module Cadmium::Tokenizer::Diacritics

Direct including types

Defined in:

cadmium/tokenizer/diacritics.cr

Constant Summary

DIACRITICS = [{base: "A", letters: ::Regex.new("AⒶAÀÁÂẦẤẪẨÃĀĂẰẮẴẲȦǠÄǞẢÅǺǍȀȂẠẬẶḀĄȺⱯ]/")}, {base: "AA", letters: ::Regex.new("Ꜳ]/")}, {base: "AE", letters: ::Regex.new("ÆǼǢ]/")}, {base: "AO", letters: ::Regex.new("Ꜵ]/")}, {base: "AU", letters: ::Regex.new("Ꜷ]/")}, {base: "AV", letters: ::Regex.new("ꜸꜺ]/")}, {base: "AY", letters: ::Regex.new("Ꜽ]/")}, {base: "B", letters: ::Regex.new("BⒷBḂḄḆɃƂƁ]/")}, {base: "C", letters: ::Regex.new("CⒸCĆĈĊČÇḈƇȻꜾ]/")}, {base: "D", letters: ::Regex.new("DⒹDḊĎḌḐḒḎĐƋƊƉꝹ]/")}, {base: "DZ", letters: ::Regex.new("DZDŽ]/")}, {base: "Dz", letters: ::Regex.new("DzDž]/")}, {base: "E", letters: ::Regex.new("EⒺEÈÉÊỀẾỄỂẼĒḔḖĔĖËẺĚȄȆẸỆȨḜĘḘḚƐƎ]/")}, {base: "F", letters: ::Regex.new("FⒻFḞƑꝻ]/")}, {base: "G", letters: ::Regex.new("GⒼGǴĜḠĞĠǦĢǤƓꞠꝽꝾ]/")}, {base: "H", letters: ::Regex.new("HⒽHĤḢḦȞḤḨḪĦⱧⱵꞍ]/")}, {base: "I", letters: ::Regex.new("IⒾIÌÍÎĨĪĬİÏḮỈǏȈȊỊĮḬƗ]/")}, {base: "J", letters: ::Regex.new("JⒿJĴɈ]/")}, {base: "K", letters: ::Regex.new("KⓀKḰǨḲĶḴƘⱩꝀꝂꝄꞢ]/")}, {base: "L", letters: ::Regex.new("LⓁLĿĹĽḶḸĻḼḺŁȽⱢⱠꝈꝆꞀ]/")}, {base: "LJ", letters: ::Regex.new("LJ]/")}, {base: "Lj", letters: ::Regex.new("Lj]/")}, {base: "M", letters: ::Regex.new("MⓂMḾṀṂⱮƜ]/")}, {base: "N", letters: ::Regex.new("NⓃNǸŃÑṄŇṆŅṊṈȠƝꞐꞤ]/")}, {base: "NJ", letters: ::Regex.new("NJ]/")}, {base: "Nj", letters: ::Regex.new("Nj]/")}, {base: "O", letters: ::Regex.new("OⓄOÒÓÔỒỐỖỔÕṌȬṎŌṐṒŎȮȰÖȪỎŐǑȌȎƠỜỚỠỞỢỌỘǪǬØǾƆƟꝊꝌ]/")}, {base: "OE", letters: ::Regex.new("Œ]/")}, {base: "OI", letters: ::Regex.new("Ƣ]/")}, {base: "OO", letters: ::Regex.new("Ꝏ]/")}, {base: "OU", letters: ::Regex.new("Ȣ]/")}, {base: "P", letters: ::Regex.new("PⓅPṔṖƤⱣꝐꝒꝔ]/")}, {base: "Q", letters: ::Regex.new("QⓆQꝖꝘɊ]/")}, {base: "R", letters: ::Regex.new("RⓇRŔṘŘȐȒṚṜŖṞɌⱤꝚꞦꞂ]/")}, {base: "S", letters: ::Regex.new("SⓈSẞŚṤŜṠŠṦṢṨȘŞⱾꞨꞄ]/")}, {base: "T", letters: ::Regex.new("TⓉTṪŤṬȚŢṰṮŦƬƮȾꞆ]/")}, {base: "TZ", letters: ::Regex.new("Ꜩ]/")}, {base: "U", letters: ::Regex.new("UⓊUÙÚÛŨṸŪṺŬÜǛǗǕǙỦŮŰǓȔȖƯỪỨỮỬỰỤṲŲṶṴɄ]/")}, {base: "V", letters: ::Regex.new("VⓋVṼṾƲꝞɅ]/")}, {base: "VY", letters: ::Regex.new("Ꝡ]/")}, {base: "W", letters: ::Regex.new("WⓌWẀẂŴẆẄẈⱲ]/")}, {base: "X", letters: ::Regex.new("XⓍXẊẌ]/")}, {base: "Y", letters: ::Regex.new("YⓎYỲÝŶỸȲẎŸỶỴƳɎỾ]/")}, {base: "Z", letters: ::Regex.new("ZⓏZŹẐŻŽẒẔƵȤⱿⱫꝢ]/")}, {base: "a", letters: ::Regex.new("aⓐaẚàáâầấẫẩãāăằắẵẳȧǡäǟảåǻǎȁȃạậặḁąⱥɐ]/")}, {base: "aa", letters: ::Regex.new("ꜳ]/")}, {base: "ae", letters: ::Regex.new("æǽǣ]/")}, {base: "ao", letters: ::Regex.new("ꜵ]/")}, {base: "au", letters: ::Regex.new("ꜷ]/")}, {base: "av", letters: ::Regex.new("ꜹꜻ]/")}, {base: "ay", letters: ::Regex.new("ꜽ]/")}, {base: "b", letters: ::Regex.new("bⓑbḃḅḇƀƃɓ]/")}, {base: "c", letters: ::Regex.new("cⓒcćĉċčçḉƈȼꜿↄ]/")}, {base: "d", letters: ::Regex.new("dⓓdḋďḍḑḓḏđƌɖɗꝺ]/")}, {base: "dz", letters: ::Regex.new("dzdž]/")}, {base: "e", letters: ::Regex.new("eⓔeèéêềếễểẽēḕḗĕėëẻěȅȇẹệȩḝęḙḛɇɛǝ]/")}, {base: "f", letters: ::Regex.new("fⓕfḟƒꝼ]/")}, {base: "g", letters: ::Regex.new("gⓖgǵĝḡğġǧģǥɠꞡᵹꝿ]/")}, {base: "h", letters: ::Regex.new("hⓗhĥḣḧȟḥḩḫẖħⱨⱶɥ]/")}, {base: "hv", letters: ::Regex.new("ƕ]/")}, {base: "i", letters: ::Regex.new("iⓘiìíîĩīĭïḯỉǐȉȋịįḭɨı]/")}, {base: "j", letters: ::Regex.new("jⓙjĵǰɉ]/")}, {base: "k", letters: ::Regex.new("kⓚkḱǩḳķḵƙⱪꝁꝃꝅꞣ]/")}, {base: "l", letters: ::Regex.new("lⓛlŀĺľḷḹļḽḻſłƚɫⱡꝉꞁꝇ]/")}, {base: "lj", letters: ::Regex.new("lj]/")}, {base: "m", letters: ::Regex.new("mⓜmḿṁṃɱɯ]/")}, {base: "n", letters: ::Regex.new("nⓝnǹńñṅňṇņṋṉƞɲʼnꞑꞥ]/")}, {base: "nj", letters: ::Regex.new("nj]/")}, {base: "o", letters: ::Regex.new("oⓞoòóôồốỗổõṍȭṏōṑṓŏȯȱöȫỏőǒȍȏơờớỡởợọộǫǭøǿɔꝋꝍɵ]/")}, {base: "oe", letters: ::Regex.new("œ]/")}, {base: "oi", letters: ::Regex.new("ƣ]/")}, {base: "ou", letters: ::Regex.new("ȣ]/")}, {base: "oo", letters: ::Regex.new("ꝏ]/")}, {base: "p", letters: ::Regex.new("pⓟpṕṗƥᵽꝑꝓꝕ]/")}, {base: "q", letters: ::Regex.new("qⓠqɋꝗꝙ]/")}, {base: "r", letters: ::Regex.new("rⓡrŕṙřȑȓṛṝŗṟɍɽꝛꞧꞃ]/")}, {base: "s", letters: ::Regex.new("sⓢsßśṥŝṡšṧṣṩșşȿꞩꞅẛ]/")}, {base: "t", letters: ::Regex.new("tⓣtṫẗťṭțţṱṯŧƭʈⱦꞇ]/")}, {base: "tz", letters: ::Regex.new("ꜩ]/")}, {base: "u", letters: ::Regex.new("uⓤuùúûũṹūṻŭüǜǘǖǚủůűǔȕȗưừứữửựụṳųṷṵʉ]/")}, {base: "v", letters: ::Regex.new("vⓥvṽṿʋꝟʌ]/")}, {base: "vy", letters: ::Regex.new("ꝡ]/")}, {base: "w", letters: ::Regex.new("wⓦwẁẃŵẇẅẘẉⱳ]/")}, {base: "x", letters: ::Regex.new("xⓧxẋẍ]/")}, {base: "y", letters: ::Regex.new("yⓨyỳýŷỹȳẏÿỷẙỵƴɏỿ]/")}, {base: "z", letters: ::Regex.new("zⓩzźẑżžẓẕƶȥɀⱬꝣ]/")}]

Instance Method Summary

Instance Method Detail

def remove_diacritics(str : String) #

[View source]