module TextUtil

Extended Modules

Defined in:

util/text_util.cr

Constant Summary

BR_RE = /\<br\s*\/?\>|\s{4,+}/i
FIX_MARKS = {"òa" => "oà", "óa" => "oá", "ỏa" => "oả", "õa" => "oã", "ọa" => "oạ", "òe" => "oè", "óe" => "oé", "ỏe" => "oẻ", "õe" => "oẽ", "ọe" => "oẹ", "ùy" => "uỳ", "úy" => "uý", "ủy" => "uỷ", "ũy" => "uỹ", "ụy" => "uỵ"}
MARK_RE = Regex.new(FIX_MARKS.keys.join('|'))

Instance Method Summary

Instance Method Detail

def canon_clean(input : String, upcase : Bool = false) : String #

convert all halfwidth to fullwidth and group similar characters


[View source]
def capitalize(input : String) : String #

smart capitalize:

  • don't downcase extra characters
  • treat unicode alphanumeric chars as upcase-able

[View source]
def clean_and_trim(input : String) : String #

[View source]
def clean_spaces(input : String) : String #

[View source]
def fix_viet(str : String) #

[View source]
def normalize(input : String) : String #

Convert chinese punctuations to english punctuations and full width characters to ascii characters


[View source]
def normalize(input : Array(Char)) : Array(Char) #

Convert chinese punctuations to english punctuations and full width characters to ascii characters


[View source]
def slugify(input : String, tones = false) : String #

make url friendly string


[View source]
def split_html(input : String, fix_br : Bool = true) : Array(String) #

[View source]
def split_spaces(input : String) #

[View source]
def split_text(input : String, spaces_as_newline = true) : Array(String) #

[View source]
def titleize(input : String) : String #

capitalize all words


[View source]
def tokenize(input : String, tones = false) : Array(String) #

split input to words


[View source]
def truncate(input : String, limit = 100) #

[View source]
def unaccent(input : String) : String #

strip vietnamese accents


[View source]
def uniformize(input : String, upcase : Bool = false) : String #

convert all halfwidth to fullwidth and group similar characters


[View source]