module TextUtil
Extended Modules
Defined in:
util/text_util.crConstant Summary
-
BR_RE =
/\<br\s*\/?\>|\s{4,+}/i
-
FIX_MARKS =
{"òa" => "oà", "óa" => "oá", "ỏa" => "oả", "õa" => "oã", "ọa" => "oạ", "òe" => "oè", "óe" => "oé", "ỏe" => "oẻ", "õe" => "oẽ", "ọe" => "oẹ", "ùy" => "uỳ", "úy" => "uý", "ủy" => "uỷ", "ũy" => "uỹ", "ụy" => "uỵ"}
-
MARK_RE =
Regex.new(FIX_MARKS.keys.join('|'))
Instance Method Summary
-
#canon_clean(input : String, upcase : Bool = false) : String
convert all halfwidth to fullwidth and group similar characters
-
#capitalize(input : String) : String
smart capitalize: - don't downcase extra characters - treat unicode alphanumeric chars as upcase-able
- #clean_and_trim(input : String) : String
- #clean_spaces(input : String) : String
- #fix_viet(str : String)
-
#normalize(input : String) : String
Convert chinese punctuations to english punctuations and full width characters to ascii characters
-
#normalize(input : Array(Char)) : Array(Char)
Convert chinese punctuations to english punctuations and full width characters to ascii characters
-
#slugify(input : String, tones = false) : String
make url friendly string
- #split_html(input : String, fix_br : Bool = true) : Array(String)
- #split_spaces(input : String)
- #split_text(input : String, spaces_as_newline = true) : Array(String)
-
#titleize(input : String) : String
capitalize all words
-
#tokenize(input : String, tones = false) : Array(String)
split input to words
- #truncate(input : String, limit = 100)
-
#unaccent(input : String) : String
strip vietnamese accents
-
#uniformize(input : String, upcase : Bool = false) : String
convert all halfwidth to fullwidth and group similar characters
Instance Method Detail
convert all halfwidth to fullwidth and group similar characters
smart capitalize:
- don't downcase extra characters
- treat unicode alphanumeric chars as upcase-able
Convert chinese punctuations to english punctuations and full width characters to ascii characters
Convert chinese punctuations to english punctuations and full width characters to ascii characters
convert all halfwidth to fullwidth and group similar characters