class Cadmium::Tokenizer::VisibleChar

Defined in:

cadmium/tokenizer/visible_char.cr

Constant Summary

REGEX_PATTERN = /\s+|(?<=[\P{Cc}])(?=[\P{Cc}])/

Constructors

Instance methods inherited from class Cadmium::Tokenizer::Regex

tokenize(string : String) : Array(String) tokenize

Constructor methods inherited from class Cadmium::Tokenizer::Regex

new(pattern : ::Regex, *, gaps = nil, discard_empty = nil) new

Instance methods inherited from class Cadmium::Tokenizer::Base

tokenize(string : String) : Array(String) tokenize, trim(arr) trim

Instance methods inherited from module Cadmium::Tokenizer::Diacritics

remove_diacritics(str : String) remove_diacritics

Instance methods inherited from module Cadmium::Tokenizer::StopWords

add_stopwords_list(language : Symbol) add_stopwords_list

Constructor Detail

def self.new #

[View source]