class Cadmium::WordPunctuationTokenizer

Defined in:

cadmium/tokenizer/word_punctuation_tokenizer.cr

Constant Summary

REGEX_PATTERN = /(\w+|[а-я0-9_]+|\.|\!|\'|\"")/i

Constructors

Instance methods inherited from class Cadmium::RegexTokenizer

tokenize(string : String) : Array(String) tokenize

Constructor methods inherited from class Cadmium::RegexTokenizer

new(pattern : Regex, *, gaps = nil, discard_empty = nil) new

Instance methods inherited from class Cadmium::Tokenizer

tokenize(string : String) : Array(String) tokenize, trim(arr) trim

Constructor Detail

def self.new #

[View source]