class Cadmium::Tokenizer::Word

Defined in:

cadmium/tokenizer/word.cr

Constant Summary

REGEX_PATTERN = /[^A-Za-zА-Яа-я0-9_]+/

Constructors

Instance methods inherited from class Cadmium::Tokenizer::Regex

tokenize(string : String) : Array(String) tokenize

Constructor methods inherited from class Cadmium::Tokenizer::Regex

new(pattern : ::Regex, *, gaps = nil, discard_empty = nil) new

Instance methods inherited from class Cadmium::Tokenizer::Base

tokenize(string : String) : Array(String) tokenize, trim(arr) trim

Instance methods inherited from module Cadmium::Tokenizer::Diacritics

remove_diacritics(str : String) remove_diacritics

Instance methods inherited from module Cadmium::Tokenizer::StopWords

add_stopwords_list(language : Symbol) add_stopwords_list

Constructor Detail

def self.new #

[View source]