class Cadmium::PragmaticTokenizer::Languages::Common

Direct Known Subclasses

ABBREVIATIONS = Set(String).new
ALNUM_QUOTE = /(\w|\D)'(?!')(?=\W|$)/: Single quotes handling
CONTRACTIONS = {} of String => String
PUNCTUATION_MAP = {"。" => "♳", "．" => "♴", "." => "♵", "！" => "♶", "!" => "♷", "?" => "♸", "？" => "♹", "、" => "♺", "¡" => "⚀", "¿" => "⚁", "„" => "⚂", "“" => "⚃", "[" => "⚄", "]" => "⚅", "\"" => "☇", "#" => "☈", "$" => "☉", "%" => "☊", "&" => "☋", "(" => "☌", ")" => "☍", "*" => "☠", "+" => "☢", "," => "☣", ":" => "☤", ";" => "☥", "<" => "☦", "=" => "☧", ">" => "☀", "@" => "☁", "^" => "☂", "_" => "☃", "`" => "☄", "'" => "☮", "{" => "♔", "|" => "♕", "}" => "♖", "~" => "♗", "-" => "♘", "«" => "♙", "»" => "♚", "”" => "⚘", "‘" => "⚭"}
QUOTE_NOT_TWAS1 = /(\W|^)'(?!twas)/i
QUOTE_NOT_TWAS2 = /(\W|^)‘(?!twas)/i
QUOTE_WORD = /(\W|^)'(?=\w)/
STOP_WORDS = Set(String).new

.abbreviations
.contractions
.handle_single_quotes(text)
This 'special treatment' is actually relevant for many other tests.
.punctuation_map
.stop_words

def self.abbreviations #

[View source]

def self.contractions #

[View source]

def self.handle_single_quotes(text) #

This 'special treatment' is actually relevant for many other tests. Alter core regular expressions!

[View source]

def self.punctuation_map #

[View source]

def self.stop_words #

[View source]