struct Hansa::Classifier

Included Modules

Defined in:

hansa.cr

Constructors

Instance Method Summary

Constructor Detail

def self.new(pull : JSON::PullParser) #

[View source]

Instance Method Detail

def classify(content : String) #

[View source]
def common_extract_and_replace(content : String, re : Regex) : Tuple(Array(String), String) #

[View source]
def extract_and_replace_operator(content : String) : Tuple(Array(String), String) #

[View source]
def extract_and_replace_punctuation(content : String) : Tuple(Array(String), String) #

[View source]
def extract_and_replace_regular(content : String) : Tuple(Array(String), String) #

[View source]
def extract_and_replace_sgml(content : String) : Tuple(Array(String), String) #

[View source]
def extract_and_replace_shebang(content : String) : Tuple(Array(String), String) #

[View source]
def extract_remainders(content : String) : Tuple(Array(String), String) #

[View source]
def get_sgml_attributes(sgml_tag : String) : Array(String) #

[View source]
def known_languages : Array(String) #

Despite the name this only reports the 100 most common languages in the corpus, to avoid super unilely false positives for obscure languages


[View source]
def languages_log_probabilities : Hash(String, Float64) #

[View source]
def languages_log_probabilities=(languages_log_probabilities : Hash(String, Float64)) #

[View source]
def skip_comments_and_literals(content : String) : Tuple(Array(String), String) #

[View source]
def tokenize(content : String) : Array(String) #

[View source]
def tokens_log_probabilities : Hash(String, Hash(String, Float64)) #

[View source]
def tokens_log_probabilities=(tokens_log_probabilities : Hash(String, Hash(String, Float64))) #

[View source]
def tokens_log_probability(tokens : Array(String), language : String) : Float64 #

[View source]