struct Hansa::Classifier

Hansa::Classifier
Struct
Value
Object

Included Modules

JSON::Serializable

Defined in:

hansa.cr

Constructors

.new(pull : JSON::PullParser)

Instance Method Summary

#classify(content : String)
#common_extract_and_replace(content : String, re : Regex) : Tuple(Array(String), String)
#extract_and_replace_operator(content : String) : Tuple(Array(String), String)
#extract_and_replace_punctuation(content : String) : Tuple(Array(String), String)
#extract_and_replace_regular(content : String) : Tuple(Array(String), String)
#extract_and_replace_sgml(content : String) : Tuple(Array(String), String)
#extract_and_replace_shebang(content : String) : Tuple(Array(String), String)
#extract_remainders(content : String) : Tuple(Array(String), String)
#get_sgml_attributes(sgml_tag : String) : Array(String)
#known_languages : Array(String)
Despite the name this only reports the 100 most common languages in the corpus, to avoid super unilely false positives for obscure languages
#languages_log_probabilities : Hash(String, Float64)
#languages_log_probabilities=(languages_log_probabilities : Hash(String, Float64))
#skip_comments_and_literals(content : String) : Tuple(Array(String), String)
#tokenize(content : String) : Array(String)
#tokens_log_probabilities : Hash(String, Hash(String, Float64))
#tokens_log_probabilities=(tokens_log_probabilities : Hash(String, Hash(String, Float64)))
#tokens_log_probability(tokens : Array(String), language : String) : Float64

Constructor Detail

def self.new(pull : JSON::PullParser) #

[View source]

Instance Method Detail

def classify(content : String) #

[View source]

def common_extract_and_replace(content : String, re : Regex) : Tuple(Array(String), String) #

[View source]

def extract_and_replace_operator(content : String) : Tuple(Array(String), String) #

[View source]

def extract_and_replace_punctuation(content : String) : Tuple(Array(String), String) #

[View source]

def extract_and_replace_regular(content : String) : Tuple(Array(String), String) #

[View source]

def extract_and_replace_sgml(content : String) : Tuple(Array(String), String) #

[View source]

def extract_and_replace_shebang(content : String) : Tuple(Array(String), String) #

[View source]

def extract_remainders(content : String) : Tuple(Array(String), String) #

[View source]

def get_sgml_attributes(sgml_tag : String) : Array(String) #

[View source]

def known_languages : Array(String) #

Despite the name this only reports the 100 most common languages in the corpus, to avoid super unilely false positives for obscure languages

[View source]

def languages_log_probabilities : Hash(String, Float64) #

[View source]

def languages_log_probabilities=(languages_log_probabilities : Hash(String, Float64)) #

[View source]

def skip_comments_and_literals(content : String) : Tuple(Array(String), String) #

[View source]

def tokenize(content : String) : Array(String) #

[View source]

def tokens_log_probabilities : Hash(String, Hash(String, Float64)) #

[View source]

def tokens_log_probabilities=(tokens_log_probabilities : Hash(String, Hash(String, Float64))) #

[View source]

def tokens_log_probability(tokens : Array(String), language : String) : Float64 #

[View source]

CrystalDoc.info

hansa

struct Hansa::Classifier

Included Modules

Defined in:

Constructors

Instance Method Summary

Constructor Detail

Instance Method Detail