struct Hansa::Classifier
- Hansa::Classifier
- Struct
- Value
- Object
Included Modules
- JSON::Serializable
Defined in:
hansa.crConstructors
Instance Method Summary
- #classify(content : String)
- #common_extract_and_replace(content : String, re : Regex) : Tuple(Array(String), String)
- #extract_and_replace_operator(content : String) : Tuple(Array(String), String)
- #extract_and_replace_punctuation(content : String) : Tuple(Array(String), String)
- #extract_and_replace_regular(content : String) : Tuple(Array(String), String)
- #extract_and_replace_sgml(content : String) : Tuple(Array(String), String)
- #extract_and_replace_shebang(content : String) : Tuple(Array(String), String)
- #extract_remainders(content : String) : Tuple(Array(String), String)
- #get_sgml_attributes(sgml_tag : String) : Array(String)
-
#known_languages : Array(String)
Despite the name this only reports the 100 most common languages in the corpus, to avoid super unilely false positives for obscure languages
- #languages_log_probabilities : Hash(String, Float64)
- #languages_log_probabilities=(languages_log_probabilities : Hash(String, Float64))
- #skip_comments_and_literals(content : String) : Tuple(Array(String), String)
- #tokenize(content : String) : Array(String)
- #tokens_log_probabilities : Hash(String, Hash(String, Float64))
- #tokens_log_probabilities=(tokens_log_probabilities : Hash(String, Hash(String, Float64)))
- #tokens_log_probability(tokens : Array(String), language : String) : Float64
Constructor Detail
Instance Method Detail
def common_extract_and_replace(content : String, re : Regex) : Tuple(Array(String), String)
#
def extract_and_replace_punctuation(content : String) : Tuple(Array(String), String)
#
def known_languages : Array(String)
#
Despite the name this only reports the 100 most common languages in the corpus, to avoid super unilely false positives for obscure languages
def languages_log_probabilities=(languages_log_probabilities : Hash(String, Float64))
#
def tokens_log_probabilities=(tokens_log_probabilities : Hash(String, Hash(String, Float64)))
#