class Cadmium::Classifier::Viterbi

Overview

TODO : add Kneser-Ney smoothing This is a Hidden Markov Model classifier which uses the Viterbi algorithm. It is efficient in predicting a state given prior observations matched to states. In NLP, it is often used to attribut POS tags to words of a text. As such it is used by Cadmium::POSTagger.

Included Modules

Defined in:

cadmium/classifier/viterbi.cr

Constructors

Instance Method Summary

Constructor Detail

def self.new(ngrams_size : Int32 = 3) #

[View source]

Instance Method Detail

def classify(sequence_of_observations : Array(String)) : Hash(String, String) #

[View source]
def emission_matrix : Matrix(Float64) #

[View source]
def epsilon : Float64 #

[View source]
def label_count : Hash(String, Int32) #

[View source]
def load_model(filename : String = "model.zip") #

[View source]
def lookup_table : Hash(String, Int32) #

[View source]
def ngram_label_count : Hash(Array(String), Int32) #

[View source]
def ngrams_size : Int32 #

property initial_probabilities


[View source]
def observation_space : Set(String) #

[View source]
def predicted_states : Array(String) #

[View source]
def prior_ngram_label_count : Hash(Array(String), Int32) #

[View source]
def save_model(filename : String = "model.zip") #

[View source]
def sequence_of_ngrams : Array(Array(Tuple(String, String))) #

[View source]
def sequence_of_observations : Array(String) #

[View source]
def sequence_of_prior_ngrams : Array(Array(Tuple(String, String))) #

[View source]
def state_space : Set(String) #

[View source]
def token_count : Hash(String, Int32) #

observation_space, state_space, initial_probabilities, sequence_of_observations, transition_matrix, emission_matrix


[View source]
def token_label_count : Hash(Tuple(String, String), Int32) #

[View source]
def train(training_data : Array(Tuple(String, String))) #

[View source]
def training_data : Array(Tuple(String, String)) #

[View source]
def transition_matrix : Matrix(Float64) #

[View source]