class Cadmium::Glove::Model

Defined in:

glove/model.cr

Constant Summary

BIAS_FILE = "word_biases.json"
COOC_FILE = "cooc_matrix.json"
CORPUS_FILE = "corpus.json"
VEC_FILE = "word_vectors.json"

Constructors

Class Method Summary

Instance Method Summary

Constructor Detail

def self.new(num_components : Int32 = 30, epochs : Int32 = 25, threads : Int32 = 4, learning_rate : Float64 = 0.05, alpha : Float64 = 0.75, max_count : Int32 = 100) #

Creates a new Glove::Model instance.


[View source]

Class Method Detail

def self.load(dir, corpus_file = CORPUS_FILE, cooc_file = COOC_FILE, vec_file = VEC_FILE, bias_file = BIAS_FILE, **options) #

Create a new model from an existing dataset.


[View source]

Instance Method Detail

def alpha : Float64 #

[View source]
def alpha=(alpha : Float64) #

[View source]
def analogy_words(word1, word2, target, num = 3, accuracy = 1e-4) #

Get a word that relates to target like word1 relates to word2.

Example:

model.analogy_words("quantum", "physics", "atom")
# => [{"electron", 0.98583}, {"energi", 0.98151}, {"photon",0.96650}]

[View source]
def cooc_matrix : Apatite::Matrix(Float64) #

[View source]
def cooc_matrix=(cooc_matrix : Apatite::Matrix(Float64)) #

[View source]
def corpus #

[View source]
def corpus=(corpus : Corpus | Nil) #

[View source]
def epochs : Int32 #

[View source]
def epochs=(epochs : Int32) #

[View source]
def fit(text, **options) #

Fit a String or Glove::Corpus instance and build a co-occurrence matrix.


[View source]
def learning_rate : Float64 #

[View source]
def learning_rate=(learning_rate : Float64) #

[View source]
def load(dir, corpus_file = CORPUS_FILE, cooc_file = COOC_FILE, vec_file = VEC_FILE, bias_file = BIAS_FILE) #

Loads training data from already existing files.


[View source]
def max_count : Int32 #

[View source]
def max_count=(max_count : Int32) #

[View source]
def most_similar(word, num = 3) #

Get most similar words to word.


[View source]
def num_components : Int32 #

[View source]
def num_components=(num_components : Int32) #

[View source]
def save(outdir, corpus_file = CORPUS_FILE, cooc_file = COOC_FILE, vec_file = VEC_FILE, bias_file = BIAS_FILE) #

Save trained data to files


[View source]
def threads : Int32 #

[View source]
def threads=(threads : Int32) #

[View source]
def token_index : Hash(String, Int32) #

[View source]
def token_index=(token_index : Hash(String, Int32)) #

[View source]
def token_pairs : Array(TokenPair) #

[View source]
def token_pairs=(token_pairs : Array(TokenPair)) #

[View source]
def train #

Train the model. #fit must be called prior to this.


[View source]
def vector(word) #

Find the vector row of @word_vec for a given word.


[View source]
def vector_distance(word : String | Apatite::Vector) #

Calculates the cosine distance of all the words in the vocabulary against a given word. Results are then sorted in DESC order.


[View source]
def visualize #

TODO Generate a graph of the word vector matrix


[View source]
def word_biases : Array(Float64) #

[View source]
def word_biases=(word_biases : Array(Float64)) #

[View source]
def word_vec : Apatite::Matrix(Float64) #

[View source]
def word_vec=(word_vec : Apatite::Matrix(Float64)) #

[View source]