struct Llama::Tokenizer

Defined in:

llama/tokenizer.cr

Constructors

Instance Method Summary

Constructor Detail

def self.new(vocab : Array(String) = Array(String).new, vocab_scores : Array(Float32) = Array(Float32).new, sorted_vocab : Array(TokenIndex) = Array(TokenIndex).new, vocab_size : Int32 = 0, max_token_length : UInt32 = 0_u32, byte_pieces : Array(UInt8) = Array(UInt8).new(512, 0_u8)) #

[View source]

Instance Method Detail

def build(tokenizer_path : String) #

[View source]
def byte_pieces : Array(UInt8) #

[View source]
def byte_pieces=(byte_pieces : Array(UInt8)) #

[View source]
def decode(prev_token : Int32, token : Int32) : String #

[View source]
def encode(text : String, bos : Bool, eos : Bool) : Array(Int32) #

[View source]
def max_token_length : UInt32 #

[View source]
def max_token_length=(max_token_length : UInt32) #

[View source]
def sorted_vocab : Array(TokenIndex) #

[View source]
def sorted_vocab=(sorted_vocab : Array(TokenIndex)) #

[View source]
def vocab : Array(String) #

[View source]
def vocab=(vocab : Array(String)) #

[View source]
def vocab_scores : Array(Float32) #

[View source]
def vocab_scores=(vocab_scores : Array(Float32)) #

[View source]
def vocab_size : Int32 #

[View source]
def vocab_size=(vocab_size : Int32) #

[View source]