struct Llama::Tokenizer
- Llama::Tokenizer
- Struct
- Value
- Object
Defined in:
llama/tokenizer.crConstructors
Instance Method Summary
- #build(tokenizer_path : String)
- #byte_pieces : Array(UInt8)
- #byte_pieces=(byte_pieces : Array(UInt8))
- #decode(prev_token : Int32, token : Int32) : String
- #encode(text : String, bos : Bool, eos : Bool) : Array(Int32)
- #max_token_length : UInt32
- #max_token_length=(max_token_length : UInt32)
- #sorted_vocab : Array(TokenIndex)
- #sorted_vocab=(sorted_vocab : Array(TokenIndex))
- #vocab : Array(String)
- #vocab=(vocab : Array(String))
- #vocab_scores : Array(Float32)
- #vocab_scores=(vocab_scores : Array(Float32))
- #vocab_size : Int32
- #vocab_size=(vocab_size : Int32)
Constructor Detail
def self.new(vocab : Array(String) = Array(String).new, vocab_scores : Array(Float32) = Array(Float32).new, sorted_vocab : Array(TokenIndex) = Array(TokenIndex).new, vocab_size : Int32 = 0, max_token_length : UInt32 = 0_u32, byte_pieces : Array(UInt8) = Array(UInt8).new(512, 0_u8))
#