class Llama::Vocab

Llama::Vocab
Reference
Object

Overview

Wrapper for the llama_vocab structure

Defined in:

llama/vocab.cr

Constructors

.new(handle : Pointer(LibLlama::LlamaVocab))
Creates a new Vocab instance from a raw pointer

Instance Method Summary

#add_bos? : Bool
Returns whether the model adds BOS token by default
#add_eos? : Bool
Returns whether the model adds EOS token by default
#bos : Int32
Returns the beginning-of-sentence token ID
#eos : Int32
Returns the end-of-sentence token ID
#eot : Int32
Returns the end-of-turn token ID
#format_token(token : Int32, show_id : Bool = true, show_text : Bool = true) : String
Format a token for display
#is_control(token : Int32) : Bool
Checks if a token is a control token
#is_eog(token : Int32) : Bool
Checks if a token is an end-of-generation token
#n_tokens : Int32
Returns the number of tokens in the vocabulary
#nl : Int32
Returns the newline token ID
#pad : Int32
Returns the padding token ID
#to_unsafe : Pointer(Llama::LibLlama::LlamaVocab)
Returns the raw pointer to the underlying llama_vocab structure
#token_to_piece(token : Int32, lstrip : Int32 = 0, special : Bool = false) : String
Converts a token to a piece of text This is similar to token_to_text but provides more control over the output format
#token_to_text(token : Int32) : String
Returns the text representation of a token
#tokenize(text : String, add_special : Bool = true, parse_special : Bool = true) : Array(Int32)
Tokenizes a string into an array of token IDs

Constructor Detail

def self.new(handle : Pointer(LibLlama::LlamaVocab)) #

Creates a new Vocab instance from a raw pointer

Note: This constructor is intended for internal use. Users should obtain Vocab instances through Model#vocab.

[View source]

Instance Method Detail

def add_bos? : Bool #

Returns whether the model adds BOS token by default

[View source]

def add_eos? : Bool #

Returns whether the model adds EOS token by default

[View source]

def bos : Int32 #

Returns the beginning-of-sentence token ID

[View source]

def eos : Int32 #

Returns the end-of-sentence token ID

[View source]

def eot : Int32 #

Returns the end-of-turn token ID

[View source]

def format_token(token : Int32, show_id : Bool = true, show_text : Bool = true) : String #

Format a token for display

Parameters:

token: The token to format
show_id: Whether to show the token ID
show_text: Whether to show the token text

Returns:

A formatted string representation of the token

[View source]

def is_control(token : Int32) : Bool #

Checks if a token is a control token

[View source]

def is_eog(token : Int32) : Bool #

Checks if a token is an end-of-generation token

[View source]

def n_tokens : Int32 #

Returns the number of tokens in the vocabulary

[View source]

def nl : Int32 #

Returns the newline token ID

[View source]

def pad : Int32 #

Returns the padding token ID

[View source]

def to_unsafe : Pointer(Llama::LibLlama::LlamaVocab) #

Returns the raw pointer to the underlying llama_vocab structure

[View source]

def token_to_piece(token : Int32, lstrip : Int32 = 0, special : Bool = false) : String #

Converts a token to a piece of text This is similar to token_to_text but provides more control over the output format

Parameters:

token: The token to convert
lstrip: Whether to strip leading spaces (0 = no, 1 = yes)
special: Whether to render special tokens

Returns:

The text representation of the token

[View source]

def token_to_text(token : Int32) : String #

Returns the text representation of a token

[View source]

def tokenize(text : String, add_special : Bool = true, parse_special : Bool = true) : Array(Int32) #

Tokenizes a string into an array of token IDs

[View source]

CrystalDoc.info

llama

class Llama::Vocab

Overview

Defined in:

Constructors

Instance Method Summary

Constructor Detail

Instance Method Detail