module Memo::Storage

Overview

Low-level storage operations for embeddings and chunks

Extended Modules

Memo::Storage

Defined in:

memo/storage.cr

Instance Method Summary

#compute_hash(text : String) : Bytes
Compute SHA256 hash for text content
#create_chunk(db : DB::Database, hash : Bytes, source_type : String, source_id : Int64, offset : Int32 | Nil, size : Int32, pair_id : Int64 | Nil = nil, parent_id : Int64 | Nil = nil) : Int64
Create chunk reference
#deserialize_embedding(blob : Bytes) : Array(Float64)
Deserialize embedding from binary blob
#get_embedding(db : DB::Database, hash : Bytes) : Array(Float64) | Nil
Get embedding by hash
#increment_match_count(db : DB::Database, chunk_ids : Array(Int64))
Increment match_count for chunks
#increment_read_count(db : DB::Database, chunk_ids : Array(Int64))
Increment read_count for chunks
#register_service(db : DB::Database, provider : String, model : String, version : String | Nil, dimensions : Int32, max_tokens : Int32) : Int64
Register or get existing service
#serialize_embedding(embedding : Array(Float64)) : Bytes
Serialize embedding to binary blob (little-endian Float32 for space efficiency)
#store_embedding(db : DB::Database, hash : Bytes, embedding : Array(Float64), token_count : Int32, service_id : Int64) : Bool
Store embedding in database (deduplicated by hash)

Instance Method Detail

def compute_hash(text : String) : Bytes #

Compute SHA256 hash for text content

[View source]

def create_chunk(db : DB::Database, hash : Bytes, source_type : String, source_id : Int64, offset : Int32 | Nil, size : Int32, pair_id : Int64 | Nil = nil, parent_id : Int64 | Nil = nil) : Int64 #

Create chunk reference

Links a hash to a source with optional relationships

[View source]

def deserialize_embedding(blob : Bytes) : Array(Float64) #

Deserialize embedding from binary blob

[View source]

def get_embedding(db : DB::Database, hash : Bytes) : Array(Float64) | Nil #

Get embedding by hash

Returns nil if not found

[View source]

def increment_match_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment match_count for chunks

[View source]

def increment_read_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment read_count for chunks

[View source]

def register_service(db : DB::Database, provider : String, model : String, version : String | Nil, dimensions : Int32, max_tokens : Int32) : Int64 #

Returns service_id for the provider/model combination

[View source]

def serialize_embedding(embedding : Array(Float64)) : Bytes #

Serialize embedding to binary blob (little-endian Float32 for space efficiency)

TODO Consider int16 normalization for embeddings to reduce storage by 50% (1536 dims: 6KB -> 3KB). Precision loss is ~0.003% for normalized vectors. Would require mapping float range [-1,1] to int16 range [-32768,32767].

[View source]

def store_embedding(db : DB::Database, hash : Bytes, embedding : Array(Float64), token_count : Int32, service_id : Int64) : Bool #

Store embedding in database (deduplicated by hash)

Returns true if inserted, false if already exists

[View source]

CrystalDoc.info

memo

module Memo::Storage

Overview

Extended Modules

Defined in:

Instance Method Summary

Instance Method Detail