module Memo::Storage

Overview

Low-level storage operations for embeddings and chunks

Extended Modules

Defined in:

memo/storage.cr

Instance Method Summary

Instance Method Detail

def compute_hash(text : String) : Bytes #

Compute SHA256 hash for text content


[View source]
def create_chunk(db : DB::Database, hash : Bytes, source_type : String, source_id : Int64, offset : Int32 | Nil, size : Int32, pair_id : Int64 | Nil = nil, parent_id : Int64 | Nil = nil) : Int64 #

Create chunk reference

Links a hash to a source with optional relationships


[View source]
def deserialize_embedding(blob : Bytes) : Array(Float64) #

Deserialize embedding from binary blob


[View source]
def get_embedding(db : DB::Database, hash : Bytes) : Array(Float64) | Nil #

Get embedding by hash

Returns nil if not found


[View source]
def increment_match_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment match_count for chunks


[View source]
def increment_read_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment read_count for chunks


[View source]
def register_service(db : DB::Database, provider : String, model : String, version : String | Nil, dimensions : Int32, max_tokens : Int32) : Int64 #

Register or get existing service

Returns service_id for the provider/model combination


[View source]
def serialize_embedding(embedding : Array(Float64)) : Bytes #

Serialize embedding to binary blob (little-endian Float32 for space efficiency)

TODO Consider int16 normalization for embeddings to reduce storage by 50% (1536 dims: 6KB -> 3KB). Precision loss is ~0.003% for normalized vectors. Would require mapping float range [-1,1] to int16 range [-32768,32767].


[View source]
def store_embedding(db : DB::Database, hash : Bytes, embedding : Array(Float64), token_count : Int32, service_id : Int64) : Bool #

Store embedding in database (deduplicated by hash)

Returns true if inserted, false if already exists


[View source]