class Cadmium::Classifier::Tabular::KNN

Overview

K-Nearest Neighbors classifier for multi-feature tabular data.

This classifier stores all training data and makes predictions by finding the k most similar training examples and taking a majority vote.

Features

Example

classifier = Cadmium::Classifier::Tabular::KNN.new(k: 3)

features = [
  [1.0, 2.0, 3.0],
  [1.1, 2.1, 3.1],
  [5.0, 6.0, 7.0],
]
labels = ["class_a", "class_a", "class_b"]

classifier.train(features, labels)

# Predict new sample
result = classifier.classify([1.05, 2.05, 3.05])
# => "class_a"

# Get detailed results with vote counts
details = classifier.classify_details([1.05, 2.05, 3.05])
# => {"class_a" => 3, "class_b" => 0}

Defined in:

cadmium/classifier/tabular/knn.cr

Constructors

Instance Method Summary

Constructor Detail

def self.load_model(path : String) : self #

Load a trained model from a file.

classifier = Cadmium::Classifier::Tabular::KNN.load_model("knn_model.msgpack")

[View source]
def self.new(k : Int32 = 5, distance_metric : DistanceMetric = DistanceMetric::Euclidean) #

[View source]

Instance Method Detail

def classify(features : Array(Float64)) : String #

Classify a new sample and return the predicted label.

classifier.classify([1.0, 2.0, 3.0]) # => "class_a"

[View source]
def classify_batch(features_batch : Array(Array(Float64))) : Array(String) #

Classify multiple samples at once.

results = classifier.classify_batch([[1.0, 2.0], [3.0, 4.0]])
# => ["class_a", "class_b"]

[View source]
def classify_details(features : Array(Float64)) : Hash(String, Int32) #

Classify a new sample and return detailed vote counts.

details = classifier.classify_details([1.0, 2.0, 3.0])
# => {"class_a" => 3, "class_b" => 2}

[View source]
def distance_metric : DistanceMetric #

Distance metric to use for finding nearest neighbors


[View source]
def k : Int32 #

Number of neighbors to consider


[View source]
def save_model(path : String) : Nil #

Save the trained model to a file.

classifier.save_model("knn_model.msgpack")

[View source]
def train(features : Array(Array(Float64)), labels : Array(String)) : self #

Train the classifier by storing feature vectors and labels.

features = [[1.0, 2.0], [3.0, 4.0]]
labels = ["a", "b"]
classifier.train(features, labels)

[View source]
def train(features : Array(Float64), label : String) : self #

Train with a single sample.

classifier.train([1.0, 2.0, 3.0], "class_a")

[View source]