Skip to content

Vector Search Accuracy Test Suite

Vector Search Accuracy Test Suite

Overview

This document defines comprehensive tests for HeliosDB’s vector search capabilities, including ANN index accuracy (HNSW, IVF), filtered search, recall metrics, and TOAST storage strategy validation.

Test Categories

1. HNSW Index Accuracy Tests

2. IVF Index Accuracy Tests

3. Filtered ANN Search Tests

4. Vector Storage (TOAST) Tests

5. Recall and Performance Benchmarks

1. HNSW Index Accuracy Tests

tests/vector/hnsw_accuracy_test.rs
use heliosdb_vector::hnsw::*;
use heliosdb_vector::metrics::*;
#[tokio::test]
async fn test_hnsw_recall_at_k() {
// VEC-001: HNSW recall@10 > 95%
let dataset = generate_test_vectors(10_000, 384);
let index = HNSWIndex::new(
384, // dimensions
16, // M (connections per layer)
200 // efConstruction
).await;
// Insert all vectors
for (i, vec) in dataset.iter().enumerate() {
index.insert(i as u64, vec, vec![]).await.unwrap();
}
// Test recall with 100 random queries
let mut total_recall = 0.0;
for _ in 0..100 {
let query_idx = rand::random::<usize>() % dataset.len();
let query_vec = &dataset[query_idx];
// HNSW search (approximate)
let ann_results = index.search(query_vec, 10, None).await.unwrap();
// Brute-force search (ground truth)
let exact_results = brute_force_knn(&dataset, query_vec, 10);
// Calculate recall
let recall = calculate_recall(&ann_results, &exact_results);
total_recall += recall;
}
let avg_recall = total_recall / 100.0;
println!("HNSW Recall@10: {:.2}%", avg_recall * 100.0);
assert!(avg_recall > 0.95, "HNSW recall too low: {:.2}%", avg_recall * 100.0);
}
#[tokio::test]
async fn test_hnsw_ef_search_tradeoff() {
// VEC-002: Higher ef_search improves recall but increases latency
let dataset = generate_test_vectors(10_000, 384);
let index = HNSWIndex::new(384, 16, 200).await;
for (i, vec) in dataset.iter().enumerate() {
index.insert(i as u64, vec, vec![]).await.unwrap();
}
let query_vec = &dataset[0];
let ground_truth = brute_force_knn(&dataset, query_vec, 10);
// Test different ef_search values
for ef_search in [10, 50, 100, 200] {
index.set_ef_search(ef_search).await;
let start = std::time::Instant::now();
let results = index.search(query_vec, 10, None).await.unwrap();
let latency = start.elapsed();
let recall = calculate_recall(&results, &ground_truth);
println!("ef_search={}: recall={:.2}%, latency={:?}",
ef_search, recall * 100.0, latency);
}
}
#[tokio::test]
async fn test_hnsw_memory_usage() {
// VEC-003: HNSW memory usage proportional to graph size
let index = HNSWIndex::new(384, 16, 200).await;
let initial_memory = index.memory_usage().await;
// Insert 100k vectors
for i in 0..100_000 {
let vec = vec![0.1f32; 384];
index.insert(i, &vec, vec![]).await.unwrap();
}
let final_memory = index.memory_usage().await;
let memory_per_vector = (final_memory - initial_memory) / 100_000;
println!("Memory per vector: {} bytes", memory_per_vector);
// Expect ~1KB per vector for 384-dim with M=16
assert!(
memory_per_vector < 2000,
"Memory usage too high: {} bytes per vector",
memory_per_vector
);
}

2. IVF Index Accuracy Tests

tests/vector/ivf_accuracy_test.rs
use heliosdb_vector::ivf::*;
#[tokio::test]
async fn test_ivf_recall_at_k() {
// VEC-004: IVF recall@10 > 90% with nprobe tuning
let dataset = generate_test_vectors(100_000, 384);
let index = IVFIndex::new(
384, // dimensions
256, // n_lists (clusters)
Metric::L2
).await;
// Train index (k-means clustering)
index.train(&dataset[..10_000]).await.unwrap();
// Insert all vectors
for (i, vec) in dataset.iter().enumerate() {
index.insert(i as u64, vec, vec![]).await.unwrap();
}
// Test with different nprobe values
for nprobe in [1, 5, 10, 20] {
index.set_nprobe(nprobe).await;
let mut total_recall = 0.0;
for _ in 0..100 {
let query_idx = rand::random::<usize>() % dataset.len();
let query_vec = &dataset[query_idx];
let ivf_results = index.search(query_vec, 10, None).await.unwrap();
let exact_results = brute_force_knn(&dataset, query_vec, 10);
let recall = calculate_recall(&ivf_results, &exact_results);
total_recall += recall;
}
let avg_recall = total_recall / 100.0;
println!("IVF nprobe={}: Recall@10={:.2}%", nprobe, avg_recall * 100.0);
if nprobe >= 10 {
assert!(avg_recall > 0.90, "IVF recall too low at nprobe={}", nprobe);
}
}
}
#[tokio::test]
async fn test_ivf_build_time() {
// VEC-005: IVF builds faster than HNSW
let dataset = generate_test_vectors(100_000, 384);
// IVF build
let ivf_start = std::time::Instant::now();
let ivf_index = IVFIndex::new(384, 256, Metric::L2).await;
ivf_index.train(&dataset[..10_000]).await.unwrap();
for (i, vec) in dataset.iter().enumerate() {
ivf_index.insert(i as u64, vec, vec![]).await.unwrap();
}
let ivf_duration = ivf_start.elapsed();
// HNSW build
let hnsw_start = std::time::Instant::now();
let hnsw_index = HNSWIndex::new(384, 16, 200).await;
for (i, vec) in dataset.iter().enumerate() {
hnsw_index.insert(i as u64, vec, vec![]).await.unwrap();
}
let hnsw_duration = hnsw_start.elapsed();
println!("IVF build time: {:?}", ivf_duration);
println!("HNSW build time: {:?}", hnsw_duration);
assert!(
ivf_duration < hnsw_duration,
"IVF build should be faster than HNSW"
);
}

3. Filtered ANN Search Tests

tests/vector/filtered_search_test.rs
use heliosdb_vector::filtered_hnsw::*;
#[tokio::test]
async fn test_filtered_hnsw_recall() {
// VEC-006: Filtered HNSW maintains high recall
let index = FilteredHNSWIndex::new(384, 16, 200).await;
// Insert vectors with metadata
for i in 0..10_000 {
let vec = generate_vector(384);
let metadata = vec![
("category", format!("cat_{}", i % 10)),
("price", (i % 1000).to_string()),
("available", (i % 2 == 0).to_string()),
];
index.insert(i as u64, &vec, metadata).await.unwrap();
}
// Build bitmap indexes for filters
index.build_bitmap_indexes().await.unwrap();
let query_vec = generate_vector(384);
// Test different filter selectivity
let filters = vec![
// High selectivity (10% of data)
vec![Filter::Equals("category", "cat_0")],
// Medium selectivity (50% of data)
vec![Filter::Equals("available", "true")],
// Low selectivity + range (5% of data)
vec![
Filter::Equals("category", "cat_0"),
Filter::LessThan("price", "500"),
],
];
for filter_set in filters {
// Filtered search
let ann_results = index.search(
&query_vec,
10,
Some(filter_set.clone())
).await.unwrap();
// Brute-force filtered search
let exact_results = brute_force_filtered_knn(
&index.get_all_vectors().await,
&query_vec,
10,
&filter_set
);
let recall = calculate_recall(&ann_results, &exact_results);
println!("Filtered recall (filters={:?}): {:.2}%",
filter_set, recall * 100.0);
assert!(
recall > 0.90,
"Filtered recall too low: {:.2}%",
recall * 100.0
);
}
}
#[tokio::test]
async fn test_filter_aware_traversal() {
// VEC-007: Filter-aware traversal handles disconnected graph islands
let index = FilteredHNSWIndex::new(384, 16, 200).await;
// Create clustered data (islands in HNSW graph)
for cluster in 0..10 {
for i in 0..1_000 {
let mut vec = vec![0.0f32; 384];
vec[cluster] = 1.0; // Each cluster distinct in one dimension
index.insert(
(cluster * 1_000 + i) as u64,
&vec,
vec![("cluster", cluster.to_string())]
).await.unwrap();
}
}
// Query targeting cluster 5 (isolated by filter)
let mut query_vec = vec![0.0f32; 384];
query_vec[5] = 1.0;
let results = index.search(
&query_vec,
10,
Some(vec![Filter::Equals("cluster", "5")])
).await.unwrap();
// Should find results despite graph islands
assert_eq!(results.len(), 10, "Filter-aware traversal failed on isolated cluster");
// All results should be from cluster 5
for result in results {
let metadata = index.get_metadata(result.id).await.unwrap();
assert_eq!(metadata.get("cluster"), Some(&"5".to_string()));
}
}
#[tokio::test]
async fn test_bitmap_index_efficiency() {
// VEC-008: Bitmap indexes enable fast pre-filtering
let index = FilteredHNSWIndex::new(384, 16, 200).await;
for i in 0..100_000 {
index.insert(
i as u64,
&generate_vector(384),
vec![
("status", if i % 10 == 0 { "active" } else { "inactive" }),
("region", format!("region_{}", i % 5)),
]
).await.unwrap();
}
index.build_bitmap_indexes().await.unwrap();
let query = generate_vector(384);
// Measure bitmap filter time
let start = std::time::Instant::now();
let bitmap = index.build_filter_bitmap(&vec![
Filter::Equals("status", "active"),
Filter::Equals("region", "region_2"),
]).await.unwrap();
let filter_time = start.elapsed();
println!("Bitmap filter time: {:?}", filter_time);
println!("Candidates after filter: {}", bitmap.count_ones());
// Bitmap construction should be very fast (< 10ms for 100k items)
assert!(filter_time < Duration::from_millis(10));
// Should reduce search space significantly
assert!(bitmap.count_ones() < 5_000); // ~2% of 100k
}

4. Vector Storage (TOAST) Tests

tests/vector/toast_storage_test.rs
use heliosdb_vector::storage::*;
#[tokio::test]
async fn test_toast_inline_threshold() {
// VEC-009: Small vectors stored in-line, large vectors out-of-line
let storage = VectorStorage::new().await;
// Small vector (128 dims = 512 bytes, should be in-line)
let small_vec = vec![0.1f32; 128];
storage.insert(1, &small_vec, StorageHint::Auto).await.unwrap();
assert!(storage.is_inline(1).await, "Small vector not stored in-line");
// Large vector (1536 dims = 6144 bytes, should be out-of-line)
let large_vec = vec![0.1f32; 1536];
storage.insert(2, &large_vec, StorageHint::Auto).await.unwrap();
assert!(!storage.is_inline(2).await, "Large vector not stored out-of-line");
// Verify both are retrievable correctly
assert_eq!(storage.get(1).await.unwrap(), small_vec);
assert_eq!(storage.get(2).await.unwrap(), large_vec);
}
#[tokio::test]
async fn test_toast_io_overhead() {
// VEC-010: Out-of-line storage adds I/O overhead
let storage = VectorStorage::new().await;
// Store inline vector
let inline_vec = vec![0.1f32; 128];
storage.insert(1, &inline_vec, StorageHint::Plain).await.unwrap();
// Store out-of-line vector
let external_vec = vec![0.1f32; 1536];
storage.insert(2, &external_vec, StorageHint::External).await.unwrap();
// Measure retrieval I/O
let inline_io = storage.measure_io_operations(|| {
storage.get(1).await.unwrap()
}).await;
let external_io = storage.measure_io_operations(|| {
storage.get(2).await.unwrap()
}).await;
println!("Inline I/O operations: {}", inline_io.read_count);
println!("External I/O operations: {}", external_io.read_count);
// External requires 2 reads (pointer + data), inline only 1
assert!(external_io.read_count > inline_io.read_count);
}
#[tokio::test]
async fn test_force_plain_storage() {
// VEC-011: PLAIN storage forces in-line (or errors if too large)
let storage = VectorStorage::new().await;
// Force small vector in-line (should work)
let small = vec![0.1f32; 128];
storage.insert(1, &small, StorageHint::Plain).await.unwrap();
assert!(storage.is_inline(1).await);
// Force medium vector in-line (should work but be suboptimal)
let medium = vec![0.1f32; 512];
storage.insert(2, &medium, StorageHint::Plain).await.unwrap();
assert!(storage.is_inline(2).await);
// Very large vector with PLAIN should error (won't fit in page)
let huge = vec![0.1f32; 10_000]; // 40KB
let result = storage.insert(3, &huge, StorageHint::Plain).await;
assert!(result.is_err(), "Huge vector should fail with PLAIN storage");
}

5. Recall and Performance Benchmarks

tests/vector/benchmark_test.rs
use heliosdb_vector::benchmark::*;
#[tokio::test]
async fn test_hnsw_vs_ivf_tradeoffs() {
// VEC-012: Compare HNSW and IVF on recall/latency/memory
let dataset = generate_test_vectors(100_000, 384);
// Build HNSW
let hnsw = HNSWIndex::new(384, 16, 200).await;
for (i, vec) in dataset.iter().enumerate() {
hnsw.insert(i as u64, vec, vec![]).await.unwrap();
}
// Build IVF
let ivf = IVFIndex::new(384, 256, Metric::L2).await;
ivf.train(&dataset[..10_000]).await.unwrap();
for (i, vec) in dataset.iter().enumerate() {
ivf.insert(i as u64, vec, vec![]).await.unwrap();
}
// Benchmark
let queries: Vec<_> = (0..100)
.map(|_| dataset[rand::random::<usize>() % dataset.len()].clone())
.collect();
let hnsw_metrics = benchmark_index(&hnsw, &dataset, &queries, 10).await;
let ivf_metrics = benchmark_index(&ivf, &dataset, &queries, 10).await;
println!("\nHNSW Metrics:");
println!(" Recall@10: {:.2}%", hnsw_metrics.recall * 100.0);
println!(" P50 latency: {:?}", hnsw_metrics.latency_p50);
println!(" P99 latency: {:?}", hnsw_metrics.latency_p99);
println!(" Memory: {} MB", hnsw_metrics.memory_mb);
println!("\nIVF Metrics (nprobe=10):");
println!(" Recall@10: {:.2}%", ivf_metrics.recall * 100.0);
println!(" P50 latency: {:?}", ivf_metrics.latency_p50);
println!(" P99 latency: {:?}", ivf_metrics.latency_p99);
println!(" Memory: {} MB", ivf_metrics.memory_mb);
// HNSW should have higher recall and lower latency
assert!(hnsw_metrics.recall > ivf_metrics.recall);
assert!(hnsw_metrics.latency_p99 < ivf_metrics.latency_p99);
// IVF should use less memory
assert!(ivf_metrics.memory_mb < hnsw_metrics.memory_mb);
}
#[tokio::test]
async fn test_recall_vs_latency_curve() {
// VEC-013: Generate recall-latency tradeoff curve
let dataset = generate_test_vectors(50_000, 384);
let index = HNSWIndex::new(384, 16, 200).await;
for (i, vec) in dataset.iter().enumerate() {
index.insert(i as u64, vec, vec![]).await.unwrap();
}
println!("\n| ef_search | Recall@10 | P50 Latency | P99 Latency |");
println!("|-----------|-----------|-------------|-------------|");
for ef in [10, 20, 50, 100, 200, 500] {
index.set_ef_search(ef).await;
let metrics = benchmark_index(&index, &dataset, &generate_queries(100, 384), 10).await;
println!("| {:9} | {:8.2}% | {:11?} | {:11?} |",
ef,
metrics.recall * 100.0,
metrics.latency_p50,
metrics.latency_p99
);
}
}

Test Execution

Terminal window
# Run all vector tests
cargo test --test vector
# Run specific categories
cargo test --test hnsw_accuracy
cargo test --test ivf_accuracy
cargo test --test filtered_search
cargo test --test toast_storage
# Run benchmarks
cargo test --test benchmark -- --nocapture --ignored
# Generate recall curves
cargo test test_recall_vs_latency_curve -- --nocapture

Quality Metrics

Accuracy Targets

  • HNSW Recall@10 > 95%
  • IVF Recall@10 > 90% (nprobe=10)
  • Filtered HNSW Recall@10 > 90%
  • Bitmap filter construction < 10ms for 100k items

Performance Targets

  • HNSW P99 query latency < 10ms (100k vectors)
  • IVF P99 query latency < 20ms (100k vectors)
  • Memory: < 1.5KB per vector (HNSW), < 500 bytes per vector (IVF)
  • TOAST inline retrieval: 1 I/O, out-of-line: 2 I/O

Index Comparison Matrix

MetricHNSWIVF
Recall@10>95%>90% (nprobe=10)
Query LatencyVery LowLow-Medium
Build TimeHighLow
Memory UsageHighLow
Use CaseReal-time searchLarge-scale batch