Skip to content

Graph Database: Performance Optimization

Graph Database: Performance Optimization

Part of: Graph Database User Guide


Index Creation

Create indexes on frequently queried properties for faster lookups.

// Label index (automatically created)
let people = storage.get_vertices_by_label("Person").await?;
// Property index
let users_in_sf = storage.get_vertices_by_property("city", &json!("San Francisco")).await?;
// For best performance, create indexes during graph construction

Query Optimization

Use BFS/DFS Instead of Cypher for Simple Traversals

// Slower: Cypher query with variable-length path
// let query = CypherQuery { ... };
// Faster: Direct BFS call
let result = bfs(&storage, start_id, BfsOptions {
max_depth: 3,
max_nodes: 10000,
enable_early_termination: true,
target: Some(target_id),
}).await?;

Filter Early

// Good: Filter by label/properties first, then traverse
let tech_companies = storage.get_vertices_by_label("Company").await?
.into_iter()
.filter(|node| {
node.properties.get("industry").unwrap() == "Technology"
})
.collect::<Vec<_>>();
// Then run algorithms on filtered set

Limit Result Sets

// Always use limits for large result sets
let query = CypherQuery {
// ...
limit: Some(100), // Prevent OOM on large graphs
// ...
};

Batch Loading

For bulk data import, use batch updates:

use heliosdb_graph::updates::BatchUpdateManager;
let batch_mgr = BatchUpdateManager::new(
Arc::new(storage),
1000 // batch size
);
// Load 1M nodes efficiently
for i in 0..1_000_000 {
let node = Node {
id: 0,
label: "Person".to_string(),
properties: HashMap::from([
("id".to_string(), json!(i)),
("name".to_string(), json!(format!("User{}", i))),
]),
};
batch_mgr.add_node(node).await?;
}
let (nodes_added, edges_added) = batch_mgr.flush_all().await?;
println!("Loaded {} nodes, {} edges", nodes_added, edges_added);

Performance: 500K+ inserts/second with batching (vs 450K without)

Graph Projection

For iterative algorithms, project subgraphs to reduce overhead:

// Create a subgraph for community detection
let active_users: Vec<NodeId> = storage.get_vertices_by_label("User").await?
.into_iter()
.filter(|user| {
user.properties.get("active").unwrap().as_bool().unwrap()
})
.map(|user| user.id)
.collect();
// Run algorithm on projected graph
let communities = louvain(&storage, &active_users, 1.0).await?;

Caching Strategies

Cache frequently accessed data:

use std::sync::Arc;
use dashmap::DashMap;
// Cache for hot nodes
let node_cache: Arc<DashMap<NodeId, Node>> = Arc::new(DashMap::new());
async fn get_node_cached(
storage: &GraphStorage,
cache: &Arc<DashMap<NodeId, Node>>,
node_id: NodeId,
) -> Result<Option<Node>> {
// Check cache first
if let Some(node) = cache.get(&node_id) {
return Ok(Some(node.clone()));
}
// Load from storage
if let Some(node) = storage.get_vertex(node_id).await? {
cache.insert(node_id, node.clone());
Ok(Some(node))
} else {
Ok(None)
}
}

Storage Optimization

Optimize storage periodically:

// Rebuild CSR format for faster traversals
storage.optimize().await?;
// Compact storage to reclaim space
storage.compact().await?;
// Get storage statistics
let stats = storage.get_stats().await?;
println!("Nodes: {}, Edges: {}", stats.vertex_count, stats.edge_count);
println!("Memory usage: {} MB", stats.memory_usage_mb);



Version: 6.5 Last Updated: November 17, 2025