HeliosDB High Availability Operational Procedures
HeliosDB High Availability Operational Procedures
Version: 1.0 Last Updated: 2025-11-30
Cluster Setup
-- Create HA cluster with 3 nodesSELECT create_ha_cluster( cluster_name => 'production-cluster', nodes => ARRAY['node1:5432', 'node2:5432', 'node3:5432'], replication_mode => 'synchronous', quorum => 2);
-- Verify cluster statusSELECT * FROM cluster_status;Monitoring
-- Real-time health checkSELECT node_name, status, lag_bytes, uptime_seconds, cpu_usage_pct, memory_usage_pctFROM cluster_health;
-- Alert setupCREATE ALERT node_down AS WHEN ANY NODE status != 'HEALTHY' THEN NOTIFY 'ops-team' SEVERITY HIGH;Failover Procedures
Automatic Failover
-- Enable automatic failoverALTER CLUSTER SET ( auto_failover = true, failover_timeout = 30, failover_min_replicas = 1);Manual Failover
-- Initiate switchoverSELECT manual_failover_to('node2');
-- Monitor progressSELECT * FROM failover_progress;Scaling
-- Add nodeSELECT add_cluster_node('node4:5432');
-- Remove nodeSELECT remove_cluster_node('node3');
-- RebalanceSELECT rebalance_cluster();Recovery
-- Rebuild failed nodeSELECT rebuild_node('node3');
-- Reset replicaSELECT reset_replica('node2');Best Practices
- Use synchronous replication for critical data
- Monitor node health continuously
- Test failover monthly
- Keep replicas in sync
- Set up automated alerts
Related Documentation: