Skip to content

HeliosDB Replication Operations Guide

HeliosDB Replication Operations Guide

Version: 1.0 Last Updated: 2025-11-30


Quick Start

-- Enable replication
ALTER SYSTEM SET primary_replication_enabled = on;
-- Add replica
SELECT pg_create_replica(
replica_name => 'replica-1',
host => '192.168.1.100',
port => 5432
);
-- Monitor replication
SELECT * FROM pg_stat_replication;

Replication Setup

Configure Primary

-- Primary configuration
ALTER SYSTEM SET
wal_level = replica,
max_wal_senders = 10,
wal_keep_size = '1GB',
hot_standby = on;
SELECT pg_reload_conf();

Configure Replica

-- Base backup
pg_basebackup -h primary.example.com -D /var/lib/postgresql/data -U replication
-- Standby configuration
standby_mode = 'on'
primary_conninfo = 'host=primary.example.com port=5432'

Replication Modes

Asynchronous Replication

-- Default mode: faster, risk of data loss
ALTER SYSTEM SET synchronous_commit = off;
SELECT pg_reload_conf();

Synchronous Replication

-- Replicas must acknowledge writes
ALTER SYSTEM SET
synchronous_commit = on,
synchronous_standby_names = 'replica-1, replica-2';
SELECT pg_reload_conf();

Quorum-Based Replication

-- Quorum of replicas must acknowledge
ALTER SYSTEM SET
synchronous_commit = 'quorum',
synchronous_standby_names = '2 (replica-1, replica-2, replica-3)';
SELECT pg_reload_conf();

Monitoring Replication

Replication Status

-- Replication metrics
SELECT
pid,
usename,
application_name,
client_addr,
state,
write_lsn,
flush_lsn,
replay_lsn,
sync_state
FROM pg_stat_replication;
-- Replication lag
SELECT
application_name,
EXTRACT(EPOCH FROM (NOW() - backend_xmin_horizon)) as lag_seconds
FROM pg_stat_replication;

Failover & Recovery

Planned Failover

-- On primary: initiate switchover
SELECT pg_ctl_switchover('replica-1');
-- On new primary: promote replica
SELECT pg_ctl_promote('replica-1');

Emergency Promotion

-- Promote replica immediately
SELECT pg_promote();
-- Restart as primary
pg_ctl restart -D /data

Conflict Resolution

During Multi-Master Replication

-- Configure conflict resolution
ALTER DATABASE SET
conflict_resolution_method = 'CLOUD_WINS',
last_write_wins_field = 'updated_at';
-- Custom resolution function
CREATE FUNCTION resolve_conflict(local_row, remote_row)
RETURNS record AS $$
BEGIN
IF local_row.version > remote_row.version THEN
RETURN local_row;
ELSE
RETURN remote_row;
END IF;
END;
$$ LANGUAGE plpgsql;

Monitoring & Alerts

-- Replication health check
SELECT * FROM replication_health_check;
-- Lag monitoring
CREATE ALERT replication_lag_alert AS
WHEN (SELECT MAX(pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) / 1024 / 1024)
FROM pg_stat_replication) > 100 -- 100MB lag
THEN NOTIFY 'ops-team';

Best Practices

  1. Monitor replication lag
  2. Use synchronous replication for critical data
  3. Plan for failover scenarios
  4. Test failover regularly
  5. Keep replicas in sync
  6. Monitor backup logs

Related Documentation: