HeliosDB v3.0 SQL Reference
HeliosDB v3.0 SQL Reference
Complete SQL Language Reference for HeliosDB v3.0
Table of Contents
- Data Definition Language (DDL)
- Data Manipulation Language (DML)
- Data Query Language (DQL)
- v3.0 Extensions
- Functions and Operators
Data Definition Language (DDL)
CREATE TABLE
-- Basic tableCREATE TABLE users ( id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL, email VARCHAR(255) UNIQUE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);
-- Time-series tableCREATE TABLE metrics ( timestamp TIMESTAMP NOT NULL, sensor_id INTEGER, value DOUBLE, PRIMARY KEY (timestamp, sensor_id)) WITH ( timeseries_retention = '30 days', timeseries_downsample = 'avg:5m', timeseries_compression = 'gorilla');
-- Temporal table (system-versioned)CREATE TABLE employees ( id INTEGER PRIMARY KEY, name VARCHAR(100), salary DECIMAL(10,2)) WITH SYSTEM VERSIONING;
-- Encrypted columnsCREATE TABLE sensitive_data ( id INTEGER PRIMARY KEY, name VARCHAR(100), ssn CHAR(11) ENCRYPTED SEARCHABLE, email VARCHAR(255) ENCRYPTED);
-- Sharded tableCREATE TABLE orders ( id INTEGER PRIMARY KEY, user_id INTEGER, amount DECIMAL(10,2)) SHARD BY HASH(user_id) SHARDS 8;
-- Geospatial tableCREATE TABLE locations ( id INTEGER PRIMARY KEY, name VARCHAR(100), coordinates GEOMETRY(Point, 4326));CREATE INDEX
-- B-tree indexCREATE INDEX idx_users_email ON users(email);
-- Full-text indexCREATE FULLTEXT INDEX fts_products ON products(name, description)WITH LANGUAGE 'English';
-- Geospatial indexCREATE SPATIAL INDEX idx_locations_coords ON locations USING RTREE(coordinates);
-- Global secondary index (cross-shard)CREATE GLOBAL INDEX idx_orders_amount ON orders(amount);
-- Partial indexCREATE INDEX idx_active_users ON users(email) WHERE active = true;
-- Covering indexCREATE INDEX idx_users_email_name ON users(email) INCLUDE (name);CREATE MATERIALIZED VIEW
-- Basic materialized viewCREATE MATERIALIZED VIEW sales_summary ASSELECT region, DATE(order_date) as date, SUM(amount) as totalFROM salesGROUP BY region, DATE(order_date);
-- Auto-refresh on commitCREATE MATERIALIZED VIEW daily_metrics ASSELECT DATE(timestamp), AVG(value)FROM metricsGROUP BY DATE(timestamp)WITH REFRESH ON COMMIT;
-- Scheduled refreshCREATE MATERIALIZED VIEW weekly_report ASSELECT * FROM sales WHERE week = CURRENT_WEEK()WITH REFRESH SCHEDULE '0 0 * * 0'; -- Every Sunday midnightCREATE REGION
-- Multi-region deploymentCREATE REGION us_east DATACENTER 'aws-us-east-1';CREATE REGION eu_west DATACENTER 'aws-eu-west-1';CREATE REGION ap_south DATACENTER 'aws-ap-south-1';CREATE READ REPLICA
CREATE READ REPLICA replica_1WITH ( source => 'primary', async_lag_limit => '1 second', region => 'us_east');CREATE SAMPLE
-- Stratified sample for approximate queriesCREATE SAMPLE sales_sample ON salesSTRATIFIED BY region, categoryWITH SIZE 5% ERROR 5%;
-- Uniform sampleCREATE SAMPLE users_sample ON usersWITH SIZE 1%;CREATE FOREIGN TABLE
-- S3 Parquet dataCREATE FOREIGN TABLE s3_logs ( timestamp TIMESTAMP, level VARCHAR(10), message TEXT) SERVER s3_server OPTIONS ( bucket => 'my-logs', format => 'parquet', compression => 'snappy');
-- PostgreSQL foreign tableCREATE FOREIGN TABLE pg_users ( id INTEGER, name VARCHAR(100)) SERVER postgres_server OPTIONS ( schema => 'public', table => 'users');
-- REST API foreign tableCREATE FOREIGN TABLE api_users ( id INTEGER, name VARCHAR(100)) SERVER rest_server OPTIONS ( endpoint => 'https://api.example.com/users', method => 'GET', auth => 'bearer_token');CREATE PROCEDURE
-- PL/pgSQL procedureCREATE PROCEDURE calculate_bonus(emp_id INTEGER)LANGUAGE plpgsqlAS $$DECLARE v_salary DECIMAL(10,2); v_bonus DECIMAL(10,2);BEGIN SELECT salary INTO v_salary FROM employees WHERE id = emp_id; v_bonus := v_salary * 0.10; UPDATE employees SET bonus = v_bonus WHERE id = emp_id;END;$$;
-- PL/SQL procedure (Oracle-compatible)CREATE PROCEDURE process_orders(p_date DATE)LANGUAGE plsqlASBEGIN FOR rec IN (SELECT * FROM orders WHERE order_date = p_date) LOOP -- Process each order UPDATE order_status SET status = 'processed' WHERE order_id = rec.id; END LOOP; COMMIT;EXCEPTION WHEN OTHERS THEN ROLLBACK; RAISE;END;/CREATE TRIGGER
-- BEFORE triggerCREATE TRIGGER audit_user_changesBEFORE UPDATE ON usersFOR EACH ROWEXECUTE FUNCTION log_user_changes();
-- AFTER triggerCREATE TRIGGER update_statisticsAFTER INSERT ON salesFOR EACH ROWEXECUTE FUNCTION update_sales_stats();
-- INSTEAD OF trigger (for views)CREATE TRIGGER user_view_insertINSTEAD OF INSERT ON user_viewFOR EACH ROWEXECUTE FUNCTION handle_user_insert();Row-Level Security
-- Create policyCREATE POLICY user_isolation ON user_dataFOR SELECTUSING (user_id = current_user_id());
CREATE POLICY tenant_isolation ON tenant_dataFOR ALLUSING (tenant_id = current_tenant_id());
-- Enable RLSALTER TABLE user_data ENABLE ROW LEVEL SECURITY;ALTER TABLE tenant_data ENABLE ROW LEVEL SECURITY;Data Manipulation Language (DML)
INSERT
-- Basic insertINSERT INTO users (id, name, email) VALUES (1, 'Alice', 'alice@example.com');
-- Multi-row insertINSERT INTO users (id, name, email) VALUES (1, 'Alice', 'alice@example.com'), (2, 'Bob', 'bob@example.com'), (3, 'Charlie', 'charlie@example.com');
-- Insert from SELECTINSERT INTO archived_ordersSELECT * FROM orders WHERE order_date < '2024-01-01';
-- Insert with RETURNINGINSERT INTO users (name, email) VALUES ('David', 'david@example.com')RETURNING id, created_at;UPDATE
-- Basic updateUPDATE users SET email = 'newemail@example.com' WHERE id = 1;
-- Multi-column updateUPDATE employeesSET salary = salary * 1.1, last_raise = CURRENT_DATEWHERE department = 'engineering';
-- Update with JOINUPDATE orders oSET status = 'shipped'FROM shipments sWHERE o.id = s.order_id AND s.shipped_date = CURRENT_DATE;DELETE
-- Basic deleteDELETE FROM users WHERE id = 1;
-- Delete with subqueryDELETE FROM orders WHERE user_id IN ( SELECT id FROM users WHERE inactive = true);Data Query Language (DQL)
SELECT
-- Basic SELECTSELECT id, name, email FROM users;
-- WHERE clauseSELECT * FROM users WHERE age > 25 AND active = true;
-- ORDER BYSELECT * FROM users ORDER BY created_at DESC LIMIT 10;
-- GROUP BY with HAVINGSELECT department, AVG(salary) as avg_salaryFROM employeesGROUP BY departmentHAVING AVG(salary) > 50000;
-- JOINsSELECT u.name, o.amountFROM users uINNER JOIN orders o ON u.id = o.user_id;
-- SubqueriesSELECT * FROM users WHERE id IN ( SELECT DISTINCT user_id FROM orders WHERE amount > 1000);
-- Common Table Expressions (CTE)WITH high_value_customers AS ( SELECT user_id, SUM(amount) as total FROM orders GROUP BY user_id HAVING SUM(amount) > 10000)SELECT u.name, h.totalFROM users uJOIN high_value_customers h ON u.id = h.user_id;
-- Window functionsSELECT name, salary, RANK() OVER (PARTITION BY department ORDER BY salary DESC) as rankFROM employees;v3.0 Extensions
Time-Series Queries
-- Query time rangeSELECT * FROM metricsWHERE timestamp BETWEEN '2025-01-01' AND '2025-01-31';
-- Downsampled aggregationSELECT TIME_BUCKET('5 minutes', timestamp) as bucket, AVG(value) as avg_valueFROM metricsGROUP BY bucket;Temporal Queries
-- Point-in-time querySELECT * FROM employeesFOR SYSTEM_TIME AS OF '2024-01-01 00:00:00';
-- Time range querySELECT * FROM employeesFOR SYSTEM_TIME BETWEEN '2024-01-01' AND '2024-12-31';
-- All versionsSELECT * FROM employeesFOR SYSTEM_TIME ALLWHERE id = 123;Approximate Queries
-- Approximate COUNT with confidence boundsSELECT region, APPROX_COUNT(*) as count, APPROX_SUM(amount) as totalFROM sales_sampleGROUP BY regionWITH CONFIDENCE 95%;
-- Approximate percentileSELECT APPROX_PERCENTILE(amount, 0.95) FROM sales_sample;
-- Result with error bounds-- count = 1,000,000 ± 50,000 (95% confidence)Streaming Queries
-- Tumbling window (5-minute aggregations)SELECT TUMBLE(timestamp, INTERVAL '5 minutes') as window, COUNT(*) as event_count, AVG(value) as avg_valueFROM event_streamGROUP BY TUMBLE(timestamp, INTERVAL '5 minutes');
-- Sliding windowSELECT HOP(timestamp, INTERVAL '1 minute', INTERVAL '5 minutes') as window, COUNT(*) as event_countFROM event_streamGROUP BY HOP(timestamp, INTERVAL '1 minute', INTERVAL '5 minutes');
-- Session window (10-minute inactivity gap)SELECT user_id, SESSION(timestamp, INTERVAL '10 minutes') as session_id, COUNT(*) as events_per_sessionFROM user_eventsGROUP BY user_id, SESSION(timestamp, INTERVAL '10 minutes');Machine Learning
-- Register ONNX modelREGISTER MODEL fraud_detector FROM '/models/fraud_v1.onnx';
-- In-database inferenceSELECT transaction_id, PREDICT(fraud_detector, amount, merchant_category, user_age) as fraud_scoreFROM transactionsWHERE fraud_score > 0.8;
-- Batch predictionINSERT INTO fraud_predictionsSELECT id, PREDICT(fraud_detector, amount, merchant_category, user_age) as scoreFROM transactionsWHERE processed = false;Graph Queries
-- Recursive CTE (find all descendants)WITH RECURSIVE descendants AS ( SELECT id, name, manager_id, 0 AS depth FROM employees WHERE id = 1
UNION ALL
SELECT e.id, e.name, e.manager_id, d.depth + 1 FROM employees e JOIN descendants d ON e.manager_id = d.id)SELECT * FROM descendants;
-- Shortest pathSELECT shortest_path( graph => 'org_chart', source => 1, target => 100, algorithm => 'dijkstra');
-- Cycle detectionSELECT detect_cycles('dependency_graph');Geospatial Queries
-- Distance querySELECT id, nameFROM locationsWHERE ST_Distance(coordinates, ST_Point(0, 0)) < 1000;
-- Contains querySELECT * FROM locationsWHERE ST_Contains( ST_Polygon('POLYGON((0 0, 10 0, 10 10, 0 10, 0 0))'), coordinates);
-- K-Nearest NeighborsSELECT id, name, ST_Distance(coordinates, ST_Point(5, 5)) as distanceFROM locationsORDER BY distanceLIMIT 10;
-- IntersectionSELECT l1.name, l2.nameFROM locations l1, locations l2WHERE ST_Intersects(l1.coordinates, l2.coordinates) AND l1.id < l2.id;Full-Text Search
-- Basic searchSELECT * FROM productsWHERE MATCH(name, description) AGAINST('laptop');
-- Boolean searchSELECT * FROM productsWHERE MATCH(name, description) AGAINST('+laptop -refurbished' IN BOOLEAN MODE);
-- Phrase searchSELECT * FROM productsWHERE MATCH(name, description) AGAINST('"gaming laptop"' IN BOOLEAN MODE);
-- Ranked resultsSELECT id, name, MATCH(name, description) AGAINST('laptop') as relevanceFROM productsWHERE MATCH(name, description) AGAINST('laptop')ORDER BY relevance DESC;Functions and Operators
Aggregate Functions
-- Standard aggregatesCOUNT(*), SUM(column), AVG(column), MIN(column), MAX(column)
-- StatisticalSTDDEV(column), VARIANCE(column)
-- Approximate (on samples)APPROX_COUNT(*), APPROX_SUM(column), APPROX_AVG(column)APPROX_PERCENTILE(column, percentile)Window Functions
ROW_NUMBER() OVER (...)RANK() OVER (...)DENSE_RANK() OVER (...)LAG(column, offset) OVER (...)LEAD(column, offset) OVER (...)FIRST_VALUE(column) OVER (...)LAST_VALUE(column) OVER (...)String Functions
CONCAT(str1, str2, ...)SUBSTRING(str, start, length)UPPER(str), LOWER(str)TRIM(str), LTRIM(str), RTRIM(str)LENGTH(str)REPLACE(str, old, new)Date/Time Functions
CURRENT_DATE, CURRENT_TIME, CURRENT_TIMESTAMPDATE_ADD(date, INTERVAL n unit)DATE_SUB(date, INTERVAL n unit)EXTRACT(part FROM date)TIME_BUCKET(interval, timestamp)Geospatial Functions
-- ConstructorsST_Point(x, y), ST_LineString(points), ST_Polygon(rings)
-- MeasurementsST_Distance(geom1, geom2)ST_Area(geom)ST_Length(geom)
-- RelationshipsST_Contains(geom1, geom2)ST_Within(geom1, geom2)ST_Intersects(geom1, geom2)ST_Overlaps(geom1, geom2)
-- TransformationsST_Transform(geom, srid)ST_Buffer(geom, distance)Cryptographic Functions
-- HashingSHA256(data), MD5(data)
-- Encryption (requires column encryption)ENCRYPT(data, key, algorithm)DECRYPT(data, key, algorithm)System Tables
Information Schema
-- List tablesSELECT * FROM information_schema.tables;
-- List columnsSELECT * FROM information_schema.columns WHERE table_name = 'users';
-- List indexesSELECT * FROM information_schema.indexes;HeliosDB System Tables
-- Cache statisticsSELECT * FROM heliosdb_cache_stats;
-- Index recommendationsSELECT * FROM heliosdb_index_recommendations;
-- Slow queriesSELECT * FROM heliosdb_slow_queries ORDER BY duration DESC;
-- Replication lagSELECT * FROM heliosdb_replication_status;
-- Shard distributionSELECT * FROM heliosdb_shard_distribution;Data Types
Numeric Types
INTEGER,BIGINT,SMALLINTDECIMAL(p, s),NUMERIC(p, s)FLOAT,DOUBLE
String Types
VARCHAR(n),CHAR(n)TEXTVARCHAR(n) ENCRYPTED(v3.0)
Date/Time Types
DATETIMETIMESTAMPINTERVAL
Boolean
BOOLEAN
Geospatial Types (v3.0)
GEOMETRY(type, srid)GEOGRAPHY(type, srid)POINT,LINESTRING,POLYGON
Special Types
JSON,JSONBARRAYUUID
Transaction Control
-- Begin transactionBEGIN;START TRANSACTION;
-- CommitCOMMIT;
-- RollbackROLLBACK;
-- SavepointSAVEPOINT sp1;ROLLBACK TO SAVEPOINT sp1;RELEASE SAVEPOINT sp1;
-- Isolation levelsSET TRANSACTION ISOLATION LEVEL READ COMMITTED;SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;Examples by Use Case
E-Commerce Platform
-- Create schemaCREATE TABLE products ( id INTEGER PRIMARY KEY, name VARCHAR(200), description TEXT, price DECIMAL(10,2));
CREATE FULLTEXT INDEX fts_products ON products(name, description);
-- Search productsSELECT id, name, price, MATCH(name, description) AGAINST('laptop') as relevanceFROM productsWHERE MATCH(name, description) AGAINST('laptop')ORDER BY relevance DESCLIMIT 10;IoT Platform
-- Create time-series tableCREATE TABLE sensor_data ( timestamp TIMESTAMP, sensor_id INTEGER, temperature DOUBLE, humidity DOUBLE, PRIMARY KEY (timestamp, sensor_id)) WITH ( timeseries_retention = '90 days', timeseries_downsample = 'avg:5m');
-- Query aggregated dataSELECT TIME_BUCKET('1 hour', timestamp) as hour, AVG(temperature) as avg_temp, MAX(temperature) as max_tempFROM sensor_dataWHERE timestamp >= NOW() - INTERVAL '24 hours'GROUP BY hour;Multi-Tenant SaaS
-- Enable row-level securityCREATE TABLE tenant_data ( id INTEGER PRIMARY KEY, tenant_id INTEGER, data TEXT);
CREATE POLICY tenant_isolation ON tenant_dataFOR ALLUSING (tenant_id = current_tenant_id());
ALTER TABLE tenant_data ENABLE ROW LEVEL SECURITY;Performance Tips
- Use indexes wisely: Create indexes on columns used in WHERE, JOIN, ORDER BY
- Use materialized views: Pre-compute expensive aggregations
- Enable query caching: For repeated queries
- Use approximate queries: For exploratory analytics on large datasets
- Partition large tables: Use time-based or hash partitioning
- Use connection pooling: Enabled by default in v3.0