Integrating DuckDB as a Custom Table Access Method and Executor in PostgreSQL
Embedding an analytical engine like DuckDB directly into PostgreSQL requires bridging two distinct execution models. By implementing a custom extension that hooks in to PostgreSQL's executor and Table Access Method (TAM) interfaces, it is possible to route specific workloads to DuckDB's vectorized, push-based pipeline while maintaining standard SQL compatibility. The integration relies on intercepting query execution paths and delegating storage operations to an embedded DuckDB instance.
Configuration and Hook Initialization
The extension introduces a custom GUC parameter, quack.storage_path, to define the filesystem location for DuckDB data base files. During extension load, two primary initialization routines are invoked: init_duckdb_tam() registers the custom table access method, and install_executor_hooks() overrides PostgreSQL's standard query processing callbacks.
Executor Interception
Query interception is split in to two pathways: one handles Data Manipulation Language (DML) operations, while the other manages Data Definition Language (DDL) and utility commands. Both pathways leverage DuckDB's embedded C API, requiring only the linkage of the core library and header files.
DML Execution Path
void route_dml_to_duckdb(QueryDesc *qdesc) {
duckdb_database duck_ctx = open_embedded_db(CurrentDatabaseId, false);
duckdb_connection sess = establish_session(duck_ctx);
duckdb_result res_set;
if (duckdb_query(sess, qdesc->sourceText, &res_set) == DuckDBError) {
handle_execution_error(&res_set);
return;
}
TupleTableSlot *pg_tuple_slot = MakeTupleTableSlot(qdesc->tupDesc, &TTSOpsHeapTuple);
idx_t total_rows = duckdb_row_count(&res_set);
idx_t total_cols = duckdb_column_count(&res_set);
for (idx_t r = 0; r < total_rows; r++) {
ExecClearTuple(pg_tuple_slot);
for (idx_t c = 0; c < total_cols; c++) {
if (duckdb_value_is_null(&res_set, c, r)) {
pg_tuple_slot->tts_isnull[c] = true;
} else {
pg_tuple_slot->tts_isnull[c] = false;
extract_column_value(pg_tuple_slot, &res_set, c, r);
}
}
ExecStoreVirtualTuple(pg_tuple_slot);
qdesc->dest->receiveSlot(pg_tuple_slot, qdesc->dest);
cleanup_row_buffers(&res_set, r);
}
duckdb_destroy_result(&res_set);
duckdb_disconnect(&sess);
duckdb_close(&duck_ctx);
}
DDL and Utility Path
void process_ddl_via_duckdb(const char *utility_stmt) {
char db_path[MAXPGPATH];
snprintf(db_path, sizeof(db_path), "%s/%u.duckdb", quack_storage_path, CurrentDatabaseId);
duckdb_database duck_ctx = open_embedded_db(CurrentDatabaseId, true);
duckdb_connection sess;
duckdb_connect(duck_ctx, &sess);
duckdb_query(sess, utility_stmt, NULL);
duckdb_disconnect(&sess);
duckdb_close(&duck_ctx);
}
Table Access Method Integration
To expose DuckDB tables natively within PostgreSQL, the extension implements the TableAmRoutine interface. This structure maps PostgreSQL's storage operations to DuckDB-backed equivalents, enabling the planner and executor to interact with the analytical engine as if it were a native heap table.
static const TableAmRoutine duckdb_tam_routine = {
.type = T_TableAmRoutine,
.slot_callbacks = duckdb_slot_ops,
.scan_begin = duckdb_scan_start,
.scan_end = duckdb_scan_finish,
.scan_rescan = duckdb_scan_reset,
.scan_getnextslot = duckdb_fetch_next_row,
.parallelscan_estimate = duckdb_parallel_size_estimate,
.parallelscan_initialize = duckdb_parallel_init,
.parallelscan_reinitialize = duckdb_parallel_restart,
.index_fetch_begin = duckdb_idx_lookup_start,
.index_fetch_reset = duckdb_idx_lookup_reset,
.index_fetch_end = duckdb_idx_lookup_finish,
.index_fetch_tuple = duckdb_idx_retrieve_tuple,
.tuple_fetch_row_version = duckdb_get_row_version,
.tuple_tid_valid = duckdb_validate_tid,
.tuple_get_latest_tid = duckdb_resolve_latest_tid,
.tuple_satisfies_snapshot = duckdb_check_visibility,
.index_delete_tuples = duckdb_purge_index_entries,
.tuple_insert = duckdb_insert_row,
.tuple_insert_speculative = duckdb_speculative_insert,
.tuple_complete_speculative = duckdb_finalize_speculative,
.multi_insert = duckdb_bulk_insert,
.tuple_delete = duckdb_remove_row,
.tuple_update = duckdb_modify_row,
.tuple_lock = duckdb_acquire_row_lock,
.finish_bulk_insert = duckdb_flush_inserts,
.relation_set_new_filenode = duckdb_assign_filenode,
.relation_nontransactional_truncate = duckdb_truncate_table,
.relation_copy_data = duckdb_clone_relation,
.relation_copy_for_cluster = duckdb_cluster_copy,
.relation_vacuum = duckdb_vacuum_table,
.scan_analyze_next_block = duckdb_analyze_block,
.scan_analyze_next_tuple = duckdb_analyze_row,
.index_build_range_scan = duckdb_build_index_range,
.index_validate_scan = duckdb_verify_index,
.relation_size = duckdb_calc_relation_size,
.relation_needs_toast_table = duckdb_requires_toast,
.relation_estimate_size = duckdb_estimate_storage,
.scan_bitmap_next_block = NULL,
.scan_bitmap_next_tuple = NULL,
.scan_sample_next_block = duckdb_sample_block,
.scan_sample_next_tuple = duckdb_sample_row
};