LadybugDB · adsharma · May 28, 2026 · May 21, 2026 · May 28, 2026 · May 28, 2026
diff --git a/astro.config.mjs b/astro.config.mjs
@@ -151,6 +151,7 @@ export default defineConfig({
                                 { label: 'Functions, expressions, & operators', link: '/cypher/expressions' },
                                 { label: 'Data definition language (DDL)', link: '/cypher/data-definition' },
                                 { label: 'Data manipulation clauses', link: '/cypher/data-manipulation-clauses' },
+                                { label: 'Indexes', link: '/cypher/indexes' },
                                 { label: 'Subqueries', link: '/cypher/subquery' },
                                 { label: 'Macros', link: '/cypher/macro' },
                                 { label: 'Transactions', link: '/cypher/transaction' },
@@ -195,6 +196,7 @@ export default defineConfig({
                                 { label: 'Copy from DataFrame', link: '/import/copy-from-dataframe' },
                                 { label: 'Copy from subquery', link: '/import/copy-from-subquery' },
                                 { label: 'Copy from JSON', link: '/import/copy-from-json' },
+                                { label: 'Icebug', link: '/import/icebug' },
                                 { label: 'Graph databases', link: '/import/graph-databases' },
                             ]
                         },
@@ -299,6 +301,7 @@ export default defineConfig({
                         },
                         { label: 'LLM', link: '/extensions/llm', badge: { text: 'New' }},
                         { label: 'Neo4j', link: '/extensions/neo4j'},
+                        { label: 'ADBC', link: '/extensions/adbc' },
                         {
                             label: 'Relational databases',
                             collapsed: true,

diff --git a/src/content/docs/cypher/expressions/aggregate-functions.md b/src/content/docs/cypher/expressions/aggregate-functions.md
@@ -13,5 +13,6 @@ description: Aggregate functions are used to compute a single result from a set
 | `max(arg)` | returns the maximum value of arg | `max(a.length)` |
 | `sum(arg)` | returns the sum value of all tuples in arg | `sum(a.length)` |
 | `collect(arg)` | returns a list of values returned by arg expression | `collect(a.age)` |
+| `percentileDisc(arg, percentile)` | returns the value that corresponds to the given discrete percentile of the input values; `percentile` must be a literal between `0.0` and `1.0` | `percentileDisc(a.age, 0.5)` |
 
 </div>
diff --git a/src/content/docs/cypher/indexes.md b/src/content/docs/cypher/indexes.md
@@ -0,0 +1,58 @@
+---
+title: Indexes
+description: Create and manage primary key indexes on node tables using HASH or ART index types
+---
+
+Ladybug automatically creates a primary key index(hash) on every node table to enforce uniqueness
+and accelerate primary-key lookups. Ladybug also supports ART indexes for faster range queries on primary keys. Ladybug also maintains **zone maps** (min/max indexes) on all columns automatically — these are used to skip irrelevant node groups during scans and to answer `COUNT(*)` queries without reading column data.
+
+## Default HASH index
+
+When you create a node table, Ladybug automatically builds a hash-based primary-key index.
+No extra DDL is required
+
+### Space amplification
+
+The hash index stores one entry per node and adds roughly **15–25 bytes per row** on top of the column data, depending on the primary key type:
+
+| Primary key type | Index overhead |
+|---|---|
+| `INT32` | ~14 bytes/row |
+| `INT64` | ~18 bytes/row |
+| `STRING` | ~18 bytes/row + key length |
+
+The column data is stored with compression (Zstandard by default) and is typically similar in size to the source Parquet file. So the total on-disk footprint of a node table is roughly:
+
+```
+total size ≈ compressed column data + (num_rows × ~15–25 bytes)
+```
+
+**Example**: a 300 MB Parquet file resulted in a **1.2 GB** `.lbdb` database with the default hash index enabled. Disabling the hash index brought it down to **1 GB** — roughly 16% smaller.
+
+If you want to disable the default HASH index to save space, you can do so by setting the `enable_default_hash_index` property to `false` before creating any node tables:
+
+```cypher
+CALL enable_default_hash_index = false;
+```
+
+Note: The config resets on close, so you need to run this command every time you start a new session if you want to keep the default index disabled
+
+## Creating indexes manually
+
+If you want to create an index on a node table when the `enable_default_hash_index` config is set to false, you can run one of the index creation commands:
+
+To create the inbuilt HASH index:
+```cypher
+CREATE HASH INDEX <index_name> FOR (<alias>:<NodeTable>) ON (<alias>.<property>);
+```
+
+```cypher
+CREATE INDEX <index_name> FOR (<alias>:<NodeTable>) ON (<alias>.<property>);
+```
+
+To create the ART index:
+```cypher
+CREATE ART INDEX <index_name> FOR (<alias>:<NodeTable>) ON (<alias>.<property>);
+```
+
+Note: At a time, only one primary key index can be created per node table
diff --git a/src/content/docs/developer-guide/database-internal/index.md b/src/content/docs/developer-guide/database-internal/index.md
@@ -48,7 +48,7 @@ The catalog module contains schema-level information that is generated through D
 The storage module contains data that needs to be persistent to disk. Specifically:
 
 - **BufferManager**: manages all memory being used in the system (except for small memory allocations from the OS); caches recently read pages in memory.
-- **Index**: Hash index for primary keys.
+- **Index**: Hash index and ART (Adaptive Radix Tree) index for primary keys.
 - **Column**: Vanilla column data structure.
 - **List**: CSR-like data structure.
 - **NodeTable**: A collection of multiple columns.

diff --git a/src/content/docs/extensions/adbc.mdx b/src/content/docs/extensions/adbc.mdx
@@ -0,0 +1,130 @@
+---
+title: ADBC extension
+description: Connect to any ADBC-compatible database (DuckDB, PostgreSQL, SQLite, Snowflake, etc.) using the Apache Arrow ADBC standard.
+---
+
+The `adbc` extension allows you to attach any database that exposes an [Apache Arrow Database Connectivity (ADBC)](https://arrow.apache.org/adbc/) driver. ADBC is a vendor-neutral standard for database connectivity, and drivers are available for PostgreSQL, DuckDB, SQLite, Snowflake, and more.
+
+Use ADBC when:
+- You need to connect to a database that doesn't have its own dedicated Ladybug extension.
+- You want to use a single, uniform interface across multiple backend databases.
+
+## Dependencies
+
+The `adbc` extension requires the ADBC driver for the database you want to connect to. You must have the driver library available on your system before using this extension.
+
+Common ADBC drivers can be installed via `pip`:
+
+```bash
+# PostgreSQL
+pip install adbc-driver-postgresql
+
+# DuckDB
+pip install adbc-driver-duckdb
+
+# SQLite
+pip install adbc-driver-sqlite
+
+# Snowflake
+pip install adbc-driver-snowflake
+```
+
+Each package installs a shared library (e.g., `libadbc_driver_postgresql.so` on Linux) that ADBC can load by name.
+
+## Usage
+
+Please see [Install an extension](/extensions#install-an-extension) and [Load an extension](/extensions#load-an-extension) first before getting started.
+
+### Attach syntax
+
+```cypher
+ATTACH [DB_PATH] [AS alias]
+  (dbtype adbc, driver = 'DRIVER_NAME', tables = 'TABLE1[,TABLE2,...]' [, schema = 'SCHEMA_NAME'] [, KEY = 'VALUE' ...])
+```
+
+- **`DB_PATH`**: Path or URI to the database. Paths are passed to the driver as `path`; URIs containing `://` are passed as `uri`.
+- **`alias`**: Optional name to reference this database in Ladybug queries.
+- **`driver`** (required): ADBC driver name or path to its shared library.
+- **`tables`** (required): Comma-separated list of table names to expose in Ladybug.
+- **`schema`** (optional): Schema name to look up tables in. Defaults to `main`.
+- Any additional key-value options are forwarded directly to the ADBC driver (e.g., connection credentials).
+
+:::note[Note]
+Unlike other database extensions, the ADBC extension currently requires you to explicitly list the tables you want to attach via `tables = 'table1,table2,...'`. Automatic table discovery is not yet supported.
+:::
+
+### Example: Attach a DuckDB database
+
+First, install and load the `adbc` extension:
+
+```cypher
+INSTALL adbc;
+LOAD adbc;
+```
+
+Then attach a local DuckDB file:
+
+```cypher
+ATTACH 'games.duckdb' AS games_db (dbtype adbc, driver='duckdb', tables='games');
+```
+
+Scan the table:
+
+```cypher
+LOAD FROM games_db.games RETURN id, title, score ORDER BY id;
+```
+
+```table
+┌────┬────────┬───────┐
+│ id │ title  │ score │
+├────┼────────┼───────┤
+│ 1  │ Portal │ 95    │
+│ 2  │ Celeste│ 94    │
+│ 3  │ Hades  │ 93    │
+└────┴────────┴───────┘
+```
+
+### Example: Attach a PostgreSQL database
+
+```cypher
+ATTACH 'postgresql://user:password@localhost:5432/mydb' AS pg
+  (dbtype adbc, driver='adbc_driver_postgresql', tables='orders,customers');
+```
+
+### Example: Attach a Snowflake database
+
+```cypher
+ATTACH '' AS sf (
+  dbtype adbc,
+  driver = 'adbc_driver_snowflake',
+  tables = 'employees',
+  adbc.snowflake.sql.account = 'myaccount',
+  username = 'myuser',
+  password = 'mypassword'
+);
+```
+
+### Detach a database
+
+```cypher
+DETACH games_db;
+```
+
+## Copy data into Ladybug
+
+You can import data from an ADBC-attached table using `COPY FROM`:
+
+```cypher
+CREATE NODE TABLE Game (id INT64 PRIMARY KEY, title STRING, score INT64);
+COPY Game FROM games_db.games;
+```
+
+Or selectively with a subquery:
+
+```cypher
+COPY Game FROM (LOAD FROM games_db.games WHERE score >= 94 RETURN id, title, score);
+```
+
+## Comparison to dedicated extensions
+
+The `adbc` extension trades per-database optimizations (e.g., push-down SQL queries via `SQL_QUERY`) for breadth: any ADBC driver works. Dedicated extensions such as [`duckdb`](/extensions/attach/duckdb) and [`postgres`](/extensions/attach/postgres) support features like `SQL_QUERY` that bypass Ladybug's query engine entirely for filtering, and may offer better type coverage. Prefer a dedicated extension when one is available.
diff --git a/src/content/docs/extensions/attach/rdbms.mdx b/src/content/docs/extensions/attach/rdbms.mdx
@@ -14,6 +14,11 @@ The currently available relational database extensions are shown below:
 
 Extension Name | Description | Minimum Version
 :---:|:---:|:---:
+[`adbc`](/extensions/adbc) | Scan from any ADBC-compatible database | 1.10.0
 [`duckdb`](/extensions/attach/duckdb) | Scan from an attached DuckDB database | 0.10.0
 [`postgres`](/extensions/attach/postgres) | Scan from an attached PostgreSQL database | 14.0
 [`sqlite`](/extensions/attach/sqlite) | Scan from an attached SQLite database | 3.3.0
+
+:::note[Note]
+The `adbc` extension is a generic adapter that works with any database exposing an [ADBC driver](https://arrow.apache.org/adbc/). Prefer a dedicated extension (DuckDB, PostgreSQL, SQLite) when one is available, as dedicated extensions offer additional features such as arbitrary SQL pass-through via `SQL_QUERY`.
+:::
diff --git a/src/content/docs/extensions/index.mdx b/src/content/docs/extensions/index.mdx
@@ -13,6 +13,7 @@ The following extensions are currently implemented:
 
 | Extension | Description |
 |----------|----------|
+| [adbc](/extensions/adbc) | Scan data from any ADBC-compatible database (DuckDB, PostgreSQL, SQLite, Snowflake, etc.) |
 | [algo](/extensions/algo) | Graph algorithms |
 | [azure](/extensions/azure) | Scan from Azure Blob Storage and Azure Data Lake Storage (ADLS) |
 | [delta](/extensions/attach/delta) | Scan data from Delta Lake tables |

diff --git a/src/content/docs/get-started/scan.mdx b/src/content/docs/get-started/scan.mdx
@@ -115,7 +115,8 @@ Query data directly from external sources without importing. Unlike `LOAD FROM`,
 
 ### Supported sources
 
-- Parquet files
+- Icebug disk (CSR Parquet files with a Cypher schema)
+- Icebug memory (CSR Arrow tables)
 - Arrow memory
 - DuckDB tables
 - PostgreSQL (coming soon)
@@ -132,16 +133,78 @@ WITH (storage='path/to/employee.parquet');
 
 ```cypher
 CREATE REL TABLE WorksIn (from Person, to Company, since INT32)
-WITH (storage='path/to/works_in.parquet');
+WITH (storage='path/to/works_in.parquet', format='icebug-disk');
 ```
 
-### Arrow memory
-
-You can query Arrow memory directly by registering it with the database. The `arrowId` is obtained when you register Arrow memory (e.g., via the Python API):
+### Icebug disk
 
+**NODE tables:**
 ```cypher
 CREATE NODE TABLE Employee (id INT64, name STRING, PRIMARY KEY (id))
-WITH (storage='arrow://my_arrow_table_id');
+WITH (storage='icebug-disk/', format='icebug-disk');
+
+CREATE NODE TABLE Company (id INT64, name STRING, PRIMARY KEY (id))
+WITH (storage='icebug-disk/company.parquet', format='icebug-disk');
+```
+
+**REL table:**
+```cypher
+CREATE REL TABLE WorksIn (from Employee, to Company, since INT32)
+WITH (storage='icebug-disk/', format='icebug-disk');
+```
+
+For more details about generating and using icebug-disk files, see the [icebug documentation](/import/icebug/).
+
+### Icebug memory
+
+```python
+import ladybug as lb
+
+db = lb.Database()
+conn = lb.Connection(db)
+
+# Create node table
+conn.create_arrow_table(
+    table_name="employee",           # node table name to be used in ladybug
+    dataframe=pa_employee            # node table as a pa.Table
+)
+
+# create rel table
+conn.create_arrow_rel_table(
+    table_name="worksin",            # rel table name to be used in ladybug
+    src_table_name="employee",       # src node table name from table creation earlier
+    dst_table_name="company",        # dst node table name from table creation earlier
+    layout="CSR",
+    dataframe=pa_company_indices,    # rel table with 'source' and 'target' columns
+    dst_col_name="target",           # dst col name in the indices table
+    indptr=pa_company_indptr,        # row pointers for indices table
+)
+```
+
+For more details about generating and using icebug-memory tables, see the [icebug documentation](/import/icebug/).
+
+### Arrow memory
+
+```python
+import ladybug as lb
+
+db = lb.Database()
+conn = lb.Connection(db)
+
+# Create node table
+conn.create_arrow_table(
+    table_name="employee",           # node table name to be used in ladybug
+    dataframe=pa_employee            # node table as a pa.Table
+)
+
+# create rel table
+conn.create_arrow_rel_table(
+    table_name="worksin",            # rel table name to be used in ladybug
+    src_table_name="employee",       # src node table name from table creation earlier
+    dst_table_name="company",        # dst node table name from table creation earlier
+    layout="FLAT",
+    dataframe=pa_company,            # rel table with 'from' and 'to' columns
+)
 ```
 
 ### DuckDB

diff --git a/src/content/docs/import/graph-databases.md b/src/content/docs/import/graph-databases.md
@@ -37,37 +37,4 @@ To use GraphAr data in Ladybug, convert it to Icebug format first:
 uvx icebug-format --graphar <path to graphar archive>
 ```
 
-This generates a directory of Parquet files plus a Cypher schema file that can be loaded directly with `lbug -i`. See the [Icebug format](#icebug-format) section for details.
-
-## Icebug format
-
-[Icebug](https://github.com/Ladybug-Memory/icebug-format) is a Ladybug-native graph-aware Parquet format designed for ingestion-free graph analytics. Unlike general-purpose Parquet files, Icebug preserves graph structure (node and relationship tables) and enables direct querying without preprocessing.
-
-### Generating Icebug files
-
-Use the `icebug-format` tool to generate Icebug files from existing databases:
-
-```bash
-# From a DuckDB database
-uvx icebug-format --source-db demo-db.duckdb --schema schema.cypher
-
-# From a GraphAr archive
-uvx icebug-format --graphar <path to archive>
-```
-
-This generates a directory of Parquet files (for nodes and relationships) plus a Cypher schema file.
-
-### Using Icebug files
-
-Start Ladybug with the generated schema file using the `-i` flag:
-
-```bash
-lbug -i csr_graph/schema.cypher
-```
-
-Then query the graph directly:
-
-```cypher
-MATCH (a:User)-[b:LivesIn]->(c:City)
-RETURN a.*, b.*, c.*;
-```
+This generates a directory of Parquet files plus a Cypher schema file that can be loaded directly with `lbug -i`. See the [Icebug format](/import/icebug) section for details.