Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .changeset/brave-mangos-visit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
'hive': minor
---

Add configurable data retention TTL for self-hosted Hive instances. Self-hosted users can now configure retention periods via environment variables instead of hardcoded values.

New environment variables:
- `CLICKHOUSE_TTL_TABLES` - Retention for ClickHouse mergetree tables (Default: 1 YEAR)
- `CLICKHOUSE_TTL_DAILY_MV_TABLES` - Retention for daily materialized view tables (Default: 1 YEAR)
- `CLICKHOUSE_TTL_HOURLY_MV_TABLES` - Retention for hourly materialized view tables (Default: 30 DAYS)
- `CLICKHOUSE_TTL_MINUTELY_MV_TABLES` - Retention for minutely materialized view tables (Default: 24 HOURS)

Supports both numeric days (e.g., `365`) and ClickHouse interval syntax (e.g., `"1 YEAR"`, `"30 DAY"`, `"24 HOUR"`).

The retention update runs automatically if any retention environment variable is set.
4 changes: 4 additions & 0 deletions docker/docker-compose.community.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ services:
CLICKHOUSE_PORT: '8123'
CLICKHOUSE_USERNAME: '${CLICKHOUSE_USER}'
CLICKHOUSE_PASSWORD: '${CLICKHOUSE_PASSWORD}'
CLICKHOUSE_TTL_TABLES: '${CLICKHOUSE_TTL_TABLES:-}'
CLICKHOUSE_TTL_DAILY_MV_TABLES: '${CLICKHOUSE_TTL_DAILY_MV_TABLES:-}'
CLICKHOUSE_TTL_HOURLY_MV_TABLES: '${CLICKHOUSE_TTL_HOURLY_MV_TABLES:-}'
CLICKHOUSE_TTL_MINUTELY_MV_TABLES: '${CLICKHOUSE_TTL_MINUTELY_MV_TABLES:-}'
TS_NODE_TRANSPILE_ONLY: 'true'
LOG_LEVEL: '${LOG_LEVEL:-debug}'

Expand Down
87 changes: 87 additions & 0 deletions integration-tests/tests/migrations/retention.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { clickHouseQuery } from '../../testkit/clickhouse';
import { ensureEnv } from '../../testkit/env';
import { getServiceHost } from '../../testkit/utils';

test('update-retention script applies TTL to ClickHouse tables', async () => {
const originalEnv = { ...process.env };

try {
// Set up ClickHouse connection env vars for migrations module
const clickhouseAddress = await getServiceHost('clickhouse', 8123);
const [host, port] = clickhouseAddress.split(':');
process.env.CLICKHOUSE_PROTOCOL = 'http';
process.env.CLICKHOUSE_HOST = host;
process.env.CLICKHOUSE_PORT = port;
process.env.CLICKHOUSE_USERNAME = ensureEnv('CLICKHOUSE_USER');
process.env.CLICKHOUSE_PASSWORD = ensureEnv('CLICKHOUSE_PASSWORD');

// Set retention TTL values
process.env.CLICKHOUSE_TTL_TABLES = '1 YEAR';
process.env.CLICKHOUSE_TTL_DAILY_MV_TABLES = '30 DAY';
process.env.CLICKHOUSE_TTL_HOURLY_MV_TABLES = '7 DAY';
process.env.CLICKHOUSE_TTL_MINUTELY_MV_TABLES = '1 DAY';

// Dynamic import to pick up env vars
const { updateRetention } = await import(
'../../../packages/migrations/src/scripts/update-retention'
);

await updateRetention();

// Verify TTL was applied to a MergeTree table
const operationsTable = await clickHouseQuery<{ engine_full: string }>(`
SELECT engine_full
FROM system.tables
WHERE database = 'default' AND name = 'operations'
LIMIT 1
`);

expect(operationsTable.rows).toBe(1);
expect(operationsTable.data[0].engine_full).toContain('TTL');
expect(operationsTable.data[0].engine_full).toContain('toIntervalYear(1)');

// Verify TTL was applied to a daily materialized view inner table
const operationsDailyTable = await clickHouseQuery<{ uuid: string }>(`
SELECT uuid
FROM system.tables
WHERE database = 'default' AND name = 'operations_daily'
LIMIT 1
`);

expect(operationsDailyTable.rows).toBe(1);
const innerTableName = `.inner_id.${operationsDailyTable.data[0].uuid}`;
const innerTable = await clickHouseQuery<{ engine_full: string }>(`
SELECT engine_full
FROM system.tables
WHERE database = 'default' AND name = '${innerTableName}'
LIMIT 1
`);

expect(innerTable.rows).toBe(1);
expect(innerTable.data[0].engine_full).toContain('TTL');
expect(innerTable.data[0].engine_full).toContain('toIntervalDay(30)');
} finally {
process.env = originalEnv;
}
});

test('update-retention script skips gracefully when no env vars are set', async () => {
const originalEnv = { ...process.env };

try {
delete process.env.CLICKHOUSE_TTL_TABLES;
delete process.env.CLICKHOUSE_TTL_DAILY_MV_TABLES;
delete process.env.CLICKHOUSE_TTL_HOURLY_MV_TABLES;
delete process.env.CLICKHOUSE_TTL_MINUTELY_MV_TABLES;

vi.resetModules();
const { updateRetention } = await import(
'../../../packages/migrations/src/scripts/update-retention'
);

// Should not throw
await expect(updateRetention()).resolves.toBeUndefined();
} finally {
process.env = originalEnv;
}
});
4 changes: 4 additions & 0 deletions packages/migrations/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ CLICKHOUSE_HOST="localhost"
CLICKHOUSE_PORT="8123"
CLICKHOUSE_USERNAME="test"
CLICKHOUSE_PASSWORD="test"
# CLICKHOUSE_TTL_TABLES="1 YEAR"
# CLICKHOUSE_TTL_DAILY_MV_TABLES="30 DAYS"
# CLICKHOUSE_TTL_HOURLY_MV_TABLES="7 DAYS"
# CLICKHOUSE_TTL_MINUTELY_MV_TABLES="1 DAY"

POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
Expand Down
22 changes: 22 additions & 0 deletions packages/migrations/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { migrateClickHouse } from './clickhouse';
import { createConnectionString } from './connection-string';
import { env } from './environment';
import { runPGMigrations } from './run-pg-migrations';
import { updateRetention } from './scripts/update-retention';

const slonik = await createPool(createConnectionString(env.postgres), {
// 10 minute timeout per statement
Expand Down Expand Up @@ -38,6 +39,27 @@ try {
env.clickhouse,
);
}

// Automatically apply retention if any retention setting is configured
// eslint-disable-next-line no-process-env
if (
// eslint-disable-next-line no-process-env
process.env.CLICKHOUSE_TTL_TABLES ||
// eslint-disable-next-line no-process-env
process.env.CLICKHOUSE_TTL_DAILY_MV_TABLES ||
// eslint-disable-next-line no-process-env
process.env.CLICKHOUSE_TTL_HOURLY_MV_TABLES ||
// eslint-disable-next-line no-process-env
process.env.CLICKHOUSE_TTL_MINUTELY_MV_TABLES
) {
console.log('Applying clickhouse retention settings...');
try {
await updateRetention();
} catch (error) {
console.error('Failed to update retention (non-fatal):', error);
}
}

process.exit(0);
} catch (error) {
console.error(error);
Expand Down
Loading
Loading