diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 988dc0661..9928d0cdd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,29 @@ jobs: - name: Run TCD validation tests run: npm run test -w tcd + sqlite: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: "24" + + - name: Install dependencies + run: npm install + + - name: Build project + run: npm run build + + - name: Build SQLite database + run: npm run build -w sqlite + + - name: Run SQLite validation tests + run: npm run test -w sqlite + lint: runs-on: ubuntu-latest steps: diff --git a/packages/sqlite/README.md b/packages/sqlite/README.md new file mode 100644 index 000000000..a929d6b10 --- /dev/null +++ b/packages/sqlite/README.md @@ -0,0 +1,70 @@ +# Neaps Tide Database - TideBase (SQLite) + +This package generates a [TideBase](./spec.md) file — an SQLite database containing all stations from the Neaps Tide Database with harmonic constituents, tidal datums, subordinate station offsets, and precomputed equilibrium arguments and node factors for tide prediction. + +TideBase is a modern, portable, and queryable database containing everything needed for tide prediction in an SQLite database. + +## Usage + +Download the latest `*.tidebase` file from [GitHub Releases](https://github.com/openwatersio/tide-database/releases) and query it with any SQLite client: + +```sh +sqlite3 neaps*.tidebase +``` + +### Find a station + +```sql +SELECT station_id, name, latitude, longitude +FROM stations +WHERE station_id = 'noaa/9414290'; +``` + +### Find nearby stations + +```sql +SELECT station_id, name, latitude, longitude +FROM stations +WHERE latitude BETWEEN 37.5 AND 38.0 + AND longitude BETWEEN -122.6 AND -122.2 +ORDER BY name; +``` + +### Get prediction data for a station and year + +```sql +SELECT sc.constituent, c.speed, sc.amplitude, sc.phase, + ea.value AS eq_argument, nf.value AS node_factor +FROM station_constituents sc +JOIN stations s ON s.id = sc.station_id +JOIN constituents c ON c.name = sc.constituent +LEFT JOIN equilibrium_arguments ea + ON ea.constituent = sc.constituent AND ea.year = 2026 +LEFT JOIN node_factors nf + ON nf.constituent = sc.constituent AND nf.year = 2026 +WHERE s.station_id = 'noaa/9414290' +ORDER BY c.speed; +``` + +See [examples/](./examples/) for more queries. See [spec.md](./spec.md) for the full specification. + +## Contributing + +Build the database: + +```sh +npm run build +``` + +### Testing + +The test suite validates that the built database correctly preserves all station data from the source JSON files, and that all example queries execute successfully. + +```sh +npm test +``` + +## References + +- [TideBase Specification](./spec.md) +- [@neaps/tide-predictor](https://github.com/openwatersio/neaps/tree/main/packages/tide-predictor) diff --git a/packages/sqlite/build b/packages/sqlite/build new file mode 100755 index 000000000..76f45c5e9 --- /dev/null +++ b/packages/sqlite/build @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +cd "$(dirname "$0")" +rm -rf dist && mkdir -p dist +node build.ts diff --git a/packages/sqlite/build.ts b/packages/sqlite/build.ts new file mode 100644 index 000000000..1f91618e7 --- /dev/null +++ b/packages/sqlite/build.ts @@ -0,0 +1,379 @@ +#!/usr/bin/env node +/** + * Generates an SQLite database from station JSON data. + * + * The database is a normalized, self-contained replacement for the TCD format, + * with tables for stations, constituents, datums, offsets, and precomputed + * equilibrium arguments and node factors for tide prediction. + */ + +import { readFileSync } from "fs"; +import { dirname, join } from "path"; +import { fileURLToPath } from "url"; +import { DatabaseSync } from "node:sqlite"; +import tidePredictor, { astro } from "@neaps/tide-predictor"; +import { + stations, + constituents as constituentDefs, + type Station, +} from "@neaps/tide-database"; + +const tpConstituents = tidePredictor.constituents; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const outDir = join(__dirname, "dist"); +const dbPath = join(outDir, "tides.tidebase"); +const SCHEMA = readFileSync(join(__dirname, "schema.sql"), "utf-8"); + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- + +const START_YEAR = 1970; +const END_YEAR = 2100; + +// --------------------------------------------------------------------------- +// Helpers (reused from packages/tcd/build.ts) +// --------------------------------------------------------------------------- + +function modulus(a: number, b: number): number { + return ((a % b) + b) % b; +} + +function computeEquilibriumArgument(name: string, time: Date): number { + const constituent = tpConstituents[name]; + if (!constituent) return 0; + const astroData = astro(time); + const V0 = constituent.value(astroData); + const { u } = constituent.correction(astroData); + return modulus(V0 + u, 360); +} + +function computeNodeFactor(name: string, time: Date): number { + const constituent = tpConstituents[name]; + if (!constituent) return 1; + const astroData = astro(time); + const { f } = constituent.correction(astroData); + return f; +} + +/** + * Resolve a station constituent name to its canonical name in tide-predictor. + * Returns canonical name or null if not found. + */ +function resolveConstituentName( + stationName: string, + knownNames: Set, +): string | null { + if (knownNames.has(stationName)) return stationName; + const tp = tpConstituents[stationName]; + if (tp && knownNames.has(tp.name)) return tp.name; + return null; +} + +// --------------------------------------------------------------------------- +// Build +// --------------------------------------------------------------------------- + +function main() { + console.error("Loading stations..."); + + const referenceStations = stations.filter( + (s: Station) => s.type === "reference", + ); + const subordinateStations = stations.filter( + (s: Station) => s.type === "subordinate", + ); + + console.error( + `Found ${stations.length} stations (${referenceStations.length} reference, ${subordinateStations.length} subordinate)`, + ); + + // ----------------------------------------------------------------------- + // Build constituent list: canonical tide-predictor constituents + any + // additional constituents found only in station data + // ----------------------------------------------------------------------- + + // Start with all constituents from the canonical definitions file + const constituentMap = new Map< + string, + { description: string | null; speed: number } + >(); + for (const c of constituentDefs) { + constituentMap.set(c.name, { description: c.description, speed: c.speed }); + } + + // Scan station data for any constituents not in the canonical list + for (const station of referenceStations) { + for (const hc of station.harmonic_constituents) { + // Try to resolve to canonical name + const tp = tpConstituents[hc.name]; + const canonicalName = tp ? tp.name : hc.name; + if (!constituentMap.has(canonicalName)) { + constituentMap.set(canonicalName, { + description: null, + speed: tp?.speed ?? hc.speed ?? 0, + }); + } + } + } + + const constituentNames = [...constituentMap.keys()]; + console.error(`Constituent list: ${constituentNames.length} constituents`); + + // Build set for resolution lookups + const constituentNameSet = new Set(constituentNames); + + // ----------------------------------------------------------------------- + // Open database + // ----------------------------------------------------------------------- + + console.error(`Creating database at ${dbPath}...`); + const db = new DatabaseSync(dbPath); + + // Performance pragmas for bulk loading + db.exec("PRAGMA journal_mode = OFF"); + db.exec("PRAGMA synchronous = OFF"); + db.exec("PRAGMA locking_mode = EXCLUSIVE"); + db.exec("PRAGMA cache_size = -64000"); // 64MB cache + + // Create schema + db.exec(SCHEMA); + + // ----------------------------------------------------------------------- + // Insert data in a transaction + // ----------------------------------------------------------------------- + + db.exec("BEGIN TRANSACTION"); + + // --- Metadata --- + const insertMeta = db.prepare( + "INSERT INTO metadata (key, value) VALUES (?, ?)", + ); + insertMeta.run( + "generator", + "tide-database (https://openwaters.io/tides/database)", + ); + insertMeta.run("generated_at", new Date().toISOString()); + insertMeta.run("start_year", String(START_YEAR)); + insertMeta.run("end_year", String(END_YEAR)); + + // --- Constituents --- + console.error("Inserting constituents..."); + const insertConstituent = db.prepare( + "INSERT INTO constituents (name, description, speed) VALUES (?, ?, ?)", + ); + for (const [name, { description, speed }] of constituentMap) { + insertConstituent.run(name, description, speed); + } + + // --- Sources (deduplicated) --- + console.error("Inserting sources..."); + const insertSource = db.prepare( + "INSERT INTO sources (name, url) VALUES (?, ?)", + ); + const getSourceId = db.prepare( + "SELECT id FROM sources WHERE name = ? AND url = ?", + ); + const sourceIdCache = new Map(); + + function getOrCreateSource(source: Station["source"]): number { + const cacheKey = `${source.name}|${source.url}`; + let id = sourceIdCache.get(cacheKey); + if (id !== undefined) return id; + + insertSource.run(source.name, source.url); + const row = getSourceId.get(source.name, source.url) as { id: number }; + id = row.id; + sourceIdCache.set(cacheKey, id); + return id; + } + + // --- Stations --- + console.error("Inserting stations..."); + const insertStation = db.prepare(` + INSERT INTO stations ( + station_id, name, type, latitude, longitude, + continent, country, region, timezone, disclaimers, + source_id, source_station_id, + license, commercial_use, license_url, license_notes, + epoch_start, epoch_end + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + // Map from text station_id to integer id + const stationIdMap = new Map(); + + for (const station of stations) { + const sourceId = getOrCreateSource(station.source); + const result = insertStation.run( + station.id, + station.name, + station.type, + station.latitude, + station.longitude, + station.continent, + station.country, + station.region ?? null, + station.timezone, + station.disclaimers || null, + sourceId, + station.source.id, + station.license.type, + station.license.commercial_use ? 1 : 0, + station.license.url, + station.license.notes ?? null, + station.epoch?.start ?? null, + station.epoch?.end ?? null, + ); + + const intId = Number(result.lastInsertRowid); + stationIdMap.set(station.id, intId); + } + + // --- Station constituents --- + console.error("Inserting station constituents..."); + const insertStationConstituent = db.prepare( + "INSERT INTO station_constituents (station_id, constituent, amplitude, phase) VALUES (?, ?, ?, ?)", + ); + + let totalHC = 0; + let insertedHC = 0; + + for (const station of referenceStations) { + const intId = stationIdMap.get(station.id)!; + for (const hc of station.harmonic_constituents) { + totalHC++; + const resolved = resolveConstituentName(hc.name, constituentNameSet); + if (resolved) { + insertStationConstituent.run( + intId, + resolved, + hc.amplitude, + modulus(hc.phase, 360), + ); + insertedHC++; + } + } + } + console.error( + ` Constituent coverage: ${insertedHC}/${totalHC} (${((insertedHC / totalHC) * 100).toFixed(1)}%)`, + ); + + // --- Station offsets --- + console.error("Inserting station offsets..."); + const insertOffset = db.prepare(` + INSERT INTO station_offsets ( + station_id, reference_id, height_type, + height_high, height_low, time_high, time_low + ) VALUES (?, ?, ?, ?, ?, ?, ?) + `); + + let offsetCount = 0; + for (const station of subordinateStations) { + if (!station.offsets) continue; + const intId = stationIdMap.get(station.id)!; + const refIntId = stationIdMap.get(station.offsets.reference); + if (!refIntId) { + console.error( + `WARNING: Subordinate "${station.name}" references unknown station "${station.offsets.reference}", skipping`, + ); + continue; + } + + const offsets = station.offsets; + insertOffset.run( + intId, + refIntId, + offsets.height.type, + offsets.height.high, + offsets.height.low, + offsets.time.high, + offsets.time.low, + ); + offsetCount++; + } + console.error(` Inserted ${offsetCount} subordinate offsets`); + + // --- Station datums --- + console.error("Inserting station datums..."); + const insertDatum = db.prepare( + "INSERT INTO station_datums (station_id, datum, value) VALUES (?, ?, ?)", + ); + + let datumCount = 0; + for (const station of stations) { + if (!station.datums) continue; + const intId = stationIdMap.get(station.id)!; + for (const [name, value] of Object.entries(station.datums)) { + insertDatum.run(intId, name, value); + datumCount++; + } + } + console.error(` Inserted ${datumCount} datum values`); + + // --- Equilibrium arguments --- + console.error("Computing equilibrium arguments..."); + const insertEqArg = db.prepare( + "INSERT INTO equilibrium_arguments (constituent, year, value) VALUES (?, ?, ?)", + ); + + let eqCount = 0; + for (const name of constituentNames) { + // Only compute for constituents known to tide-predictor + if (!tpConstituents[name]) continue; + for (let year = START_YEAR; year <= END_YEAR; year++) { + const time = new Date(Date.UTC(year, 0, 1, 0, 0, 0)); + const value = computeEquilibriumArgument(name, time); + insertEqArg.run(name, year, value); + eqCount++; + } + } + console.error(` Inserted ${eqCount} equilibrium arguments`); + + // --- Node factors --- + console.error("Computing node factors..."); + const insertNodeFactor = db.prepare( + "INSERT INTO node_factors (constituent, year, value) VALUES (?, ?, ?)", + ); + + let nfCount = 0; + for (const name of constituentNames) { + if (!tpConstituents[name]) continue; + for (let year = START_YEAR; year <= END_YEAR; year++) { + const time = new Date(Date.UTC(year, 6, 1, 0, 0, 0)); + const value = computeNodeFactor(name, time); + insertNodeFactor.run(name, year, value); + nfCount++; + } + } + console.error(` Inserted ${nfCount} node factors`); + + // --- Final metadata --- + insertMeta.run("station_count", String(stations.length)); + insertMeta.run("constituent_count", String(constituentNames.length)); + + db.exec("COMMIT"); + + // ----------------------------------------------------------------------- + // Finalize + // ----------------------------------------------------------------------- + + console.error("Running ANALYZE..."); + db.exec("ANALYZE"); + + console.error("Running VACUUM..."); + db.exec("VACUUM"); + + db.close(); + + console.error(`\nDatabase written to ${dbPath}`); + console.error(` Stations: ${stations.length}`); + console.error(` Constituents: ${constituentNames.length}`); + console.error(` Station constituents: ${insertedHC}`); + console.error(` Station offsets: ${offsetCount}`); + console.error(` Station datums: ${datumCount}`); + console.error(` Equilibrium arguments: ${eqCount}`); + console.error(` Node factors: ${nfCount}`); +} + +main(); diff --git a/packages/sqlite/examples/find-by-bounding-box.sql b/packages/sqlite/examples/find-by-bounding-box.sql new file mode 100644 index 000000000..a08940e4e --- /dev/null +++ b/packages/sqlite/examples/find-by-bounding-box.sql @@ -0,0 +1,7 @@ +-- Find all stations within a bounding box +-- Example: San Francisco Bay area (roughly 37.4°N to 38.0°N, -122.6°W to -122.0°W) +SELECT station_id, name, latitude, longitude, type +FROM stations +WHERE latitude BETWEEN 37.4 AND 38.0 + AND longitude BETWEEN -122.6 AND -122.0 +ORDER BY name; diff --git a/packages/sqlite/examples/find-by-country.sql b/packages/sqlite/examples/find-by-country.sql new file mode 100644 index 000000000..c11d800f3 --- /dev/null +++ b/packages/sqlite/examples/find-by-country.sql @@ -0,0 +1,18 @@ +-- Find all stations in a country +SELECT s.station_id, s.name, s.type, s.latitude, s.longitude +FROM stations s +WHERE s.country = 'United States' +ORDER BY s.name; + +-- Count stations by continent +SELECT continent, count(*) AS station_count +FROM stations +GROUP BY continent +ORDER BY station_count DESC; + +-- Count stations by country within Europe +SELECT country, count(*) AS station_count +FROM stations +WHERE continent = 'Europe' +GROUP BY country +ORDER BY station_count DESC; diff --git a/packages/sqlite/examples/find-by-id.sql b/packages/sqlite/examples/find-by-id.sql new file mode 100644 index 000000000..ea90c0c86 --- /dev/null +++ b/packages/sqlite/examples/find-by-id.sql @@ -0,0 +1,5 @@ +-- Find a station by its text ID +SELECT s.*, src.name AS source_name, src.url AS source_url +FROM stations s +JOIN sources src ON src.id = s.source_id +WHERE s.station_id = 'noaa/9414290'; diff --git a/packages/sqlite/examples/find-commercial.sql b/packages/sqlite/examples/find-commercial.sql new file mode 100644 index 000000000..76b485135 --- /dev/null +++ b/packages/sqlite/examples/find-commercial.sql @@ -0,0 +1,11 @@ +-- Find all stations with commercially-usable licenses +SELECT s.station_id, s.name, s.country, s.license +FROM stations s +WHERE s.commercial_use = 1 +ORDER BY s.country, s.name; + +-- Count stations by license type +SELECT license, commercial_use, count(*) AS station_count +FROM stations +GROUP BY license, commercial_use +ORDER BY station_count DESC; diff --git a/packages/sqlite/examples/find-nearest.sql b/packages/sqlite/examples/find-nearest.sql new file mode 100644 index 000000000..bfacadc9c --- /dev/null +++ b/packages/sqlite/examples/find-nearest.sql @@ -0,0 +1,21 @@ +-- Find the 10 nearest stations to a point +-- Example: near San Francisco (37.7749°N, -122.4194°W) +-- +-- Uses the equirectangular approximation which is fast and accurate +-- enough for nearby stations. For exact distances, use the full haversine formula. + +WITH nearby AS ( + SELECT station_id, name, type, latitude, longitude, + (latitude - 37.7749) * (latitude - 37.7749) + + ((longitude - (-122.4194)) * cos(37.7749 * 3.14159265 / 180)) * + ((longitude - (-122.4194)) * cos(37.7749 * 3.14159265 / 180)) + AS dist_sq + FROM stations + WHERE latitude BETWEEN 37.7749 - 2 AND 37.7749 + 2 + AND longitude BETWEEN -122.4194 - 2 AND -122.4194 + 2 +) +SELECT station_id, name, type, latitude, longitude, + round(sqrt(dist_sq) * 111.32, 2) AS approx_distance_km +FROM nearby +ORDER BY dist_sq +LIMIT 10; diff --git a/packages/sqlite/examples/prediction-data.sql b/packages/sqlite/examples/prediction-data.sql new file mode 100644 index 000000000..ce2ac093b --- /dev/null +++ b/packages/sqlite/examples/prediction-data.sql @@ -0,0 +1,29 @@ +-- Get everything needed for tide prediction at a station for a given year: +-- constituent amplitudes, phases, speeds, equilibrium arguments, and node factors +-- +-- This query returns all the data needed to compute tide heights using: +-- h(t) = Z₀ + Σ f·H·cos(ωt + V₀+u - κ) +-- where: +-- f = node factor (from node_factors table) +-- H = amplitude (from station_constituents) +-- ω = speed in deg/hr (from constituents) +-- V₀+u = equilibrium argument (from equilibrium_arguments) +-- κ = phase (from station_constituents) +-- Z₀ = datum offset (MSL - MLLW, from station_datums) + +SELECT + sc.constituent, + c.speed AS speed_deg_per_hr, + sc.amplitude AS amplitude_m, + sc.phase AS phase_deg, + ea.value AS eq_argument_deg, + nf.value AS node_factor +FROM station_constituents sc +JOIN stations s ON s.id = sc.station_id +JOIN constituents c ON c.name = sc.constituent +LEFT JOIN equilibrium_arguments ea + ON ea.constituent = sc.constituent AND ea.year = 2026 +LEFT JOIN node_factors nf + ON nf.constituent = sc.constituent AND nf.year = 2026 +WHERE s.station_id = 'noaa/9414290' +ORDER BY c.speed; diff --git a/packages/sqlite/examples/station-constituents.sql b/packages/sqlite/examples/station-constituents.sql new file mode 100644 index 000000000..b60435a6e --- /dev/null +++ b/packages/sqlite/examples/station-constituents.sql @@ -0,0 +1,7 @@ +-- Get all harmonic constituents for a station, with speeds and descriptions +SELECT c.name, c.description, c.speed, sc.amplitude, sc.phase +FROM station_constituents sc +JOIN constituents c ON c.name = sc.constituent +JOIN stations s ON s.id = sc.station_id +WHERE s.station_id = 'noaa/9414290' +ORDER BY c.speed; diff --git a/packages/sqlite/examples/station-datums.sql b/packages/sqlite/examples/station-datums.sql new file mode 100644 index 000000000..c893e8164 --- /dev/null +++ b/packages/sqlite/examples/station-datums.sql @@ -0,0 +1,6 @@ +-- Get all datum values for a station (in meters) +SELECT d.datum, d.value +FROM station_datums d +JOIN stations s ON s.id = d.station_id +WHERE s.station_id = 'noaa/9414290' +ORDER BY d.value DESC; diff --git a/packages/sqlite/examples/subordinate-stations.sql b/packages/sqlite/examples/subordinate-stations.sql new file mode 100644 index 000000000..a84f5a622 --- /dev/null +++ b/packages/sqlite/examples/subordinate-stations.sql @@ -0,0 +1,9 @@ +-- Find all subordinate stations for a given reference station +SELECT s.station_id, s.name, + o.height_type, o.height_high, o.height_low, + o.time_high, o.time_low +FROM station_offsets o +JOIN stations s ON s.id = o.station_id +JOIN stations ref ON ref.id = o.reference_id +WHERE ref.station_id = 'noaa/8443970' +ORDER BY s.name; diff --git a/packages/sqlite/package.json b/packages/sqlite/package.json new file mode 100644 index 000000000..7968f8b33 --- /dev/null +++ b/packages/sqlite/package.json @@ -0,0 +1,14 @@ +{ + "name": "sqlite", + "private": true, + "type": "module", + "scripts": { + "build": "./build", + "pretest": "npm run build", + "test": "vitest" + }, + "devDependencies": { + "@neaps/tide-database": "file:../..", + "@neaps/tide-predictor": "*" + } +} diff --git a/packages/sqlite/schema.sql b/packages/sqlite/schema.sql new file mode 100644 index 000000000..26340169f --- /dev/null +++ b/packages/sqlite/schema.sql @@ -0,0 +1,84 @@ +CREATE TABLE metadata ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE constituents ( + name TEXT PRIMARY KEY, + description TEXT, + speed REAL NOT NULL +) WITHOUT ROWID; + +CREATE TABLE sources ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + url TEXT NOT NULL, + UNIQUE(name, url) +); + +CREATE TABLE stations ( + id INTEGER PRIMARY KEY, + station_id TEXT NOT NULL UNIQUE, + name TEXT NOT NULL, + type TEXT NOT NULL CHECK (type IN ('reference', 'subordinate')), + latitude REAL NOT NULL, + longitude REAL NOT NULL, + continent TEXT NOT NULL, + country TEXT NOT NULL, + region TEXT, + timezone TEXT NOT NULL, + disclaimers TEXT, + source_id INTEGER NOT NULL REFERENCES sources(id), + source_station_id TEXT NOT NULL, + license TEXT NOT NULL, + commercial_use INTEGER NOT NULL DEFAULT 0, + license_url TEXT NOT NULL, + license_notes TEXT, + epoch_start TEXT, + epoch_end TEXT +); + +CREATE INDEX idx_stations_type ON stations(type); +CREATE INDEX idx_stations_country ON stations(country); +CREATE INDEX idx_stations_continent ON stations(continent); + +CREATE TABLE station_constituents ( + station_id INTEGER NOT NULL REFERENCES stations(id), + constituent TEXT NOT NULL REFERENCES constituents(name), + amplitude REAL NOT NULL, + phase REAL NOT NULL, + PRIMARY KEY (station_id, constituent) +) WITHOUT ROWID; + +CREATE TABLE station_offsets ( + station_id INTEGER PRIMARY KEY REFERENCES stations(id), + reference_id INTEGER NOT NULL REFERENCES stations(id), + height_type TEXT NOT NULL CHECK (height_type IN ('ratio', 'fixed')), + height_high REAL NOT NULL, + height_low REAL NOT NULL, + time_high INTEGER NOT NULL, + time_low INTEGER NOT NULL +); + +CREATE INDEX idx_station_offsets_reference ON station_offsets(reference_id); + +CREATE TABLE station_datums ( + station_id INTEGER NOT NULL REFERENCES stations(id), + datum TEXT NOT NULL, + value REAL NOT NULL, + PRIMARY KEY (station_id, datum) +) WITHOUT ROWID; + +CREATE TABLE equilibrium_arguments ( + constituent TEXT NOT NULL REFERENCES constituents(name), + year INTEGER NOT NULL, + value REAL NOT NULL, + PRIMARY KEY (constituent, year) +) WITHOUT ROWID; + +CREATE TABLE node_factors ( + constituent TEXT NOT NULL REFERENCES constituents(name), + year INTEGER NOT NULL, + value REAL NOT NULL, + PRIMARY KEY (constituent, year) +) WITHOUT ROWID; diff --git a/packages/sqlite/spec.md b/packages/sqlite/spec.md new file mode 100644 index 000000000..7e4777d18 --- /dev/null +++ b/packages/sqlite/spec.md @@ -0,0 +1,306 @@ +# TideBase Specification + +**Version 1.0 (Draft)** + +## Abstract + +TideBase is an open specification for storing tidal harmonic data in [SQLite](https://www.sqlite.org/) databases. A single TideBase file contains everything needed for tide prediction: station metadata, harmonic constituents, tidal datums, subordinate station offsets, and precomputed astronomical parameters. TideBase is designed as a modern, portable, and queryable successor to the [Tidal Constituent Database (TCD)](https://flaterco.com/xtide/files.html) format. + +TideBase files use the extension **`.tidebase`**. + +## Definitions + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). + +## Database + +A TideBase file MUST be a valid SQLite database of [version 3](https://www.sqlite.org/formatchng.html) or higher. Only core SQLite features are used. A TideBase file SHALL NOT require any SQLite extensions. + +### Charset + +All text values in a TideBase database MUST be encoded as UTF-8. + +### Units + +All height and amplitude values MUST be in **meters**. All speed values MUST be in **degrees per solar hour**. All phase and equilibrium argument values MUST be in **degrees** (0-360) relative to UTC. All time offsets MUST be in **minutes**. + +## Database Specification + +### `metadata` + +#### Schema + +```sql +CREATE TABLE metadata ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) WITHOUT ROWID; +``` + +#### Content + +The `metadata` table MUST contain the following rows: + +| Key | Value | +| ------------------- | ------------------------------------------------------------------------------------- | +| `generator` | The name and/or URL of the software that generated the file. | +| `generated_at` | ISO 8601 timestamp of when the file was generated. | +| `station_count` | Total number of stations in the `stations` table, as a string. | +| `constituent_count` | Total number of constituents in the `constituents` table, as a string. | +| `start_year` | First year covered by `equilibrium_arguments` and `node_factors` tables, as a string. | +| `end_year` | Last year covered by `equilibrium_arguments` and `node_factors` tables, as a string. | + +Producers MAY include additional metadata keys. Consumers SHOULD ignore keys they do not recognize. + +### `constituents` + +The `constituents` table defines tidal harmonic constituents (M2, S2, K1, O1, etc.) used by stations in the database. + +#### Schema + +```sql +CREATE TABLE constituents ( + name TEXT PRIMARY KEY, + description TEXT, + speed REAL NOT NULL +) WITHOUT ROWID; +``` + +#### Content + +Each row defines a single tidal constituent. + +- `name` -- The canonical short name of the constituent (e.g. `M2`, `S2`, `K1`). Primary key, referenced by `station_constituents`, `equilibrium_arguments`, and `node_factors`. +- `description` -- OPTIONAL human-readable description of the constituent. +- `speed` -- Angular speed in degrees per solar hour. + +The table MUST contain at least the principal tidal constituents (M2, S2, N2, K2, K1, O1, P1, Q1). It SHOULD contain all constituents referenced by any station in the database. + +### `sources` + +The `sources` table identifies the organizations or services that provided station data. + +#### Schema + +```sql +CREATE TABLE sources ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + url TEXT NOT NULL, + UNIQUE(name, url) +); +``` + +#### Content + +- `id` -- Unique integer identifier. Referenced by `stations.source_id`. +- `name` -- Name of the data source (e.g. `NOAA`, `CMEMS`). +- `url` -- URL for the data source. + +### `stations` + +The `stations` table contains one row per tide station. + +#### Schema + +```sql +CREATE TABLE stations ( + id INTEGER PRIMARY KEY, + station_id TEXT NOT NULL UNIQUE, + name TEXT NOT NULL, + type TEXT NOT NULL CHECK (type IN ('reference', 'subordinate')), + latitude REAL NOT NULL, + longitude REAL NOT NULL, + continent TEXT NOT NULL, + country TEXT NOT NULL, + region TEXT, + timezone TEXT NOT NULL, + disclaimers TEXT, + source_id INTEGER NOT NULL REFERENCES sources(id), + source_station_id TEXT NOT NULL, + license TEXT NOT NULL, + commercial_use INTEGER NOT NULL DEFAULT 0, + license_url TEXT NOT NULL, + license_notes TEXT, + epoch_start TEXT, + epoch_end TEXT +); + +CREATE INDEX idx_stations_type ON stations(type); +CREATE INDEX idx_stations_country ON stations(country); +CREATE INDEX idx_stations_continent ON stations(continent); +``` + +#### Content + +- `id` -- Auto-incrementing integer primary key. Used as foreign key in all related tables. +- `station_id` -- Unique text identifier in the format `source/id` (e.g. `noaa/9414290`). Consumers SHOULD use this field for lookups and display. +- `name` -- Human-readable station name. +- `type` -- MUST be one of `reference` or `subordinate`. Reference stations have harmonic constituents in `station_constituents`. Subordinate stations have offsets in `station_offsets`. +- `latitude` -- Station latitude in decimal degrees (WGS 84). Positive is north. +- `longitude` -- Station longitude in decimal degrees (WGS 84). Positive is east. +- `continent` -- Continent where the station is located. +- `country` -- ISO 3166-1 alpha-2 country code. +- `region` -- OPTIONAL sub-national region or state. +- `timezone` -- IANA timezone name (e.g. `America/Los_Angeles`). +- `disclaimers` -- OPTIONAL text disclaimers about data quality or usage. +- `source_id` -- Foreign key to `sources.id`. +- `source_station_id` -- The station identifier as used by the original data source. +- `license` -- SPDX license identifier or description (e.g. `Public Domain`, `CC-BY-4.0`). +- `commercial_use` -- `1` if the data MAY be used commercially, `0` otherwise. +- `license_url` -- URL to the full license text. +- `license_notes` -- OPTIONAL additional licensing information. +- `epoch_start` -- OPTIONAL start date of the harmonic analysis epoch, as `YYYY-MM-DD`. +- `epoch_end` -- OPTIONAL end date of the harmonic analysis epoch, as `YYYY-MM-DD`. + +### `station_constituents` + +The `station_constituents` table stores the harmonic constants for reference stations. + +#### Schema + +```sql +CREATE TABLE station_constituents ( + station_id INTEGER NOT NULL REFERENCES stations(id), + constituent TEXT NOT NULL REFERENCES constituents(name), + amplitude REAL NOT NULL, + phase REAL NOT NULL, + PRIMARY KEY (station_id, constituent) +) WITHOUT ROWID; +``` + +#### Content + +- `station_id` -- Foreign key to `stations.id`. The referenced station MUST have `type = 'reference'`. +- `constituent` -- Foreign key to `constituents.name`. +- `amplitude` -- Amplitude in meters. +- `phase` -- Phase lag (epoch) in degrees, in the range [0, 360). + +Each reference station SHOULD have at least the principal constituents (M2, S2, K1, O1). + +### `station_offsets` + +The `station_offsets` table stores prediction offsets for subordinate stations relative to a reference station. + +#### Schema + +```sql +CREATE TABLE station_offsets ( + station_id INTEGER PRIMARY KEY REFERENCES stations(id), + reference_id INTEGER NOT NULL REFERENCES stations(id), + height_type TEXT NOT NULL CHECK (height_type IN ('ratio', 'fixed')), + height_high REAL NOT NULL, + height_low REAL NOT NULL, + time_high INTEGER NOT NULL, + time_low INTEGER NOT NULL +); + +CREATE INDEX idx_station_offsets_reference ON station_offsets(reference_id); +``` + +#### Content + +- `station_id` -- Foreign key to `stations.id`. The referenced station MUST have `type = 'subordinate'`. +- `reference_id` -- Foreign key to `stations.id`. The referenced station MUST have `type = 'reference'`. +- `height_type` -- MUST be `ratio` (multiply reference heights) or `fixed` (add to reference heights in meters). +- `height_high` -- Height adjustment for high tides. If `height_type` is `ratio`, this is a dimensionless multiplier. If `fixed`, this is in meters. +- `height_low` -- Height adjustment for low tides (same units as `height_high`). +- `time_high` -- Time offset for high tides, in minutes. Positive values shift later. +- `time_low` -- Time offset for low tides, in minutes. Positive values shift later. + +### `station_datums` + +The `station_datums` table stores tidal datum values for stations. + +#### Schema + +```sql +CREATE TABLE station_datums ( + station_id INTEGER NOT NULL REFERENCES stations(id), + datum TEXT NOT NULL, + value REAL NOT NULL, + PRIMARY KEY (station_id, datum) +) WITHOUT ROWID; +``` + +#### Content + +- `station_id` -- Foreign key to `stations.id`. +- `datum` -- Datum identifier. Common values include `MHHW`, `MHW`, `MTL`, `MSL`, `MLW`, `MLLW`, `LAT`, `HAT`. +- `value` -- Datum height in meters, relative to the station's chart datum. + +### `equilibrium_arguments` + +The `equilibrium_arguments` table stores precomputed equilibrium arguments (V₀ + u) for each constituent at the start of each year. These values enable tide prediction without an astronomy library. + +#### Schema + +```sql +CREATE TABLE equilibrium_arguments ( + constituent TEXT NOT NULL REFERENCES constituents(name), + year INTEGER NOT NULL, + value REAL NOT NULL, + PRIMARY KEY (constituent, year) +) WITHOUT ROWID; +``` + +#### Content + +- `constituent` -- Foreign key to `constituents.name`. +- `year` -- Calendar year. +- `value` -- Equilibrium argument in degrees (0-360), computed at 00:00 UTC on January 1 of the given year. + +The year range MUST span at least `start_year` through `end_year` as declared in `metadata`. Equilibrium arguments MAY be absent for constituents that lack a computable astronomical formula. + +### `node_factors` + +The `node_factors` table stores precomputed node factors (f) for each constituent at the middle of each year. + +#### Schema + +```sql +CREATE TABLE node_factors ( + constituent TEXT NOT NULL REFERENCES constituents(name), + year INTEGER NOT NULL, + value REAL NOT NULL, + PRIMARY KEY (constituent, year) +) WITHOUT ROWID; +``` + +#### Content + +- `constituent` -- Foreign key to `constituents.name`. +- `year` -- Calendar year. +- `value` -- Node factor (dimensionless multiplier), computed at 00:00 UTC on July 1 of the given year. + +The year range MUST match `equilibrium_arguments`. Node factors MAY be absent for constituents that lack a computable astronomical formula. A node factor of `1.0` indicates no correction. + +## Tide Prediction + +A consumer can compute tide predictions from a TideBase file using the standard harmonic method: + +$$h(t) = Z_0 + \sum_i f_i \cdot H_i \cdot \cos(\omega_i \cdot t + V_{0_i} + u_i - \kappa_i)$$ + +Where for each constituent $i$: + +- $Z_0$ is the mean sea level (the `MSL` datum from `station_datums`, or 0) +- $f_i$ is the node factor from `node_factors` for the prediction year +- $H_i$ is the amplitude from `station_constituents` +- $\omega_i$ is the speed from `constituents` (converted to radians/hour) +- $t$ is hours elapsed since 00:00 UTC January 1 of the prediction year +- $V_{0_i} + u_i$ is the equilibrium argument from `equilibrium_arguments` +- $\kappa_i$ is the phase from `station_constituents` + +For subordinate stations, first compute predictions at the reference station, then apply the time and height offsets from `station_offsets`. + +## Future Considerations + +The following features are being considered for future versions of this specification: + +- **Tidal current data** -- velocity constituents for current prediction stations +- **Datum conversions** -- relationships between vertical datums at each station +- **Confidence intervals** -- uncertainty estimates for harmonic constants + +## License + +This specification is released under [CC0 1.0 Universal](https://creativecommons.org/publicdomain/zero/1.0/). diff --git a/packages/sqlite/test/sqlite.test.ts b/packages/sqlite/test/sqlite.test.ts new file mode 100644 index 000000000..603ffe221 --- /dev/null +++ b/packages/sqlite/test/sqlite.test.ts @@ -0,0 +1,285 @@ +/** + * SQLite database validation tests. + * + * These tests validate that the SQLite build correctly preserves all station + * data from the source JSON files. + */ + +import { describe, test, expect, beforeAll } from "vitest"; +import { existsSync, readdirSync, readFileSync, statSync } from "fs"; +import { basename, join } from "path"; +import { DatabaseSync } from "node:sqlite"; +import { + stations, + constituents as constituentDefs, +} from "@neaps/tide-database"; +import tidePredictor, { astro } from "@neaps/tide-predictor"; + +const dbPath = join(import.meta.dirname, "..", "dist", "tides.tidebase"); + +let db: DatabaseSync; + +beforeAll(() => { + db = new DatabaseSync(dbPath, { readOnly: true }); +}); + +describe("Database file", () => { + test("exists and has reasonable size", () => { + expect(existsSync(dbPath)).toBe(true); + const stats = statSync(dbPath); + expect(stats.size).toBeGreaterThan(1_000_000); // At least 1MB + }); +}); + +describe("Metadata", () => { + test("all expected keys are present", () => { + const rows = db.prepare("SELECT key, value FROM metadata").all() as { + key: string; + value: string; + }[]; + const meta = Object.fromEntries(rows.map((r) => [r.key, r.value])); + + expect(meta["generator"]).toContain("tide-database"); + expect(meta["generated_at"]).toBeTruthy(); + expect(Number(meta["station_count"])).toBe(stations.length); + expect(Number(meta["constituent_count"])).toBeGreaterThan(0); + expect(Number(meta["start_year"])).toBe(1970); + expect(Number(meta["end_year"])).toBe(2100); + }); +}); + +describe("Constituents", () => { + test("all canonical constituents are present with correct speeds", () => { + for (const c of constituentDefs) { + const row = db + .prepare("SELECT speed FROM constituents WHERE name = ?") + .get(c.name) as { speed: number } | undefined; + expect(row, `Constituent ${c.name} not found`).toBeDefined(); + expect(row!.speed).toBeCloseTo(c.speed, 4); + } + }); + + test("total count includes station-only constituents", () => { + const row = db + .prepare("SELECT count(*) AS cnt FROM constituents") + .get() as { cnt: number }; + expect(row.cnt).toBeGreaterThanOrEqual(constituentDefs.length); + }); +}); + +describe("Stations", () => { + test("total count matches source data", () => { + const row = db.prepare("SELECT count(*) AS cnt FROM stations").get() as { + cnt: number; + }; + expect(row.cnt).toBe(stations.length); + }); + + test("reference station count matches", () => { + const expected = stations.filter((s) => s.type === "reference").length; + const row = db + .prepare("SELECT count(*) AS cnt FROM stations WHERE type = 'reference'") + .get() as { cnt: number }; + expect(row.cnt).toBe(expected); + }); + + test("subordinate station count matches", () => { + const expected = stations.filter((s) => s.type === "subordinate").length; + const row = db + .prepare( + "SELECT count(*) AS cnt FROM stations WHERE type = 'subordinate'", + ) + .get() as { cnt: number }; + expect(row.cnt).toBe(expected); + }); + + test("sample reference station has correct data", () => { + const station = stations.find((s) => s.id === "noaa/9414290")!; + const row = db + .prepare("SELECT * FROM stations WHERE station_id = ?") + .get("noaa/9414290") as Record; + + expect(row).toBeDefined(); + expect(row["name"]).toBe(station.name); + expect(row["type"]).toBe("reference"); + expect(row["latitude"]).toBeCloseTo(station.latitude, 4); + expect(row["longitude"]).toBeCloseTo(station.longitude, 4); + expect(row["timezone"]).toBe(station.timezone); + expect(row["country"]).toBe(station.country); + expect(row["continent"]).toBe(station.continent); + }); + + test("sample subordinate station has correct offsets", () => { + const station = stations.find((s) => s.id === "noaa/1610367")!; + const sRow = db + .prepare("SELECT id FROM stations WHERE station_id = ?") + .get("noaa/1610367") as { id: number }; + const oRow = db + .prepare("SELECT * FROM station_offsets WHERE station_id = ?") + .get(sRow.id) as { + reference_id: number; + height_type: string; + height_high: number; + height_low: number; + time_high: number; + time_low: number; + }; + + expect(oRow).toBeDefined(); + expect(oRow.height_type).toBe(station.offsets!.height.type); + expect(oRow.height_high).toBeCloseTo(station.offsets!.height.high, 4); + expect(oRow.height_low).toBeCloseTo(station.offsets!.height.low, 4); + expect(oRow.time_high).toBe(station.offsets!.time.high); + expect(oRow.time_low).toBe(station.offsets!.time.low); + + // Verify reference points to correct station + const refRow = db + .prepare("SELECT station_id FROM stations WHERE id = ?") + .get(oRow.reference_id) as { station_id: string }; + expect(refRow.station_id).toBe(station.offsets!.reference); + }); +}); + +describe("Station constituents", () => { + test("sample station has all constituents preserved", () => { + const station = stations.find((s) => s.id === "noaa/9414290")!; + const sRow = db + .prepare("SELECT id FROM stations WHERE station_id = ?") + .get("noaa/9414290") as { id: number }; + + const rows = db + .prepare( + `SELECT sc.constituent AS name, sc.amplitude, sc.phase + FROM station_constituents sc + WHERE sc.station_id = ?`, + ) + .all(sRow.id) as { name: string; amplitude: number; phase: number }[]; + + // Should have at least as many as the source (may resolve aliases) + expect(rows.length).toBeGreaterThanOrEqual( + station.harmonic_constituents.filter( + (hc) => hc.amplitude !== 0 || hc.phase !== 0, + ).length, + ); + + // Check M2 specifically + const m2Source = station.harmonic_constituents.find( + (hc) => hc.name === "M2", + )!; + const m2Db = rows.find((r) => r.name === "M2")!; + expect(m2Db).toBeDefined(); + expect(m2Db.amplitude).toBeCloseTo(m2Source.amplitude, 4); + expect(m2Db.phase).toBeCloseTo(m2Source.phase, 2); + }); + + test("all subordinate offset references are valid", () => { + const row = db + .prepare( + `SELECT count(*) AS cnt FROM station_offsets o + WHERE NOT EXISTS ( + SELECT 1 FROM stations s WHERE s.id = o.reference_id + )`, + ) + .get() as { cnt: number }; + expect(row.cnt).toBe(0); + }); +}); + +describe("Datums", () => { + test("sample station has correct datum values", () => { + const station = stations.find((s) => s.id === "noaa/9414290")!; + const sRow = db + .prepare("SELECT id FROM stations WHERE station_id = ?") + .get("noaa/9414290") as { id: number }; + + const rows = db + .prepare("SELECT datum, value FROM station_datums WHERE station_id = ?") + .all(sRow.id) as { datum: string; value: number }[]; + + const dbDatums = Object.fromEntries(rows.map((r) => [r.datum, r.value])); + + for (const [name, value] of Object.entries(station.datums) as [ + string, + number, + ][]) { + expect(dbDatums[name], `Datum ${name}`).toBeCloseTo(value, 4); + } + }); +}); + +describe("Equilibrium arguments and node factors", () => { + test("spot check M2 equilibrium argument for 2026", () => { + const eaRow = db + .prepare( + "SELECT value FROM equilibrium_arguments WHERE constituent = 'M2' AND year = 2026", + ) + .get() as { value: number }; + + // Compute expected value directly + const time = new Date(Date.UTC(2026, 0, 1, 0, 0, 0)); + const constituent = tidePredictor.constituents["M2"]!; + const astroData = astro(time); + const V0 = constituent.value(astroData); + const { u } = constituent.correction(astroData); + const expected = (((V0 + u) % 360) + 360) % 360; + + expect(eaRow.value).toBeCloseTo(expected, 2); + }); + + test("spot check M2 node factor for 2026", () => { + const nfRow = db + .prepare( + "SELECT value FROM node_factors WHERE constituent = 'M2' AND year = 2026", + ) + .get() as { value: number }; + + // Compute expected value directly + const time = new Date(Date.UTC(2026, 6, 1, 0, 0, 0)); + const constituent = tidePredictor.constituents["M2"]!; + const astroData = astro(time); + const { f } = constituent.correction(astroData); + + expect(nfRow.value).toBeCloseTo(f, 4); + }); + + test("all constituents with tide-predictor support have eq args and node factors", () => { + const constituentCount = db + .prepare( + `SELECT count(DISTINCT ea.constituent) AS cnt + FROM equilibrium_arguments ea`, + ) + .get() as { cnt: number }; + + // Should be a substantial number (the canonical tide-predictor constituents) + expect(constituentCount.cnt).toBeGreaterThan(50); + + // Each should have entries for the full year range + const yearCount = db + .prepare( + `SELECT count(*) AS cnt FROM equilibrium_arguments + WHERE constituent = 'M2'`, + ) + .get() as { cnt: number }; + expect(yearCount.cnt).toBe(2100 - 1970 + 1); + }); +}); + +describe("Example queries", () => { + const examplesDir = join(import.meta.dirname, "..", "examples"); + const files = readdirSync(examplesDir).filter((f) => f.endsWith(".sql")); + + for (const file of files) { + test(`${basename(file, ".sql")} executes without error`, () => { + const sql = readFileSync(join(examplesDir, file), "utf-8"); + const statements = sql + .split(";") + .map((s) => s.trim()) + .filter((s) => s && !s.startsWith("--")); + + for (const stmt of statements) { + const rows = db.prepare(stmt).all(); + expect(rows.length).toBeGreaterThanOrEqual(0); + } + }); + } +});