From 118618de66e49f1693cc4ab2329ae236d2b50da1 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:08:08 +0300 Subject: [PATCH 01/15] add docs website deploy job --- .github/workflows/docs.yml | 26 ++++++++++++++++++++++++++ docs/en/01-quickstart.md | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..a71e74ce --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,26 @@ +name: Deploy docs + +on: + push: + branches: [ main ] + paths: # only if PR touched these paths + - '/docs/**' + +jobs: + do_stuff: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + # Check out the merge commit on the base branch + ref: ${{ github.event.pull_request.merge_commit_sha }} + - name: Your step + run: | + curl -L \  ✔  13:58:31 + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/ozontech/seq-db-docs/dispatches \ + -d '{"event_type":"dispatch-event"}' \ No newline at end of file diff --git a/docs/en/01-quickstart.md b/docs/en/01-quickstart.md index f8847548..cf72fc43 100644 --- a/docs/en/01-quickstart.md +++ b/docs/en/01-quickstart.md @@ -15,7 +15,7 @@ Welcome to the seq-db quickstart guide! In just a few minutes, you'll learn how ### Single node mode -Before launch you need to create config file: +Before launch you need to create a config file: config.yaml: From 953463235324adf7d9faefc45b94113a126e2177 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:10:15 +0300 Subject: [PATCH 02/15] fix --- .github/workflows/docs.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a71e74ce..b91e04eb 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -8,7 +8,6 @@ on: jobs: do_stuff: - if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 From 75faf4e0efaf33681beea762933aa8c628ae7e76 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:11:01 +0300 Subject: [PATCH 03/15] fix --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b91e04eb..a383d032 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,7 +1,7 @@ name: Deploy docs on: - push: + pull_request: branches: [ main ] paths: # only if PR touched these paths - '/docs/**' From bf0b6684c6d411022c83de30f86121c3f53327e9 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:13:05 +0300 Subject: [PATCH 04/15] fix --- .github/workflows/docs.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a383d032..899961df 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,8 +3,6 @@ name: Deploy docs on: pull_request: branches: [ main ] - paths: # only if PR touched these paths - - '/docs/**' jobs: do_stuff: @@ -15,11 +13,11 @@ jobs: # Check out the merge commit on the base branch ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Your step - run: | - curl -L \  ✔  13:58:31 - -X POST \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/repos/ozontech/seq-db-docs/dispatches \ - -d '{"event_type":"dispatch-event"}' \ No newline at end of file + run: echo "hello world" +# curl -L \  ✔  13:58:31 +# -X POST \ +# -H "Accept: application/vnd.github+json" \ +# -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ +# -H "X-GitHub-Api-Version: 2022-11-28" \ +# https://api.github.com/repos/ozontech/seq-db-docs/dispatches \ +# -d '{"event_type":"dispatch-event"}' \ No newline at end of file From c6acc5b2c8be9c0aec51413e6cf618fe87f95e9d Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:21:04 +0300 Subject: [PATCH 05/15] fix --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 899961df..4966cd3b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,6 +3,8 @@ name: Deploy docs on: pull_request: branches: [ main ] + paths: + - "docs/**" jobs: do_stuff: From 447f5327372c065d9abba8ff739410a5966aa1b1 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:22:36 +0300 Subject: [PATCH 06/15] fix --- docs/en/01-quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/01-quickstart.md b/docs/en/01-quickstart.md index cf72fc43..f8847548 100644 --- a/docs/en/01-quickstart.md +++ b/docs/en/01-quickstart.md @@ -15,7 +15,7 @@ Welcome to the seq-db quickstart guide! In just a few minutes, you'll learn how ### Single node mode -Before launch you need to create a config file: +Before launch you need to create config file: config.yaml: From b0a8de57fddbea31960b1808c097b58ddac0e374 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:23:43 +0300 Subject: [PATCH 07/15] fix --- .github/workflows/docs.yml | 16 ++++++++-------- docs/en/01-quickstart.md | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4966cd3b..19a19f73 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -15,11 +15,11 @@ jobs: # Check out the merge commit on the base branch ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Your step - run: echo "hello world" -# curl -L \  ✔  13:58:31 -# -X POST \ -# -H "Accept: application/vnd.github+json" \ -# -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ -# -H "X-GitHub-Api-Version: 2022-11-28" \ -# https://api.github.com/repos/ozontech/seq-db-docs/dispatches \ -# -d '{"event_type":"dispatch-event"}' \ No newline at end of file + run: | + curl -L \  ✔  13:58:31 + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/ozontech/seq-db-docs/dispatches \ + -d '{"event_type":"dispatch-event"}' \ No newline at end of file diff --git a/docs/en/01-quickstart.md b/docs/en/01-quickstart.md index f8847548..cf72fc43 100644 --- a/docs/en/01-quickstart.md +++ b/docs/en/01-quickstart.md @@ -15,7 +15,7 @@ Welcome to the seq-db quickstart guide! In just a few minutes, you'll learn how ### Single node mode -Before launch you need to create config file: +Before launch you need to create a config file: config.yaml: From 8f69a49ce824dfd1063ed33680d2162a64661d97 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:24:44 +0300 Subject: [PATCH 08/15] fix --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 19a19f73..74c7ab6a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -16,7 +16,7 @@ jobs: ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Your step run: | - curl -L \  ✔  13:58:31 + curl -L -X POST \ -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ From 87972bb882fb9ee57226f4deedda6bf62282b700 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:27:38 +0300 Subject: [PATCH 09/15] fix --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 74c7ab6a..91f392df 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -16,7 +16,7 @@ jobs: ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Your step run: | - curl -L + curl -L \ -X POST \ -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ From 5d3b5673e4753e3cb2f5766152b39260693e08ab Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:28:49 +0300 Subject: [PATCH 10/15] fix --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 91f392df..53b5cabf 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -19,7 +19,7 @@ jobs: curl -L \ -X POST \ -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer $secrets.DOCS_REPO_TOKEN" \ + -H "Authorization: Bearer ${{ secrets.DOCS_REPO_TOKEN }}" \ -H "X-GitHub-Api-Version: 2022-11-28" \ https://api.github.com/repos/ozontech/seq-db-docs/dispatches \ -d '{"event_type":"dispatch-event"}' \ No newline at end of file From 2620a6326ef7ecdc6a67c61df7fd4dfc40cbcc45 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Wed, 3 Sep 2025 17:42:55 +0300 Subject: [PATCH 11/15] fix --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 53b5cabf..a655333a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,12 +3,14 @@ name: Deploy docs on: pull_request: branches: [ main ] + types: [ closed ] paths: - "docs/**" jobs: do_stuff: runs-on: ubuntu-latest + if: github.event.pull_request.merged == true steps: - uses: actions/checkout@v4 with: From 101d955a8b535387916d5ebc9c77659ba1a3f5ec Mon Sep 17 00:00:00 2001 From: Sandu K Date: Mon, 8 Sep 2025 12:25:19 +0300 Subject: [PATCH 12/15] fix --- docs/en/05-seq-ql.md | 2 +- docs/en/{12-architecture.md => 13-architecture.md} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename docs/en/{12-architecture.md => 13-architecture.md} (98%) diff --git a/docs/en/05-seq-ql.md b/docs/en/05-seq-ql.md index 3a74da83..c101d021 100644 --- a/docs/en/05-seq-ql.md +++ b/docs/en/05-seq-ql.md @@ -84,7 +84,7 @@ The language supports the following symbols: - `*` — replaces any number of characters. These symbols can be used to search within tokens or parts of tokens. -For example, a query on the [keyword](03-index-types.md#keyword) index `source_type:access*` will match all documents starting +For example, a query on the [keyword](03-index-types.md) index `source_type:access*` will match all documents starting with `access`. ## Filter `range` diff --git a/docs/en/12-architecture.md b/docs/en/13-architecture.md similarity index 98% rename from docs/en/12-architecture.md rename to docs/en/13-architecture.md index e1bb5a3b..cd59e81b 100644 --- a/docs/en/12-architecture.md +++ b/docs/en/13-architecture.md @@ -32,7 +32,7 @@ Because the dataset is stored in these three file types, moving or restoring a shard is straightforward: simply `cp` / `rsync` the directory to the target node and start the pod. -Read more about file types and their internal structure [here](./internal/fractions.md). +Read more about file types and their internal structure [here](internal/fractions.md). #### Durability A write operation is acknowledged only after the payload is safely persisted: From 946d5631736ca461f87323d599e07dc75061a7cd Mon Sep 17 00:00:00 2001 From: Sandu K Date: Mon, 8 Sep 2025 12:29:22 +0300 Subject: [PATCH 13/15] fix --- .github/workflows/docs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a655333a..309830a3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -8,15 +8,15 @@ on: - "docs/**" jobs: - do_stuff: + trigger_docs_website_deploy: runs-on: ubuntu-latest if: github.event.pull_request.merged == true steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: # Check out the merge commit on the base branch ref: ${{ github.event.pull_request.merge_commit_sha }} - - name: Your step + - name: Send trigger for remote website deploy run: | curl -L \ -X POST \ From bbe9e145e16e4c76d2ad36d0dbaf87f84b5046b5 Mon Sep 17 00:00:00 2001 From: Sandu K Date: Mon, 8 Sep 2025 13:01:51 +0300 Subject: [PATCH 14/15] fix --- .github/workflows/docs.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 309830a3..67371095 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -17,11 +17,14 @@ jobs: # Check out the merge commit on the base branch ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Send trigger for remote website deploy + env: + DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }} run: | - curl -L \ + set -euo pipefail + curl -sS -L --fail-with-body \ -X POST \ -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer ${{ secrets.DOCS_REPO_TOKEN }}" \ + -H "Authorization: Bearer ${DOCS_REPO_TOKEN}" \ -H "X-GitHub-Api-Version: 2022-11-28" \ https://api.github.com/repos/ozontech/seq-db-docs/dispatches \ -d '{"event_type":"dispatch-event"}' \ No newline at end of file From c487ea481571b96bd64055ac50b5b6ed0701647e Mon Sep 17 00:00:00 2001 From: Sandu K Date: Mon, 8 Sep 2025 13:22:11 +0300 Subject: [PATCH 15/15] add architecture docs (en) --- .github/workflows/docs.yml | 3 - docs/ru/13-architecture.md | 160 +++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 docs/ru/13-architecture.md diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 67371095..9e0fe8a5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -13,9 +13,6 @@ jobs: if: github.event.pull_request.merged == true steps: - uses: actions/checkout@v5 - with: - # Check out the merge commit on the base branch - ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Send trigger for remote website deploy env: DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }} diff --git a/docs/ru/13-architecture.md b/docs/ru/13-architecture.md new file mode 100644 index 00000000..cd59e81b --- /dev/null +++ b/docs/ru/13-architecture.md @@ -0,0 +1,160 @@ +# Cluster-Mode Architecture + +## Components overview + +In cluster mode, seq-db consists of two main components: + - seq-db store (seq-db instance running with `--mode=store flag`) + - seq-db proxy (seq-db instance running with `--mode=proxy flag`). + +### seq-db store +seq-db store is the stateful storage component, that keeps all the +written documents and handles both reads and writes. +All data written into seq-db eventually makes its way to one or multiple stores. + + +#### Key characteristics +- Deployed as k8s `Statefulset` +- Share-nothing architecture: a seq-db store instance is unaware of any other stores. +- Maintains in-memory and on-disk inverted indexes, allowing search on indexed fields. + + +#### File layout +seq-db store keeps all document data in three file types: + +| File type | Purpose | +|-----------|------------------------------------------------| +| `.docs` | Stores compressed batches of raw log documents | +| `.meta` | Tokenized metadata stream (used for recovery) | +| `.index` | On-disk inverted index | + + +Because the dataset is stored in these three file types, moving or restoring a +shard is straightforward: simply `cp` / `rsync` the directory +to the target node and start the pod. + +Read more about file types and their internal structure [here](internal/fractions.md). + +#### Durability +A write operation is acknowledged only after the payload is safely persisted: + +``` +write, fsync # .meta file +write, fsync # .data file +``` +That is, two write system calls followed by two fsync +calls—guaranteeing the data survives a node +crash or restart before the client receives a success response. +Indexing occurs asynchronously, so it usually takes under 1 +second before the newly written documents are available for search queries. +Note that this value may be slightly higher when bulk load spikes happen + +### seq-db proxy +seq-db proxy is a stateless coordinator for all read & write traffic. +It maintans a user-defined cluster topology, and allows changes in read-write +traffic distribution without changes to the stateful components + + +#### Key characteristics +- Deployed as k8s `Deployment` +- Performs logical replication between stores +- Routes traffic between storage tiers (hot/cold stores) + +seq-db proxy tokenizes every incoming document +and compresses batches with zstd / lz4 +before sending batches to seq-db stores. + +### Read-path & write-path (rf=2) +Let's take a look at an example architecture with 4 seq-db shards and replication-factor=2 +(each log must be stored in two separate seq-db stores). +Note that replicas of shard can be located in different availability zones. + +### Write-path +The write commits only after seq-db proxy receives an ack **from all replicas of the addressed shard**. + +```mermaid +sequenceDiagram + + participant Client + participant Proxy as seq-db proxy + + box Shard1 + participant A as seq-db store
shard1 replica A + participant B as seq-db store
shard1 replica B + end + + box Shard2 + participant C as seq-db store
shard2 replica A + participant D as seq-db store
shard2 replica B + end + + Note over Proxy,B: seq-db proxy chooses a random shard + Client->>Proxy: write(batch1) + Proxy->>A: write(batch1) + Proxy->>B: write(batch1) + + A-->>Proxy: ack + B-->>Proxy: ack + Proxy-->>Client: ack + + Note over Proxy,B: the write is done if acks received
from both replicas of a shard + + Client->>Proxy: write(batch2) + Proxy->>C: write(batch2) + Proxy->>D: write(batch2) + + C-->>Proxy: ack + D-->>Proxy: ack + + Proxy-->>Client: ack +``` + +### Read-path +While the written document must be acknowledged by all replicas +of a shard, +a read is successful when **at least one replica of each shard** returns a response. + +```mermaid +sequenceDiagram + + participant Client + participant Proxy as seq-db proxy + + box Shard1 + participant A as seq-db store
shard1 replica A + participant B as seq-db store
shard1 replica B + end + + box Shard2 + participant C as seq-db store
shard2 replica A + participant D as seq-db store
shard2 replica B + end + + Note over Proxy,C: seq-db proxy chooses
a random replica of each shard + Client->>Proxy: request 1 + Proxy->>A: request 1 + Proxy->>C: request 1 + + A-->>Proxy: response 1 (shard1 replica A) + C-->>Proxy: response 1 (shard2 replica A) + Note over Proxy: seq-db proxy merges the returned responses + Proxy-->>Client: merge(res1_s1rA, res1_s2rA) + + + Client->>Proxy: request 2 + Proxy->>B: request 2 + Proxy->>D: request 2 + + B-->>Proxy: response 2 (shard1 replica B) + D-->>Proxy: response 2 (shard2 replica B) + + Proxy-->>Client: merge(res2_s1rB, res2_s2rB) +``` + +## Notes about replication & consistency +seq-db doesn't have any mechanism to keep replicas consistent between each other. +That is, if a write operation succeeds on a replica of a shard and fails on another replica, the replicas +would be out of sync and won't be (automatically) synced. +The only given guarantee is that a write operation will succeed only having at least RF replicas saved on disk. +This optimization allows seq-db to have a higher than alternatives ingestion throughput +with the obvious price of the possible inconsistencies of retrieval and aggregation queries. +seq-db was designed as a database for logs/traces with this tradeoff in mind. \ No newline at end of file