diff --git a/.gitmodules b/.gitmodules
index eead28035..a51aabbcd 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -52,3 +52,6 @@
 [submodule "thirdparty/limonp/limonp-v1.0.2"]
 	path = thirdparty/limonp/limonp-v1.0.2
 	url = https://github.com/yanyiwu/limonp.git
+[submodule "thirdparty/snowball/snowball-3.1.1"]
+	path = thirdparty/snowball/snowball-3.1.1
+	url = https://github.com/snowballstem/snowball.git
diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt
index 5dfb081d7..02e2db7be 100644
--- a/src/db/CMakeLists.txt
+++ b/src/db/CMakeLists.txt
@@ -45,6 +45,7 @@ cc_library(
     libprotobuf
     FastPFOR
     cppjieba
+    snowball
     Arrow::arrow_static
     Arrow::parquet_static
     Arrow::arrow_compute
diff --git a/src/db/index/CMakeLists.txt b/src/db/index/CMakeLists.txt
index 8231718da..85d7fbe9b 100644
--- a/src/db/index/CMakeLists.txt
+++ b/src/db/index/CMakeLists.txt
@@ -29,6 +29,7 @@ cc_library(
          Arrow::arrow_compute
          Arrow::arrow_dataset
          cppjieba
+         snowball
          FastPFOR
     INCS .  ${PROJECT_ROOT_DIR}/src
     VERSION "${PROXIMA_ZVEC_VERSION}"
diff --git a/src/db/index/column/fts_column/tokenizer/stemmer_token_filter.cc b/src/db/index/column/fts_column/tokenizer/stemmer_token_filter.cc
new file mode 100644
index 000000000..a52ef3262
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/stemmer_token_filter.cc
@@ -0,0 +1,80 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "stemmer_token_filter.h"
+#include <unordered_map>
+#include <zvec/ailego/logger/logger.h>
+
+extern "C" {
+#include <libstemmer.h>
+}
+
+namespace zvec::fts {
+
+struct ThreadLocalStemmerCache {
+  std::unordered_map<std::string, struct sb_stemmer *> stemmers;
+
+  ~ThreadLocalStemmerCache() {
+    for (auto &[_, s] : stemmers) {
+      sb_stemmer_delete(s);
+    }
+  }
+
+  struct sb_stemmer *get(const std::string &lang) {
+    auto it = stemmers.find(lang);
+    if (it != stemmers.end()) {
+      return it->second;
+    }
+    auto *s = sb_stemmer_new(lang.c_str(), nullptr);
+    if (s) {
+      stemmers[lang] = s;
+    }
+    return s;
+  }
+};
+
+bool StemmerTokenFilter::init(const ailego::JsonObject &config) {
+  std::string lang;
+  if (config.get("stemmer_lang", &lang) && !lang.empty()) {
+    language_ = lang;
+  }
+  auto *test_stemmer = sb_stemmer_new(language_.c_str(), nullptr);
+  if (!test_stemmer) {
+    LOG_ERROR("[StemmerTokenFilter] failed to create stemmer for language: %s",
+              language_.c_str());
+    return false;
+  }
+  sb_stemmer_delete(test_stemmer);
+  return true;
+}
+
+std::vector<Token> StemmerTokenFilter::filter(std::vector<Token> tokens) const {
+  static thread_local ThreadLocalStemmerCache tls_cache;
+  auto *stemmer = tls_cache.get(language_);
+  if (!stemmer) {
+    return tokens;
+  }
+  for (auto &token : tokens) {
+    const auto *result = sb_stemmer_stem(
+        stemmer, reinterpret_cast<const unsigned char *>(token.text.data()),
+        static_cast<int>(token.text.size()));
+    if (result) {
+      int len = sb_stemmer_length(stemmer);
+      token.text.assign(reinterpret_cast<const char *>(result), len);
+    }
+  }
+  return tokens;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/stemmer_token_filter.h b/src/db/index/column/fts_column/tokenizer/stemmer_token_filter.h
new file mode 100644
index 000000000..8838dbced
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/stemmer_token_filter.h
@@ -0,0 +1,42 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "token_filter.h"
+
+namespace zvec::fts {
+
+class StemmerTokenFilter : public TokenFilter {
+ public:
+  StemmerTokenFilter() = default;
+  ~StemmerTokenFilter() override = default;
+
+  StemmerTokenFilter(const StemmerTokenFilter &) = delete;
+  StemmerTokenFilter &operator=(const StemmerTokenFilter &) = delete;
+
+  bool init(const ailego::JsonObject &config) override;
+  std::vector<Token> filter(std::vector<Token> tokens) const override;
+
+  const char *name() const override {
+    return "stemmer";
+  }
+
+ private:
+  std::string language_{"english"};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/token_filter.h b/src/db/index/column/fts_column/tokenizer/token_filter.h
index ce11fbe14..1bc9752a8 100644
--- a/src/db/index/column/fts_column/tokenizer/token_filter.h
+++ b/src/db/index/column/fts_column/tokenizer/token_filter.h
@@ -17,6 +17,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <zvec/ailego/encoding/json/mod_json_plus.h>
 #include "tokenizer.h"
 
 namespace zvec::fts {
@@ -29,6 +30,15 @@ class TokenFilter {
  public:
   virtual ~TokenFilter() = default;
 
+  /*! Initialise the filter from a JSON configuration object.
+   *  Must be called once before filter().
+   *  \param config  JSON object containing filter-specific parameters.
+   *  \return        true on success, false on failure.
+   */
+  virtual bool init(const ailego::JsonObject & /*config*/) {
+    return true;
+  }
+
   /*! Filter/transform a list of tokens.
    *  \param tokens  input token list (may be modified in place)
    *  \return        processed token list
diff --git a/src/db/index/column/fts_column/tokenizer/tokenizer_factory.cc b/src/db/index/column/fts_column/tokenizer/tokenizer_factory.cc
index ec775678e..9150f71d8 100644
--- a/src/db/index/column/fts_column/tokenizer/tokenizer_factory.cc
+++ b/src/db/index/column/fts_column/tokenizer/tokenizer_factory.cc
@@ -17,6 +17,7 @@
 #include <zvec/ailego/logger/logger.h>
 #include "jieba_tokenizer.h"
 #include "standard_tokenizer.h"
+#include "stemmer_token_filter.h"
 #include "whitespace_tokenizer.h"
 
 namespace zvec::fts {
@@ -55,6 +56,11 @@ TokenizerPipelinePtr TokenizerFactory::create(const FtsIndexParams &params) {
                 filter_name.c_str());
       return nullptr;
     }
+    if (!filter->init(extra_json)) {
+      LOG_ERROR("[TokenizerFactory] failed to init filter: %s",
+                filter_name.c_str());
+      return nullptr;
+    }
     filters.push_back(std::move(filter));
   }
 
@@ -96,6 +102,8 @@ TokenizerPtr TokenizerFactory::create_tokenizer(
 TokenFilterPtr TokenizerFactory::create_filter(const std::string &filter_name) {
   if (filter_name == "lowercase") {
     return std::make_shared<LowercaseTokenFilter>();
+  } else if (filter_name == "stemmer") {
+    return std::make_shared<StemmerTokenFilter>();
   }
   LOG_ERROR("[TokenizerFactory] unknown filter name: %s", filter_name.c_str());
   return nullptr;
diff --git a/tests/db/index/column/fts_column/fts_column_indexer_test.cc b/tests/db/index/column/fts_column/fts_column_indexer_test.cc
index 5bce2c5f6..e9b816e26 100644
--- a/tests/db/index/column/fts_column/fts_column_indexer_test.cc
+++ b/tests/db/index/column/fts_column/fts_column_indexer_test.cc
@@ -1832,3 +1832,89 @@ TEST_F(FtsColumnIndexerTest, FilterPushdownNullFilterUnchanged) {
     EXPECT_FLOAT_EQ(baseline[i].score, with_null[i].score);
   }
 }
+
+// ============================================================
+// Stemmer token filter end-to-end tests
+// ============================================================
+
+static zvec::fts::TokenizerPipelinePtr make_stemmer_pipeline() {
+  zvec::fts::FtsIndexParams params;
+  params.tokenizer_name = "standard";
+  params.filters = {"lowercase", "stemmer"};
+  return zvec::fts::TokenizerFactory::create(params);
+}
+
+class FtsStemmerIndexerTest : public FtsColumnIndexerTest {
+ protected:
+  std::unique_ptr<FtsColumnIndexer> make_stemmer_indexer(
+      const std::string &field_name = "content") {
+    auto fts_params = std::make_shared<zvec::FtsIndexParams>(
+        "standard", std::vector<std::string>{"lowercase", "stemmer"}, "");
+    auto field_meta = make_test_field_meta(field_name, fts_params);
+    auto indexer = std::make_unique<FtsColumnIndexer>();
+    auto ret = indexer->open(field_meta, &db_, postings_cf_, positions_cf_,
+                             term_freq_cf_, max_tf_cf_, doc_len_cf_, stat_cf_);
+    EXPECT_TRUE(ret.has_value());
+    return indexer;
+  }
+};
+
+TEST_F(FtsStemmerIndexerTest, StemmedTermMatchesMorphologicalVariants) {
+  auto indexer = make_stemmer_indexer();
+  EXPECT_TRUE(indexer->insert(0, "the cats are running quickly").has_value());
+  EXPECT_TRUE(indexer->insert(1, "a dog runs slowly").has_value());
+  EXPECT_TRUE(indexer->insert(2, "birds fly high").has_value());
+
+  auto pipeline = make_stemmer_pipeline();
+
+  // "running" stems to "run", matches doc 0 ("running") and doc 1 ("runs")
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "running", 10, &results, pipeline));
+  EXPECT_EQ(results.size(), 2u);
+
+  // "cats" stems to "cat", matches only doc 0
+  results.clear();
+  EXPECT_TRUE(search_ok(*indexer, "cats", 10, &results, pipeline));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 0ull);
+}
+
+TEST_F(FtsStemmerIndexerTest, QueryWithBaseFormMatchesVariants) {
+  auto indexer = make_stemmer_indexer();
+  EXPECT_TRUE(indexer->insert(0, "connected connections").has_value());
+  EXPECT_TRUE(indexer->insert(1, "connecting wires").has_value());
+  EXPECT_TRUE(indexer->insert(2, "unrelated text").has_value());
+
+  auto pipeline = make_stemmer_pipeline();
+
+  // "connect" is already a stem, should match doc 0 and doc 1
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "connect", 10, &results, pipeline));
+  EXPECT_EQ(results.size(), 2u);
+}
+
+TEST_F(FtsStemmerIndexerTest, StemmerWithAndQuery) {
+  auto indexer = make_stemmer_indexer();
+  EXPECT_TRUE(indexer->insert(0, "dogs running fast").has_value());
+  EXPECT_TRUE(indexer->insert(1, "cats running slow").has_value());
+  EXPECT_TRUE(indexer->insert(2, "dogs sleeping").has_value());
+
+  auto pipeline = make_stemmer_pipeline();
+
+  // "dogs AND running" -> stems to "dog AND run" -> doc 0 only
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "dogs AND running", 10, &results, pipeline));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 0ull);
+}
+
+TEST_F(FtsStemmerIndexerTest, StemmerNoMatchAfterStemming) {
+  auto indexer = make_stemmer_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+
+  auto pipeline = make_stemmer_pipeline();
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "nonexistent", 10, &results, pipeline));
+  EXPECT_TRUE(results.empty());
+}
diff --git a/tests/db/index/column/fts_column/stemmer_token_filter_test.cc b/tests/db/index/column/fts_column/stemmer_token_filter_test.cc
new file mode 100644
index 000000000..0aecbd867
--- /dev/null
+++ b/tests/db/index/column/fts_column/stemmer_token_filter_test.cc
@@ -0,0 +1,158 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+#include <vector>
+#include <gtest/gtest.h>
+#include "db/index/column/fts_column/fts_types.h"
+#include "db/index/column/fts_column/tokenizer/tokenizer_factory.h"
+
+using namespace zvec::fts;
+
+// ============================================================
+// Helpers
+// ============================================================
+
+static FtsIndexParams make_stemmer_params(
+    const std::string &lang = "",
+    const std::vector<std::string> &filters = {"lowercase", "stemmer"}) {
+  FtsIndexParams params;
+  params.tokenizer_name = "standard";
+  params.filters = filters;
+  if (!lang.empty()) {
+    params.extra_params = R"({"stemmer_lang":")" + lang + R"("})";
+  }
+  return params;
+}
+
+// ============================================================
+// Pipeline creation
+// ============================================================
+
+TEST(StemmerTokenFilterTest, CreatePipelineDefaultEnglish) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params());
+  ASSERT_NE(pipeline, nullptr);
+}
+
+TEST(StemmerTokenFilterTest, CreatePipelineExplicitLanguage) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params("german"));
+  ASSERT_NE(pipeline, nullptr);
+}
+
+TEST(StemmerTokenFilterTest, CreatePipelineInvalidLanguageFails) {
+  auto pipeline =
+      TokenizerFactory::create(make_stemmer_params("nonexistent_lang"));
+  EXPECT_EQ(pipeline, nullptr);
+}
+
+// ============================================================
+// English stemming
+// ============================================================
+
+TEST(StemmerTokenFilterTest, EnglishStemming) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params());
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("running cats easily connection");
+  ASSERT_EQ(tokens.size(), 4u);
+  EXPECT_EQ(tokens[0].text, "run");
+  EXPECT_EQ(tokens[1].text, "cat");
+  EXPECT_EQ(tokens[2].text, "easili");
+  EXPECT_EQ(tokens[3].text, "connect");
+}
+
+TEST(StemmerTokenFilterTest, AlreadyStemmedWordsUnchanged) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params());
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("run cat");
+  ASSERT_EQ(tokens.size(), 2u);
+  EXPECT_EQ(tokens[0].text, "run");
+  EXPECT_EQ(tokens[1].text, "cat");
+}
+
+TEST(StemmerTokenFilterTest, EmptyInput) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params());
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("");
+  EXPECT_TRUE(tokens.empty());
+}
+
+TEST(StemmerTokenFilterTest, PreservesOffsetAndPosition) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params());
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("running dogs");
+  ASSERT_EQ(tokens.size(), 2u);
+  EXPECT_EQ(tokens[0].position, 0u);
+  EXPECT_EQ(tokens[1].position, 1u);
+  EXPECT_EQ(tokens[0].offset, 0u);
+  EXPECT_EQ(tokens[1].offset, 8u);
+}
+
+// ============================================================
+// Lowercase + stemmer chain
+// ============================================================
+
+TEST(StemmerTokenFilterTest, LowercaseThenStem) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params());
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("Running Cats EASILY");
+  ASSERT_EQ(tokens.size(), 3u);
+  EXPECT_EQ(tokens[0].text, "run");
+  EXPECT_EQ(tokens[1].text, "cat");
+  EXPECT_EQ(tokens[2].text, "easili");
+}
+
+// ============================================================
+// Stemmer-only (no lowercase)
+// ============================================================
+
+TEST(StemmerTokenFilterTest, StemmerOnlyNoLowercase) {
+  auto pipeline =
+      TokenizerFactory::create(make_stemmer_params("", {"stemmer"}));
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("running");
+  ASSERT_EQ(tokens.size(), 1u);
+  EXPECT_EQ(tokens[0].text, "run");
+}
+
+// ============================================================
+// Non-English language
+// ============================================================
+
+TEST(StemmerTokenFilterTest, GermanStemming) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params("german"));
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("laufen");
+  ASSERT_EQ(tokens.size(), 1u);
+  EXPECT_EQ(tokens[0].text, "lauf");
+}
+
+// ============================================================
+// ISO code as language
+// ============================================================
+
+TEST(StemmerTokenFilterTest, LanguageByISOCode) {
+  auto pipeline = TokenizerFactory::create(make_stemmer_params("en"));
+  ASSERT_NE(pipeline, nullptr);
+
+  auto tokens = pipeline->process("running");
+  ASSERT_EQ(tokens.size(), 1u);
+  EXPECT_EQ(tokens[0].text, "run");
+}
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index c7b227245..14a39d7ed 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -30,3 +30,4 @@ add_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)
 add_subdirectory(FastPFOR FastPFOR EXCLUDE_FROM_ALL)
 add_subdirectory(limonp limonp EXCLUDE_FROM_ALL)
 add_subdirectory(cppjieba cppjieba EXCLUDE_FROM_ALL)
+add_subdirectory(snowball snowball EXCLUDE_FROM_ALL)
diff --git a/thirdparty/snowball/CMakeLists.txt b/thirdparty/snowball/CMakeLists.txt
new file mode 100644
index 000000000..92d6756b9
--- /dev/null
+++ b/thirdparty/snowball/CMakeLists.txt
@@ -0,0 +1,100 @@
+include(ExternalProject)
+
+set(SNOWBALL_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/snowball-3.1.1")
+set(SNOWBALL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/snowball-codegen")
+set(SNOWBALL_HOST_CC "" CACHE STRING
+    "Optional host C compiler for building the Snowball code generator")
+find_program(_SNOWBALL_MAKE NAMES make gmake REQUIRED)
+
+# ---------------------------------------------------------------------------
+# Parse modules.txt → UTF-8 algorithm list
+# ---------------------------------------------------------------------------
+set(_snowball_gen_srcs)
+set(_snowball_gen_hdrs)
+set(_snowball_make_targets)
+file(STRINGS "${SNOWBALL_SOURCE_DIR}/libstemmer/modules.txt" _lines)
+foreach(_line IN LISTS _lines)
+  if(_line MATCHES "^#" OR _line MATCHES "^[ \t]*$")
+    continue()
+  endif()
+  if(_line MATCHES "^([a-z_]+)[ \t]+([A-Z_0-9,]+)")
+    set(_alg "${CMAKE_MATCH_1}")
+    list(APPEND _snowball_gen_srcs
+      "${SNOWBALL_BUILD_DIR}/src_c/stem_UTF_8_${_alg}.c")
+    list(APPEND _snowball_gen_hdrs
+      "${SNOWBALL_BUILD_DIR}/src_c/stem_UTF_8_${_alg}.h")
+    list(APPEND _snowball_make_targets
+      "src_c/stem_UTF_8_${_alg}.c")
+  endif()
+endforeach()
+
+set(_snowball_make_args "CFLAGS=-O2")
+if(NOT SNOWBALL_HOST_CC STREQUAL "")
+  list(APPEND _snowball_make_args "CC=${SNOWBALL_HOST_CC}")
+endif()
+
+# ---------------------------------------------------------------------------
+# Phase 1 (host): build snowball compiler & generate UTF-8 sources only
+# ---------------------------------------------------------------------------
+# Copy source tree into the build directory so the original stays clean.
+# Request only the UTF-8 stemmer sources, the utf8 libstemmer entry point,
+# and the utf8 modules header — no ISO-8859/KOI8 stemmers, no host .a.
+# Each src_c/stem_UTF_8_*.c target implicitly builds the snowball compiler
+# (host executable) as a dependency.
+# By default make uses system `cc`; set SNOWBALL_HOST_CC to override when
+# the environment CC points to a cross-compiler.
+ExternalProject_Add(snowball_codegen
+  DOWNLOAD_COMMAND  ${CMAKE_COMMAND} -E copy_directory
+                    ${SNOWBALL_SOURCE_DIR} ${SNOWBALL_BUILD_DIR}
+  SOURCE_DIR        ${SNOWBALL_BUILD_DIR}
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND     ${_SNOWBALL_MAKE}
+                    libstemmer/libstemmer_utf8.c
+                    libstemmer/modules_utf8.h
+                    ${_snowball_make_targets}
+                    ${_snowball_make_args}
+  BUILD_IN_SOURCE   TRUE
+  INSTALL_COMMAND   ""
+  BUILD_BYPRODUCTS
+    ${SNOWBALL_BUILD_DIR}/runtime/api.c
+    ${SNOWBALL_BUILD_DIR}/runtime/utilities.c
+    ${SNOWBALL_BUILD_DIR}/libstemmer/libstemmer_utf8.c
+    ${SNOWBALL_BUILD_DIR}/libstemmer/modules_utf8.h
+    ${_snowball_gen_srcs}
+    ${_snowball_gen_hdrs}
+)
+
+# ---------------------------------------------------------------------------
+# Phase 2 (target): compile generated sources with the project toolchain
+# ---------------------------------------------------------------------------
+set(_snowball_target_srcs
+  ${SNOWBALL_BUILD_DIR}/runtime/api.c
+  ${SNOWBALL_BUILD_DIR}/runtime/utilities.c
+  ${SNOWBALL_BUILD_DIR}/libstemmer/libstemmer_utf8.c
+  ${_snowball_gen_srcs}
+)
+
+set_source_files_properties(${_snowball_target_srcs}
+  PROPERTIES GENERATED TRUE)
+
+if(NOT TARGET snowball)
+  add_library(snowball STATIC ${_snowball_target_srcs})
+  add_dependencies(snowball snowball_codegen)
+  # Public include points to the SOURCE directory — libstemmer.h exists at
+  # configure time and does not depend on the codegen step.
+  target_include_directories(snowball SYSTEM PUBLIC
+    ${SNOWBALL_SOURCE_DIR}/include
+  )
+  # Private includes for generated headers (modules_utf8.h, stem_*.h).
+  target_include_directories(snowball PRIVATE
+    ${SNOWBALL_BUILD_DIR}
+    ${SNOWBALL_BUILD_DIR}/libstemmer
+    ${SNOWBALL_BUILD_DIR}/src_c
+  )
+  set_target_properties(snowball PROPERTIES
+    POSITION_INDEPENDENT_CODE ON
+    C_STANDARD 99
+  )
+endif()
+
+set(snowball_FOUND TRUE PARENT_SCOPE)
diff --git a/thirdparty/snowball/snowball-3.1.1 b/thirdparty/snowball/snowball-3.1.1
new file mode 160000
index 000000000..cd195b51e
--- /dev/null
+++ b/thirdparty/snowball/snowball-3.1.1
@@ -0,0 +1 @@
+Subproject commit cd195b51e948a902a4312f023f4a14392516a543