diff --git a/examples/run_lexical_baselines.py b/examples/run_lexical_baselines.py
new file mode 100644
index 0000000..8ff3ed3
--- /dev/null
+++ b/examples/run_lexical_baselines.py
@@ -0,0 +1,41 @@
+"""
+Reproduce benchmark results.
+"""
+
+import workrb
+
+if __name__ == "__main__":
+    # 1. Setup model and tasks
+    models = [
+        workrb.models.RandomRankingModel(),
+        workrb.models.TfIdfModel(tokenization="word"),
+        workrb.models.TfIdfModel(lowercase=False, tokenization="word"),
+        workrb.models.TfIdfModel(tokenization="char"),
+        workrb.models.TfIdfModel(lowercase=False, tokenization="char"),
+        workrb.models.BM25Model(),
+        workrb.models.BM25Model(lowercase=False),
+        workrb.models.EditDistanceModel(),
+        workrb.models.EditDistanceModel(lowercase=False),
+    ]
+
+    # Config
+    langs = [
+        "en",
+        "fr",
+        "de",
+        "es",
+        "nl",
+    ]
+    split = "test"
+
+    tasks = [
+        workrb.tasks.JobTitleSimilarityRanking(split=split, languages=langs),
+    ]
+
+    results = workrb.evaluate_multiple_models(
+        models=models,
+        tasks=tasks,
+        output_folder_template="../results/lexical_baselines/{model_name}",
+        description="WorkRB demo with lexical baselines",
+        force_restart=True,
+    )
diff --git a/pyproject.toml b/pyproject.toml
index a1ee243..2af1d08 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,8 @@ dependencies = [
     "PyYAML>=6.0",
     "sentence-transformers>=5.1.0",
     "pydantic>=2.11.9",
+    "rank-bm25>=0.2.2",
+    "rapidfuzz>=3.9.4",
 ]
 
 [dependency-groups] # https://docs.astral.sh/uv/concepts/projects/dependencies/#development-dependencies
diff --git a/src/workrb/models/__init__.py b/src/workrb/models/__init__.py
index 00cd8ac..58d15c4 100644
--- a/src/workrb/models/__init__.py
+++ b/src/workrb/models/__init__.py
@@ -6,12 +6,22 @@
 from workrb.models.bi_encoder import BiEncoderModel, ConTeXTMatchModel, JobBERTModel
 from workrb.models.classification_model import RndESCOClassificationModel
 from workrb.models.curriculum_encoder import CurriculumMatchModel
+from workrb.models.lexical_baselines import (
+    BM25Model,
+    EditDistanceModel,
+    RandomRankingModel,
+    TfIdfModel,
+)
 
 __all__ = [
+    "BM25Model",
     "BiEncoderModel",
     "ConTeXTMatchModel",
     "CurriculumMatchModel",
+    "EditDistanceModel",
     "JobBERTModel",
     "ModelInterface",
+    "RandomRankingModel",
     "RndESCOClassificationModel",
+    "TfIdfModel",
 ]
diff --git a/src/workrb/models/lexical_baselines.py b/src/workrb/models/lexical_baselines.py
new file mode 100644
index 0000000..85f1085
--- /dev/null
+++ b/src/workrb/models/lexical_baselines.py
@@ -0,0 +1,470 @@
+"""Lexical baseline models for ranking tasks in WorkRB.
+
+These models provide fast, CPU-based baselines for ranking tasks. They are useful for
+establishing performance bounds and enabling rapid iteration without GPU dependencies.
+"""
+
+import random
+import unicodedata
+
+import numpy as np
+import torch
+from rank_bm25 import BM25Okapi
+from rapidfuzz import fuzz
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+
+from workrb.models.base import ModelInterface
+from workrb.registry import register_model
+from workrb.types import ModelInputType
+
+
+@register_model()
+class BM25Model(ModelInterface):
+    """BM25 Okapi probabilistic ranking baseline.
+
+    Parameters
+    ----------
+    lowercase : bool, default=True
+        Whether to lowercase texts before computing scores.
+
+    Example
+    -------
+        >>> model = BM25Model()
+        >>> queries = ["python developer", "data scientist"]
+        >>> targets = ["python programming", "machine learning", "software engineer"]
+        >>> scores = model.compute_rankings(queries, targets, None, None)
+        >>> scores.shape
+        torch.Size([2, 3])
+    """
+
+    def __init__(self, lowercase: bool = True):
+        self.lowercase = lowercase
+
+    @property
+    def name(self) -> str:
+        """Return the model name."""
+        suffix = "lower" if self.lowercase else "cased"
+        return f"BM25-{suffix}"
+
+    @property
+    def description(self) -> str:
+        """Return the model description."""
+        return "BM25 Okapi probabilistic ranking baseline"
+
+    @property
+    def classification_label_space(self) -> list[str] | None:
+        """Return None as this model has no fixed label space."""
+        return None
+
+    def _preprocess(self, text: str) -> str:
+        """Preprocess text by normalizing Unicode and optionally lowercasing."""
+        text = unicodedata.normalize("NFKD", text)
+        if self.lowercase:
+            return text.lower()
+        return text
+
+    def _compute_rankings(
+        self,
+        queries: list[str],
+        targets: list[str],
+        query_input_type: ModelInputType | None = None,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute BM25 ranking scores.
+
+        Parameters
+        ----------
+        queries : list[str]
+            List of query texts
+        targets : list[str]
+            List of target texts (corpus)
+        query_input_type : ModelInputType | None
+            Type of query input (ignored by this model)
+        target_input_type : ModelInputType | None
+            Type of target input (ignored by this model)
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_queries, n_targets) with BM25 scores
+        """
+        # Preprocess and tokenize corpus
+        tokenized_corpus = [self._preprocess(target).split() for target in targets]
+
+        # Build BM25 index
+        bm25 = BM25Okapi(tokenized_corpus)
+
+        # Compute scores for each query
+        scores = []
+        for query in queries:
+            preprocessed_query = self._preprocess(query)
+            tokenized_query = preprocessed_query.split()
+            query_scores = bm25.get_scores(tokenized_query)
+            scores.append(query_scores)
+
+        scores_array = np.array(scores)
+        return torch.tensor(scores_array, dtype=torch.float32)
+
+    def _compute_classification(
+        self,
+        texts: list[str],
+        targets: list[str],
+        input_type: ModelInputType,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute classification scores by ranking texts against target labels.
+
+        Parameters
+        ----------
+        texts : list[str]
+            List of input texts to classify
+        targets : list[str]
+            List of target class labels (as text)
+        input_type : ModelInputType
+            Type of input
+        target_input_type : ModelInputType | None
+            Type of target. If None, uses input_type.
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_texts, n_classes) with BM25 scores
+        """
+        if target_input_type is None:
+            target_input_type = input_type
+
+        return self._compute_rankings(texts, targets, input_type, target_input_type)
+
+
+@register_model()
+class TfIdfModel(ModelInterface):
+    """TF-IDF baseline with configurable tokenization.
+
+    Uses TF-IDF (Term Frequency-Inverse Document Frequency) vectorization followed by
+    cosine similarity. Supports both word-level and character n-gram tokenization.
+
+    Parameters
+    ----------
+    lowercase : bool, default=True
+        Whether to lowercase texts before computing scores.
+    tokenization : str, default="word"
+        Tokenization strategy. Options:
+        - "word": Word-level tokenization (default)
+        - "char": Character n-gram tokenization (1-3 grams)
+    """
+
+    def __init__(self, lowercase: bool = True, tokenization: str = "word"):
+        if tokenization not in ["word", "char"]:
+            raise ValueError(f"Invalid tokenization: {tokenization}. Must be 'word' or 'char'.")
+
+        self.lowercase = lowercase
+        self.tokenization = tokenization
+
+    @property
+    def name(self) -> str:
+        """Return the model name."""
+        suffix = "lower" if self.lowercase else "cased"
+        return f"TfIdf-{self.tokenization}-{suffix}"
+
+    @property
+    def description(self) -> str:
+        """Return the model description."""
+        if self.tokenization == "word":
+            return "TF-IDF baseline with word-level tokenization"
+        return "TF-IDF baseline with character n-gram tokenization (1-3)"
+
+    @property
+    def classification_label_space(self) -> list[str] | None:
+        """Return None as this model has no fixed label space."""
+        return None
+
+    def _preprocess(self, text: str) -> str:
+        """Preprocess text by normalizing Unicode and optionally lowercasing."""
+        text = unicodedata.normalize("NFKD", text)
+        if self.lowercase:
+            return text.lower()
+        return text
+
+    def _compute_rankings(
+        self,
+        queries: list[str],
+        targets: list[str],
+        query_input_type: ModelInputType | None = None,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute TF-IDF ranking scores.
+
+        Parameters
+        ----------
+        queries : list[str]
+            List of query texts
+        targets : list[str]
+            List of target texts (corpus)
+        query_input_type : ModelInputType | None
+            Type of query input (ignored by this model)
+        target_input_type : ModelInputType | None
+            Type of target input (ignored by this model)
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_queries, n_targets) with cosine similarity scores
+        """
+        # Preprocess corpus
+        processed_corpus = [self._preprocess(target) for target in targets]
+
+        # Configure vectorizer based on tokenization strategy
+        if self.tokenization == "char":
+            vectorizer = TfidfVectorizer(analyzer="char", ngram_range=(1, 3))
+        else:
+            vectorizer = TfidfVectorizer()
+
+        # Fit vectorizer on corpus
+        tfidf_matrix = vectorizer.fit_transform(processed_corpus)
+
+        # Compute scores for each query
+        scores = []
+        for query in queries:
+            preprocessed_query = self._preprocess(query)
+            query_vector = vectorizer.transform([preprocessed_query])
+            query_scores = cosine_similarity(query_vector, tfidf_matrix).flatten()
+            scores.append(query_scores)
+
+        scores_array = np.array(scores)
+        return torch.tensor(scores_array, dtype=torch.float32)
+
+    def _compute_classification(
+        self,
+        texts: list[str],
+        targets: list[str],
+        input_type: ModelInputType,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute classification scores by ranking texts against target labels.
+
+        Parameters
+        ----------
+        texts : list[str]
+            List of input texts to classify
+        targets : list[str]
+            List of target class labels (as text)
+        input_type : ModelInputType
+            Type of input
+        target_input_type : ModelInputType | None
+            Type of target. If None, uses input_type.
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_texts, n_classes) with cosine similarity scores
+        """
+        if target_input_type is None:
+            target_input_type = input_type
+
+        return self._compute_rankings(texts, targets, input_type, target_input_type)
+
+
+@register_model()
+class EditDistanceModel(ModelInterface):
+    """Edit distance (Levenshtein ratio) baseline.
+
+    Computes the Levenshtein ratio between query and target strings. The ratio is
+    normalized to [0, 100] where 100 indicates identical strings. This model is
+    effective for near-exact matches and normalization tasks.
+
+    Parameters
+    ----------
+    lowercase : bool, default=True
+        Whether to lowercase texts before computing scores.
+    """
+
+    def __init__(self, lowercase: bool = True):
+        self.lowercase = lowercase
+
+    @property
+    def name(self) -> str:
+        """Return the model name."""
+        suffix = "lower" if self.lowercase else "cased"
+        return f"EditDistance-{suffix}"
+
+    @property
+    def description(self) -> str:
+        """Return the model description."""
+        return "Levenshtein ratio baseline for string similarity"
+
+    @property
+    def classification_label_space(self) -> list[str] | None:
+        """Return None as this model has no fixed label space."""
+        return None
+
+    def _preprocess(self, text: str) -> str:
+        """Preprocess text by normalizing Unicode and optionally lowercasing."""
+        text = unicodedata.normalize("NFKD", text)
+        if self.lowercase:
+            return text.lower()
+        return text
+
+    def _compute_rankings(
+        self,
+        queries: list[str],
+        targets: list[str],
+        query_input_type: ModelInputType | None = None,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute edit distance ranking scores.
+
+        Parameters
+        ----------
+        queries : list[str]
+            List of query texts
+        targets : list[str]
+            List of target texts
+        query_input_type : ModelInputType | None
+            Type of query input (ignored by this model)
+        target_input_type : ModelInputType | None
+            Type of target input (ignored by this model)
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_queries, n_targets) with Levenshtein ratio scores [0-100]
+        """
+        scores = []
+        for query in queries:
+            query_preprocessed = self._preprocess(query)
+            query_scores = []
+            for target in targets:
+                target_preprocessed = self._preprocess(target)
+                score = fuzz.ratio(query_preprocessed, target_preprocessed)
+                query_scores.append(score)
+            scores.append(query_scores)
+
+        return torch.tensor(scores, dtype=torch.float32)
+
+    def _compute_classification(
+        self,
+        texts: list[str],
+        targets: list[str],
+        input_type: ModelInputType,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute classification scores by ranking texts against target labels.
+
+        Parameters
+        ----------
+        texts : list[str]
+            List of input texts to classify
+        targets : list[str]
+            List of target class labels (as text)
+        input_type : ModelInputType
+            Type of input
+        target_input_type : ModelInputType | None
+            Type of target. If None, uses input_type.
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_texts, n_classes) with Levenshtein ratio scores
+        """
+        if target_input_type is None:
+            target_input_type = input_type
+
+        return self._compute_rankings(texts, targets, input_type, target_input_type)
+
+
+@register_model()
+class RandomRankingModel(ModelInterface):
+    """Random ranking baseline for sanity checking.
+
+    Generates random scores between 0 and 1 for all query-target pairs. This serves
+    as a control baseline to verify that evaluation metrics and pipelines are working
+    correctly. Any reasonable model should significantly outperform random scoring.
+
+    Parameters
+    ----------
+    seed : int | None, default=None
+        Random seed for reproducibility. If None, results will vary between runs.
+    """
+
+    def __init__(self, seed: int | None = None):
+        self.seed = seed
+        if seed is not None:
+            random.seed(seed)
+
+    @property
+    def name(self) -> str:
+        """Return the model name."""
+        return "RandomRanking"
+
+    @property
+    def description(self) -> str:
+        """Return the model description."""
+        return "Random ranking baseline for sanity checking"
+
+    @property
+    def classification_label_space(self) -> list[str] | None:
+        """Return None as this model has no fixed label space."""
+        return None
+
+    def _compute_rankings(
+        self,
+        queries: list[str],
+        targets: list[str],
+        query_input_type: ModelInputType | None = None,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute random ranking scores.
+
+        Parameters
+        ----------
+        queries : list[str]
+            List of query texts
+        targets : list[str]
+            List of target texts
+        query_input_type : ModelInputType | None
+            Type of query input (ignored by this model)
+        target_input_type : ModelInputType | None
+            Type of target input (ignored by this model)
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_queries, n_targets) with random scores [0-1]
+        """
+        scores = []
+        for _ in queries:
+            query_scores = [random.random() for _ in targets]
+            scores.append(query_scores)
+
+        return torch.tensor(scores, dtype=torch.float32)
+
+    def _compute_classification(
+        self,
+        texts: list[str],
+        targets: list[str],
+        input_type: ModelInputType,
+        target_input_type: ModelInputType | None = None,
+    ) -> torch.Tensor:
+        """Compute random classification scores.
+
+        Parameters
+        ----------
+        texts : list[str]
+            List of input texts to classify
+        targets : list[str]
+            List of target class labels (as text)
+        input_type : ModelInputType
+            Type of input
+        target_input_type : ModelInputType | None
+            Type of target. If None, uses input_type.
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor of shape (n_texts, n_classes) with random scores [0-1]
+        """
+        if target_input_type is None:
+            target_input_type = input_type
+
+        return self._compute_rankings(texts, targets, input_type, target_input_type)
diff --git a/tests/test_lexical_baselines.py b/tests/test_lexical_baselines.py
new file mode 100644
index 0000000..2982556
--- /dev/null
+++ b/tests/test_lexical_baselines.py
@@ -0,0 +1,414 @@
+"""Unit tests for lexical baseline models."""
+
+import torch
+
+from workrb.models.lexical_baselines import (
+    BM25Model,
+    EditDistanceModel,
+    RandomRankingModel,
+    TfIdfModel,
+)
+from workrb.types import ModelInputType
+
+
+class TestBM25Model:
+    """Test BM25Model initialization and functionality."""
+
+    def test_model_initialization_default(self):
+        """Test BM25Model initialization with default parameters."""
+        model = BM25Model()
+        assert model is not None
+        assert model.lowercase is True
+
+    def test_model_initialization_custom_params(self):
+        """Test BM25Model initialization with custom parameters."""
+        model = BM25Model(lowercase=False)
+        assert model.lowercase is False
+
+    def test_model_properties(self):
+        """Test BM25Model name and description properties."""
+        model = BM25Model()
+        assert model.name == "BM25-lower"
+        assert isinstance(model.description, str)
+        assert len(model.description) > 0
+        assert model.classification_label_space is None
+
+    def test_model_name_cased(self):
+        """Test BM25Model name with lowercase=False."""
+        model = BM25Model(lowercase=False)
+        assert model.name == "BM25-cased"
+
+    def test_compute_rankings_basic(self):
+        """Test basic BM25 ranking computation."""
+        model = BM25Model()
+        queries = ["python developer", "data scientist"]
+        targets = ["python programming", "machine learning", "java developer"]
+
+        scores = model._compute_rankings(
+            queries=queries,
+            targets=targets,
+            query_input_type=ModelInputType.JOB_TITLE,
+            target_input_type=ModelInputType.SKILL_NAME,
+        )
+
+        # Check output shape
+        assert scores.shape == (len(queries), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert scores.dtype == torch.float32
+
+        # Scores should be finite and non-negative (BM25 scores are >= 0)
+        assert torch.isfinite(scores).all()
+        assert (scores >= 0).all()
+
+    def test_compute_rankings_lowercase_sensitivity(self):
+        """Test that lowercase parameter affects preprocessing."""
+        model_lower = BM25Model(lowercase=True)
+        model_no_lower = BM25Model(lowercase=False)
+
+        # Test the preprocessing method directly
+        text = "Python Developer"
+
+        assert model_lower._preprocess(text) == "python developer"
+        assert model_no_lower._preprocess(text) == "Python Developer"
+
+    def test_compute_classification(self):
+        """Test BM25 classification computation."""
+        model = BM25Model()
+        texts = ["python developer", "data scientist"]
+        targets = ["python", "machine learning", "statistics"]
+
+        scores = model._compute_classification(
+            texts=texts,
+            targets=targets,
+            input_type=ModelInputType.JOB_TITLE,
+        )
+
+        assert scores.shape == (len(texts), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert torch.isfinite(scores).all()
+
+
+class TestTfIdfModel:
+    """Test TfIdfModel initialization and functionality."""
+
+    def test_model_initialization_default(self):
+        """Test TfIdfModel initialization with default parameters."""
+        model = TfIdfModel()
+        assert model is not None
+        assert model.lowercase is True
+        assert model.tokenization == "word"
+
+    def test_model_initialization_word_tokenization(self):
+        """Test TfIdfModel initialization with word tokenization."""
+        model = TfIdfModel(lowercase=True, tokenization="word")
+        assert model.lowercase is True
+        assert model.tokenization == "word"
+
+    def test_model_initialization_char_tokenization(self):
+        """Test TfIdfModel initialization with char tokenization."""
+        model = TfIdfModel(lowercase=False, tokenization="char")
+        assert model.lowercase is False
+        assert model.tokenization == "char"
+
+    def test_model_initialization_invalid_tokenization(self):
+        """Test that invalid tokenization raises ValueError."""
+        try:
+            TfIdfModel(tokenization="invalid")
+            assert False, "Should have raised ValueError"
+        except ValueError as e:
+            assert "Invalid tokenization" in str(e)
+            assert "Must be 'word' or 'char'" in str(e)
+
+    def test_model_properties_word(self):
+        """Test TfIdfModel properties with word tokenization."""
+        model = TfIdfModel(tokenization="word")
+        assert model.name == "TfIdf-word-lower"
+        assert "word-level" in model.description
+        assert model.classification_label_space is None
+
+    def test_model_properties_char(self):
+        """Test TfIdfModel properties with char tokenization."""
+        model = TfIdfModel(tokenization="char")
+        assert model.name == "TfIdf-char-lower"
+        assert "character n-gram" in model.description
+        assert model.classification_label_space is None
+
+    def test_model_name_cased(self):
+        """Test TfIdfModel name with lowercase=False."""
+        model_word = TfIdfModel(lowercase=False, tokenization="word")
+        assert model_word.name == "TfIdf-word-cased"
+
+        model_char = TfIdfModel(lowercase=False, tokenization="char")
+        assert model_char.name == "TfIdf-char-cased"
+
+    def test_compute_rankings_word_tokenization(self):
+        """Test TF-IDF ranking with word tokenization."""
+        model = TfIdfModel(tokenization="word")
+        queries = ["python developer", "data scientist"]
+        targets = ["python programming", "machine learning", "java developer"]
+
+        scores = model._compute_rankings(
+            queries=queries,
+            targets=targets,
+            query_input_type=ModelInputType.JOB_TITLE,
+            target_input_type=ModelInputType.SKILL_NAME,
+        )
+
+        # Check output shape
+        assert scores.shape == (len(queries), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert scores.dtype == torch.float32
+
+        # Cosine similarity scores should be in [-1, 1], but typically [0, 1] for TF-IDF
+        assert torch.isfinite(scores).all()
+        assert (scores >= -1).all() and (scores <= 1).all()
+
+    def test_compute_rankings_char_tokenization(self):
+        """Test TF-IDF ranking with character n-gram tokenization."""
+        model = TfIdfModel(tokenization="char")
+        queries = ["python", "java"]
+        targets = ["python", "pithon", "java"]  # pithon is similar to python
+
+        scores = model._compute_rankings(queries, targets)
+
+        # Check output shape
+        assert scores.shape == (len(queries), len(targets))
+        assert isinstance(scores, torch.Tensor)
+
+        # Character n-grams should give high similarity for "python" vs "pithon"
+        # query[0]="python" should have higher score with target[0]="python" than target[2]="java"
+        assert scores[0, 0].item() > scores[0, 2].item()
+        # query[0]="python" should have higher score with target[1]="pithon" than target[2]="java"
+        assert scores[0, 1].item() > scores[0, 2].item()
+
+    def test_compute_rankings_word_vs_char(self):
+        """Test that word and char tokenization produce different results."""
+        queries = ["software engineer"]
+        targets = ["software development", "sofware engineering"]  # typo in second
+
+        # Word tokenization
+        model_word = TfIdfModel(tokenization="word")
+        scores_word = model_word._compute_rankings(queries, targets)
+
+        # Character tokenization
+        model_char = TfIdfModel(tokenization="char")
+        scores_char = model_char._compute_rankings(queries, targets)
+
+        # Character n-grams should handle the typo better
+        # So the relative scores should be different
+        assert not torch.allclose(scores_word, scores_char, atol=0.01)
+
+    def test_compute_classification(self):
+        """Test TF-IDF classification computation."""
+        model = TfIdfModel()
+        texts = ["python developer", "data scientist"]
+        targets = ["python", "machine learning", "statistics"]
+
+        scores = model._compute_classification(
+            texts=texts,
+            targets=targets,
+            input_type=ModelInputType.JOB_TITLE,
+        )
+
+        assert scores.shape == (len(texts), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert torch.isfinite(scores).all()
+
+
+class TestEditDistanceModel:
+    """Test EditDistanceModel initialization and functionality."""
+
+    def test_model_initialization_default(self):
+        """Test EditDistanceModel initialization with default parameters."""
+        model = EditDistanceModel()
+        assert model is not None
+        assert model.lowercase is True
+
+    def test_model_initialization_custom_params(self):
+        """Test EditDistanceModel initialization with custom parameters."""
+        model = EditDistanceModel(lowercase=False)
+        assert model.lowercase is False
+
+    def test_model_properties(self):
+        """Test EditDistanceModel name and description properties."""
+        model = EditDistanceModel()
+        assert model.name == "EditDistance-lower"
+        assert isinstance(model.description, str)
+        assert "Levenshtein" in model.description
+        assert model.classification_label_space is None
+
+    def test_model_name_cased(self):
+        """Test EditDistanceModel name with lowercase=False."""
+        model = EditDistanceModel(lowercase=False)
+        assert model.name == "EditDistance-cased"
+
+    def test_compute_rankings_basic(self):
+        """Test basic edit distance ranking computation."""
+        model = EditDistanceModel()
+        queries = ["python developer", "data scientist"]
+        targets = ["python developer", "python developper", "java engineer"]
+
+        scores = model._compute_rankings(
+            queries=queries,
+            targets=targets,
+            query_input_type=ModelInputType.JOB_TITLE,
+            target_input_type=ModelInputType.SKILL_NAME,
+        )
+
+        # Check output shape
+        assert scores.shape == (len(queries), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert scores.dtype == torch.float32
+
+        # Levenshtein ratio scores are in [0, 100]
+        assert torch.isfinite(scores).all()
+        assert (scores >= 0).all() and (scores <= 100).all()
+
+        # Exact match should have score 100
+        assert scores[0, 0].item() == 100.0
+
+        # Similar strings should have high scores
+        assert scores[0, 1].item() > 80  # "python developer" vs "python developper"
+
+    def test_compute_rankings_exact_matches(self):
+        """Test that exact matches get score of 100."""
+        model = EditDistanceModel()
+        queries = ["test", "example"]
+        targets = ["test", "example", "different"]
+
+        scores = model._compute_rankings(queries, targets)
+
+        # Exact matches should be 100
+        assert scores[0, 0].item() == 100.0  # "test" vs "test"
+        assert scores[1, 1].item() == 100.0  # "example" vs "example"
+
+    def test_compute_rankings_lowercase_sensitivity(self):
+        """Test that lowercase parameter affects edit distance scores."""
+        queries = ["Python"]
+        targets = ["python", "PYTHON"]
+
+        # With lowercase=True (default)
+        model_lower = EditDistanceModel(lowercase=True)
+        scores_lower = model_lower._compute_rankings(queries, targets)
+
+        # Both should be exact matches after lowercasing
+        assert scores_lower[0, 0].item() == 100.0
+        assert scores_lower[0, 1].item() == 100.0
+
+        # With lowercase=False
+        model_no_lower = EditDistanceModel(lowercase=False)
+        scores_no_lower = model_no_lower._compute_rankings(queries, targets)
+
+        # Should not be exact matches without lowercasing
+        assert scores_no_lower[0, 0].item() < 100.0
+        assert scores_no_lower[0, 1].item() < 100.0
+
+    def test_compute_classification(self):
+        """Test edit distance classification computation."""
+        model = EditDistanceModel()
+        texts = ["python", "java"]
+        targets = ["python", "javascript", "ruby"]
+
+        scores = model._compute_classification(
+            texts=texts,
+            targets=targets,
+            input_type=ModelInputType.SKILL_NAME,
+        )
+
+        assert scores.shape == (len(texts), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert torch.isfinite(scores).all()
+        assert (scores >= 0).all() and (scores <= 100).all()
+
+
+class TestRandomRankingModel:
+    """Test RandomRankingModel initialization and functionality."""
+
+    def test_model_initialization_default(self):
+        """Test RandomRankingModel initialization with default parameters."""
+        model = RandomRankingModel()
+        assert model is not None
+        assert model.seed is None
+
+    def test_model_initialization_with_seed(self):
+        """Test RandomRankingModel initialization with seed."""
+        model = RandomRankingModel(seed=42)
+        assert model.seed == 42
+
+    def test_model_properties(self):
+        """Test RandomRankingModel name and description properties."""
+        model = RandomRankingModel()
+        assert model.name == "RandomRanking"
+        assert isinstance(model.description, str)
+        assert "random" in model.description.lower() or "Random" in model.description
+        assert model.classification_label_space is None
+
+    def test_compute_rankings_basic(self):
+        """Test basic random ranking computation."""
+        model = RandomRankingModel(seed=42)
+        queries = ["python developer", "data scientist"]
+        targets = ["python programming", "machine learning", "java developer"]
+
+        scores = model._compute_rankings(
+            queries=queries,
+            targets=targets,
+            query_input_type=ModelInputType.JOB_TITLE,
+            target_input_type=ModelInputType.SKILL_NAME,
+        )
+
+        # Check output shape
+        assert scores.shape == (len(queries), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert scores.dtype == torch.float32
+
+        # Random scores should be in [0, 1]
+        assert torch.isfinite(scores).all()
+        assert (scores >= 0).all() and (scores <= 1).all()
+
+    def test_compute_rankings_reproducibility_with_seed(self):
+        """Test that same seed produces same results."""
+        queries = ["test query"]
+        targets = ["target1", "target2", "target3"]
+
+        # First run with seed
+        model1 = RandomRankingModel(seed=12345)
+        scores1 = model1._compute_rankings(queries, targets)
+
+        # Second run with same seed
+        model2 = RandomRankingModel(seed=12345)
+        scores2 = model2._compute_rankings(queries, targets)
+
+        # Should produce identical results
+        assert torch.allclose(scores1, scores2)
+
+    def test_compute_rankings_different_without_seed(self):
+        """Test that different seeds produce different results."""
+        queries = ["test query"]
+        targets = ["target1", "target2", "target3"]
+
+        # Run with different seeds
+        model1 = RandomRankingModel(seed=1)
+        scores1 = model1._compute_rankings(queries, targets)
+
+        model2 = RandomRankingModel(seed=2)
+        scores2 = model2._compute_rankings(queries, targets)
+
+        # Should produce different results
+        assert not torch.allclose(scores1, scores2)
+
+    def test_compute_classification(self):
+        """Test random classification computation."""
+        model = RandomRankingModel(seed=42)
+        texts = ["python developer", "data scientist"]
+        targets = ["python", "machine learning", "statistics"]
+
+        scores = model._compute_classification(
+            texts=texts,
+            targets=targets,
+            input_type=ModelInputType.JOB_TITLE,
+        )
+
+        assert scores.shape == (len(texts), len(targets))
+        assert isinstance(scores, torch.Tensor)
+        assert torch.isfinite(scores).all()
+        assert (scores >= 0).all() and (scores <= 1).all()