diff --git a/examples/run_lexical_baselines.py b/examples/run_lexical_baselines.py new file mode 100644 index 0000000..8ff3ed3 --- /dev/null +++ b/examples/run_lexical_baselines.py @@ -0,0 +1,41 @@ +""" +Reproduce benchmark results. +""" + +import workrb + +if __name__ == "__main__": + # 1. Setup model and tasks + models = [ + workrb.models.RandomRankingModel(), + workrb.models.TfIdfModel(tokenization="word"), + workrb.models.TfIdfModel(lowercase=False, tokenization="word"), + workrb.models.TfIdfModel(tokenization="char"), + workrb.models.TfIdfModel(lowercase=False, tokenization="char"), + workrb.models.BM25Model(), + workrb.models.BM25Model(lowercase=False), + workrb.models.EditDistanceModel(), + workrb.models.EditDistanceModel(lowercase=False), + ] + + # Config + langs = [ + "en", + "fr", + "de", + "es", + "nl", + ] + split = "test" + + tasks = [ + workrb.tasks.JobTitleSimilarityRanking(split=split, languages=langs), + ] + + results = workrb.evaluate_multiple_models( + models=models, + tasks=tasks, + output_folder_template="../results/lexical_baselines/{model_name}", + description="WorkRB demo with lexical baselines", + force_restart=True, + ) diff --git a/pyproject.toml b/pyproject.toml index a1ee243..2af1d08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,8 @@ dependencies = [ "PyYAML>=6.0", "sentence-transformers>=5.1.0", "pydantic>=2.11.9", + "rank-bm25>=0.2.2", + "rapidfuzz>=3.9.4", ] [dependency-groups] # https://docs.astral.sh/uv/concepts/projects/dependencies/#development-dependencies diff --git a/src/workrb/models/__init__.py b/src/workrb/models/__init__.py index 00cd8ac..58d15c4 100644 --- a/src/workrb/models/__init__.py +++ b/src/workrb/models/__init__.py @@ -6,12 +6,22 @@ from workrb.models.bi_encoder import BiEncoderModel, ConTeXTMatchModel, JobBERTModel from workrb.models.classification_model import RndESCOClassificationModel from workrb.models.curriculum_encoder import CurriculumMatchModel +from workrb.models.lexical_baselines import ( + BM25Model, + EditDistanceModel, + RandomRankingModel, + TfIdfModel, +) __all__ = [ + "BM25Model", "BiEncoderModel", "ConTeXTMatchModel", "CurriculumMatchModel", + "EditDistanceModel", "JobBERTModel", "ModelInterface", + "RandomRankingModel", "RndESCOClassificationModel", + "TfIdfModel", ] diff --git a/src/workrb/models/lexical_baselines.py b/src/workrb/models/lexical_baselines.py new file mode 100644 index 0000000..85f1085 --- /dev/null +++ b/src/workrb/models/lexical_baselines.py @@ -0,0 +1,470 @@ +"""Lexical baseline models for ranking tasks in WorkRB. + +These models provide fast, CPU-based baselines for ranking tasks. They are useful for +establishing performance bounds and enabling rapid iteration without GPU dependencies. +""" + +import random +import unicodedata + +import numpy as np +import torch +from rank_bm25 import BM25Okapi +from rapidfuzz import fuzz +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity + +from workrb.models.base import ModelInterface +from workrb.registry import register_model +from workrb.types import ModelInputType + + +@register_model() +class BM25Model(ModelInterface): + """BM25 Okapi probabilistic ranking baseline. + + Parameters + ---------- + lowercase : bool, default=True + Whether to lowercase texts before computing scores. + + Example + ------- + >>> model = BM25Model() + >>> queries = ["python developer", "data scientist"] + >>> targets = ["python programming", "machine learning", "software engineer"] + >>> scores = model.compute_rankings(queries, targets, None, None) + >>> scores.shape + torch.Size([2, 3]) + """ + + def __init__(self, lowercase: bool = True): + self.lowercase = lowercase + + @property + def name(self) -> str: + """Return the model name.""" + suffix = "lower" if self.lowercase else "cased" + return f"BM25-{suffix}" + + @property + def description(self) -> str: + """Return the model description.""" + return "BM25 Okapi probabilistic ranking baseline" + + @property + def classification_label_space(self) -> list[str] | None: + """Return None as this model has no fixed label space.""" + return None + + def _preprocess(self, text: str) -> str: + """Preprocess text by normalizing Unicode and optionally lowercasing.""" + text = unicodedata.normalize("NFKD", text) + if self.lowercase: + return text.lower() + return text + + def _compute_rankings( + self, + queries: list[str], + targets: list[str], + query_input_type: ModelInputType | None = None, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute BM25 ranking scores. + + Parameters + ---------- + queries : list[str] + List of query texts + targets : list[str] + List of target texts (corpus) + query_input_type : ModelInputType | None + Type of query input (ignored by this model) + target_input_type : ModelInputType | None + Type of target input (ignored by this model) + + Returns + ------- + torch.Tensor + Tensor of shape (n_queries, n_targets) with BM25 scores + """ + # Preprocess and tokenize corpus + tokenized_corpus = [self._preprocess(target).split() for target in targets] + + # Build BM25 index + bm25 = BM25Okapi(tokenized_corpus) + + # Compute scores for each query + scores = [] + for query in queries: + preprocessed_query = self._preprocess(query) + tokenized_query = preprocessed_query.split() + query_scores = bm25.get_scores(tokenized_query) + scores.append(query_scores) + + scores_array = np.array(scores) + return torch.tensor(scores_array, dtype=torch.float32) + + def _compute_classification( + self, + texts: list[str], + targets: list[str], + input_type: ModelInputType, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute classification scores by ranking texts against target labels. + + Parameters + ---------- + texts : list[str] + List of input texts to classify + targets : list[str] + List of target class labels (as text) + input_type : ModelInputType + Type of input + target_input_type : ModelInputType | None + Type of target. If None, uses input_type. + + Returns + ------- + torch.Tensor + Tensor of shape (n_texts, n_classes) with BM25 scores + """ + if target_input_type is None: + target_input_type = input_type + + return self._compute_rankings(texts, targets, input_type, target_input_type) + + +@register_model() +class TfIdfModel(ModelInterface): + """TF-IDF baseline with configurable tokenization. + + Uses TF-IDF (Term Frequency-Inverse Document Frequency) vectorization followed by + cosine similarity. Supports both word-level and character n-gram tokenization. + + Parameters + ---------- + lowercase : bool, default=True + Whether to lowercase texts before computing scores. + tokenization : str, default="word" + Tokenization strategy. Options: + - "word": Word-level tokenization (default) + - "char": Character n-gram tokenization (1-3 grams) + """ + + def __init__(self, lowercase: bool = True, tokenization: str = "word"): + if tokenization not in ["word", "char"]: + raise ValueError(f"Invalid tokenization: {tokenization}. Must be 'word' or 'char'.") + + self.lowercase = lowercase + self.tokenization = tokenization + + @property + def name(self) -> str: + """Return the model name.""" + suffix = "lower" if self.lowercase else "cased" + return f"TfIdf-{self.tokenization}-{suffix}" + + @property + def description(self) -> str: + """Return the model description.""" + if self.tokenization == "word": + return "TF-IDF baseline with word-level tokenization" + return "TF-IDF baseline with character n-gram tokenization (1-3)" + + @property + def classification_label_space(self) -> list[str] | None: + """Return None as this model has no fixed label space.""" + return None + + def _preprocess(self, text: str) -> str: + """Preprocess text by normalizing Unicode and optionally lowercasing.""" + text = unicodedata.normalize("NFKD", text) + if self.lowercase: + return text.lower() + return text + + def _compute_rankings( + self, + queries: list[str], + targets: list[str], + query_input_type: ModelInputType | None = None, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute TF-IDF ranking scores. + + Parameters + ---------- + queries : list[str] + List of query texts + targets : list[str] + List of target texts (corpus) + query_input_type : ModelInputType | None + Type of query input (ignored by this model) + target_input_type : ModelInputType | None + Type of target input (ignored by this model) + + Returns + ------- + torch.Tensor + Tensor of shape (n_queries, n_targets) with cosine similarity scores + """ + # Preprocess corpus + processed_corpus = [self._preprocess(target) for target in targets] + + # Configure vectorizer based on tokenization strategy + if self.tokenization == "char": + vectorizer = TfidfVectorizer(analyzer="char", ngram_range=(1, 3)) + else: + vectorizer = TfidfVectorizer() + + # Fit vectorizer on corpus + tfidf_matrix = vectorizer.fit_transform(processed_corpus) + + # Compute scores for each query + scores = [] + for query in queries: + preprocessed_query = self._preprocess(query) + query_vector = vectorizer.transform([preprocessed_query]) + query_scores = cosine_similarity(query_vector, tfidf_matrix).flatten() + scores.append(query_scores) + + scores_array = np.array(scores) + return torch.tensor(scores_array, dtype=torch.float32) + + def _compute_classification( + self, + texts: list[str], + targets: list[str], + input_type: ModelInputType, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute classification scores by ranking texts against target labels. + + Parameters + ---------- + texts : list[str] + List of input texts to classify + targets : list[str] + List of target class labels (as text) + input_type : ModelInputType + Type of input + target_input_type : ModelInputType | None + Type of target. If None, uses input_type. + + Returns + ------- + torch.Tensor + Tensor of shape (n_texts, n_classes) with cosine similarity scores + """ + if target_input_type is None: + target_input_type = input_type + + return self._compute_rankings(texts, targets, input_type, target_input_type) + + +@register_model() +class EditDistanceModel(ModelInterface): + """Edit distance (Levenshtein ratio) baseline. + + Computes the Levenshtein ratio between query and target strings. The ratio is + normalized to [0, 100] where 100 indicates identical strings. This model is + effective for near-exact matches and normalization tasks. + + Parameters + ---------- + lowercase : bool, default=True + Whether to lowercase texts before computing scores. + """ + + def __init__(self, lowercase: bool = True): + self.lowercase = lowercase + + @property + def name(self) -> str: + """Return the model name.""" + suffix = "lower" if self.lowercase else "cased" + return f"EditDistance-{suffix}" + + @property + def description(self) -> str: + """Return the model description.""" + return "Levenshtein ratio baseline for string similarity" + + @property + def classification_label_space(self) -> list[str] | None: + """Return None as this model has no fixed label space.""" + return None + + def _preprocess(self, text: str) -> str: + """Preprocess text by normalizing Unicode and optionally lowercasing.""" + text = unicodedata.normalize("NFKD", text) + if self.lowercase: + return text.lower() + return text + + def _compute_rankings( + self, + queries: list[str], + targets: list[str], + query_input_type: ModelInputType | None = None, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute edit distance ranking scores. + + Parameters + ---------- + queries : list[str] + List of query texts + targets : list[str] + List of target texts + query_input_type : ModelInputType | None + Type of query input (ignored by this model) + target_input_type : ModelInputType | None + Type of target input (ignored by this model) + + Returns + ------- + torch.Tensor + Tensor of shape (n_queries, n_targets) with Levenshtein ratio scores [0-100] + """ + scores = [] + for query in queries: + query_preprocessed = self._preprocess(query) + query_scores = [] + for target in targets: + target_preprocessed = self._preprocess(target) + score = fuzz.ratio(query_preprocessed, target_preprocessed) + query_scores.append(score) + scores.append(query_scores) + + return torch.tensor(scores, dtype=torch.float32) + + def _compute_classification( + self, + texts: list[str], + targets: list[str], + input_type: ModelInputType, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute classification scores by ranking texts against target labels. + + Parameters + ---------- + texts : list[str] + List of input texts to classify + targets : list[str] + List of target class labels (as text) + input_type : ModelInputType + Type of input + target_input_type : ModelInputType | None + Type of target. If None, uses input_type. + + Returns + ------- + torch.Tensor + Tensor of shape (n_texts, n_classes) with Levenshtein ratio scores + """ + if target_input_type is None: + target_input_type = input_type + + return self._compute_rankings(texts, targets, input_type, target_input_type) + + +@register_model() +class RandomRankingModel(ModelInterface): + """Random ranking baseline for sanity checking. + + Generates random scores between 0 and 1 for all query-target pairs. This serves + as a control baseline to verify that evaluation metrics and pipelines are working + correctly. Any reasonable model should significantly outperform random scoring. + + Parameters + ---------- + seed : int | None, default=None + Random seed for reproducibility. If None, results will vary between runs. + """ + + def __init__(self, seed: int | None = None): + self.seed = seed + if seed is not None: + random.seed(seed) + + @property + def name(self) -> str: + """Return the model name.""" + return "RandomRanking" + + @property + def description(self) -> str: + """Return the model description.""" + return "Random ranking baseline for sanity checking" + + @property + def classification_label_space(self) -> list[str] | None: + """Return None as this model has no fixed label space.""" + return None + + def _compute_rankings( + self, + queries: list[str], + targets: list[str], + query_input_type: ModelInputType | None = None, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute random ranking scores. + + Parameters + ---------- + queries : list[str] + List of query texts + targets : list[str] + List of target texts + query_input_type : ModelInputType | None + Type of query input (ignored by this model) + target_input_type : ModelInputType | None + Type of target input (ignored by this model) + + Returns + ------- + torch.Tensor + Tensor of shape (n_queries, n_targets) with random scores [0-1] + """ + scores = [] + for _ in queries: + query_scores = [random.random() for _ in targets] + scores.append(query_scores) + + return torch.tensor(scores, dtype=torch.float32) + + def _compute_classification( + self, + texts: list[str], + targets: list[str], + input_type: ModelInputType, + target_input_type: ModelInputType | None = None, + ) -> torch.Tensor: + """Compute random classification scores. + + Parameters + ---------- + texts : list[str] + List of input texts to classify + targets : list[str] + List of target class labels (as text) + input_type : ModelInputType + Type of input + target_input_type : ModelInputType | None + Type of target. If None, uses input_type. + + Returns + ------- + torch.Tensor + Tensor of shape (n_texts, n_classes) with random scores [0-1] + """ + if target_input_type is None: + target_input_type = input_type + + return self._compute_rankings(texts, targets, input_type, target_input_type) diff --git a/tests/test_lexical_baselines.py b/tests/test_lexical_baselines.py new file mode 100644 index 0000000..2982556 --- /dev/null +++ b/tests/test_lexical_baselines.py @@ -0,0 +1,414 @@ +"""Unit tests for lexical baseline models.""" + +import torch + +from workrb.models.lexical_baselines import ( + BM25Model, + EditDistanceModel, + RandomRankingModel, + TfIdfModel, +) +from workrb.types import ModelInputType + + +class TestBM25Model: + """Test BM25Model initialization and functionality.""" + + def test_model_initialization_default(self): + """Test BM25Model initialization with default parameters.""" + model = BM25Model() + assert model is not None + assert model.lowercase is True + + def test_model_initialization_custom_params(self): + """Test BM25Model initialization with custom parameters.""" + model = BM25Model(lowercase=False) + assert model.lowercase is False + + def test_model_properties(self): + """Test BM25Model name and description properties.""" + model = BM25Model() + assert model.name == "BM25-lower" + assert isinstance(model.description, str) + assert len(model.description) > 0 + assert model.classification_label_space is None + + def test_model_name_cased(self): + """Test BM25Model name with lowercase=False.""" + model = BM25Model(lowercase=False) + assert model.name == "BM25-cased" + + def test_compute_rankings_basic(self): + """Test basic BM25 ranking computation.""" + model = BM25Model() + queries = ["python developer", "data scientist"] + targets = ["python programming", "machine learning", "java developer"] + + scores = model._compute_rankings( + queries=queries, + targets=targets, + query_input_type=ModelInputType.JOB_TITLE, + target_input_type=ModelInputType.SKILL_NAME, + ) + + # Check output shape + assert scores.shape == (len(queries), len(targets)) + assert isinstance(scores, torch.Tensor) + assert scores.dtype == torch.float32 + + # Scores should be finite and non-negative (BM25 scores are >= 0) + assert torch.isfinite(scores).all() + assert (scores >= 0).all() + + def test_compute_rankings_lowercase_sensitivity(self): + """Test that lowercase parameter affects preprocessing.""" + model_lower = BM25Model(lowercase=True) + model_no_lower = BM25Model(lowercase=False) + + # Test the preprocessing method directly + text = "Python Developer" + + assert model_lower._preprocess(text) == "python developer" + assert model_no_lower._preprocess(text) == "Python Developer" + + def test_compute_classification(self): + """Test BM25 classification computation.""" + model = BM25Model() + texts = ["python developer", "data scientist"] + targets = ["python", "machine learning", "statistics"] + + scores = model._compute_classification( + texts=texts, + targets=targets, + input_type=ModelInputType.JOB_TITLE, + ) + + assert scores.shape == (len(texts), len(targets)) + assert isinstance(scores, torch.Tensor) + assert torch.isfinite(scores).all() + + +class TestTfIdfModel: + """Test TfIdfModel initialization and functionality.""" + + def test_model_initialization_default(self): + """Test TfIdfModel initialization with default parameters.""" + model = TfIdfModel() + assert model is not None + assert model.lowercase is True + assert model.tokenization == "word" + + def test_model_initialization_word_tokenization(self): + """Test TfIdfModel initialization with word tokenization.""" + model = TfIdfModel(lowercase=True, tokenization="word") + assert model.lowercase is True + assert model.tokenization == "word" + + def test_model_initialization_char_tokenization(self): + """Test TfIdfModel initialization with char tokenization.""" + model = TfIdfModel(lowercase=False, tokenization="char") + assert model.lowercase is False + assert model.tokenization == "char" + + def test_model_initialization_invalid_tokenization(self): + """Test that invalid tokenization raises ValueError.""" + try: + TfIdfModel(tokenization="invalid") + assert False, "Should have raised ValueError" + except ValueError as e: + assert "Invalid tokenization" in str(e) + assert "Must be 'word' or 'char'" in str(e) + + def test_model_properties_word(self): + """Test TfIdfModel properties with word tokenization.""" + model = TfIdfModel(tokenization="word") + assert model.name == "TfIdf-word-lower" + assert "word-level" in model.description + assert model.classification_label_space is None + + def test_model_properties_char(self): + """Test TfIdfModel properties with char tokenization.""" + model = TfIdfModel(tokenization="char") + assert model.name == "TfIdf-char-lower" + assert "character n-gram" in model.description + assert model.classification_label_space is None + + def test_model_name_cased(self): + """Test TfIdfModel name with lowercase=False.""" + model_word = TfIdfModel(lowercase=False, tokenization="word") + assert model_word.name == "TfIdf-word-cased" + + model_char = TfIdfModel(lowercase=False, tokenization="char") + assert model_char.name == "TfIdf-char-cased" + + def test_compute_rankings_word_tokenization(self): + """Test TF-IDF ranking with word tokenization.""" + model = TfIdfModel(tokenization="word") + queries = ["python developer", "data scientist"] + targets = ["python programming", "machine learning", "java developer"] + + scores = model._compute_rankings( + queries=queries, + targets=targets, + query_input_type=ModelInputType.JOB_TITLE, + target_input_type=ModelInputType.SKILL_NAME, + ) + + # Check output shape + assert scores.shape == (len(queries), len(targets)) + assert isinstance(scores, torch.Tensor) + assert scores.dtype == torch.float32 + + # Cosine similarity scores should be in [-1, 1], but typically [0, 1] for TF-IDF + assert torch.isfinite(scores).all() + assert (scores >= -1).all() and (scores <= 1).all() + + def test_compute_rankings_char_tokenization(self): + """Test TF-IDF ranking with character n-gram tokenization.""" + model = TfIdfModel(tokenization="char") + queries = ["python", "java"] + targets = ["python", "pithon", "java"] # pithon is similar to python + + scores = model._compute_rankings(queries, targets) + + # Check output shape + assert scores.shape == (len(queries), len(targets)) + assert isinstance(scores, torch.Tensor) + + # Character n-grams should give high similarity for "python" vs "pithon" + # query[0]="python" should have higher score with target[0]="python" than target[2]="java" + assert scores[0, 0].item() > scores[0, 2].item() + # query[0]="python" should have higher score with target[1]="pithon" than target[2]="java" + assert scores[0, 1].item() > scores[0, 2].item() + + def test_compute_rankings_word_vs_char(self): + """Test that word and char tokenization produce different results.""" + queries = ["software engineer"] + targets = ["software development", "sofware engineering"] # typo in second + + # Word tokenization + model_word = TfIdfModel(tokenization="word") + scores_word = model_word._compute_rankings(queries, targets) + + # Character tokenization + model_char = TfIdfModel(tokenization="char") + scores_char = model_char._compute_rankings(queries, targets) + + # Character n-grams should handle the typo better + # So the relative scores should be different + assert not torch.allclose(scores_word, scores_char, atol=0.01) + + def test_compute_classification(self): + """Test TF-IDF classification computation.""" + model = TfIdfModel() + texts = ["python developer", "data scientist"] + targets = ["python", "machine learning", "statistics"] + + scores = model._compute_classification( + texts=texts, + targets=targets, + input_type=ModelInputType.JOB_TITLE, + ) + + assert scores.shape == (len(texts), len(targets)) + assert isinstance(scores, torch.Tensor) + assert torch.isfinite(scores).all() + + +class TestEditDistanceModel: + """Test EditDistanceModel initialization and functionality.""" + + def test_model_initialization_default(self): + """Test EditDistanceModel initialization with default parameters.""" + model = EditDistanceModel() + assert model is not None + assert model.lowercase is True + + def test_model_initialization_custom_params(self): + """Test EditDistanceModel initialization with custom parameters.""" + model = EditDistanceModel(lowercase=False) + assert model.lowercase is False + + def test_model_properties(self): + """Test EditDistanceModel name and description properties.""" + model = EditDistanceModel() + assert model.name == "EditDistance-lower" + assert isinstance(model.description, str) + assert "Levenshtein" in model.description + assert model.classification_label_space is None + + def test_model_name_cased(self): + """Test EditDistanceModel name with lowercase=False.""" + model = EditDistanceModel(lowercase=False) + assert model.name == "EditDistance-cased" + + def test_compute_rankings_basic(self): + """Test basic edit distance ranking computation.""" + model = EditDistanceModel() + queries = ["python developer", "data scientist"] + targets = ["python developer", "python developper", "java engineer"] + + scores = model._compute_rankings( + queries=queries, + targets=targets, + query_input_type=ModelInputType.JOB_TITLE, + target_input_type=ModelInputType.SKILL_NAME, + ) + + # Check output shape + assert scores.shape == (len(queries), len(targets)) + assert isinstance(scores, torch.Tensor) + assert scores.dtype == torch.float32 + + # Levenshtein ratio scores are in [0, 100] + assert torch.isfinite(scores).all() + assert (scores >= 0).all() and (scores <= 100).all() + + # Exact match should have score 100 + assert scores[0, 0].item() == 100.0 + + # Similar strings should have high scores + assert scores[0, 1].item() > 80 # "python developer" vs "python developper" + + def test_compute_rankings_exact_matches(self): + """Test that exact matches get score of 100.""" + model = EditDistanceModel() + queries = ["test", "example"] + targets = ["test", "example", "different"] + + scores = model._compute_rankings(queries, targets) + + # Exact matches should be 100 + assert scores[0, 0].item() == 100.0 # "test" vs "test" + assert scores[1, 1].item() == 100.0 # "example" vs "example" + + def test_compute_rankings_lowercase_sensitivity(self): + """Test that lowercase parameter affects edit distance scores.""" + queries = ["Python"] + targets = ["python", "PYTHON"] + + # With lowercase=True (default) + model_lower = EditDistanceModel(lowercase=True) + scores_lower = model_lower._compute_rankings(queries, targets) + + # Both should be exact matches after lowercasing + assert scores_lower[0, 0].item() == 100.0 + assert scores_lower[0, 1].item() == 100.0 + + # With lowercase=False + model_no_lower = EditDistanceModel(lowercase=False) + scores_no_lower = model_no_lower._compute_rankings(queries, targets) + + # Should not be exact matches without lowercasing + assert scores_no_lower[0, 0].item() < 100.0 + assert scores_no_lower[0, 1].item() < 100.0 + + def test_compute_classification(self): + """Test edit distance classification computation.""" + model = EditDistanceModel() + texts = ["python", "java"] + targets = ["python", "javascript", "ruby"] + + scores = model._compute_classification( + texts=texts, + targets=targets, + input_type=ModelInputType.SKILL_NAME, + ) + + assert scores.shape == (len(texts), len(targets)) + assert isinstance(scores, torch.Tensor) + assert torch.isfinite(scores).all() + assert (scores >= 0).all() and (scores <= 100).all() + + +class TestRandomRankingModel: + """Test RandomRankingModel initialization and functionality.""" + + def test_model_initialization_default(self): + """Test RandomRankingModel initialization with default parameters.""" + model = RandomRankingModel() + assert model is not None + assert model.seed is None + + def test_model_initialization_with_seed(self): + """Test RandomRankingModel initialization with seed.""" + model = RandomRankingModel(seed=42) + assert model.seed == 42 + + def test_model_properties(self): + """Test RandomRankingModel name and description properties.""" + model = RandomRankingModel() + assert model.name == "RandomRanking" + assert isinstance(model.description, str) + assert "random" in model.description.lower() or "Random" in model.description + assert model.classification_label_space is None + + def test_compute_rankings_basic(self): + """Test basic random ranking computation.""" + model = RandomRankingModel(seed=42) + queries = ["python developer", "data scientist"] + targets = ["python programming", "machine learning", "java developer"] + + scores = model._compute_rankings( + queries=queries, + targets=targets, + query_input_type=ModelInputType.JOB_TITLE, + target_input_type=ModelInputType.SKILL_NAME, + ) + + # Check output shape + assert scores.shape == (len(queries), len(targets)) + assert isinstance(scores, torch.Tensor) + assert scores.dtype == torch.float32 + + # Random scores should be in [0, 1] + assert torch.isfinite(scores).all() + assert (scores >= 0).all() and (scores <= 1).all() + + def test_compute_rankings_reproducibility_with_seed(self): + """Test that same seed produces same results.""" + queries = ["test query"] + targets = ["target1", "target2", "target3"] + + # First run with seed + model1 = RandomRankingModel(seed=12345) + scores1 = model1._compute_rankings(queries, targets) + + # Second run with same seed + model2 = RandomRankingModel(seed=12345) + scores2 = model2._compute_rankings(queries, targets) + + # Should produce identical results + assert torch.allclose(scores1, scores2) + + def test_compute_rankings_different_without_seed(self): + """Test that different seeds produce different results.""" + queries = ["test query"] + targets = ["target1", "target2", "target3"] + + # Run with different seeds + model1 = RandomRankingModel(seed=1) + scores1 = model1._compute_rankings(queries, targets) + + model2 = RandomRankingModel(seed=2) + scores2 = model2._compute_rankings(queries, targets) + + # Should produce different results + assert not torch.allclose(scores1, scores2) + + def test_compute_classification(self): + """Test random classification computation.""" + model = RandomRankingModel(seed=42) + texts = ["python developer", "data scientist"] + targets = ["python", "machine learning", "statistics"] + + scores = model._compute_classification( + texts=texts, + targets=targets, + input_type=ModelInputType.JOB_TITLE, + ) + + assert scores.shape == (len(texts), len(targets)) + assert isinstance(scores, torch.Tensor) + assert torch.isfinite(scores).all() + assert (scores >= 0).all() and (scores <= 1).all()