diff --git a/README.md b/README.md index 5e542f4..9a2e1ae 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,7 @@ lang_result_ci = summary["mean_per_language/en/f1_macro/ci_margin"] | **Ranking** | Job to Skills | multi_label | 3039 queries x 13939 targets | 28 | | Job Normalization | multi_class | 15463 queries x 2942 targets | 28 | +| Job Title Similarity | multi_label | 105 queries x 2619 targets | 11 | | Skill to Job | multi_label | 13492 queries x 3039 targets | 28 | | Skill Extraction House | multi_label | 262 queries x 13891 targets | 28 | | Skill Extraction Tech | multi_label | 338 queries x 13891 targets | 28 | diff --git a/src/workrb/tasks/__init__.py b/src/workrb/tasks/__init__.py index 7e1a35a..c8c9453 100644 --- a/src/workrb/tasks/__init__.py +++ b/src/workrb/tasks/__init__.py @@ -9,6 +9,7 @@ # Task implementations from .classification.job2skill import ESCOJob2SkillClassification from .ranking.job2skill import ESCOJob2SkillRanking +from .ranking.job_similarity import JobTitleSimilarityRanking from .ranking.jobnorm import JobBERTJobNormRanking from .ranking.skill2job import ESCOSkill2JobRanking from .ranking.skill_extraction import HouseSkillExtractRanking, TechSkillExtractRanking @@ -29,6 +30,7 @@ "ESCOSkill2JobRanking", "ESCOSkillNormRanking", "JobBERTJobNormRanking", + "JobTitleSimilarityRanking", "HouseSkillExtractRanking", "TechSkillExtractRanking", "SkillMatch1kSkillSimilarityRanking", diff --git a/src/workrb/tasks/abstract/ranking_base.py b/src/workrb/tasks/abstract/ranking_base.py index 3c1bcc7..e980168 100644 --- a/src/workrb/tasks/abstract/ranking_base.py +++ b/src/workrb/tasks/abstract/ranking_base.py @@ -22,6 +22,7 @@ class RankingTaskGroup(BaseTaskGroup, str, Enum): JOB_NORMALIZATION = f"{_prefix}job_normalization" JOB2SKILL = f"{_prefix}job2skill" + JOBSIM = f"{_prefix}jobsim" SKILL2JOB = f"{_prefix}skill2job" SKILL_NORMALIZATION = f"{_prefix}skill_normalization" SKILL_EXTRACTION = f"{_prefix}skill_extraction" diff --git a/src/workrb/tasks/ranking/__init__.py b/src/workrb/tasks/ranking/__init__.py index 4e0dc62..3724c02 100644 --- a/src/workrb/tasks/ranking/__init__.py +++ b/src/workrb/tasks/ranking/__init__.py @@ -8,6 +8,7 @@ """ from workrb.tasks.ranking.job2skill import ESCOJob2SkillRanking +from workrb.tasks.ranking.job_similarity import JobTitleSimilarityRanking from workrb.tasks.ranking.jobnorm import JobBERTJobNormRanking from workrb.tasks.ranking.skill2job import ESCOSkill2JobRanking from workrb.tasks.ranking.skill_extraction import ( @@ -23,6 +24,7 @@ "ESCOSkillNormRanking", "HouseSkillExtractRanking", "JobBERTJobNormRanking", + "JobTitleSimilarityRanking", "SkillMatch1kSkillSimilarityRanking", "TechSkillExtractRanking", ] diff --git a/src/workrb/tasks/ranking/job_similarity.py b/src/workrb/tasks/ranking/job_similarity.py new file mode 100644 index 0000000..4edc043 --- /dev/null +++ b/src/workrb/tasks/ranking/job_similarity.py @@ -0,0 +1,154 @@ +"""Job Title Similarity ranking task using Zbib et al. (2022) and Deniz et al. (2024) datasets.""" + +from datasets import load_dataset + +from workrb.registry import register_task +from workrb.tasks.abstract.base import DatasetSplit, LabelType, Language +from workrb.tasks.abstract.ranking_base import RankingDataset, RankingTask, RankingTaskGroup +from workrb.types import ModelInputType + + +@register_task() +class JobTitleSimilarityRanking(RankingTask): + """ + Job Title Similarity ranking task based on Zbib et al. (2022) and Deniz et al. (2024). + + This task evaluates a model's ability to rank job titles by semantic similarity to a + query job title. Given a query job title, the model must rank corpus job titles such + that semantically similar ones appear higher than non-similar ones. + + Notes + ----- + HuggingFace Dataset: https://huggingface.co/datasets/Avature/Job-Title-Similarity + + Languages: en, de, es, fr, it, ja, ko, nl, pl, pt, zh. + + Each language is a HuggingFace subset, and contains two splits: + - ``queries``: ~105 query job titles with indices of relevant corpus elements + - ``corpus``: ~2,500 job titles to rank + + Each query has binary relevance annotations indicating which corpus job titles are + semantically similar (multi-label). + + Example (English): + - Query: "Food Technologist" + - This query has 9 relevant corpus titles out of 2,619: "Food Scientist", + "Food Engineer", "Flavorist", among others. + + Difference between this task and Job Normalization: + Job Normalization measures whether the model identifies the single canonical form of a + job title. Job Title Similarity measures whether the model ranks semantically similar + titles above dissimilar ones. + """ + + SUPPORTED_DATASET_LANGUAGES = [ + Language.DE, + Language.EN, + Language.ES, + Language.FR, + Language.IT, + Language.JA, + Language.KO, + Language.NL, + Language.PL, + Language.PT, + Language.ZH, + ] + + @property + def name(self) -> str: + """Job Title Similarity task name.""" + return "Job Title Similarity" + + @property + def description(self) -> str: + """Job Title Similarity task description.""" + return "Rank job titles in a corpus based on their semantic similarity to query job titles." + + @property + def default_metrics(self) -> list[str]: + return ["map", "rp@5", "rp@10", "mrr"] + + @property + def task_group(self) -> RankingTaskGroup: + """Job Title Similarity task group.""" + return RankingTaskGroup.JOBSIM + + @property + def supported_query_languages(self) -> list[Language]: + """Supported query languages.""" + return self.SUPPORTED_DATASET_LANGUAGES + + @property + def supported_target_languages(self) -> list[Language]: + """Supported target languages.""" + return self.SUPPORTED_DATASET_LANGUAGES + + @property + def split_test_fraction(self) -> float: + """Fraction of data to use for test split.""" + return 1.0 + + @property + def label_type(self) -> LabelType: + """Multi-label ranking for semantically similar job titles.""" + return LabelType.MULTI_LABEL + + @property + def query_input_type(self) -> ModelInputType: + """Query input type for job titles.""" + return ModelInputType.JOB_TITLE + + @property + def target_input_type(self) -> ModelInputType: + """Target input type for job titles.""" + return ModelInputType.JOB_TITLE + + def load_monolingual_data(self, split: DatasetSplit, language: Language) -> RankingDataset: + """Load Job Title Similarity data from the HuggingFace dataset.""" + if split != DatasetSplit.TEST: + raise ValueError(f"Split '{split}' not supported. Use TEST") + + if language not in self.SUPPORTED_DATASET_LANGUAGES: + raise ValueError(f"Language '{language}' not supported.") + + ds = load_dataset("Avature/Job-Title-Similarity", language.value) + + queries = list(ds["queries"]["text"]) + relevancy_labels = list(ds["queries"]["labels"]) + corpus = list(ds["corpus"]["text"]) + + return RankingDataset(queries, relevancy_labels, corpus, language=language) + + @property + def citation(self) -> str: + """Job Title Similarity task citation.""" + return """ +@article{zbib2022Learning, + title={{Learning Job Titles Similarity from Noisy Skill Labels}}, + author={Rabih Zbib and + Lucas Alvarez Lacasa and + Federico Retyk and + Rus Poves and + Juan Aizpuru and + Hermenegildo Fabregat and + Vaidotas Šimkus and + Emilia García-Casademont}, + journal={{FEAST, ECML-PKDD 2022 Workshop}}, + year={{2022}}, + url="https://feast-ecmlpkdd.github.io/archive/2022/papers/FEAST2022_paper_4972.pdf" +} +@inproceedings{deniz2024Combined, + title = {Combined Unsupervised and Contrastive Learning for Multilingual Job Recommendations}, + author = {Daniel Deniz and + Federico Retyk and + Laura García-Sardiña and + Hermenegildo Fabregat and + Luis Gasco and + Rabih Zbib}, + booktitle = {Proceedings of the 4th Workshop on Recommender Systems for Human Resources + (RecSys in {HR} 2024), in conjunction with the 18th {ACM} Conference on + Recommender Systems}, + year = {2024}, +} +""" diff --git a/src/workrb/types.py b/src/workrb/types.py index 4446ce5..8acedc6 100644 --- a/src/workrb/types.py +++ b/src/workrb/types.py @@ -34,6 +34,9 @@ class Language(str, Enum): NO = "no" AR = "ar" UK = "uk" + JA = "ja" + KO = "ko" + ZH = "zh" class LabelType(str, Enum): diff --git a/tests/test_task_loading.py b/tests/test_task_loading.py index 6f21066..4eba058 100644 --- a/tests/test_task_loading.py +++ b/tests/test_task_loading.py @@ -12,6 +12,7 @@ ESCOSkillNormRanking, HouseSkillExtractRanking, JobBERTJobNormRanking, + JobTitleSimilarityRanking, SkillMatch1kSkillSimilarityRanking, TechSkillExtractRanking, ) @@ -41,11 +42,16 @@ def test_ranking_tasks_init_en_splits(): ("ESCOSkill2JobRanking", ESCOSkill2JobRanking), ("ESCOSkillNormRanking", ESCOSkillNormRanking), ("JobNormRanking", JobBERTJobNormRanking), + ("JobTitleSimilarityRanking", JobTitleSimilarityRanking), ("SkillExtractHouseRanking", HouseSkillExtractRanking), ("SkillExtractTechRanking", TechSkillExtractRanking), ("SkillSimilarityRanking", SkillMatch1kSkillSimilarityRanking), ] + tasks_with_only_test_set = [ + "JobTitleSimilarityRanking", + ] + results = {"success": [], "failures": []} languages = [Language.EN] splits = [split for split in DatasetSplit] @@ -53,6 +59,8 @@ def test_ranking_tasks_init_en_splits(): nb_total = 0 for split in splits: for task_name, task_class in ranking_tasks: + if split != DatasetSplit.TEST and task_name in tasks_with_only_test_set: + continue nb_total += 1 try: # Try to instantiate with minimal parameters