From 95324ead08e838508da4dadfed2efedc6edcc053 Mon Sep 17 00:00:00 2001 From: Osma Suominen Date: Tue, 31 Aug 2021 13:34:57 +0300 Subject: [PATCH] Make prefLabel policies deterministic by using label string sort order to break ties. Related to #81 --- skosify/check.py | 2 +- test/test_check.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/skosify/check.py b/skosify/check.py index 07c37c1..7eae2ee 100644 --- a/skosify/check.py +++ b/skosify/check.py @@ -147,7 +147,7 @@ def preflabel_uniqueness(rdf, policy='all'): return def key_fn(label): - return [policy_fn[p](label) for p in policies] + return [policy_fn[p](label) for p in policies] + [str(label)] for res in sorted(resources): prefLabels = {} diff --git a/test/test_check.py b/test/test_check.py index e0a1074..47712a3 100644 --- a/test/test_check.py +++ b/test/test_check.py @@ -171,3 +171,35 @@ def test_preflabel_uniqueness_shortest_uppercase(): assert (a, SKOS.altLabel, Literal('short', 'en')) in rdf assert (a, SKOS.altLabel, Literal('longer', 'en')) in rdf assert (a, SKOS.altLabel, Literal('Longer', 'en')) in rdf + + +def test_preflabel_uniqueness_is_deterministic(): + rdf = Graph() + a = BNode() + + rdf.add((a, RDF.type, SKOS.Concept)) + # all English labels have the same length, tie must be broken + rdf.add((a, SKOS.prefLabel, Literal('bab', 'en'))) # remove + rdf.add((a, SKOS.prefLabel, Literal('bba', 'en'))) # remove + rdf.add((a, SKOS.prefLabel, Literal('aab', 'en'))) # keep + rdf.add((a, SKOS.prefLabel, Literal('aba', 'en'))) # remove + + # ditto for Finnish labels + rdf.add((a, SKOS.prefLabel, Literal('ba', 'fi'))) # remove + rdf.add((a, SKOS.prefLabel, Literal('bb', 'fi'))) # remove + rdf.add((a, SKOS.prefLabel, Literal('aa', 'fi'))) # keep + rdf.add((a, SKOS.prefLabel, Literal('ab', 'fi'))) # remove + + len_before = len(rdf) + + skosify.check.preflabel_uniqueness(rdf, policy=['shortest']) + assert len(rdf) == len_before + assert (a, SKOS.prefLabel, Literal('aab', 'en')) in rdf + assert (a, SKOS.altLabel, Literal('bab', 'en')) in rdf + assert (a, SKOS.altLabel, Literal('bba', 'en')) in rdf + assert (a, SKOS.altLabel, Literal('aba', 'en')) in rdf + + assert (a, SKOS.prefLabel, Literal('aa', 'fi')) in rdf + assert (a, SKOS.altLabel, Literal('ba', 'fi')) in rdf + assert (a, SKOS.altLabel, Literal('bb', 'fi')) in rdf + assert (a, SKOS.altLabel, Literal('ab', 'fi')) in rdf