Skip to content

Commit bd181cb

Browse files
plugin priority remove duplicates
1 parent 0b6fe92 commit bd181cb

1 file changed

Lines changed: 31 additions & 1 deletion

File tree

_cite/cite.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
from dotenv import load_dotenv
99
from util import *
1010

11+
PLUGIN_PRIORITY = {
12+
"orcid.py": 1,
13+
"pubmed.py": 2,
14+
"google-scholar.py": 3,
15+
"sources.py": 4 # optional catch-all fallback
16+
}
17+
1118

1219
# load environment variables
1320
load_dotenv()
@@ -95,6 +102,27 @@
95102

96103
log("Merging sources by id")
97104

105+
# deduplicate sources by title and plugin priority
106+
merged_sources = {}
107+
for source in sources:
108+
title = get_safe(source, "title", "").strip().lower()
109+
if not title:
110+
continue
111+
112+
plugin = get_safe(source, "plugin", "")
113+
priority = PLUGIN_PRIORITY.get(plugin, 999)
114+
115+
if title not in merged_sources:
116+
merged_sources[title] = (source, priority)
117+
else:
118+
existing_source, existing_priority = merged_sources[title]
119+
if priority < existing_priority:
120+
# Replace lower-quality source
121+
merged_sources[title] = (source, priority)
122+
123+
# extract final list of sources
124+
sources = [entry[0] for entry in merged_sources.values()]
125+
98126
# merge sources with matching (non-blank) ids
99127
for a in range(0, len(sources)):
100128
a_id = get_safe(sources, f"{a}.id", "")
@@ -136,7 +164,7 @@
136164

137165
# Manubot doesn't work without an id
138166
plugin = get_safe(source, "plugin", "")
139-
print(source)
167+
# print(source)
140168

141169
if _id and plugin != "google-scholar.py":
142170
log("Using Manubot to generate citation", 1)
@@ -172,6 +200,8 @@
172200
# add new citation to list
173201
citations.append(citation)
174202

203+
print("all citations:")
204+
print(citations)
175205

176206
log()
177207

0 commit comments

Comments
 (0)