|
8 | 8 | from dotenv import load_dotenv |
9 | 9 | from util import * |
10 | 10 |
|
| 11 | +PLUGIN_PRIORITY = { |
| 12 | + "orcid.py": 1, |
| 13 | + "pubmed.py": 2, |
| 14 | + "google-scholar.py": 3, |
| 15 | + "sources.py": 4 # optional catch-all fallback |
| 16 | +} |
| 17 | + |
11 | 18 |
|
12 | 19 | # load environment variables |
13 | 20 | load_dotenv() |
|
95 | 102 |
|
96 | 103 | log("Merging sources by id") |
97 | 104 |
|
| 105 | +# deduplicate sources by title and plugin priority |
| 106 | +merged_sources = {} |
| 107 | +for source in sources: |
| 108 | + title = get_safe(source, "title", "").strip().lower() |
| 109 | + if not title: |
| 110 | + continue |
| 111 | + |
| 112 | + plugin = get_safe(source, "plugin", "") |
| 113 | + priority = PLUGIN_PRIORITY.get(plugin, 999) |
| 114 | + |
| 115 | + if title not in merged_sources: |
| 116 | + merged_sources[title] = (source, priority) |
| 117 | + else: |
| 118 | + existing_source, existing_priority = merged_sources[title] |
| 119 | + if priority < existing_priority: |
| 120 | + # Replace lower-quality source |
| 121 | + merged_sources[title] = (source, priority) |
| 122 | + |
| 123 | +# extract final list of sources |
| 124 | +sources = [entry[0] for entry in merged_sources.values()] |
| 125 | + |
98 | 126 | # merge sources with matching (non-blank) ids |
99 | 127 | for a in range(0, len(sources)): |
100 | 128 | a_id = get_safe(sources, f"{a}.id", "") |
|
136 | 164 |
|
137 | 165 | # Manubot doesn't work without an id |
138 | 166 | plugin = get_safe(source, "plugin", "") |
139 | | - print(source) |
| 167 | + # print(source) |
140 | 168 |
|
141 | 169 | if _id and plugin != "google-scholar.py": |
142 | 170 | log("Using Manubot to generate citation", 1) |
|
172 | 200 | # add new citation to list |
173 | 201 | citations.append(citation) |
174 | 202 |
|
| 203 | +print("all citations:") |
| 204 | +print(citations) |
175 | 205 |
|
176 | 206 | log() |
177 | 207 |
|
|
0 commit comments