11#!/usr/bin/env python3
22import sys
33import re
4- from peyutil import read_as_json
4+ import os
5+ from peyutil import read_as_json , write_as_json
56
67no_app_pat = re .compile (r"^amende?ment [#](\d+) not applied:" )
78homonym_pat = re .compile (r"^amende?ment [#](\d+) not applied: ([A-Za-z][-A-Za-z0-9 ]+[A-Za-z0-9]) is a homonym of (\d+)" )
89
10+ src_fn_pat = re .compile (r"^(additions\-\d+\-\d+):(\d+)$" )
11+ by_study_id = {}
12+
913def check_if_del_works (amend_num , name , ott_id , edott , amendments_repo ):
1014 exp_amend_idx = amend_num - 1
1115 rel_amends = []
@@ -20,13 +24,28 @@ def check_if_del_works(amend_num, name, ott_id, edott, amendments_repo):
2024 return (False , "solo" )
2125 if rel_amends [0 ][0 ] == exp_amend_idx :
2226 return False , "first"
23- found = False
27+ found = None
2428 for ra in rel_amends [1 :]:
2529 if ra [0 ] == exp_amend_idx :
26- found = True
30+ found = ra [ 1 ]
2731 break
28- if not found :
32+ if found is None :
2933 return False , "notfound"
34+ taxon = found ["taxon" ]
35+ src = taxon ["sourceinfo" ]
36+ m = src_fn_pat .match (src )
37+ if not m :
38+ raise ValueError (f"'sourceinfo' { src } does not fit pattern." )
39+ fn_frag = m .group (1 )
40+ bogus_id = int (m .group (2 ))
41+ fn = f"{ fn_frag } .json"
42+ fp = os .path .join (amendments_repo , "amendments" , fn )
43+ if not os .path .isfile (fp ):
44+ raise RuntimeError (f"amendments file { fp } does not exist" )
45+ offending_amend = read_as_json (fp )
46+ study_id = offending_amend ['study_id' ]
47+ name_set = by_study_id .setdefault (study_id , set ())
48+ name_set .add (name )
3049 return True , ""
3150
3251def main (edott_fp ,
@@ -43,22 +62,42 @@ def main(edott_fp,
4362 amend_num = int (hm .group (1 ))
4463 name = hm .group (2 )
4564 ott_id = int (hm .group (3 ))
46- print (amend_num , name , ott_id )
4765 rc = check_if_del_works (amend_num , name , ott_id , edott , amendments_repo )
4866 if rc [0 ]:
4967 to_del .append (amend_num )
5068 else :
5169 prob = rc [1 ]
5270 if prob == "solo" :
53- print ( "Atypical homonym. Solo in amendments {amend_num}:" , edott [amend_num - 1 ])
71+ sys . stderr . write ( f "Atypical homonym. Solo in amendments { amend_num } : { edott [amend_num - 1 ]} \n " )
5472 elif prob == "notfound" :
55- print ( "PROBLEM {amend_num} does not match:" , edott [amend_num - 1 ])
73+ sys . stderr . write ( f "PROBLEM { amend_num } does not match: { edott [amend_num - 1 ]} \n " )
5674 else :
5775 assert (prob == "first" )
58- print ( "Atypical homonym. First in amendments {amend_num} is bad:" , edott [amend_num - 1 ])
76+ sys . stderr . write ( f "Atypical homonym. First in amendments { amend_num } is bad: { edott [amend_num - 1 ]} \n " )
5977 else :
60- print (m .group (1 ), "not a homonym" )
78+ sys .stderr .write (m .group (1 ) + " not a homonym" )
79+ sk = list (by_study_id .keys ())
80+ sk .sort ()
81+ for study_id in sk :
82+ name_set = by_study_id [study_id ]
83+ if len (name_set ) == 1 :
84+ name = next (name_set )
85+ sys .stderr .write (f"In https://tree.opentreeoflife.org/curator/study/view/{ study_id } need to remap 1 taxon: \" { name } \" \n " )
86+ else :
87+ sys .stderr .write (f"In https://tree.opentreeoflife.org/curator/study/view/{ study_id } need to remap { len (name_set )} taxa:\n " )
88+ nl = list (name_set )
89+ nl .sort ()
90+ for name in nl :
91+ sys .stderr .write (f" \" { name } \" \n " )
6192
93+ if to_del :
94+ tds = set ([i - 1 for i in to_del ])
95+ new_edott = []
96+ for amend_idx , amend in enumerate (edott ):
97+ if amend_idx not in tds :
98+ new_edott .append (amend )
99+ write_as_json (new_edott , sys .stdout , indent = 2 )
100+
62101if __name__ == "__main__" :
63102 try :
64103 _args = list (sys .argv [1 :4 ])
0 commit comments