From 9e6a165b8ff9eb643bd4c7f39e4d6d07f616c4c9 Mon Sep 17 00:00:00 2001 From: Awes Date: Thu, 14 Dec 2023 17:34:36 +0300 Subject: [PATCH 1/3] feat: estonia loader --- src/loaders/EstoniaLoader/EstoniaLoader.py | 128 +++++++++++++++++++++ src/loaders/EstoniaLoader/readme.txt.txt | 4 + 2 files changed, 132 insertions(+) create mode 100644 src/loaders/EstoniaLoader/EstoniaLoader.py create mode 100644 src/loaders/EstoniaLoader/readme.txt.txt diff --git a/src/loaders/EstoniaLoader/EstoniaLoader.py b/src/loaders/EstoniaLoader/EstoniaLoader.py new file mode 100644 index 0000000..a43178e --- /dev/null +++ b/src/loaders/EstoniaLoader/EstoniaLoader.py @@ -0,0 +1,128 @@ +import requests +import pandas as pd +from io import BytesIO +from googletrans import Translator + +class EstoniaLoader(): + def load_everything(self, file_path): + table_url = 'https://ravimiregister.ee/Data/XML/hum_medProducts.csv' + response = requests.get(table_url) + drugs_table = pd.read_csv(BytesIO(response.content), delimiter = ';',encoding="unicode_escape") + + + for i in range(len(drugs_table)): + original_name = drugs_table['Name'][i].lower() + name = self.remove_dozage(original_name).strip(' ').replace(' ', '_') + if isinstance(drugs_table['Strength of active substance'][i], str): + strengths = drugs_table['Strength of active substance'][i].replace('/', ' ').replace('+', ' ').split() + strengths = self.delete_nonexisting_dosages(strengths) + else: + strengths = [] + company = drugs_table['Marketing autorization holder or manufacturer'][i] + company = self.delete_wierd_characters(company).strip().replace(' ', '_').replace('&', 'and') + routes = drugs_table['Route of administration'][i].split(',') + form = drugs_table['Dosage form'][i].replace('-', '_') + actives = drugs_table['Name of active substance'][i].split('+') + for strength in strengths: + file = open(file_path, 'a+', encoding = 'utf-8') + number, measurement = self.seperate_dosage(strength) + file.write(f"{name}_{number}_{measurement}=[*\n") + file.write(f"concept_{name}_{number}_{measurement}\n<-sc_node_not_relation;\n<-consept_medication(*<-sc_node_not_relation;;*);") + file.close() + self.write_atc(drugs_table['ATC code'][i], file_path) + file = open(file_path, 'a+', encoding = 'utf-8') + file.write(f"\n=>nrel_main_idtf:[{name} ({strength})](*<-lang_en;;*);\n") + file.write(f"=>nrel_company:{company};\n") + file.write(f"=>nrel_countries_of_sale:...(*->country_estonia;;*);\n") + for route in routes: + route = route.strip(' ').replace(' ', '_') + route = route.replace('use', 'route') + file.write(f"=>nrel_route_of_administration:concept_{route}(*<-sc_node_not_relation;;*);\n") + file.write(f"=>nrel_dosage_form:concept_{form.replace(' ', '_')}(*<-sc_node_not_relation;;*);\n") + file.write(f"=>nrel_dosage:...(*\n\t<-sc_node_not_relation;;\n\t<-concept_dosage(*<-sc_node_not_relation;;*);;") + file.write(f"\n\t=>nrel_measurement_in_{measurement}:{number}(*<-concept_number(*<-sc_node_not_relation;;*);;*);;*);") + for active in actives: + active = active.split(',')[-1] + active = active.lower().replace(' ', '_') + file.write(f"\n=>nrel_active_substances:...(*->{active}(*\n\t<-sc_node_not_relation;;") + file.write(f"\n\t<-concept_pharmacologic_substance(*<-sc_node_not_relation;;*);;*);;*);") + file.write(';\n*];;\n\n') + file.close() + + def write_atc(self, atc_code, file_path): + atc_url = 'https://ravimiregister.ee/en/Data/XML/atc.csv' + response = requests.get(atc_url) + atc_tree = pd.read_csv(BytesIO(response.content), delimiter = ';',encoding="unicode_escape") + + with open(file_path, 'a+') as file: + layers = [atc_code[0], atc_code[:3], atc_code[:4], atc_code[:5], atc_code] + for layer in layers: + description = '' + left = 0 + right = len(atc_tree)-1 + while left<=right: + mid = (left+right)//2 + if atc_tree['ATC kood'][mid] == layer: + description = atc_tree['Nimi'][mid].lower() + description = self.translate(atc_tree['Nimi'][mid], 'et', 'en').lower() + description = self.delete_excess(description) + break + elif atc_tree['ATC kood'][mid] > layer: + right = mid-1 + else: + left = mid+1 + if description != '': + file.write(f"\n<-concept_{layer.lower()}_{description}(*<-sc_node_not_relation;;*);\n=>nrel_atc_code:[{layer}];") + + def translate(self, text, origin_lan, dest_lan): + translator = Translator() + result = translator.translate(text, src = origin_lan, dest = dest_lan).text + return result + + def delete_wierd_characters(self, string): + i = 0 + while i < len(string): + if ord(string[i]) < 128: + i+=1 + else: + string = string[:i] + string[i+1:] + return string + + def delete_excess(self, string): + forbidden_characters = [' ', '-', '+'] + left_bracket, right_bracket = string.find('('), string.find(')') + if left_bracket != -1 and right_bracket != -1: + string = string[:left_bracket].strip() +' '+ string[right_bracket+1:].strip() + string = string.split(',')[0] + for character in forbidden_characters: + string = string.replace(character, '_') + string = string.replace('&', 'and') + return string + + def remove_dozage(self, drug): + i = 0 + while i Date: Thu, 14 Dec 2023 17:41:03 +0300 Subject: [PATCH 2/3] fix --- src/loaders/EstoniaLoader/readme.txt.txt | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/loaders/EstoniaLoader/readme.txt.txt diff --git a/src/loaders/EstoniaLoader/readme.txt.txt b/src/loaders/EstoniaLoader/readme.txt.txt deleted file mode 100644 index 342eb8b..0000000 --- a/src/loaders/EstoniaLoader/readme.txt.txt +++ /dev/null @@ -1,4 +0,0 @@ -Для запуска функции, записывающую все препараты в текстовый файл, нужно создать объект класса EstoniaLoader и с помощью него вызвать функцию load_everything(), передав туда путь к файлу, в который будет всё записано. - -loader = EstoniaLoader() -loader.load_everything(путь к файлу) \ No newline at end of file From 5040df738704a719f6d0bcae83be2a74b208e856 Mon Sep 17 00:00:00 2001 From: Awes Date: Thu, 14 Dec 2023 17:48:28 +0300 Subject: [PATCH 3/3] add readme --- src/loaders/EstoniaLoader/README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/loaders/EstoniaLoader/README.md diff --git a/src/loaders/EstoniaLoader/README.md b/src/loaders/EstoniaLoader/README.md new file mode 100644 index 0000000..342eb8b --- /dev/null +++ b/src/loaders/EstoniaLoader/README.md @@ -0,0 +1,4 @@ +Для запуска функции, записывающую все препараты в текстовый файл, нужно создать объект класса EstoniaLoader и с помощью него вызвать функцию load_everything(), передав туда путь к файлу, в который будет всё записано. + +loader = EstoniaLoader() +loader.load_everything(путь к файлу) \ No newline at end of file