diff --git a/scripts/sedos_structure_parser.py b/scripts/sedos_structure_parser.py new file mode 100644 index 0000000..1ba4ab9 --- /dev/null +++ b/scripts/sedos_structure_parser.py @@ -0,0 +1,126 @@ +import pandas as pd +import re + +total_list = [] + +df_com = pd.read_excel( + io=r"C:\Users\christoph.muschner\CWM\Python\SEDOS\SEDOS_Modellstruktur.xlsx", + engine="openpyxl", + sheet_name="Nomenclature_Commodities", + usecols=["old name", "new name suggestion"], +) + +process_set = pd.read_excel( + io=r"C:\Users\christoph.muschner\CWM\Python\SEDOS\SEDOS_Modellstruktur.xlsx", engine="openpyxl", + sheet_name="Process_Set", usecols=["input", "process", "output"] +) + + +def replace_string(row): + + if not isinstance(row, str): + print("Row is type:", type(row), row) + row = "" + return row + + commodity_mapping = dict(zip(df_com["old name"], df_com["new name suggestion"])) + + row_list = re.split(",|\+", row) + + new_list = [commodity_mapping.get(value, value) for value in row_list] + + common_list = list(set(row_list).intersection(new_list)) + + if common_list: + total_list.extend(common_list) + + replaced_row = ", ".join([str(elem) for elem in new_list]) + + return replaced_row + +def map_old_to_new_commodity_names(): + + char_replace_dict = {"[": "", "]": "", "+": ",", " ": "", ".": "_", " ": ""} + for col in process_set.columns: + for key, value in char_replace_dict.items(): + process_set[f"{col}"] = process_set[f"{col}"].str.replace(key, value, regex=True) + + cols = ["input", "output"] + + for col in cols: + process_set[f"{col}"] = process_set[f"{col}"].apply(replace_string) + + process_set.to_csv(r"C:\Users\christoph.muschner\CWM\Python\SEDOS_DB\process_set_new_com.csv", sep=";") + + return process_set + + +def read_sedos_bwshare_excel(file_path: str) -> dict: + """ + Read SEDOS B&W-share excel file. + + Parameters + ---------- + file_path + Path to downloaded B&W share file. + Returns + ------- + dict of dataframes + """ + processes = pd.read_excel( + io=file_path, engine="openpyxl", sheet_name="Processes", usecols=["Input", "Process", "Output"] + ) + + input_output = pd.read_excel( + io=file_path, engine="openpyxl", sheet_name="input_output", usecols=["parameter", "process", "input", "output"] + ) + + return {"processes": processes, "input_output": input_output} + + +def parse_es_structure(sedos_es_dict: dict) -> pd.DataFrame: + """ + Parse the es_structure in SEDOS project from two different B&W share tables. + + Parameters + ---------- + sedos_es_dict: dict + Dict with dataframe of "processes" and "input_output" sheet + + Returns + ------- + es_structure: pd.Dataframe + Structure of energy system with default and parameter-specific inputs & outputs per process + """ + + processes = sedos_es_dict["processes"] + input_output = sedos_es_dict["input_output"] + + inputs_outputs_default = pd.DataFrame( + data={ + "parameter": "default", + "process": processes.Process, + "input": processes.Input, + "output": processes.Output, + } + ) + + es_structure = pd.concat([inputs_outputs_default, input_output], axis=0) + + # clean sheet and replace unwanted characters + char_replace_dict = {"[": "", "]": "", "+": ",", " ": "", ".": "_"} + for col in es_structure.columns: + for key, value in char_replace_dict.items(): + es_structure[f"{col}"] = es_structure[f"{col}"].str.replace(key, value, regex=True) + + # sort values + es_structure.sort_values(by=["process", "parameter"], inplace=True) + es_structure.reset_index(inplace=True, drop=True) + + return es_structure + + +def write_es_structure_file(es_structure: pd.DataFrame, output_path: str) -> None: + + # save to excel + es_structure.to_excel(rf"{output_path}", index=False) diff --git a/tests/test_data/test_structures/SEDOS_Prozesse&Parameter.xlsx b/tests/test_data/test_structures/SEDOS_Prozesse&Parameter.xlsx new file mode 100644 index 0000000..267973b Binary files /dev/null and b/tests/test_data/test_structures/SEDOS_Prozesse&Parameter.xlsx differ diff --git a/tests/test_scripts.py b/tests/test_scripts.py new file mode 100644 index 0000000..e8d9fb2 --- /dev/null +++ b/tests/test_scripts.py @@ -0,0 +1,46 @@ +import pandas as pd +from pandas.testing import assert_frame_equal + +from scripts.sedos_structure_parser import parse_es_structure, read_sedos_bwshare_excel + + +def test_parse_es_structure(): + data = { + "parameter": ["ACT_BND", "ACT_COST", "FLO_EFF", "default", "default", "default", "default"], + "process": [ + "ind_cement_rk_ccs_1", + "ind_cement_rk_ccs_1", + "ind_cement_rk_ccs_1", + "pow_combustion_gt", + "pow_combustion_gt_SNG", + "pow_combustion_gt_biogas", + "pow_combustion_gt_natgas", + ], + "input": [ + "coal,coke,coke_oven_gas,heavy_fuel_oil,natgas,hydrogen,SNG,,biomass,waste,sludge,elec," + "cement_rawmeal_mats", + "coal,coke,coke_oven_gas,heavy_fuel_oil,natgas,hydrogen,SNG,,biomass," + "waste,sludge,elec,cement_rawmeal_mats", + "coal,coke,coke_oven_gas,heavy_fuel_oil,natgas,hydrogen,SNG,,biomass,waste,sludge,elec,cement_rawmeal_mats", + "biogas,natgas,SNG_ren,SNG_conv,hydrogen_ren,hydrogen_conv,heating_oil", + "SNG_ren,SNG_conv", + "biogas", + "natgas", + ], + "output": [ + "cement_clinker_mats,CO2p,CO2f,CH4f,N2Of", + "cement_clinker_mats,CO2p,CO2f,CH4f,N2Of", + "cement_clinker_mats,CO2p,CO2f,CH4f,N2Of", + "elec_ren,elec_conv,CO2", + "elec_ren,elec_conv,CO2", + "elec_ren", + "elec_conv,CO2", + ], + } + expected_output = pd.DataFrame(data) + + function_df = parse_es_structure( + sedos_es_dict=read_sedos_bwshare_excel("test_data/test_structures/SEDOS_Prozesse&Parameter.xlsx") + ) + + assert_frame_equal(expected_output, function_df)