-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfpcg_engine.py
More file actions
99 lines (79 loc) · 4.34 KB
/
fpcg_engine.py
File metadata and controls
99 lines (79 loc) · 4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import pandas as pd
import numpy as np
import re
import math
class FPCG_Engine(object):
def __init__(self, configurations):
self.m_configurations = configurations
'''
Loads the excel sheets, pre-process them, apply the evaluation and returns the results of the evaluation.
'''
def evaluate(self, index = -1):
# What to evaluate?
print("Processing the Configurations: ")
print(self.m_configurations)
configurations = self.m_configurations if (index == -1) else [self.m_configurations[index]]
# Run the engine on the configurations
tables = []
for ci, configuration in enumerate(configurations):
table = self._load_configuration_data(configuration) # Loading the CSV file
# Process the data
result_table = self._create_result_table(configuration) # Creates an empty result table (Only columns names are defined)
for ei, evaluation in enumerate(configuration["evaluations"]):
result_table = self._process(table, configuration, evaluation, result_table) # Returns a list of the evaluation results object
tables.append(result_table)
return tables
'''
Loads the data of a given configuration into a pandas dataframe. It loads the csv file and returns it.
'''
def _load_configuration_data(self, configuration):
table = pd.DataFrame()
for pi, path in enumerate(configuration['paths']):
df = pd.read_csv(path, index_col=False)
df = self._pre_process_data(df, configuration) # Renaming columns, and setting RowID
table = pd.concat([table, df], verify_integrity=True)
return table
'''
Pre-Process the table. It renames the columns, and set the RowIDs
'''
def _pre_process_data(self, table, configuration):
# Renaming Table columns
def maybe_rename(col_name):
for pattern in configuration["rename_patterns"]:
if re.match(pattern[0], col_name):
return pattern[1]
return col_name
table.rename(columns=maybe_rename, errors="ignore", inplace=True, copy=False)
table.set_index( configuration["rowIDs"], inplace=True, verify_integrity=True)
return table
'''
Creates an empty table suitable for the configuration.
This function creates a template empty table holding the indices and the columns.
'''
def _create_result_table(self, configuration):
cols = configuration["rowIDs"] + [eval["name"] for eval in configuration["evaluations"]] # Columns of the empty table
table = pd.DataFrame(columns=cols) # Empty table template creation
table.set_index(configuration["rowIDs"], inplace=True, verify_integrity=True) # Setting the index
return table
'''
Process the data according to the given evaluation object and fills the result_table and returns it.
'''
def _process(self, table, configuration, evaluation, result_table):
# Get the columns matching the pattern
desired_columns = []
for eval_col in evaluation["columns"]:
for table_col in table.columns:
if re.match(eval_col, table_col):
desired_columns.append(table_col)
desired_columns = list(set(desired_columns)) # Removing duplications
# Create a new table with desired columns to process, and the indices
temp = table.loc[:, desired_columns] # Extract the desired columns to process
indices = temp.index.to_frame().dropna().index if evaluation["nullfilter"] else temp.index # Filter out null indices
# Loop through the rows and extract the desired columns
for rowID in indices:
row_data = temp.loc[rowID, :].dropna() if evaluation["nullfilter"] else temp.loc[rowID, :]
if(len(row_data) == 0):
continue
data = [ (row_data.index[x], row_data[x]) for x in range(0, len(row_data))] # [(index, [values])]
result_table.loc[rowID, evaluation["name"]] = evaluation["callback"](list(rowID), data) # Evaluate and store it in its entry
return result_table