-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathGC9.py
More file actions
81 lines (50 loc) · 2.19 KB
/
GC9.py
File metadata and controls
81 lines (50 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import sys
sys.path.append('../../')
from utils.packages import *
from utils.ml_fairness import *
from utils.standard_data import *
# In[2]:
filepath = '../../data/german/german.data'
column_names = ['status', 'month', 'credit_history',
'purpose', 'credit_amount', 'savings', 'employment',
'investment_as_income_percentage', 'personal_status',
'other_debtors', 'residence_since', 'property', 'age',
'installment_plans', 'housing', 'number_of_credits',
'skill_level', 'people_liable_for', 'telephone',
'foreign_worker', 'credit']
na_values=[]
df = pd.read_csv(filepath, sep=' ', header=None, names=column_names,na_values=na_values)
df['age'] = df['age'].apply(lambda x: np.float(x >= 26))
df = german_custom_preprocessing(df)
feat_to_drop = ['personal_status']
df = df.drop(feat_to_drop, axis=1)
cat_feat = ['status', 'credit_history', 'purpose', 'savings', 'employment', 'other_debtors', 'property', 'installment_plans', 'housing', 'skill_level', 'telephone', 'foreign_worker']
df = pd.get_dummies(df, columns=cat_feat, prefix_sep='=')
num_feat = ['residence_since', 'age', 'investment_as_income_percentage', 'credit_amount', 'number_of_credits', 'people_liable_for', 'month']
# In[3]:
##### Pipeline #####
from collections import defaultdict
from imblearn.over_sampling import SMOTE
import xgboost as xgb
from xgboost import XGBClassifier
# In[4]:
seed = randrange(100)
y1_train, y1_test = train_test_split(df, test_size = 0.3, random_state = seed) #
pro_att_name = ['age'] # ['sex', 'age']
priv_class = [1]
reamining_cat_feat = []
y1_data_orig_train, y1_X_train, y1_y_train = load_german_data(y1_train, pro_att_name, priv_class, reamining_cat_feat)
y1_data_orig_test, y1_X_test, y1_y_test = load_german_data(y1_test, pro_att_name, priv_class, reamining_cat_feat)
# In[5]:
# Balanced Dataset
sm = SMOTE(sampling_strategy='auto')
y1_X_train, y1_y_train = sm.fit_sample(y1_X_train, y1_y_train)
# In[6]:
from sklearn.tree import DecisionTreeClassifier
y1_model = DecisionTreeClassifier()
y1_mdl = y1_model.fit(y1_X_train, y1_y_train)
# In[7]:
plot_model_performance(y1_mdl, y1_X_test, y1_y_test)