-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoverfitting_underfitting.py
More file actions
96 lines (84 loc) · 3.02 KB
/
overfitting_underfitting.py
File metadata and controls
96 lines (84 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from utils import (
optimizers, callbacks, metrics, losses, Sequential,
layers, regularizers, utils, constraints)
import pandas as pd
import numpy as np
FEATURES = 28
N_ROWS = 100000
N_VALIDATION = 0.2
BATCH_SIZE = 500
STEPS_PER_EPOCH = (N_ROWS - (N_ROWS * N_VALIDATION)) // BATCH_SIZE
MAX_EPOCHS = 20
COLUMN_NAMES = ['class_label', 'jet_1_b-tag', 'jet_1_eta', 'jet_1_phi',
'jet_1_pt', 'jet_2_b-tag', 'jet_2_eta', 'jet_2_phi', 'jet_2_pt',
'jet_3_b-tag', 'jet_3_eta', 'jet_3_phi', 'jet_3_pt', 'jet_4_b-tag',
'jet_4_eta', 'jet_4_phi', 'jet_4_pt', 'lepton_eta', 'lepton_pT',
'lepton_phi', 'm_bb', 'm_jj', 'm_jjj', 'm_jlv', 'm_lv', 'm_wbb',
'm_wwbb', 'missing_energy_magnitude', 'missing_energy_phi']
utils.get_file(
'HIGGS.csv.gz',
'http://mlphysics.ics.uci.edu/data/higgs/HIGGS.csv.gz')
X = pd.read_csv(
'~/.keras/datasets/HIGGS.csv',
names=COLUMN_NAMES,
nrows=N_ROWS)
y = X.pop(COLUMN_NAMES[0])
# Gradually reduce the learning rate during training
lr_schedule = optimizers.schedules.InverseTimeDecay(
0.001,
decay_steps=STEPS_PER_EPOCH * 10,
decay_rate=1,
staircase=False)
'''
Weight regularization puts constraints on the complexity of
a network by forcing its weights only to take small values,
which makes the distribution of weight values more "regular".
Dropout consists of randomly "dropping out" (i.e. set to zero)
a number of output features of the layer during training.
'''
model = Sequential([
layers.Input((FEATURES,)),
layers.Dense(
512, activation='elu',
kernel_regularizer=regularizers.l2(0.0001),
kernel_constraint=constraints.max_norm(1.)),
layers.Dropout(0.5),
layers.Dense(
512, activation='elu',
kernel_regularizer=regularizers.l2(0.0001),
kernel_constraint=constraints.max_norm(1.)),
layers.Dropout(0.5),
layers.Dense(
512, activation='elu',
kernel_regularizer=regularizers.l2(0.0001),
kernel_constraint=constraints.max_norm(1.)),
layers.Dropout(0.5),
layers.Dense(
512, activation='elu',
kernel_regularizer=regularizers.l2(0.0001),
kernel_constraint=constraints.max_norm(1.)),
layers.Dropout(0.5),
layers.Dense(1)
])
model.compile(
optimizer=optimizers.Adam(lr_schedule),
loss=losses.BinaryCrossentropy(from_logits=True),
metrics=[
metrics.BinaryCrossentropy(
from_logits=True, name='binary_crossentropy'),
'accuracy'])
model.summary()
model.fit(
X, y, epochs=MAX_EPOCHS, batch_size=BATCH_SIZE,
validation_split=N_VALIDATION,
callbacks=callbacks.EarlyStopping(
monitor='val_binary_crossentropy', patience=200),
verbose=1)
# Implement Monte Carlo Dropout technique that boosts dropout models
# and provides better uncertainty estimates.
# Make 10 predictions over the test set
y_mcd_probas = np.stack([model(X, training=True) for _ in range(10)])
# Average over the first dimension
y_mcd = y_mcd_probas.mean(axis=0)
print('Predict: ', model.predict(X[:1]), y[:1])
print('Predict MCD: ', y_mcd[:1])