Skip to content

Commit 8763913

Browse files
committed
first commit
0 parents  commit 8763913

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3397
-0
lines changed

.gitignore

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
.DS_Store
2+
*.pyc
3+
*.pyo
4+
__pycache__
5+
*.h5
6+
*.json
7+
runs/
8+
check*
9+
omnet/*/out/
10+
omnet/*/networkRL
11+
omnet/*/messages/*.cc
12+
omnet/*/messages/*.h
13+
omnet/*/NetworkAll*
14+
traffic/

ActorNetwork.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
"""
2+
ActorNetwork.py
3+
"""
4+
__author__ = "giorgio@ac.upc.edu"
5+
__credits__ = "https://github.com/yanpanlau"
6+
7+
from keras.initializations import normal, glorot_normal
8+
from keras.activations import relu
9+
from keras.layers import Dense, Input, BatchNormalization
10+
from keras.models import Model
11+
from keras.regularizers import l2
12+
import keras.backend as K
13+
import tensorflow as tf
14+
15+
from helper import selu
16+
17+
18+
class ActorNetwork(object):
19+
def __init__(self, sess, state_size, action_size, DDPG_config):
20+
self.HIDDEN1_UNITS = DDPG_config['HIDDEN1_UNITS']
21+
self.HIDDEN2_UNITS = DDPG_config['HIDDEN2_UNITS']
22+
23+
self.sess = sess
24+
self.BATCH_SIZE = DDPG_config['BATCH_SIZE']
25+
self.TAU = DDPG_config['TAU']
26+
self.LEARNING_RATE = DDPG_config['LRA']
27+
self.ACTUM = DDPG_config['ACTUM']
28+
29+
if self.ACTUM == 'NEW':
30+
self.acti = 'sigmoid'
31+
elif self.ACTUM == 'DELTA':
32+
self.acti = 'tanh'
33+
34+
self.h_acti = relu
35+
if DDPG_config['HACTI'] == 'selu':
36+
self.h_acti = selu
37+
38+
K.set_session(sess)
39+
40+
#Now create the model
41+
self.model, self.weights, self.state = self.create_actor_network(state_size, action_size)
42+
self.target_model, self.target_weights, self.target_state = self.create_actor_network(state_size, action_size)
43+
self.action_gradient = tf.placeholder(tf.float32, [None, action_size])
44+
self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient)
45+
grads = zip(self.params_grad, self.weights)
46+
self.optimize = tf.train.AdamOptimizer(self.LEARNING_RATE).apply_gradients(grads)
47+
self.sess.run(tf.global_variables_initializer())
48+
49+
def train(self, states, action_grads):
50+
self.sess.run(self.optimize, feed_dict={
51+
self.state: states,
52+
self.action_gradient: action_grads
53+
})
54+
55+
def target_train(self):
56+
actor_weights = self.model.get_weights()
57+
actor_target_weights = self.target_model.get_weights()
58+
for i in range(len(actor_weights)):
59+
actor_target_weights[i] = self.TAU * actor_weights[i] + (1 - self.TAU)* actor_target_weights[i]
60+
self.target_model.set_weights(actor_target_weights)
61+
62+
def create_actor_network(self, state_size, action_dim):
63+
S = Input(shape=[state_size], name='a_S')
64+
h0 = Dense(self.HIDDEN1_UNITS, activation=self.h_acti, init=glorot_normal, name='a_h0')(S)
65+
h1 = Dense(self.HIDDEN2_UNITS, activation=self.h_acti, init=glorot_normal, name='a_h1')(h0)
66+
# https://github.com/fchollet/keras/issues/374
67+
V = Dense(action_dim, activation=self.acti, init=glorot_normal, name='a_V')(h1)
68+
model = Model(input=S, output=V)
69+
return model, model.trainable_weights, S

CriticNetwork.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
CriticNetwork.py
3+
"""
4+
__author__ = "giorgio@ac.upc.edu"
5+
__credits__ = "https://github.com/yanpanlau"
6+
7+
from keras.initializations import normal, glorot_normal
8+
from keras.activations import relu
9+
from keras.layers import Dense, Input, merge, BatchNormalization
10+
from keras.models import Model
11+
from keras.optimizers import Adam
12+
from keras.regularizers import l2
13+
import keras.backend as K
14+
import tensorflow as tf
15+
16+
from helper import selu
17+
18+
19+
class CriticNetwork(object):
20+
def __init__(self, sess, state_size, action_size, DDPG_config):
21+
self.HIDDEN1_UNITS = DDPG_config['HIDDEN1_UNITS']
22+
self.HIDDEN2_UNITS = DDPG_config['HIDDEN2_UNITS']
23+
24+
self.sess = sess
25+
self.BATCH_SIZE = DDPG_config['BATCH_SIZE']
26+
self.TAU = DDPG_config['TAU']
27+
self.LEARNING_RATE = DDPG_config['LRC']
28+
self.action_size = action_size
29+
30+
self.h_acti = relu
31+
if DDPG_config['HACTI'] == 'selu':
32+
self.h_acti = selu
33+
34+
K.set_session(sess)
35+
36+
#Now create the model
37+
self.model, self.action, self.state = self.create_critic_network(state_size, action_size)
38+
self.target_model, self.target_action, self.target_state = self.create_critic_network(state_size, action_size)
39+
self.action_grads = tf.gradients(self.model.output, self.action) #GRADIENTS for policy update
40+
self.sess.run(tf.global_variables_initializer())
41+
42+
def gradients(self, states, actions):
43+
return self.sess.run(self.action_grads, feed_dict={
44+
self.state: states,
45+
self.action: actions
46+
})[0]
47+
48+
def target_train(self):
49+
critic_weights = self.model.get_weights()
50+
critic_target_weights = self.target_model.get_weights()
51+
for i in range(len(critic_weights)):
52+
critic_target_weights[i] = self.TAU * critic_weights[i] + (1 - self.TAU)* critic_target_weights[i]
53+
self.target_model.set_weights(critic_target_weights)
54+
55+
def create_critic_network(self, state_size, action_dim):
56+
S = Input(shape=[state_size], name='c_S')
57+
A = Input(shape=[action_dim], name='c_A')
58+
w1 = Dense(self.HIDDEN1_UNITS, activation=self.h_acti, init=glorot_normal, name='c_w1')(S)
59+
a1 = Dense(self.HIDDEN2_UNITS, activation='linear', init=glorot_normal, name='c_a1')(A)
60+
h1 = Dense(self.HIDDEN2_UNITS, activation='linear', init=glorot_normal, name='c_h1')(w1)
61+
h2 = merge([h1, a1], mode='sum', name='c_h2')
62+
h3 = Dense(self.HIDDEN2_UNITS, activation=self.h_acti, init=glorot_normal, name='c_h3')(h2)
63+
V = Dense(action_dim, activation='linear', init=glorot_normal, name='c_V')(h3)
64+
model = Model(input=[S, A], output=V)
65+
adam = Adam(lr=self.LEARNING_RATE)
66+
model.compile(loss='mse', optimizer=adam)
67+
return model, A, S

DDPG.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"ACTIVE_NODES": 14,
3+
"ACTUM": "NEW",
4+
"BATCH_SIZE": 32,
5+
"BN": "reward",
6+
"BUFFER_SIZE": 1600,
7+
"ENV": "label",
8+
"EPISODE_COUNT": 100,
9+
"EXPLORE": 0.8,
10+
"GAMMA": 0.99,
11+
"HACTI": "selu",
12+
"HIDDEN1_UNITS": 91,
13+
"HIDDEN2_UNITS": 42,
14+
"LRA": 0.0001,
15+
"LRC": 0.001,
16+
"MAX_DELTA": 0.1,
17+
"MAX_STEPS": 1000,
18+
"MU": 0.0,
19+
"PRAEMIUM": "AVG",
20+
"PRINT": false,
21+
"ROUTING": "Linkweight",
22+
"RSEED": null,
23+
"SIGMA": 0.4,
24+
"STATUM": "T",
25+
"TAU": 0.001,
26+
"THETA": 0.2,
27+
"TRAFFIC": "EXP"
28+
}

0 commit comments

Comments
 (0)