-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathtesting.py
More file actions
93 lines (82 loc) · 3.12 KB
/
testing.py
File metadata and controls
93 lines (82 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from Environment.environment import Environment
from Agent.agent import Agent
import pickle
import numpy as np
import matplotlib.pyplot as plt
def test_monte_carlo(iterations=1000000, n0=100):
print "\n-------------------"
print "Monte Carlo control"
print "run for n. iterations: "+str(iterations)
print "win percentage: "
# learn
game = Environment()
agent = Agent(game, n0)
agent.MC_control(iterations)
# plot and store
agent.show_statevalue_function()
agent.store_Qvalue_function()
def test_sarsa(iterations=1000, mlambda=None, n0=100, avg_it=50):
print "\n-------------------"
print "TD control Sarsa"
print "run for n. iterations: "+str(iterations)
print "plot graph mse vs episodes for lambda equal 0 and lambda equal 1"
print "list (standard output) win percentage for values of lambda 0, 0.1, 0.2, ..., 0.9, 1"
monte_carlo_Q = pickle.load(open("Data/Qval_func_1000000_MC_control.pkl", "rb"))
n_elements = monte_carlo_Q.shape[0]*monte_carlo_Q.shape[1]*2
mse = []
if not isinstance(mlambda,list):
# if no value is passed for lambda, default 0.5
l = 0.5 if mlambda==None else mlambda
# learn
game = Environment()
agent = Agent(game, n0)
agent.TD_control(iterations, l, avg_it)
# plot results
agent.show_statevalue_function()
else:
# test each value of lambda
for l in mlambda:
game = Environment()
agent = Agent(game, n0)
l_mse = agent.TD_control(iterations, l, avg_it)
mse.append(l_mse)
plt.plot(mlambda,mse)
plt.ylabel('mse')
plt.show()
def test_linear_sarsa(iterations=1000, mlambda=None, n0=100, avg_it=100):
print "\n-------------------"
print "TD control Sarsa, with Linear function approximation"
print "run for n. iterations: "+str(iterations)
print "plot graph mse vs episodes for lambda equal 0 and lambda equal 1"
print "list (std output) win percentage for values of lambda 0, 0.1, 0.2, ..., 0.9, 1"
monte_carlo_Q = pickle.load(open("Data/Qval_func_1000000_MC_control.pkl", "rb"))
n_elements = monte_carlo_Q.shape[0]*monte_carlo_Q.shape[1]*2
mse = []
if not isinstance(mlambda,list):
# if no value is passed for lambda, default 0.5
l = 0.5 if mlambda==None else mlambda
# learn
game = Environment()
agent = Agent(game, n0)
agent.TD_control_linear(iterations,l,avg_it)
agent.show_statevalue_function()
else:
# test each value of lambda
for l in mlambda:
game = Environment()
agent = Agent(game, n0)
l_mse = agent.TD_control_linear(iterations,l,avg_it)
mse.append(l_mse)
plt.plot(mlambda,mse)
plt.ylabel('mse')
plt.show()
if __name__ == '__main__':
# parameters
lambdas = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
iterationsMC = 1000000
iterationsSRS = 1000
n0 = 500
# testing
test_monte_carlo(iterationsMC,n0)
test_sarsa(iterationsSRS,lambdas,n0, avg_it=1)
test_linear_sarsa(iterationsSRS,lambdas,n0, avg_it=1)