-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathreply_buffer.js
More file actions
147 lines (118 loc) · 4.64 KB
/
reply_buffer.js
File metadata and controls
147 lines (118 loc) · 4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/**
* Returns a random integer between min (inclusive) and max (inclusive).
* The value is no lower than min (or the next integer greater than min
* if min isn't an integer) and no greater than max (or the next integer
* lower than max if max isn't an integer).
* Using Math.round() will give you a non-uniform distribution!
* https://stackoverflow.com/questions/1527803/generating-random-whole-numbers-in-javascript-in-a-specific-range
*/
const getRandomInt = (min, max) => {
min = Math.ceil(min)
max = Math.floor(max)
return Math.floor(Math.random() * (max - min + 1)) + min
}
/**
* Reply Buffer.
*/
class ReplyBuffer {
/**
* Constructor.
*
* @param {*} limit maximum number of transitions
* @param {*} onDiscard callback triggered on discard a transition
*/
constructor(limit = 500, onDiscard = () => {}) {
this._limit = limit
this._onDiscard = onDiscard
this._buffer = new Array(limit).fill()
this._pool = []
this.size = 0
}
/**
* Add a new transition to the reply buffer.
* Transition doesn't contain the next state. The next state is derived when sampling.
*
* @param {{id: number, priority: number, state: array, action, reward: number}} transition transition
*/
add(transition) {
let { id, priority = 1 } = transition
if (id === undefined || id < 0 || priority < 1)
throw new Error('Invalid arguments')
id = id % this._limit
if (this._buffer[id]) {
const start = this._pool.indexOf(id)
let deleteCount = 0
while (this._pool[start + deleteCount] == id)
deleteCount++
this._pool.splice(start, deleteCount)
this._onDiscard(this._buffer[id])
} else
this.size++
while (priority--)
this._pool.push(id)
this._buffer[id] = transition
}
/**
* Return `n` random samples from the buffer.
* Returns an empty array if impossible provide required number of samples.
*
* @param {number} [n = 1] - number of samples
* @returns array of exactly `n` samples
*/
sample(n = 1) {
if (this.size < n)
return []
const
sampleIndices = new Set(),
samples = []
let counter = n
while (counter--)
while (sampleIndices.size < this.size) {
const randomIndex = this._pool[getRandomInt(0, this._pool.length - 1)]
if (sampleIndices.has(randomIndex))
continue
sampleIndices.add(randomIndex)
const { id, state, action, reward } = this._buffer[randomIndex]
const nextId = id + 1
const next = this._buffer[nextId % this._limit]
if (next && next.id === nextId) { // the case when sampled the last element that still waiting for next state
samples.push({ state, action, reward, nextState: next.state})
break
}
}
return samples.length == n ? samples : []
}
}
/** TESTS */
(() => {
return
const rb = new ReplyBuffer(5)
rb.add({id: 0, state: 0})
rb.add({id: 1, state: 1})
rb.add({id: 2, state: 2, priority: 3})
console.assert(rb.size === 3)
console.assert(rb._pool.length === 5)
console.assert(rb._buffer[0].id === 0)
rb.add({id: 2, state: 2})
rb.add({id: 4, state: 4, priority: 2})
console.assert(rb.size === 4)
console.assert(rb._pool.length === 5)
console.assert(JSON.stringify(rb._pool) === '[0,1,2,4,4]')
rb.add({id: 5, state: 0, priority: 2}) // 5%5 = 0 => state = 0
console.assert(rb.size === 4)
console.assert(rb._pool.length === 6)
console.assert(rb._buffer.length === 5)
console.assert(rb._buffer[0].id === 5)
console.assert(JSON.stringify(rb._pool) === '[1,2,4,4,0,0]')
console.assert(rb.sample(999).length === 0, 'Too many samples')
let samples1 = rb.sample(2)
console.assert(samples1.length === 2, 'Only two samples possible')
console.assert(samples1[0].nextState === (samples1[0].state + 1) % 5, 'Next state should be valid', samples1)
rb.add({id: 506, state: 506, priority: 3})
let samples2 = rb.sample(1)
console.assert(samples2.length === 1, 'Only one suitable sample with valid next state')
console.assert(samples2[0].state === 4, 'Sample with id:4')
console.assert(rb._buffer[1].id === 506, '506 % 5 = 1')
console.assert(rb.sample(2).length === 0,
'Can not sample 2 transitions since next state is available only for one state')
})()