Skip to content

Commit da538d7

Browse files
committed
fix comments and align both custom NN
1 parent 8c3f815 commit da538d7

File tree

2 files changed

+43
-41
lines changed

2 files changed

+43
-41
lines changed

docs/src/examples/custom-relu.jl

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where T
4343
function pullback_matrix_relu(dl_dx)
4444
## some value from the backpropagation (e.g., loss) is denoted by `l`
4545
## so `dl_dy` is the derivative of `l` wrt `y`
46-
x = model[:x] ## load decision variable `x` into scope
46+
x = model[:x] # load decision variable `x` into scope
4747
dl_dy = zeros(T, size(dl_dx))
4848
dl_dq = zeros(T, size(dl_dx))
4949
## set sensitivities
@@ -54,50 +54,48 @@ function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where T
5454
obj_exp = MOI.get(model, DiffOpt.ReverseObjectiveFunction())
5555
## coeff of `x` in q'x = -2y'x
5656
dl_dq[:] .= JuMP.coefficient.(obj_exp, x[:])
57-
dq_dy = -2 ## dq/dy = -2
57+
dq_dy = -2 # dq/dy = -2
5858
dl_dy[:] .= dl_dq[:] * dq_dy
5959
return (ChainRulesCore.NoTangent(), dl_dy,)
6060
end
6161
return pv, pullback_matrix_relu
6262
end
6363

6464
# For more details about backpropagation, visit [Introduction, ChainRulesCore.jl](https://juliadiff.org/ChainRulesCore.jl/dev/).
65-
# ## prepare data
66-
N = 1000 ## batch size
67-
imgs = MLDatasets.MNIST.traintensor(1:N)
68-
labels = MLDatasets.MNIST.trainlabels(1:N);
69-
70-
# Preprocessing
71-
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) ## stack all the images
72-
train_Y = Flux.onehotbatch(labels, 0:9);
7365

74-
test_imgs = MLDatasets.MNIST.testtensor(1:N)
75-
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))
76-
test_Y = Flux.onehotbatch(MLDatasets.MNIST.testlabels(1:N), 0:9);
77-
78-
# ## Define the Network
79-
80-
# Network structure
66+
# ## Define the network
8167

8268
layer_size = 10
83-
8469
m = Flux.Chain(
85-
Flux.Dense(784, layer_size), ## 784 being image linear dimension (28 x 28)
70+
Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)
8671
matrix_relu,
87-
Flux.Dense(layer_size, 10), ## 10 being the number of outcomes (0 to 9)
72+
Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)
8873
Flux.softmax,
8974
)
9075

76+
# ## Prepare data
77+
78+
N = 1000 # batch size
79+
## Preprocessing train data
80+
imgs = MLDatasets.MNIST.traintensor(1:N)
81+
labels = MLDatasets.MNIST.trainlabels(1:N)
82+
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images
83+
train_Y = Flux.onehotbatch(labels, 0:9);
84+
## Preprocessing test data
85+
test_imgs = MLDatasets.MNIST.testtensor(1:N)
86+
test_labels = MLDatasets.MNIST.testlabels(1:N)
87+
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))
88+
test_Y = Flux.onehotbatch(test_labels, 0:9);
89+
9190
# Define input data
9291
# The original data is repeated `epochs` times because `Flux.train!` only
9392
# loops through the data set once
9493

9594
epochs = 50 # ~1 minute (i7 8th gen with 16gb RAM)
9695
## epochs = 100 # leads to 77.8% in about 2 minutes
97-
9896
dataset = repeated((train_X, train_Y), epochs);
9997

100-
# Parameters for the network training
98+
# ## Network training
10199

102100
# training loss function, Flux optimizer
103101
custom_loss(x, y) = Flux.crossentropy(m(x), y)
@@ -111,7 +109,10 @@ evalcb = () -> @show(custom_loss(train_X, train_Y))
111109
# Although our custom implementation takes time, it is able to reach similar
112110
# accuracy as the usual ReLU function implementation.
113111

112+
# ## Accuracy results
113+
114114
# Average of correct guesses
115+
115116
accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));
116117

117118
# Training accuracy

docs/src/examples/polyhedral_project.jl

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -101,39 +101,38 @@ function ChainRulesCore.rrule(polytope::Polytope{N}, y::AbstractMatrix) where {N
101101
return xv, pullback_matrix_projection
102102
end
103103

104-
# ## Prepare data
105-
M = 500 ## batch size
106-
imgs = MLDatasets.MNIST.traintensor(1:M)
107-
labels = MLDatasets.MNIST.trainlabels(1:M);
108-
109-
# Preprocessing
110-
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) ## stack all the images
111-
train_Y = Flux.onehotbatch(labels, 0:9);
112-
113-
test_imgs = MLDatasets.MNIST.testtensor(1:M)
114-
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))
115-
test_Y = Flux.onehotbatch(MLDatasets.MNIST.testlabels(1:M), 0:9);
116-
117104
# ## Define the Network
118105

119106
layer_size = 20
120-
121107
m = Flux.Chain(
122-
Flux.Dense(784, layer_size), ## 784 being image linear dimension (28 x 28)
108+
Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)
123109
Polytope((randn(layer_size), randn(layer_size), randn(layer_size))),
124-
Flux.Dense(layer_size, 10), ## 10 being the number of outcomes (0 to 9)
110+
Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)
125111
Flux.softmax,
126112
)
127113

114+
# ## Prepare data
115+
116+
M = 500 # batch size
117+
## Preprocessing train data
118+
imgs = MLDatasets.MNIST.traintensor(1:M)
119+
labels = MLDatasets.MNIST.trainlabels(1:M);
120+
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images
121+
train_Y = Flux.onehotbatch(labels, 0:9);
122+
## Preprocessing test data
123+
test_imgs = MLDatasets.MNIST.testtensor(1:M)
124+
test_labels = MLDatasets.MNIST.testlabels(1:M)
125+
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))
126+
test_Y = Flux.onehotbatch(test_labels, 0:9);
127+
128128
# Define input data
129129
# The original data is repeated `epochs` times because `Flux.train!` only
130130
# loops through the data set once
131131

132132
epochs = 50
133-
134133
dataset = repeated((train_X, train_Y), epochs);
135134

136-
# Parameters for the network training
135+
# ## Network training
137136

138137
# training loss function, Flux optimizer
139138
custom_loss(x, y) = Flux.crossentropy(m(x), y)
@@ -142,11 +141,13 @@ evalcb = () -> @show(custom_loss(train_X, train_Y))
142141

143142
# Train to optimize network parameters
144143

145-
Flux.train!(custom_loss, Flux.params(m), dataset, opt, cb = Flux.throttle(evalcb, 5));
144+
@time Flux.train!(custom_loss, Flux.params(m), dataset, opt, cb = Flux.throttle(evalcb, 5));
146145

147146
# Although our custom implementation takes time, it is able to reach similar
148147
# accuracy as the usual ReLU function implementation.
149148

149+
# ## Accuracy results
150+
150151
# Average of correct guesses
151152
accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));
152153

0 commit comments

Comments
 (0)