Skip to content

Commit b64038a

Browse files
committed
Implemented locally connected 1d
1 parent 52f958f commit b64038a

File tree

3 files changed

+25
-60
lines changed

3 files changed

+25
-60
lines changed

example/cnn_mnist_1d.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ program cnn_mnist_1d
2121
net = network([ &
2222
input(784), &
2323
reshape2d([28,28]), &
24-
conv1d(filters=8, kernel_size=3, activation=relu()), &
24+
locally_connected_1d(filters=8, kernel_size=3, activation=relu()), &
2525
maxpool1d(pool_size=2), &
26-
conv1d(filters=16, kernel_size=3, activation=relu()), &
26+
locally_connected_1d(filters=16, kernel_size=3, activation=relu()), &
2727
maxpool1d(pool_size=2), &
2828
dense(10, activation=softmax()) &
2929
])

src/nf/nf_locally_connected_1d.f90

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ module nf_locally_connected_1d_layer
1616
integer :: kernel_size
1717
integer :: filters
1818

19-
real, allocatable :: biases(:) ! size(filters)
20-
real, allocatable :: kernel(:,:,:) ! filters x channels x window x window
19+
real, allocatable :: biases(:,:) ! size(filters)
20+
real, allocatable :: kernel(:,:,:,:) ! filters x channels x window x window
2121
real, allocatable :: output(:,:) ! filters x output_width * output_height
2222
real, allocatable :: z(:,:) ! kernel .dot. input + bias
2323

24-
real, allocatable :: dw(:,:,:) ! weight (kernel) gradients
25-
real, allocatable :: db(:) ! bias gradients
24+
real, allocatable :: dw(:,:,:,:) ! weight (kernel) gradients
25+
real, allocatable :: db(:,:) ! bias gradients
2626
real, allocatable :: gradient(:,:)
2727

2828
class(activation_function), allocatable :: activation

src/nf/nf_locally_connected_1d_submodule.f90

Lines changed: 19 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ module function locally_connected_1d_layer_cons(filters, kernel_size, activation
1717
res % kernel_size = kernel_size
1818
res % filters = filters
1919
res % activation_name = activation % get_name()
20-
allocate( res % activation, source = activation )
20+
allocate(res % activation, source = activation)
2121
end function locally_connected_1d_layer_cons
2222

2323
module subroutine init(self, input_shape)
@@ -28,16 +28,14 @@ module subroutine init(self, input_shape)
2828
self % channels = input_shape(1)
2929
self % width = input_shape(2) - self % kernel_size + 1
3030

31-
! Output of shape: filters x width
3231
allocate(self % output(self % filters, self % width))
3332
self % output = 0
3433

35-
! Kernel of shape: filters x channels x kernel_size
36-
allocate(self % kernel(self % filters, self % channels, self % kernel_size))
34+
allocate(self % kernel(self % filters, self % width, self % channels, self % kernel_size))
3735
call random_normal(self % kernel)
3836
self % kernel = self % kernel / real(self % kernel_size**2)
3937

40-
allocate(self % biases(self % filters))
38+
allocate(self % biases(self % filters, self % width))
4139
self % biases = 0
4240

4341
allocate(self % z, mold=self % output)
@@ -51,7 +49,6 @@ module subroutine init(self, input_shape)
5149

5250
allocate(self % db, mold=self % biases)
5351
self % db = 0
54-
5552
end subroutine init
5653

5754
pure module subroutine forward(self, input)
@@ -60,113 +57,81 @@ pure module subroutine forward(self, input)
6057
real, intent(in) :: input(:,:)
6158
integer :: input_channels, input_width
6259
integer :: j, n
63-
integer :: iws, iwe, half_window
60+
integer :: iws, iwe
6461

6562
input_channels = size(input, dim=1)
6663
input_width = size(input, dim=2)
67-
half_window = self % kernel_size / 2
6864

69-
! Loop over output positions.
7065
do j = 1, self % width
71-
! Compute the input window corresponding to output index j.
72-
! In forward: center index = j + half_window, so window = indices j to j+kernel_size-1.
7366
iws = j
7467
iwe = j + self % kernel_size - 1
75-
76-
! For each filter, compute the convolution (inner product over channels and kernel width).
7768
do concurrent (n = 1:self % filters)
78-
self % z(n, j) = sum(self % kernel(n, :, :) * input(:, iws:iwe))
69+
self % z(n, j) = sum(self % kernel(n, j, :, :) * input(:, iws:iwe)) + self % biases(n, j)
7970
end do
80-
81-
! Add the bias for each filter.
82-
self % z(:, j) = self % z(:, j) + self % biases
8371
end do
84-
85-
! Apply the activation function.
8672
self % output = self % activation % eval(self % z)
8773
end subroutine forward
8874

8975
pure module subroutine backward(self, input, gradient)
9076
implicit none
9177
class(locally_connected_1d_layer), intent(in out) :: self
92-
! 'input' has shape: (channels, input_width)
93-
! 'gradient' (dL/dy) has shape: (filters, output_width)
9478
real, intent(in) :: input(:,:)
9579
real, intent(in) :: gradient(:,:)
96-
9780
integer :: input_channels, input_width, output_width
9881
integer :: j, n, k
99-
integer :: iws, iwe, half_window
100-
real :: gdz_val
82+
integer :: iws, iwe
83+
real :: gdz(self % filters, self % width)
84+
real :: db_local(self % filters, self % width)
85+
real :: dw_local(self % filters, self % width, self % channels, self % kernel_size)
10186

102-
! Local arrays to accumulate gradients.
103-
real :: gdz(self % filters, self % width) ! local gradient (dL/dz)
104-
real :: db_local(self % filters)
105-
real :: dw_local(self % filters, self % channels, self % kernel_size)
106-
107-
! Determine dimensions.
10887
input_channels = size(input, dim=1)
10988
input_width = size(input, dim=2)
110-
output_width = self % width ! Note: output_width = input_width - kernel_size + 1
111-
112-
half_window = self % kernel_size / 2
89+
output_width = self % width
11390

114-
!--- Compute the local gradient gdz = (dL/dy) * sigma'(z) for each output.
11591
do j = 1, output_width
11692
gdz(:, j) = gradient(:, j) * self % activation % eval_prime(self % z(:, j))
11793
end do
11894

119-
!--- Compute bias gradients: db(n) = sum_j gdz(n, j)
12095
do n = 1, self % filters
121-
db_local(n) = sum(gdz(n, :))
96+
do j = 1, output_width
97+
db_local(n, j) = gdz(n, j)
98+
end do
12299
end do
123100

124-
!--- Initialize weight gradient and input gradient accumulators.
125101
dw_local = 0.0
126102
self % gradient = 0.0
127103

128-
!--- Accumulate gradients over each output position.
129-
! In the forward pass the window for output index j was:
130-
! iws = j, iwe = j + kernel_size - 1.
131104
do n = 1, self % filters
132105
do j = 1, output_width
133106
iws = j
134107
iwe = j + self % kernel_size - 1
135108
do k = 1, self % channels
136-
! Weight gradient: accumulate contribution from the input window.
137-
dw_local(n, k, :) = dw_local(n, k, :) + input(k, iws:iwe) * gdz(n, j)
138-
! Input gradient: propagate gradient back to the input window.
139-
self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, k, :) * gdz(n, j)
109+
dw_local(n, j, k, :) = dw_local(n, j, k, :) + input(k, iws:iwe) * gdz(n, j)
110+
self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, j, k, :) * gdz(n, j)
140111
end do
141112
end do
142113
end do
143114

144-
!--- Update stored gradients.
145115
self % dw = self % dw + dw_local
146116
self % db = self % db + db_local
147-
148117
end subroutine backward
149118

150119
pure module function get_num_params(self) result(num_params)
151120
class(locally_connected_1d_layer), intent(in) :: self
152121
integer :: num_params
153-
num_params = product(shape(self % kernel)) + size(self % biases)
122+
num_params = product(shape(self % kernel)) + product(shape(self % biases))
154123
end function get_num_params
155124

156125
module function get_params(self) result(params)
157126
class(locally_connected_1d_layer), intent(in), target :: self
158127
real, allocatable :: params(:)
159-
real, pointer :: w_(:) => null()
160-
w_(1:size(self % kernel)) => self % kernel
161-
params = [ w_, self % biases ]
128+
params = [reshape(self % kernel, [size(self % kernel)]), reshape(self % biases, [size(self % biases)])]
162129
end function get_params
163130

164131
module function get_gradients(self) result(gradients)
165132
class(locally_connected_1d_layer), intent(in), target :: self
166133
real, allocatable :: gradients(:)
167-
real, pointer :: dw_(:) => null()
168-
dw_(1:size(self % dw)) => self % dw
169-
gradients = [ dw_, self % db ]
134+
gradients = [reshape(self % dw, [size(self % dw)]), reshape(self % db, [size(self % db)])]
170135
end function get_gradients
171136

172137
module subroutine set_params(self, params)
@@ -179,7 +144,7 @@ module subroutine set_params(self, params)
179144

180145
self % kernel = reshape(params(:product(shape(self % kernel))), shape(self % kernel))
181146
associate(n => product(shape(self % kernel)))
182-
self % biases = params(n + 1 : n + self % filters)
147+
self % biases = reshape(params(n + 1 :), shape(self % biases))
183148
end associate
184149

185150
end subroutine set_params

0 commit comments

Comments
 (0)