Skip to content

Commit 5d62b13

Browse files
committed
Bug fixed; Added conv1d; Conv1d and maxpool backward still not working
1 parent 9a4f710 commit 5d62b13

15 files changed

+889
-148
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ add_library(neural-fortran
1818
src/nf.f90
1919
src/nf/nf_activation.f90
2020
src/nf/nf_base_layer.f90
21+
src/nf/nf_conv1d_layer.f90
22+
src/nf/nf_conv1d_layer_submodule.f90
2123
src/nf/nf_conv2d_layer.f90
2224
src/nf/nf_conv2d_layer_submodule.f90
2325
src/nf/nf_cross_attention_layer.f90

example/cnn_mnist_1d.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
program cnn_mnist
1+
program cnn_mnist_1d
22

33
use nf, only: network, sgd, &
44
input, conv2d, maxpool1d, maxpool2d, flatten, dense, reshape, reshape2d, locally_connected_1d, &
@@ -63,5 +63,5 @@ real function accuracy(net, x, y)
6363
accuracy = real(good) / size(x, dim=2)
6464
end function accuracy
6565

66-
end program cnn_mnist
66+
end program cnn_mnist_1d
6767

src/nf.f90

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,8 @@ module nf
33
use nf_datasets_mnist, only: label_digits, load_mnist
44
use nf_layer, only: layer
55
use nf_layer_constructors, only: &
6-
conv2d, dense, flatten, input, maxpool1d, maxpool2d, reshape, reshape2d, locally_connected_1d
7-
conv2d, &
8-
dense, &
9-
dropout, &
10-
flatten, &
11-
input, &
12-
linear2d, &
13-
maxpool2d, &
14-
reshape, &
15-
self_attention
6+
conv1d, conv2d, dense, dropout, flatten, input, linear2d, locally_connected_1d, &
7+
maxpool1d, maxpool2d, reshape, reshape2d, self_attention
168
use nf_loss, only: mse, quadratic
179
use nf_metrics, only: corr, maxabs
1810
use nf_network, only: network

src/nf/nf_conv1d_layer.f90

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
module nf_conv1d_layer
2+
!! This modules provides a 1-d convolutional `conv1d` type.
3+
4+
use nf_activation, only: activation_function
5+
use nf_base_layer, only: base_layer
6+
implicit none
7+
8+
private
9+
public :: conv1d_layer
10+
11+
type, extends(base_layer) :: conv1d_layer
12+
13+
integer :: width
14+
integer :: height
15+
integer :: channels
16+
integer :: kernel_size
17+
integer :: filters
18+
19+
real, allocatable :: biases(:) ! size(filters)
20+
real, allocatable :: kernel(:,:,:) ! filters x channels x window x window
21+
real, allocatable :: output(:,:) ! filters x output_width * output_height
22+
real, allocatable :: z(:,:) ! kernel .dot. input + bias
23+
24+
real, allocatable :: dw(:,:,:) ! weight (kernel) gradients
25+
real, allocatable :: db(:) ! bias gradients
26+
real, allocatable :: gradient(:,:)
27+
28+
class(activation_function), allocatable :: activation
29+
30+
contains
31+
32+
procedure :: forward
33+
procedure :: backward
34+
procedure :: get_gradients
35+
procedure :: get_num_params
36+
procedure :: get_params
37+
procedure :: init
38+
procedure :: set_params
39+
40+
end type conv1d_layer
41+
42+
interface conv1d_layer
43+
module function conv1d_layer_cons(filters, kernel_size, activation) &
44+
result(res)
45+
!! `conv1d_layer` constructor function
46+
integer, intent(in) :: filters
47+
integer, intent(in) :: kernel_size
48+
class(activation_function), intent(in) :: activation
49+
type(conv1d_layer) :: res
50+
end function conv1d_layer_cons
51+
end interface conv1d_layer
52+
53+
interface
54+
55+
module subroutine init(self, input_shape)
56+
!! Initialize the layer data structures.
57+
!!
58+
!! This is a deferred procedure from the `base_layer` abstract type.
59+
class(conv1d_layer), intent(in out) :: self
60+
!! A `conv1d_layer` instance
61+
integer, intent(in) :: input_shape(:)
62+
!! Input layer dimensions
63+
end subroutine init
64+
65+
pure module subroutine forward(self, input)
66+
!! Apply a forward pass on the `conv1d` layer.
67+
class(conv1d_layer), intent(in out) :: self
68+
!! A `conv1d_layer` instance
69+
real, intent(in) :: input(:,:)
70+
!! Input data
71+
end subroutine forward
72+
73+
pure module subroutine backward(self, input, gradient)
74+
!! Apply a backward pass on the `conv1d` layer.
75+
class(conv1d_layer), intent(in out) :: self
76+
!! A `conv1d_layer` instance
77+
real, intent(in) :: input(:,:)
78+
!! Input data (previous layer)
79+
real, intent(in) :: gradient(:,:)
80+
!! Gradient (next layer)
81+
end subroutine backward
82+
83+
pure module function get_num_params(self) result(num_params)
84+
!! Get the number of parameters in the layer.
85+
class(conv1d_layer), intent(in) :: self
86+
!! A `conv1d_layer` instance
87+
integer :: num_params
88+
!! Number of parameters
89+
end function get_num_params
90+
91+
module function get_params(self) result(params)
92+
!! Return the parameters (weights and biases) of this layer.
93+
!! The parameters are ordered as weights first, biases second.
94+
class(conv1d_layer), intent(in), target :: self
95+
!! A `conv1d_layer` instance
96+
real, allocatable :: params(:)
97+
!! Parameters to get
98+
end function get_params
99+
100+
module function get_gradients(self) result(gradients)
101+
!! Return the gradients of this layer.
102+
!! The gradients are ordered as weights first, biases second.
103+
class(conv1d_layer), intent(in), target :: self
104+
!! A `conv1d_layer` instance
105+
real, allocatable :: gradients(:)
106+
!! Gradients to get
107+
end function get_gradients
108+
109+
module subroutine set_params(self, params)
110+
!! Set the parameters of the layer.
111+
class(conv1d_layer), intent(in out) :: self
112+
!! A `conv1d_layer` instance
113+
real, intent(in) :: params(:)
114+
!! Parameters to set
115+
end subroutine set_params
116+
117+
end interface
118+
119+
end module nf_conv1d_layer
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
submodule(nf_conv1d_layer) nf_conv1d_layer_submodule
2+
3+
use nf_activation, only: activation_function
4+
use nf_random, only: random_normal
5+
6+
implicit none
7+
8+
contains
9+
10+
module function conv1d_layer_cons(filters, kernel_size, activation) result(res)
11+
implicit none
12+
integer, intent(in) :: filters
13+
integer, intent(in) :: kernel_size
14+
class(activation_function), intent(in) :: activation
15+
type(conv1d_layer) :: res
16+
17+
res % kernel_size = kernel_size
18+
res % filters = filters
19+
res % activation_name = activation % get_name()
20+
allocate( res % activation, source = activation )
21+
end function conv1d_layer_cons
22+
23+
module subroutine init(self, input_shape)
24+
implicit none
25+
class(conv1d_layer), intent(in out) :: self
26+
integer, intent(in) :: input_shape(:)
27+
28+
self % channels = input_shape(1)
29+
self % width = input_shape(2) - self % kernel_size + 1
30+
31+
! Output of shape: filters x width
32+
allocate(self % output(self % filters, self % width))
33+
self % output = 0
34+
35+
! Kernel of shape: filters x channels x kernel_size
36+
allocate(self % kernel(self % filters, self % channels, self % kernel_size))
37+
call random_normal(self % kernel)
38+
self % kernel = self % kernel / real(self % kernel_size**2)
39+
40+
allocate(self % biases(self % filters))
41+
self % biases = 0
42+
43+
allocate(self % z, mold=self % output)
44+
self % z = 0
45+
46+
allocate(self % gradient(input_shape(1), input_shape(2)))
47+
self % gradient = 0
48+
49+
allocate(self % dw, mold=self % kernel)
50+
self % dw = 0
51+
52+
allocate(self % db, mold=self % biases)
53+
self % db = 0
54+
55+
end subroutine init
56+
57+
pure module subroutine forward(self, input)
58+
implicit none
59+
class(conv1d_layer), intent(in out) :: self
60+
real, intent(in) :: input(:,:)
61+
integer :: input_channels, input_width
62+
integer :: j, n
63+
integer :: iws, iwe, half_window
64+
65+
input_channels = size(input, dim=1)
66+
input_width = size(input, dim=2)
67+
half_window = self % kernel_size / 2
68+
69+
! Loop over output positions.
70+
do j = 1, self % width
71+
! Compute the input window corresponding to output index j.
72+
! In forward: center index = j + half_window, so window = indices j to j+kernel_size-1.
73+
iws = j
74+
iwe = j + self % kernel_size - 1
75+
76+
! For each filter, compute the convolution (inner product over channels and kernel width).
77+
do concurrent (n = 1:self % filters)
78+
self % z(n, j) = sum(self % kernel(n, :, :) * input(:, iws:iwe))
79+
end do
80+
81+
! Add the bias for each filter.
82+
self % z(:, j) = self % z(:, j) + self % biases
83+
end do
84+
85+
! Apply the activation function.
86+
self % output = self % activation % eval(self % z)
87+
end subroutine forward
88+
89+
pure module subroutine backward(self, input, gradient)
90+
implicit none
91+
class(conv1d_layer), intent(in out) :: self
92+
! 'input' has shape: (channels, input_width)
93+
! 'gradient' (dL/dy) has shape: (filters, output_width)
94+
real, intent(in) :: input(:,:)
95+
real, intent(in) :: gradient(:,:)
96+
97+
integer :: input_channels, input_width, output_width
98+
integer :: j, n, k
99+
integer :: iws, iwe, half_window
100+
real :: gdz_val
101+
102+
! Local arrays to accumulate gradients.
103+
real :: gdz(self % filters, self % width) ! local gradient (dL/dz)
104+
real :: db_local(self % filters)
105+
real :: dw_local(self % filters, self % channels, self % kernel_size)
106+
107+
! Determine dimensions.
108+
input_channels = size(input, dim=1)
109+
input_width = size(input, dim=2)
110+
output_width = self % width ! Note: output_width = input_width - kernel_size + 1
111+
112+
half_window = self % kernel_size / 2
113+
114+
!--- Compute the local gradient gdz = (dL/dy) * sigma'(z) for each output.
115+
do j = 1, output_width
116+
gdz(:, j) = gradient(:, j) * self % activation % eval_prime(self % z(:, j))
117+
end do
118+
119+
!--- Compute bias gradients: db(n) = sum_j gdz(n, j)
120+
do n = 1, self % filters
121+
db_local(n) = sum(gdz(n, :))
122+
end do
123+
124+
!--- Initialize weight gradient and input gradient accumulators.
125+
dw_local = 0.0
126+
self % gradient = 0.0
127+
128+
!--- Accumulate gradients over each output position.
129+
! In the forward pass the window for output index j was:
130+
! iws = j, iwe = j + kernel_size - 1.
131+
do n = 1, self % filters
132+
do j = 1, output_width
133+
iws = j
134+
iwe = j + self % kernel_size - 1
135+
do k = 1, self % channels
136+
! Weight gradient: accumulate contribution from the input window.
137+
dw_local(n, k, :) = dw_local(n, k, :) + input(k, iws:iwe) * gdz(n, j)
138+
! Input gradient: propagate gradient back to the input window.
139+
self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, k, :) * gdz(n, j)
140+
end do
141+
end do
142+
end do
143+
144+
!--- Update stored gradients.
145+
self % dw = self % dw + dw_local
146+
self % db = self % db + db_local
147+
148+
end subroutine backward
149+
150+
pure module function get_num_params(self) result(num_params)
151+
class(conv1d_layer), intent(in) :: self
152+
integer :: num_params
153+
num_params = product(shape(self % kernel)) + size(self % biases)
154+
end function get_num_params
155+
156+
module function get_params(self) result(params)
157+
class(conv1d_layer), intent(in), target :: self
158+
real, allocatable :: params(:)
159+
real, pointer :: w_(:) => null()
160+
w_(1:size(self % kernel)) => self % kernel
161+
params = [ w_, self % biases ]
162+
end function get_params
163+
164+
module function get_gradients(self) result(gradients)
165+
class(conv1d_layer), intent(in), target :: self
166+
real, allocatable :: gradients(:)
167+
real, pointer :: dw_(:) => null()
168+
dw_(1:size(self % dw)) => self % dw
169+
gradients = [ dw_, self % db ]
170+
end function get_gradients
171+
172+
module subroutine set_params(self, params)
173+
class(conv1d_layer), intent(in out) :: self
174+
real, intent(in) :: params(:)
175+
176+
if (size(params) /= self % get_num_params()) then
177+
error stop 'conv1d_layer % set_params: Number of parameters does not match'
178+
end if
179+
180+
self % kernel = reshape(params(:product(shape(self % kernel))), shape(self % kernel))
181+
associate(n => product(shape(self % kernel)))
182+
self % biases = params(n + 1 : n + self % filters)
183+
end associate
184+
185+
end subroutine set_params
186+
187+
end submodule nf_conv1d_layer_submodule

src/nf/nf_layer_constructors.f90

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ module nf_layer_constructors
99

1010
private
1111
public :: &
12+
conv1d, &
1213
conv2d, &
1314
dense, &
1415
dropout, &
@@ -152,6 +153,34 @@ module function flatten() result(res)
152153
!! Resulting layer instance
153154
end function flatten
154155

156+
module function conv1d(filters, kernel_size, activation) result(res)
157+
!! CHANGE THE COMMENTS
158+
!! 2-d convolutional layer constructor.
159+
!!
160+
!! This layer is for building 2-d convolutional network.
161+
!! Although the established convention is to call these layers 2-d,
162+
!! the shape of the data is actuall 3-d: image width, image height,
163+
!! and the number of channels.
164+
!! A conv2d layer must not be the first layer in the network.
165+
!!
166+
!! Example:
167+
!!
168+
!! ```
169+
!! use nf, only :: conv2d, layer
170+
!! type(layer) :: conv2d_layer
171+
!! conv2d_layer = dense(filters=32, kernel_size=3)
172+
!! conv2d_layer = dense(filters=32, kernel_size=3, activation='relu')
173+
!! ```
174+
integer, intent(in) :: filters
175+
!! Number of filters in the output of the layer
176+
integer, intent(in) :: kernel_size
177+
!! Width of the convolution window, commonly 3 or 5
178+
class(activation_function), intent(in), optional :: activation
179+
!! Activation function (default sigmoid)
180+
type(layer) :: res
181+
!! Resulting layer instance
182+
end function conv1d
183+
155184
module function conv2d(filters, kernel_size, activation) result(res)
156185
!! 2-d convolutional layer constructor.
157186
!!

0 commit comments

Comments
 (0)