diff --git a/Cargo.toml b/Cargo.toml index ba016e9..f810966 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,16 @@ [package] -name = "cognius" +name = "athena" # versions version = "0.1.0" -edition = "2021" -rust-version = "1.73.0" +edition = "2024" +rust-version = "1.95.0" # info authors = ["commanderxa"] readme = "README.md" -repository = "https://github.com/CommanderXA/cognius" +repository = "https://github.com/CommanderXA/athena" license-file = "LICENSE.txt" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -rand = "0.8.5" +rand = "0.10.1" diff --git a/README.md b/README.md index 8277fe1..8285825 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# COGNIUS +# ATHENA A pure `Rust` implementation of `micrograd`. ## Requirements -Rust >= 1.73 +Rust >= 1.95 ## Usage diff --git a/examples/linear.rs b/examples/linear.rs index 680d011..64ff4fd 100644 --- a/examples/linear.rs +++ b/examples/linear.rs @@ -1,4 +1,4 @@ -use cognius::{ +use athena::{ module::Forward, nn::{functional as F, Linear}, Tensor, diff --git a/examples/matmul.rs b/examples/matmul.rs index 48604f3..524eea8 100644 --- a/examples/matmul.rs +++ b/examples/matmul.rs @@ -1,4 +1,4 @@ -use cognius::{linalg, Tensor}; +use athena::{linalg, Tensor}; fn main() { let a = Tensor::randn(&[2, 3, 2]); diff --git a/examples/mlp.rs b/examples/mlp.rs index 9eef732..73fdc6b 100644 --- a/examples/mlp.rs +++ b/examples/mlp.rs @@ -1,4 +1,4 @@ -use cognius::{ +use athena::{ module::{Forward, Module}, nn::{functional as F, Linear, MSELoss}, optim::{Optim, SGD}, @@ -7,9 +7,9 @@ use cognius::{ fn main() { let epochs = 10; - let criterion = MSELoss::new(); - let mlp = MLP::new([2, 1]); - let optim = SGD::new(mlp.parameters(), 3e-1); + let criterion = MSELoss::new(None); + let mlp = MLP::new([2, 16, 1]); + let optim = SGD::new(mlp.parameters(), 3e-3); let data = vec![ Tensor::tensor(&[9., 3.], &[2]), @@ -76,17 +76,18 @@ fn main() { ); loss.backward(); } - println!("MODEL: {:?}", mlp.parameters()); } struct MLP { linear: Linear, + linear1: Linear, } impl MLP { - pub fn new(features: [usize; 2]) -> Self { + pub fn new(features: [usize; 3]) -> Self { Self { linear: Linear::new(features[0], features[1]), + linear1: Linear::new(features[1], features[2]), } } } @@ -97,7 +98,8 @@ impl Module for MLP { } fn parameters(&self) -> Vec { - let parameters = self.linear.parameters(); + let mut parameters = self.linear.parameters(); + parameters.append(&mut self.linear1.parameters()); parameters } } @@ -105,6 +107,8 @@ impl Module for MLP { impl Forward for MLP { fn forward(&self, x: Tensor) -> Tensor { let x = self.linear.forward(x); + let x = F::relu(x); + let x = self.linear1.forward(x); F::sigmoid(x) } } diff --git a/examples/mlp2.rs b/examples/mlp2.rs index 24c6b66..4f9e455 100644 --- a/examples/mlp2.rs +++ b/examples/mlp2.rs @@ -1,4 +1,4 @@ -use cognius::{ +use athena::{ module::{Forward, Module}, nn::{Linear, MSELoss}, optim::{Optim, SGD}, @@ -7,7 +7,7 @@ use cognius::{ fn main() { let epochs = 10; - let criterion = MSELoss::new(); + let criterion = MSELoss::new(None); let mlp = MLP::new([1, 1]); let optim = SGD::new(mlp.parameters(), 3e-3); diff --git a/examples/softmax.rs b/examples/softmax.rs new file mode 100644 index 0000000..5ba956d --- /dev/null +++ b/examples/softmax.rs @@ -0,0 +1,8 @@ +use athena::{nn::functional as F, Tensor}; + +fn main() { + let x = Tensor::tensor(&[0.24, 0.1, 0.5, 0.8, 1.2, 2.2], &[1, 2, 3]); + println!("IN:\n{x}\n\n"); + let x = F::softmax(x, 2); + println!("OUT:\n{x}"); +} diff --git a/src/backward.rs b/src/backward.rs index ff7b9da..a8a3613 100644 --- a/src/backward.rs +++ b/src/backward.rs @@ -118,7 +118,26 @@ impl Backward for Op { t._prev[0].add_to_grad(dx); } - Op::MSE => { + Op::Softmax(x, _) => { + let t = tensor.inner.borrow(); + let n = x.length(); + let s = x.item(); + let mut jacobian = vec![0.0; n * n]; + for i in 0..n { + for j in 0..n { + if i == j { + jacobian[i * j] = s[i] * (1.0 - s[i]); + } else { + jacobian[i * j] = -s[i] * s[j]; + } + } + } + let a = Tensor::tensor(&jacobian, &[n, n]).t(); + t._prev[0].add_to_grad(a.item()); + } + + Op::MSE(n) => { + let n = *n as f64; let t = tensor.inner.borrow(); let t_prev = t._prev[0].inner.borrow(); let t_sub = t_prev._prev[0].inner.borrow(); @@ -127,7 +146,7 @@ impl Backward for Op { let grad = out .iter() .zip(target) - .map(|(x, y)| 2.0 * (x - y)) + .map(|(x, y)| 2.0 / n * (x - y)) .collect::>(); drop(t_sub); drop(t_prev); diff --git a/src/data/dataloader.rs b/src/data/dataloader.rs index aec2c4b..20aa353 100644 --- a/src/data/dataloader.rs +++ b/src/data/dataloader.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, rc::Rc}; -use rand::{seq::SliceRandom, thread_rng}; +use rand::{seq::SliceRandom, rng}; use super::dataset::Dataset; @@ -26,7 +26,7 @@ impl DataloaderInner { /// Mix the indices up to obtain random sequence fn shuffle_indices(&mut self) { - self.indices.shuffle(&mut thread_rng()); + self.indices.shuffle(&mut rng()); } } @@ -48,7 +48,7 @@ impl Dataloader { } // shuffle all indices if it is specified so if shuffle { - indices.shuffle(&mut thread_rng()); + indices.shuffle(&mut rng()); } Self(Rc::new(RefCell::new(DataloaderInner { dataset, @@ -68,6 +68,10 @@ impl Dataloader { pub fn is_shuffle(&self) -> bool { self.0.borrow().shuffle } + + pub fn len(&self) -> usize { + self.0.borrow().dataset.len() + } } impl Iterator for Dataloader { diff --git a/src/nn/criterions.rs b/src/nn/criterions.rs index 1b7ebd9..1565fcf 100644 --- a/src/nn/criterions.rs +++ b/src/nn/criterions.rs @@ -1,22 +1,43 @@ use crate::{op::Op, tensor_data::TensorData, Tensor}; -pub struct MSELoss {} +#[derive(Clone, Copy, PartialEq, PartialOrd)] +pub enum Reduction { + SUM, + MEAN, +} + +#[derive(Clone)] +pub struct MSELoss { + reduction: Option, +} impl MSELoss { - pub fn new() -> Self { - Self {} + pub fn new(reduction: Option) -> Self { + Self { + reduction: reduction, + } } pub fn measure(&self, a: Tensor, b: Tensor) -> Tensor { let t = (a - b).pow(2); let shape = t.shape(); - let inner = TensorData::from_op(t.item(), vec![t], Op::MSE); + let a = t.item(); + let t_len = t.length() as f64; + let mut s = 0.0; + if let Some(reduction) = self.reduction { + s = a.iter().sum::(); + if reduction == Reduction::MEAN { + s = s / t_len; + } + } + s /= t_len; + let inner = TensorData::from_op(vec![s], vec![t], Op::MSE(t_len as usize)); Tensor::new(inner, &shape) } } impl Default for MSELoss { fn default() -> Self { - Self::new() + Self::new(None) } } diff --git a/src/nn/functional.rs b/src/nn/functional.rs index 206eb1a..216cdf2 100644 --- a/src/nn/functional.rs +++ b/src/nn/functional.rs @@ -15,3 +15,41 @@ pub fn sigmoid(x: Tensor) -> Tensor { let inner = TensorData::from_op(data.item(), vec![x.clone()], Op::Sigmoid(x)); Tensor::new(inner, &data.shape) } + +pub fn softmax(x: Tensor, dim: usize) -> Tensor { + let shape = x.shape(); + let mut shape2 = shape.clone(); + assert_eq!( + dim, + shape.len() - 1, + "Softmax for dimensions other than the last one is not supported." + ); + let mut result = vec![0.0; x.length()]; + let data = x.item(); + // get batch dimensions if they exist + let mut batches: Vec = vec![]; + for i in 2..shape.len() { + batches.push(shape[i - 2]); + } + // remove batch dimensions from the A tensor shape + shape2.drain(0..batches.len()); + let batch_prod = batches.iter().product::(); + let m = shape2[0]; + let n = shape2[1]; + // iterate over the batch dimensions + // `k` is a batch dimension + for k in 0..batch_prod { + for i in 0..m { + let _x = &data[(k * m + i * n)..(k * m + i * n + n)]; + // do operations + let max_x = _x.iter().cloned().fold(f64::NEG_INFINITY, f64::max); + let exp_x: Vec = _x.iter().map(|&xi| (xi - max_x).exp()).collect(); + let sum_exp_x: f64 = exp_x.iter().sum(); + result[(k * m + i * n)..(k * m + i * n + n)] + .copy_from_slice(&exp_x.iter().map(|&ei| ei / sum_exp_x).collect::>()); + } + } + // create new tensor + let inner = TensorData::from_op(result, vec![x.clone()], Op::Softmax(x, dim)); + Tensor::new(inner, &shape) +} diff --git a/src/op.rs b/src/op.rs index 5f11312..29ef74c 100644 --- a/src/op.rs +++ b/src/op.rs @@ -13,7 +13,8 @@ pub enum Op { Cross, ReLU, Sigmoid(Tensor), - MSE, + Softmax(Tensor, usize), + MSE(usize), } impl std::fmt::Display for Op { @@ -28,7 +29,8 @@ impl std::fmt::Display for Op { Op::Cross => write!(f, "Cross"), Op::ReLU => write!(f, "ReLU"), Op::Sigmoid(n) => write!(f, "Sigmoid({n})"), - Op::MSE => write!(f, "MSE"), + Op::Softmax(n, dim) => write!(f, "Softmax({n},{dim})"), + Op::MSE(n) => write!(f, "MSE({n})"), } } } diff --git a/src/optim.rs b/src/optim.rs index 6919bad..948b034 100644 --- a/src/optim.rs +++ b/src/optim.rs @@ -1,3 +1,4 @@ +pub mod lr_scheduler; pub mod sgd; // Short paths for algorithms @@ -10,4 +11,7 @@ pub trait Optim { /// Sets gradients to zero. fn zero_grad(&self); + + /// changes the learning rate by gamma + fn change_lr(&mut self, gamma: f64); } diff --git a/src/optim/lr_scheduler.rs b/src/optim/lr_scheduler.rs new file mode 100644 index 0000000..f05d647 --- /dev/null +++ b/src/optim/lr_scheduler.rs @@ -0,0 +1,5 @@ +pub mod multistep_lr; + +pub trait Scheduler { + fn step(&mut self) -> (); +} diff --git a/src/optim/lr_scheduler/multistep_lr.rs b/src/optim/lr_scheduler/multistep_lr.rs new file mode 100644 index 0000000..847c663 --- /dev/null +++ b/src/optim/lr_scheduler/multistep_lr.rs @@ -0,0 +1,33 @@ +use crate::optim::Optim; + +use super::Scheduler; + +pub struct MultiStepLR { + optimizer: Box, + pub milestones: Vec, + pub gamma: f64, + count: usize, +} + +impl MultiStepLR { + pub fn new(optimizer: Box, milestones: &[usize], gamma: f64) -> Self { + Self { + optimizer, + milestones: milestones.to_vec(), + gamma: gamma, + count: 0, + } + } +} + +impl Scheduler for MultiStepLR { + fn step(&mut self) -> () { + for m in &self.milestones { + if self.count == *m { + self.optimizer.as_mut().change_lr(self.gamma); + } + } + self.optimizer.as_mut().step(); + self.count += 1; + } +} diff --git a/src/optim/sgd.rs b/src/optim/sgd.rs index 462eb27..049a84c 100644 --- a/src/optim/sgd.rs +++ b/src/optim/sgd.rs @@ -9,6 +9,7 @@ use super::Optim; /// It has: /// - parameters of the model /// - learning rate +#[derive(Clone)] pub struct SGD { parameters: Vec, lr: f64, @@ -57,4 +58,8 @@ impl Optim for SGD { self.parameters[i].inner.borrow_mut().zero_grad(); } } + + fn change_lr(&mut self, gamma: f64) { + self.lr *= gamma + } } diff --git a/src/tensor.rs b/src/tensor.rs index 910663e..229f20c 100644 --- a/src/tensor.rs +++ b/src/tensor.rs @@ -6,8 +6,6 @@ use std::{ rc::Rc, }; -use rand::Rng; - use crate::{backward::Backward, op::Op, tensor_data::TensorData}; #[derive(Clone, Debug, PartialEq)] @@ -140,7 +138,7 @@ impl Tensor { /// between -1 and 1. fn fill_tensor(tensor: &mut TensorData, range: Range) { for i in 0..tensor.data.len() { - let data = rand::thread_rng().gen_range(range.clone()); + let data = rand::random_range(range.clone()); tensor.data[i] = data; tensor.grad.as_mut().unwrap().push(0.0); } @@ -232,7 +230,9 @@ impl Tensor { ); let mut t = self.clone(); if self.stride.iter().product::() == 0 { - panic!("view size is not compatible with size and stride of input tensor. Use .reshape(...) instead"); + panic!( + "view size is not compatible with size and stride of input tensor. Use .reshape(...) instead" + ); } let mut stride = vec![1; shape.len()]; // compute stride @@ -358,7 +358,12 @@ impl Tensor { /// /// Takes new shape. pub fn expand(&self, new_shape: &[usize]) -> Self { - assert!(self.shape().len() <= new_shape.len(), "The number of sizes provided ({:?}) must be equal or greater than the number of sizes in the tensor ({:?})", self.shape().len(), new_shape.len()); + assert!( + self.shape().len() <= new_shape.len(), + "The number of sizes provided ({:?}) must be equal or greater than the number of sizes in the tensor ({:?})", + self.shape().len(), + new_shape.len() + ); let mut t = self.clone(); let mut _old_shape = self.shape(); // check if batch dims have to be added in th front @@ -374,7 +379,7 @@ impl Tensor { for i in (0..new_shape.len()).rev() { assert!( old_shape[i] == new_shape[i] || (old_shape[i] == 1), - "The expanded size of the tensor ({}) must match the existing size ({}) at dimension ({})", + "The expanded size of the tensor ({}) must match the existing size ({}) at dimension ({})", new_shape[i], old_shape[i], i, @@ -431,7 +436,7 @@ impl Tensor { /// Computes the gradients of all the tensors that have been interacting and /// have `requires_grad` set to `true`. pub fn backward(&self) { - let end_grad = self.inner.borrow()._prev[0] + let end_grad: Vec = self.inner.borrow()._prev[0] .item() .iter() .map(|a| a * 2.0) diff --git a/tests/activations_test.rs b/tests/activations_test.rs index 0f9f4d4..de5bd37 100644 --- a/tests/activations_test.rs +++ b/tests/activations_test.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use cognius::{nn::functional as F, Tensor}; + use athena::{nn::functional as F, Tensor}; #[test] fn sigmoid() { diff --git a/tests/data_test.rs b/tests/data_test.rs index 5f539ee..98bdea7 100644 --- a/tests/data_test.rs +++ b/tests/data_test.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use cognius::{ + use athena::{ data::{dataloader::Dataloader, dataset::Dataset, sample::Sample}, Tensor, }; diff --git a/tests/linalg_test.rs b/tests/linalg_test.rs index 7c507e7..9ee8a46 100644 --- a/tests/linalg_test.rs +++ b/tests/linalg_test.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use cognius::{linalg, Tensor}; + use athena::{linalg, Tensor}; #[test] /// Matrix multiplication @@ -68,7 +68,7 @@ mod tests { ], &[3, 1, 3, 1, 2, 3], ); - let c = cognius::linalg::cross(a.clone(), b.clone()); + let c = athena::linalg::cross(a.clone(), b.clone()); let correct = vec![ -0.5866, 1.0262, -0.4396, -1.9130, -0.8710, 2.7839, -0.5866, 1.0262, -0.4396, -1.9130, -0.8710, 2.7839, 0.8525, 0.7091, -1.5616, -1.0111, 3.0523, -2.0412, 0.8525, 0.7091, diff --git a/tests/optim_test.rs b/tests/optim_test.rs index 25169bf..b923d51 100644 --- a/tests/optim_test.rs +++ b/tests/optim_test.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use cognius::{ + use athena::{ module::{Forward, Module}, nn::{self, functional as F, Linear}, optim::{Optim, SGD}, @@ -27,7 +27,7 @@ mod tests { let mlp = MLP::new([4, 2, 4, 2, 1]); let optim = SGD::new(mlp.parameters(), 1e-1); let x = Tensor::randn(&[10, 4]); - let criterion = nn::MSELoss::new(); + let criterion = nn::MSELoss::new(None); let mut out = mlp.forward(x.clone()); out = out.squeeze(&[]); diff --git a/tests/tensor_test.rs b/tests/tensor_test.rs index 55cfa8d..5d7b9be 100644 --- a/tests/tensor_test.rs +++ b/tests/tensor_test.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use cognius::{randn, Tensor}; + use athena::{randn, Tensor}; #[test] /// Valid shape of the tensor