Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
[package]
name = "cognius"
name = "athena"
# versions
version = "0.1.0"
edition = "2021"
rust-version = "1.73.0"
edition = "2024"
rust-version = "1.95.0"
# info
authors = ["commanderxa"]
readme = "README.md"
repository = "https://github.com/CommanderXA/cognius"
repository = "https://github.com/CommanderXA/athena"
license-file = "LICENSE.txt"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rand = "0.8.5"
rand = "0.10.1"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# COGNIUS
# ATHENA

A pure `Rust` implementation of `micrograd`.

## Requirements

Rust >= 1.73
Rust >= 1.95

## Usage

Expand Down
2 changes: 1 addition & 1 deletion examples/linear.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cognius::{
use athena::{
module::Forward,
nn::{functional as F, Linear},
Tensor,
Expand Down
2 changes: 1 addition & 1 deletion examples/matmul.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cognius::{linalg, Tensor};
use athena::{linalg, Tensor};

fn main() {
let a = Tensor::randn(&[2, 3, 2]);
Expand Down
18 changes: 11 additions & 7 deletions examples/mlp.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cognius::{
use athena::{
module::{Forward, Module},
nn::{functional as F, Linear, MSELoss},
optim::{Optim, SGD},
Expand All @@ -7,9 +7,9 @@ use cognius::{

fn main() {
let epochs = 10;
let criterion = MSELoss::new();
let mlp = MLP::new([2, 1]);
let optim = SGD::new(mlp.parameters(), 3e-1);
let criterion = MSELoss::new(None);
let mlp = MLP::new([2, 16, 1]);
let optim = SGD::new(mlp.parameters(), 3e-3);

let data = vec![
Tensor::tensor(&[9., 3.], &[2]),
Expand Down Expand Up @@ -76,17 +76,18 @@ fn main() {
);
loss.backward();
}
println!("MODEL: {:?}", mlp.parameters());
}

struct MLP {
linear: Linear,
linear1: Linear,
}

impl MLP {
pub fn new(features: [usize; 2]) -> Self {
pub fn new(features: [usize; 3]) -> Self {
Self {
linear: Linear::new(features[0], features[1]),
linear1: Linear::new(features[1], features[2]),
}
}
}
Expand All @@ -97,14 +98,17 @@ impl Module for MLP {
}

fn parameters(&self) -> Vec<Tensor> {
let parameters = self.linear.parameters();
let mut parameters = self.linear.parameters();
parameters.append(&mut self.linear1.parameters());
parameters
}
}

impl Forward for MLP {
fn forward(&self, x: Tensor) -> Tensor {
let x = self.linear.forward(x);
let x = F::relu(x);
let x = self.linear1.forward(x);
F::sigmoid(x)
}
}
4 changes: 2 additions & 2 deletions examples/mlp2.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cognius::{
use athena::{
module::{Forward, Module},
nn::{Linear, MSELoss},
optim::{Optim, SGD},
Expand All @@ -7,7 +7,7 @@ use cognius::{

fn main() {
let epochs = 10;
let criterion = MSELoss::new();
let criterion = MSELoss::new(None);
let mlp = MLP::new([1, 1]);
let optim = SGD::new(mlp.parameters(), 3e-3);

Expand Down
8 changes: 8 additions & 0 deletions examples/softmax.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use athena::{nn::functional as F, Tensor};

fn main() {
let x = Tensor::tensor(&[0.24, 0.1, 0.5, 0.8, 1.2, 2.2], &[1, 2, 3]);
println!("IN:\n{x}\n\n");
let x = F::softmax(x, 2);
println!("OUT:\n{x}");
}
23 changes: 21 additions & 2 deletions src/backward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,26 @@ impl Backward for Op {
t._prev[0].add_to_grad(dx);
}

Op::MSE => {
Op::Softmax(x, _) => {
let t = tensor.inner.borrow();
let n = x.length();
let s = x.item();
let mut jacobian = vec![0.0; n * n];
for i in 0..n {
for j in 0..n {
if i == j {
jacobian[i * j] = s[i] * (1.0 - s[i]);
} else {
jacobian[i * j] = -s[i] * s[j];
}
}
}
let a = Tensor::tensor(&jacobian, &[n, n]).t();
t._prev[0].add_to_grad(a.item());
}

Op::MSE(n) => {
let n = *n as f64;
let t = tensor.inner.borrow();
let t_prev = t._prev[0].inner.borrow();
let t_sub = t_prev._prev[0].inner.borrow();
Expand All @@ -127,7 +146,7 @@ impl Backward for Op {
let grad = out
.iter()
.zip(target)
.map(|(x, y)| 2.0 * (x - y))
.map(|(x, y)| 2.0 / n * (x - y))
.collect::<Vec<f64>>();
drop(t_sub);
drop(t_prev);
Expand Down
10 changes: 7 additions & 3 deletions src/data/dataloader.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{cell::RefCell, rc::Rc};

use rand::{seq::SliceRandom, thread_rng};
use rand::{seq::SliceRandom, rng};

use super::dataset::Dataset;

Expand All @@ -26,7 +26,7 @@ impl<T> DataloaderInner<T> {

/// Mix the indices up to obtain random sequence
fn shuffle_indices(&mut self) {
self.indices.shuffle(&mut thread_rng());
self.indices.shuffle(&mut rng());
}
}

Expand All @@ -48,7 +48,7 @@ impl<T> Dataloader<T> {
}
// shuffle all indices if it is specified so
if shuffle {
indices.shuffle(&mut thread_rng());
indices.shuffle(&mut rng());
}
Self(Rc::new(RefCell::new(DataloaderInner {
dataset,
Expand All @@ -68,6 +68,10 @@ impl<T> Dataloader<T> {
pub fn is_shuffle(&self) -> bool {
self.0.borrow().shuffle
}

pub fn len(&self) -> usize {
self.0.borrow().dataset.len()
}
}

impl<T> Iterator for Dataloader<T> {
Expand Down
31 changes: 26 additions & 5 deletions src/nn/criterions.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,43 @@
use crate::{op::Op, tensor_data::TensorData, Tensor};

pub struct MSELoss {}
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub enum Reduction {
SUM,
MEAN,
}

#[derive(Clone)]
pub struct MSELoss {
reduction: Option<Reduction>,
}

impl MSELoss {
pub fn new() -> Self {
Self {}
pub fn new(reduction: Option<Reduction>) -> Self {
Self {
reduction: reduction,
}
}

pub fn measure(&self, a: Tensor, b: Tensor) -> Tensor {
let t = (a - b).pow(2);
let shape = t.shape();
let inner = TensorData::from_op(t.item(), vec![t], Op::MSE);
let a = t.item();
let t_len = t.length() as f64;
let mut s = 0.0;
if let Some(reduction) = self.reduction {
s = a.iter().sum::<f64>();
if reduction == Reduction::MEAN {
s = s / t_len;
}
}
s /= t_len;
let inner = TensorData::from_op(vec![s], vec![t], Op::MSE(t_len as usize));
Tensor::new(inner, &shape)
}
}

impl Default for MSELoss {
fn default() -> Self {
Self::new()
Self::new(None)
}
}
38 changes: 38 additions & 0 deletions src/nn/functional.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,41 @@ pub fn sigmoid(x: Tensor) -> Tensor {
let inner = TensorData::from_op(data.item(), vec![x.clone()], Op::Sigmoid(x));
Tensor::new(inner, &data.shape)
}

pub fn softmax(x: Tensor, dim: usize) -> Tensor {
let shape = x.shape();
let mut shape2 = shape.clone();
assert_eq!(
dim,
shape.len() - 1,
"Softmax for dimensions other than the last one is not supported."
);
let mut result = vec![0.0; x.length()];
let data = x.item();
// get batch dimensions if they exist
let mut batches: Vec<usize> = vec![];
for i in 2..shape.len() {
batches.push(shape[i - 2]);
}
// remove batch dimensions from the A tensor shape
shape2.drain(0..batches.len());
let batch_prod = batches.iter().product::<usize>();
let m = shape2[0];
let n = shape2[1];
// iterate over the batch dimensions
// `k` is a batch dimension
for k in 0..batch_prod {
for i in 0..m {
let _x = &data[(k * m + i * n)..(k * m + i * n + n)];
// do operations
let max_x = _x.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let exp_x: Vec<f64> = _x.iter().map(|&xi| (xi - max_x).exp()).collect();
let sum_exp_x: f64 = exp_x.iter().sum();
result[(k * m + i * n)..(k * m + i * n + n)]
.copy_from_slice(&exp_x.iter().map(|&ei| ei / sum_exp_x).collect::<Vec<f64>>());
}
}
// create new tensor
let inner = TensorData::from_op(result, vec![x.clone()], Op::Softmax(x, dim));
Tensor::new(inner, &shape)
}
6 changes: 4 additions & 2 deletions src/op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ pub enum Op {
Cross,
ReLU,
Sigmoid(Tensor),
MSE,
Softmax(Tensor, usize),
MSE(usize),
}

impl std::fmt::Display for Op {
Expand All @@ -28,7 +29,8 @@ impl std::fmt::Display for Op {
Op::Cross => write!(f, "Cross"),
Op::ReLU => write!(f, "ReLU"),
Op::Sigmoid(n) => write!(f, "Sigmoid({n})"),
Op::MSE => write!(f, "MSE"),
Op::Softmax(n, dim) => write!(f, "Softmax({n},{dim})"),
Op::MSE(n) => write!(f, "MSE({n})"),
}
}
}
4 changes: 4 additions & 0 deletions src/optim.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod lr_scheduler;
pub mod sgd;

// Short paths for algorithms
Expand All @@ -10,4 +11,7 @@ pub trait Optim {

/// Sets gradients to zero.
fn zero_grad(&self);

/// changes the learning rate by gamma
fn change_lr(&mut self, gamma: f64);
}
5 changes: 5 additions & 0 deletions src/optim/lr_scheduler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod multistep_lr;

pub trait Scheduler {
fn step(&mut self) -> ();
}
33 changes: 33 additions & 0 deletions src/optim/lr_scheduler/multistep_lr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use crate::optim::Optim;

use super::Scheduler;

pub struct MultiStepLR {
optimizer: Box<dyn Optim>,
pub milestones: Vec<usize>,
pub gamma: f64,
count: usize,
}

impl MultiStepLR {
pub fn new(optimizer: Box<dyn Optim>, milestones: &[usize], gamma: f64) -> Self {
Self {
optimizer,
milestones: milestones.to_vec(),
gamma: gamma,
count: 0,
}
}
}

impl Scheduler for MultiStepLR {
fn step(&mut self) -> () {
for m in &self.milestones {
if self.count == *m {
self.optimizer.as_mut().change_lr(self.gamma);
}
}
self.optimizer.as_mut().step();
self.count += 1;
}
}
5 changes: 5 additions & 0 deletions src/optim/sgd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use super::Optim;
/// It has:
/// - parameters of the model
/// - learning rate
#[derive(Clone)]
pub struct SGD {
parameters: Vec<Tensor>,
lr: f64,
Expand Down Expand Up @@ -57,4 +58,8 @@ impl Optim for SGD {
self.parameters[i].inner.borrow_mut().zero_grad();
}
}

fn change_lr(&mut self, gamma: f64) {
self.lr *= gamma
}
}
Loading
Loading