TL-System · Jasmine-Yuting-Zhang · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/configs/CIFAR10/diloco_resnet18.toml b/configs/CIFAR10/diloco_resnet18.toml
@@ -0,0 +1,79 @@
+[clients]
+
+# Type
+type = "simple"
+
+# The total number of clients
+total_clients = 50
+
+# The number of clients selected in each round
+per_round = 50
+
+# Should the clients compute test accuracy locally?
+do_test = false
+
+[server]
+type = "diloco"
+address = "127.0.0.1"
+port = 8021
+
+[server.diloco]
+outer_optimizer = "nesterov"
+outer_learning_rate = 0.7
+outer_momentum = 0.9
+aggregation_weighting = "uniform"
+apply_outer_optimizer_to = "parameters"
+
+[data]
+
+# The training and testing dataset
+datasource = "Torchvision"
+dataset_name = "CIFAR10"
+download = true
+
+# Number of samples in each partition
+partition_size = 1000
+
+# IID or non-IID?
+sampler = "iid"
+
+[trainer]
+
+# The type of the trainer
+type = "basic"
+
+# The maximum number of training rounds
+rounds = 20
+
+# The maximum number of clients running concurrently
+max_concurrency = 7
+
+# The target accuracy
+target_accuracy = 0.9
+
+# Number of local optimizer steps per DiLoCo synchronization.
+local_steps_per_round = 500
+preserve_optimizer_state = true
+
+# DiLoCo paper inner-optimizer settings.
+epochs = 5
+batch_size = 10
+optimizer = "AdamW"
+lr_scheduler = "LambdaLR"
+
+# The machine learning model
+model_name = "resnet_18"
+
+[algorithm]
+
+# Weight extraction and model update path reused by DiLoCo.
+type = "fedavg"
+
+[parameters]
+
+[parameters.optimizer]
+lr = 0.0004
+weight_decay = 0.1
+
+[parameters.learning_rate]
+warmup_steps = "1000it"
diff --git a/configs/CIFAR10/fedavg_resnet18_diloco_comparison.toml b/configs/CIFAR10/fedavg_resnet18_diloco_comparison.toml
@@ -0,0 +1,68 @@
+[clients]
+
+# Type
+type = "simple"
+
+# The total number of clients
+total_clients = 50
+
+# The number of clients selected in each round
+per_round = 50
+
+# Should the clients compute test accuracy locally?
+do_test = false
+
+[server]
+address = "127.0.0.1"
+port = 8022
+
+[data]
+
+# The training and testing dataset
+datasource = "Torchvision"
+dataset_name = "CIFAR10"
+download = true
+
+# Number of samples in each partition
+partition_size = 1000
+
+# IID or non-IID?
+sampler = "iid"
+
+[trainer]
+
+# The type of the trainer
+type = "basic"
+
+# The maximum number of training rounds
+rounds = 20
+
+# The maximum number of clients running concurrently
+max_concurrency = 7
+
+# The target accuracy
+target_accuracy = 0.9
+
+# Match the original FedAvg local training shape while keeping 500 optimizer
+# steps per round, equal to DiLoCo's H.
+epochs = 5
+batch_size = 10
+optimizer = "AdamW"
+lr_scheduler = "LambdaLR"
+
+# The machine learning model
+model_name = "resnet_18"
+
+[algorithm]
+
+# Aggregation algorithm
+type = "fedavg"
+
+[parameters]
+
+[parameters.optimizer]
+lr = 0.0004
+weight_decay = 0.1
+
+[parameters.learning_rate]
+warmup_steps = "1000it"
diff --git a/configs/MNIST/diloco_lenet5.toml b/configs/MNIST/diloco_lenet5.toml
@@ -0,0 +1,75 @@
+[clients]
+
+# Type
+type = "simple"
+
+# The total number of clients
+total_clients = 50
+
+# The number of clients selected in each round
+per_round = 50
+
+# Should the clients compute test accuracy locally?
+do_test = false
+
+[server]
+type = "diloco"
+address = "127.0.0.1"
+port = 8001
+random_seed = 1
+simulate_wall_time = true
+
+[server.diloco]
+outer_optimizer = "nesterov"
+outer_learning_rate = 0.7
+outer_momentum = 0.9
+aggregation_weighting = "uniform"
+apply_outer_optimizer_to = "parameters"
+
+[data]
+include = "mnist_iid.toml"
+partition_size = 1000
+
+[trainer]
+
+# The type of the trainer
+type = "basic"
+
+# The maximum number of training rounds
+rounds = 20
+
+# The maximum number of clients running concurrently
+max_concurrency = 7
+
+# The target accuracy
+target_accuracy = 0.99
+
+# The machine learning model
+model_name = "lenet5"
+
+# Number of local optimizer steps per DiLoCo synchronization.
+local_steps_per_round = 500
+preserve_optimizer_state = true
+
+# DiLoCo paper inner-optimizer settings.
+epochs = 5
+batch_size = 32
+optimizer = "AdamW"
+lr_scheduler = "LambdaLR"
+
+[algorithm]
+
+# Weight extraction and model update path reused by DiLoCo.
+type = "fedavg"
+
+[parameters]
+
+[parameters.model]
+num_classes = 10
+
+[parameters.optimizer]
+lr = 0.0004
+weight_decay = 0.1
+
+[parameters.learning_rate]
+warmup_steps = "1000it"
diff --git a/configs/MNIST/fedavg_lenet5_diloco_comparison.toml b/configs/MNIST/fedavg_lenet5_diloco_comparison.toml
@@ -0,0 +1,66 @@
+[clients]
+
+# Type
+type = "simple"
+
+# The total number of clients
+total_clients = 50
+
+# The number of clients selected in each round
+per_round = 50
+
+# Should the clients compute test accuracy locally?
+do_test = false
+
+[server]
+address = "127.0.0.1"
+port = 8002
+random_seed = 1
+simulate_wall_time = true
+
+[data]
+include = "mnist_iid.toml"
+partition_size = 1000
+
+[trainer]
+
+# The type of the trainer
+type = "basic"
+
+# The maximum number of training rounds
+rounds = 63
+
+# The maximum number of clients running concurrently
+max_concurrency = 7
+
+# The target accuracy
+target_accuracy = 0.99
+
+# The machine learning model
+model_name = "lenet5"
+
+# Match the DiLoCo paper-style inner optimizer settings used by the DiLoCo run.
+# 5 epochs over 1000 samples at batch size 32 gives 160 optimizer steps per
+# round. With 63 rounds, FedAvg gets 10,080 local steps, closely matching
+# DiLoCo's 20 * H=500 = 10,000-step total budget.
+epochs = 5
+batch_size = 32
+optimizer = "AdamW"
+lr_scheduler = "LambdaLR"
+
+[algorithm]
+
+# Aggregation algorithm
+type = "fedavg"
+
+[parameters]
+
+[parameters.model]
+num_classes = 10
+
+[parameters.optimizer]
+lr = 0.0004
+weight_decay = 0.1
+
+[parameters.learning_rate]
+warmup_steps = "1000it"
diff --git a/configs/TimeSeries/patchtsmixer_ev_charging.toml b/configs/TimeSeries/patchtsmixer_ev_charging.toml
@@ -0,0 +1,93 @@
+# Federated Learning with PatchTSMixer for EV Charging Prediction
+#
+# Task: Given the past 28 days (672 h) of a user's EV charging behaviour,
+#       predict whether they will be charging in each of the next 168 hours.
+#
+# Dataset: "EV Charging Reports" – AdO1 garage, 4 users
+#   https://data.mendeley.com/datasets/jbks2rcwyj/1
+#
+# Federated setup: 4 clients, one user each. All clients participate every round.
+#
+# Model: PatchTSMixer (trained from scratch)
+#   - uses all 6 input features jointly via mix_channel mode
+#   - predicts only the is_charging channel
+#
+# Usage:
+#   uv run plato.py -c configs/TimeSeries/patchtsmixer_ev_charging.toml
+
+[clients]
+type = "simple"
+total_clients = 4
+per_round = 4
+do_test = true
+
+[server]
+address = "127.0.0.1"
+port = 8000
+simulate_wall_time = false
+checkpoint_path = "checkpoints/timeseries/patchtsmixer_ev"
+model_path = "models/timeseries/patchtsmixer_ev"
+
+[data]
+datasource = "EVCharging"
+
+datasource_path = "runtime/data/ado1/dataset1_ev_charging_reports.csv"
+
+garage = "AdO1"    # garage id
+
+# Explicit user IDs to include — one client per user.
+users = ["AdO1-1", "AdO1-2", "AdO1-3", "AdO1-4"]
+sampler = "all_inclusive"
+random_seed = 42
+
+[trainer]
+type = "HuggingFace"
+rounds = 100
+max_concurrency = 4
+model_name = "patchtsmixer_scratch"
+model_type = "patchtsmixer"
+model_task = "forecasting"
+
+context_length    = 672   # 4 × 7 × 24
+prediction_length = 168   # 7 × 24
+
+# Number of input channels: is_charging, energy_scaled,
+#                           hour_sin, hour_cos, dow_sin, dow_cos
+num_input_channels = 6
+
+# Predict and evaluate only the is_charging channel (index 0)
+prediction_channel_indices = [0]
+
+patch_length     = 8
+patch_stride     = 8
+d_model          = 64
+num_layers       = 4
+expansion_factor = 2
+dropout          = 0.1
+head_dropout     = 0.1
+
+# Mix all channels so the model can use time features jointly.
+mode = "mix_channel"
+gated_attn = true
+scaling = "std"
+
+# Sliding-window stride for dataset creation
+stride = 1    # advance 1 hour at a time to maximize training windows
+
+epochs     = 10
+batch_size = 16
+optimizer  = "Adam"
+
+train_ratio = 0.70
+val_ratio   = 0.15
+
+[algorithm]
+type = "fedavg"
+
+[parameters]
+[parameters.optimizer]
+lr           = 0.0005
+weight_decay = 1e-4
+
+[results]
+types = "round, elapsed_time, mse"