From 59d5ba2ef8b701aeab9e18bf09c65eb6f79198c1 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 3 Jun 2025 12:10:42 -0400 Subject: [PATCH 01/23] initial commit --- .gitignore | 3 + pufferlib/config/ocean/ants.ini | 23 ++ pufferlib/ocean/ants/README.md | 11 + pufferlib/ocean/ants/__init__.py | 0 pufferlib/ocean/ants/ants.c | 209 +++++++++++ pufferlib/ocean/ants/ants.h | 601 +++++++++++++++++++++++++++++++ pufferlib/ocean/ants/ants.py | 161 +++++++++ pufferlib/ocean/ants/binding.c | 25 ++ pufferlib/ocean/environment.py | 2 +- save_net_flat.py | 125 +++++++ 10 files changed, 1159 insertions(+), 1 deletion(-) create mode 100644 pufferlib/config/ocean/ants.ini create mode 100644 pufferlib/ocean/ants/README.md create mode 100644 pufferlib/ocean/ants/__init__.py create mode 100644 pufferlib/ocean/ants/ants.c create mode 100644 pufferlib/ocean/ants/ants.h create mode 100644 pufferlib/ocean/ants/ants.py create mode 100644 pufferlib/ocean/ants/binding.c create mode 100644 save_net_flat.py diff --git a/.gitignore b/.gitignore index ffec20ae7..21db3195e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ c_*.c pufferlib/extensions.c pufferlib/puffernet.c +# Annoying mac files +.DS_Store + # Raylib raylib_wasm/ diff --git a/pufferlib/config/ocean/ants.ini b/pufferlib/config/ocean/ants.ini new file mode 100644 index 000000000..9855666e8 --- /dev/null +++ b/pufferlib/config/ocean/ants.ini @@ -0,0 +1,23 @@ +[base] +package = ocean +env_name = puffer_ants +policy_name = Policy +rnn_name = Recurrent + +[env] +num_envs = 64 + +[train] +total_timesteps = 100_000_000 +gamma = 0.99 +learning_rate = 0.0003 +batch_size = 131072 +minibatch_size = 4096 +update_epochs = 1 +clip_coef = 0.2 +vf_coef = 0.5 +ent_coef = 0.01 +max_grad_norm = 0.5 +target_kl = 0.015 +gae_lambda = 0.95 +bptt_horizon = 8 \ No newline at end of file diff --git a/pufferlib/ocean/ants/README.md b/pufferlib/ocean/ants/README.md new file mode 100644 index 000000000..913da7709 --- /dev/null +++ b/pufferlib/ocean/ants/README.md @@ -0,0 +1,11 @@ +``` +puffer train puffer_ants --train.device cpu --train.optimizer adam --neptune --neptune-name "matanitah" --neptune-project "ant-sim" +``` + +``` +puffer eval puffer_ants --load-model-path experiments/ANTS-XXX.pt --train.device cpu --train.optimizer adam --neptune +``` + +``` +scripts/build_ocean.sh ants +``` \ No newline at end of file diff --git a/pufferlib/ocean/ants/__init__.py b/pufferlib/ocean/ants/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c new file mode 100644 index 000000000..66c2a88b9 --- /dev/null +++ b/pufferlib/ocean/ants/ants.c @@ -0,0 +1,209 @@ +// for local testing of c code,build with: +// bash scripts/build_ocean.sh ants local + +#include +#include "ants.h" +#include "puffernet.h" + +int demo() { + // Initialize environment with proper parameters - FOLLOWING SNAKE PATTERN + AntsEnv env = { + .num_ants = NUM_COLONIES * MAX_ANTS_PER_COLONY, + .width = WINDOW_WIDTH, + .height = WINDOW_HEIGHT, + .reward_food = 0.1f, + .reward_delivery = 1.0f, + .reward_death = -1.0f, + .cell_size = 1, + }; + + // Allocate memory - CRITICAL: USING PROPER ALLOCATION PATTERN + allocate_ants_env(&env); + c_reset(&env); + + // Load trained weights if available + Weights* weights = NULL; + LinearLSTM* net = NULL; + FILE* f = fopen("resources/ants_weights.bin", "rb"); + if (f) { + fclose(f); + weights = load_weights("resources/ants_weights.bin", 266501); + if (weights) { + int logit_sizes[1] = {4}; + net = make_linearlstm(weights, env.num_ants, env.obs_size, logit_sizes, 4); + } + } + + printf("Environment initialized. Starting render loop...\n"); + printf("Ants: %d, Observation size: %d\n", env.num_ants, env.obs_size); + if (!net) { + printf("No trained weights found. Running with random actions.\n"); + } + + // Initialize rendering client + env.client = make_client(1, env.width, env.height); + + // Main loop - FOLLOWING SNAKE PATTERN + while (!WindowShouldClose()) { + // User can take control with shift key + if (IsKeyDown(KEY_LEFT_SHIFT)) { + // Control first ant of colony 1 for demo + env.actions[0] = ACTION_MOVE_FORWARD; + if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) env.actions[0] = ACTION_TURN_LEFT; + if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) env.actions[0] = ACTION_TURN_RIGHT; + if (IsKeyDown(KEY_SPACE)) env.actions[0] = ACTION_DROP_PHEROMONE; + + // Rest of ants act via scripted behaviors + for (int i = 1; i < env.num_ants; i++) { + Ant* ant = &env.ants[i]; + if (ant->has_food) { + // If ant has food, return to colony + Colony* colony = &env.colonies[ant->colony_id]; + float angle_to_colony = get_angle(ant->position, colony->position); + float angle_diff = wrap_angle(angle_to_colony - ant->direction); + + // Turn towards colony + if (angle_diff > 0.1) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -0.1) { + env.actions[i] = ACTION_TURN_LEFT; + } else { + env.actions[i] = ACTION_MOVE_FORWARD; + } + } else { + // If ant doesn't have food, seek nearest food source + float closest_food_dist_sq = env.width * env.width; + Vector2D closest_food_pos = {0, 0}; + bool found_food = false; + + for (int j = 0; j < env.num_food_sources; j++) { + if (env.food_sources[j].amount > 0) { + float dist_sq = distance_squared(ant->position, env.food_sources[j].position); + if (dist_sq < closest_food_dist_sq && is_in_vision(ant->position, env.food_sources[j].position)) { + closest_food_dist_sq = dist_sq; + closest_food_pos = env.food_sources[j].position; + found_food = true; + } + } + } + + if (found_food) { + // Turn towards food + float angle_to_food = get_angle(ant->position, closest_food_pos); + float angle_diff = wrap_angle(angle_to_food - ant->direction); + + if (angle_diff > 0.1) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -0.1) { + env.actions[i] = ACTION_TURN_LEFT; + } else { + env.actions[i] = ACTION_MOVE_FORWARD; + } + } else { + // If no food in sight, move forward and occasionally turn + env.actions[i] = (rand() % 100 < 5) ? (rand() % 2 ? ACTION_TURN_LEFT : ACTION_TURN_RIGHT) : ACTION_MOVE_FORWARD; + } + } + } + } else if (net) { + // Use neural network for all ants + forward_linearlstm(net, env.observations, env.actions); + } else { + // All ants act randomly + for (int i = 0; i < env.num_ants; i++) { + env.actions[i] = rand() % 4; + } + } + + c_step(&env); + c_render(&env); + + // Print stats periodically + if (env.tick % 1000 == 0 && env.log.n > 0) { + printf("Tick %d: Episodes completed: %.0f, Avg score: %.2f, Avg return: %.2f\n", + env.tick, env.log.n, env.log.score / env.log.n, env.log.episode_return / env.log.n); + } + } + + printf("Closing environment...\n"); + + // Clean up - PROPER CLEANUP FOLLOWING SNAKE PATTERN + if (net) { + free_linearlstm(net); + } + if (weights) { + free(weights); + } + close_client(env.client); + free_ants_env(&env); + + return 0; +} + +void test_performance(float test_time) { + // Performance test environment + AntsEnv env = { + .num_ants = 2048, + .width = 1280, + .height = 720, + .reward_food = 0.1f, + .reward_delivery = 1.0f, + .reward_death = -1.0f, + .cell_size = 1, + }; + + allocate_ants_env(&env); + c_reset(&env); + + int start = time(NULL); + int steps = 0; + + while (time(NULL) - start < test_time) { + // Random actions for performance test + for (int i = 0; i < env.num_ants; i++) { + env.actions[i] = rand() % 4; + } + + c_step(&env); + steps++; + + // Print intermediate stats + if (steps % 1000 == 0 && env.log.n > 0) { + printf("Step %d: Episodes: %.0f, Avg performance: %.4f\n", + steps, env.log.n, env.log.perf / env.log.n); + } + } + + int end = time(NULL); + float sps = (float)env.num_ants * steps / (end - start); + printf("Ant Colony Environment SPS: %.0f\n", sps); + printf("Total ant steps: %.0f\n", sps); + printf("Episodes completed: %.0f\n", env.log.n); + if (env.log.n > 0) { + printf("Average score: %.2f\n", env.log.score / env.log.n); + printf("Average performance: %.4f\n", env.log.perf / env.log.n); + } + + // Clean up + free_ants_env(&env); +} + +int main() { + // Initialize random seed + srand(time(NULL)); + + printf("Ant Colony Environment Demo\n"); + printf("Controls:\n"); + printf("- Hold SHIFT to control the first ant\n"); + printf("- A/D or LEFT/RIGHT to turn\n"); + printf("- SPACE to drop pheromone\n"); + printf("- ESC to exit\n\n"); + + demo(); + + // Uncomment for performance testing + // printf("\nRunning performance test...\n"); + // test_performance(10); + + return 0; +} \ No newline at end of file diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h new file mode 100644 index 000000000..8e1bb10f3 --- /dev/null +++ b/pufferlib/ocean/ants/ants.h @@ -0,0 +1,601 @@ +#include +#include +#include +#include +#include +#include +#include "raylib.h" + +// Constants for the simulation +#define WINDOW_WIDTH 1280 +#define WINDOW_HEIGHT 720 +#define MAX_ANTS_PER_COLONY 50 +#define NUM_COLONIES 2 +#define MAX_FOOD_SOURCES 20 +#define MAX_FOOD_PER_SOURCE 100 +#define ANT_SPEED 1.0f +#define ANT_SIZE 4 +#define FOOD_SIZE 6 +#define COLONY_SIZE 20 +#define PHEROMONE_EVAPORATION_RATE 0.001f +#define PHEROMONE_DEPOSIT_AMOUNT 1.0f +#define MAX_PHEROMONES 5000 +#define PHEROMONE_SIZE 2 +#define ANT_VISION_RANGE 50.0f +#define ANT_VISION_ANGLE (M_PI / 2) +#define TURN_ANGLE (M_PI / 20) +#define MIN_FOOD_COLONY_DISTANCE 100.0f +#define ANT_LIFETIME 100000 + +// Actions +#define ACTION_MOVE_FORWARD 0 +#define ACTION_TURN_LEFT 1 +#define ACTION_TURN_RIGHT 2 +#define ACTION_DROP_PHEROMONE 3 + +// Colors +#define COLONY1_COLOR (Color){220, 0, 0, 255} +#define COLONY2_COLOR (Color){0, 0, 220, 255} +#define PHEROMONE1_COLOR (Color){255, 200, 200, 100} +#define PHEROMONE2_COLOR (Color){200, 200, 255, 100} +#define FOOD_COLOR (Color){0, 200, 0, 255} +#define BACKGROUND_COLOR (Color){50, 50, 50, 255} + +// Required Log struct for PufferLib +typedef struct Log Log; +struct Log { + float perf; // Performance metric + float score; // Total score + float episode_return; // Cumulative rewards + float episode_length; // Episode duration + float n; // Episode count - REQUIRED AS LAST FIELD +}; + +// Forward declarations +typedef struct Client Client; +typedef struct AntsEnv AntsEnv; + +// Environment structs +typedef struct { + float x, y; +} Vector2D; + +typedef struct { + Vector2D position; + int amount; +} FoodSource; + +typedef struct { + Vector2D position; + float strength; + int colony_id; +} Pheromone; + +typedef struct { + Vector2D position; + float direction; + int colony_id; + bool has_food; + int lifetime; // Track ant lifetime for performance metrics +} Ant; + +typedef struct { + Vector2D position; + int food_collected; +} Colony; + +// Raylib client structure - FOLLOWING SNAKE PATTERN +struct Client { + int cell_size; + int width; + int height; +}; + +// Main environment struct - RESTRUCTURED FOLLOWING SNAKE PATTERN +struct AntsEnv { + // Required PufferLib fields - IDENTICAL TO SNAKE + float* observations; // Flattened observations for all ants + int* actions; // Actions for all ants + float* rewards; // Rewards for all ants + unsigned char* terminals; // Terminal flags + Log log; // Main aggregated log + Log* ant_logs; // Individual ant logs - CRITICAL ADDITION + + // Environment state + Colony colonies[NUM_COLONIES]; + Ant* ants; // Dynamic array of all ants + FoodSource food_sources[MAX_FOOD_SOURCES]; + Pheromone pheromones[MAX_PHEROMONES]; + int num_pheromones; + int num_food_sources; + + // Environment parameters + int num_ants; // Total number of ants + int width; // Environment width + int height; // Environment height + int obs_size; // Observation size per ant + int tick; // Current timestep + + // Reward parameters + float reward_food; + float reward_delivery; + float reward_death; + + // Rendering + Client* client; // Raylib client + int cell_size; +}; + +/** + * Add an ant's log to the main log when the ant's episode ends. + * CRITICAL FUNCTION - COPIED FROM SNAKE PATTERN + * This should only be called during termination conditions for a specific ant. + * Accumulates the ant's stats into the main log and resets the ant's individual log. + */ +void add_log(AntsEnv* env, int ant_id) { + env->log.perf += env->ant_logs[ant_id].perf; + env->log.score += env->ant_logs[ant_id].score; + env->log.episode_return += env->ant_logs[ant_id].episode_return; + env->log.episode_length += env->ant_logs[ant_id].episode_length; + env->log.n += 1; + + // Reset individual ant log + env->ant_logs[ant_id] = (Log){0}; +} + +// Memory management functions - FOLLOWING SNAKE PATTERN +void init_ants_env(AntsEnv* env) { + env->ants = (Ant*)calloc(env->num_ants, sizeof(Ant)); + env->ant_logs = (Log*)calloc(env->num_ants, sizeof(Log)); + env->tick = 0; + env->client = NULL; + env->num_pheromones = 0; + + // Initialize food sources + env->num_food_sources = MAX_FOOD_SOURCES; + for (int i = 0; i < env->num_food_sources; i++) { + env->food_sources[i].amount = 0; // Will be set in reset + } + + // Initialize colonies + env->colonies[0].position = (Vector2D){env->width / 4, env->height / 2}; + env->colonies[1].position = (Vector2D){3 * env->width / 4, env->height / 2}; + env->colonies[0].food_collected = 0; + env->colonies[1].food_collected = 0; +} + +void allocate_ants_env(AntsEnv* env) { + env->obs_size = 9; // Fixed observation size per ant + env->observations = (float*)calloc(env->num_ants * env->obs_size, sizeof(float)); + env->actions = (int*)calloc(env->num_ants, sizeof(int)); + env->rewards = (float*)calloc(env->num_ants, sizeof(float)); + env->terminals = (unsigned char*)calloc(env->num_ants, sizeof(unsigned char)); + init_ants_env(env); +} + +void c_close(AntsEnv* env) { + if (env->ants) { + free(env->ants); + env->ants = NULL; + } + if (env->ant_logs) { + free(env->ant_logs); + env->ant_logs = NULL; + } +} + +void free_ants_env(AntsEnv* env) { + c_close(env); + if (env->observations) { + free(env->observations); + env->observations = NULL; + } + if (env->actions) { + free(env->actions); + env->actions = NULL; + } + if (env->rewards) { + free(env->rewards); + env->rewards = NULL; + } + if (env->terminals) { + free(env->terminals); + env->terminals = NULL; + } +} + +// Helper function implementations +static inline float random_float(float min, float max) { + return min + (max - min) * ((float)rand() / (float)RAND_MAX); +} + +static inline float wrap_angle(float angle) { + while (angle > M_PI) angle -= 2 * M_PI; + while (angle < -M_PI) angle += 2 * M_PI; + return angle; +} + +static inline float distance_squared(Vector2D a, Vector2D b) { + float dx = a.x - b.x; + float dy = a.y - b.y; + return dx * dx + dy * dy; +} + +static inline float get_angle(Vector2D a, Vector2D b) { + return atan2(b.y - a.y, b.x - a.x); +} + +static inline bool is_in_vision(Vector2D ant_pos, Vector2D target) { + float dist_sq = distance_squared(ant_pos, target); + if (dist_sq > ANT_VISION_RANGE * ANT_VISION_RANGE) { + return false; + } + else { + return true; + } +} + + + +static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { + if (env->num_pheromones >= MAX_PHEROMONES) { + // Replace oldest pheromone + for (int i = 0; i < env->num_pheromones - 1; i++) { + env->pheromones[i] = env->pheromones[i + 1]; + } + env->num_pheromones--; + } + + env->pheromones[env->num_pheromones].position = position; + env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; + env->pheromones[env->num_pheromones].colony_id = colony_id; + env->num_pheromones++; +} + +void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { + Ant* ant = &env->ants[ant_idx]; + Colony* colony = &env->colonies[ant->colony_id]; + + // Observation structure (9 elements): + // [0-1]: ant position (normalized) + // [2]: ant direction (normalized to 0-1) + // [3]: has_food (0 or 1) + // [4]: angle to colony (normalized to 0-1) + // [5]: distance to colony (normalized) + // [6]: closest food direction (normalized to 0-1) + // [7]: closest food distance (normalized) + // [8]: strongest pheromone direction (normalized to 0-1) COMMENTED OUT + + obs[0] = ant->position.x / env->width; + obs[1] = ant->position.y / env->height; + obs[2] = (ant->direction + M_PI) / (2 * M_PI); + obs[3] = ant->has_food ? 1.0f : 0.0f; + + // Relative position to colony + obs[4] = (get_angle(ant->position, colony->position) + M_PI) / (2 * M_PI); + obs[5] = distance_squared(ant->position, colony->position) / (env->width * env->width + env->height * env->height); + + // Find closest visible food + float closest_food_dist_sq = env->width * env->width; + Vector2D closest_food_pos = {0, 0}; + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[i].position); + if ( + dist_sq < closest_food_dist_sq && + is_in_vision(ant->position, env->food_sources[i].position) + ) { + closest_food_dist_sq = dist_sq; + closest_food_pos.x = env->food_sources[i].position.x; + closest_food_pos.y = env->food_sources[i].position.y; + } + } + } + // Direction to closest visible food + obs[6] = (get_angle(ant->position, closest_food_pos) + M_PI) / (2 * M_PI); + // Distance to closest visible food + obs[7] = sqrt(closest_food_dist_sq) / sqrt(env->width * env->width + env->height * env->height); + + // Find strongest visible pheromone + // float strongest_pheromone = 0; + // Vector2D pheromone_pos = {0, 0}; + // for (int i = 0; i < env->num_pheromones; i++) { + // if (env->pheromones[i].colony_id == ant->colony_id) { + // float dist_sq = distance_squared(ant->position, env->pheromones[i].position); + // if ( + // is_in_vision(ant->position, env->pheromones[i].position) + // ) { + // float strength = env->pheromones[i].strength / (sqrt(dist_sq) + 1); + // if (strength > strongest_pheromone) { + // strongest_pheromone = strength; + // pheromone_pos.x = env->pheromones[i].position.x; + // pheromone_pos.y = env->pheromones[i].position.y; + // } + // } + // } + // } + // obs[8] = get_angle(ant->position, pheromone_pos); +} + +void compute_observations(AntsEnv* env) { + for (int i = 0; i < env->num_ants; i++) { + get_observation_for_ant(env, i, &env->observations[i * env->obs_size]); + } +} + +void spawn_ant(AntsEnv* env, int ant_id) { + Ant* ant = &env->ants[ant_id]; + Colony* colony = &env->colonies[ant->colony_id]; + + ant->position = colony->position; + ant->direction = random_float(0, 2 * M_PI); + ant->has_food = false; + ant->lifetime = 0; + + // Reset individual ant log + env->ant_logs[ant_id] = (Log){0}; +} + +void spawn_food(AntsEnv* env) { + int idx; + bool valid_position; + int attempts = 0; + + do { + float x = random_float(50, env->width - 50); + float y = random_float(50, env->height - 50); + + valid_position = true; + for (int j = 0; j < NUM_COLONIES; j++) { + float dist_sq = distance_squared((Vector2D){x, y}, env->colonies[j].position); + if (dist_sq < MIN_FOOD_COLONY_DISTANCE * MIN_FOOD_COLONY_DISTANCE) { + valid_position = false; + break; + } + } + + if (valid_position) { + // Find an empty food source slot + for (idx = 0; idx < env->num_food_sources; idx++) { + if (env->food_sources[idx].amount == 0) { + env->food_sources[idx].position.x = x; + env->food_sources[idx].position.y = y; + env->food_sources[idx].amount = MAX_FOOD_PER_SOURCE; + return; + } + } + } + attempts++; + } while (!valid_position && attempts < 100); +} + +void c_reset(AntsEnv* env) { + env->tick = 0; + env->log = (Log){0}; + env->num_pheromones = 0; + + // Reset colonies + env->colonies[0].food_collected = 0; + env->colonies[1].food_collected = 0; + + // Initialize all ants + int ant_idx = 0; + for (int i = 0; i < NUM_COLONIES; i++) { + for (int j = 0; j < env->num_ants / NUM_COLONIES; j++) { + env->ants[ant_idx].colony_id = i; + spawn_ant(env, ant_idx); + ant_idx++; + } + } + + // Clear food sources and spawn new ones + for (int i = 0; i < env->num_food_sources; i++) { + env->food_sources[i].amount = 0; + } + + for (int i = 0; i < env->num_food_sources; i++) { + spawn_food(env); + } + + // Clear buffers + memset(env->rewards, 0, env->num_ants * sizeof(float)); + memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); + + // Generate initial observations + compute_observations(env); +} + +void step_ant(AntsEnv* env, int ant_id) { + Ant* ant = &env->ants[ant_id]; + env->ant_logs[ant_id].episode_length += 1; + ant->lifetime++; + + int action = env->actions[ant_id]; + + // Execute action + switch (action) { + case ACTION_TURN_LEFT: + ant->direction -= TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_TURN_RIGHT: + ant->direction += TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_DROP_PHEROMONE: + add_pheromone(env, ant->position, ant->colony_id); + break; + } + + // Always move forward + ant->position.x += ANT_SPEED * cos(ant->direction); + ant->position.y += ANT_SPEED * sin(ant->direction); + + // Wrap around edges + if (ant->position.x < 0) ant->position.x = env->width; + if (ant->position.x > env->width) ant->position.x = 0; + if (ant->position.y < 0) ant->position.y = env->height; + if (ant->position.y > env->height) ant->position.y = 0; + + // Check for food collection + if (!ant->has_food) { + for (int j = 0; j < env->num_food_sources; j++) { + if (env->food_sources[j].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[j].position); + if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { + // printf("Ant %d collected food\n", ant_id); + ant->has_food = true; + env->food_sources[j].amount--; + env->rewards[ant_id] = env->reward_food; + env->ant_logs[ant_id].episode_return += env->reward_food; + break; + } + } + } + } + + // Check for food delivery + if (ant->has_food) { + Colony* colony = &env->colonies[ant->colony_id]; + float dist_sq = distance_squared(ant->position, colony->position); + if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { + ant->has_food = false; + colony->food_collected++; + env->rewards[ant_id] = env->reward_delivery; + env->ant_logs[ant_id].episode_return += env->reward_delivery; + env->ant_logs[ant_id].score += 1; // Score based on deliveries + } + } + + // MULTIPLE TERMINAL CONDITIONS FOR FREQUENT LOG GENERATION + bool should_terminate = false; + + // Terminal Condition 1: Shorter lifetime limit (similar to snake death frequency) + if (ant->lifetime > ANT_LIFETIME) { + should_terminate = true; + } + + // // Terminal Condition 2: Random death chance (0.1% per step after 50 steps) + // if (ant->lifetime > 50 && (rand() % 1000) < 1) { + // should_terminate = true; + // } + + // // Terminal Condition 3: Performance-based termination after food delivery + // if (env->ant_logs[ant_id].score > 0 && (rand() % 100) < 5) { + // should_terminate = true; + // } + + // Execute termination and log aggregation + if (should_terminate) { + env->ant_logs[ant_id].perf = env->ant_logs[ant_id].episode_length > 0 ? + env->ant_logs[ant_id].score / env->ant_logs[ant_id].episode_length : 0; + add_log(env, ant_id); + spawn_ant(env, ant_id); + env->terminals[ant_id] = 1; + + // Debug output for terminal condition verification + if (env->tick % 100 == 0) { + printf("Ant %d terminated at tick %d, lifetime %d, score %.1f\n", + ant_id, env->tick, ant->lifetime, env->ant_logs[ant_id].score); + } + } +} + +void c_step(AntsEnv* env) { + env->tick++; + + // Clear rewards and terminals + memset(env->rewards, 0, env->num_ants * sizeof(float)); + memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); + + // Step all ants + for (int i = 0; i < env->num_ants; i++) { + step_ant(env, i); + } + + // Update pheromones + for (int i = 0; i < env->num_pheromones; i++) { + env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; + if (env->pheromones[i].strength <= 0) { + // Remove evaporated pheromone + env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; + env->num_pheromones--; + i--; + } + } + + // Generate new observations + compute_observations(env); +} + +// Raylib client functions - FOLLOWING SNAKE PATTERN +Client* make_client(int cell_size, int width, int height) { + Client* client = (Client*)malloc(sizeof(Client)); + client->cell_size = cell_size; + client->width = width; + client->height = height; + InitWindow(width, height, "PufferLib Ant Colony"); + SetTargetFPS(60); + return client; +} + +void close_client(Client* client) { + CloseWindow(); + free(client); +} + +void c_render(AntsEnv* env) { + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + if (env->client == NULL) { + env->client = make_client(1, env->width, env->height); + } + + BeginDrawing(); + ClearBackground(BACKGROUND_COLOR); + + // Draw colonies + for (int i = 0; i < NUM_COLONIES; i++) { + Color colony_color = (i == 0) ? COLONY1_COLOR : COLONY2_COLOR; + DrawCircle(env->colonies[i].position.x, env->colonies[i].position.y, COLONY_SIZE, colony_color); + } + + // Draw food sources + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + DrawCircle(env->food_sources[i].position.x, env->food_sources[i].position.y, + FOOD_SIZE, FOOD_COLOR); + DrawText(TextFormat("%d", env->food_sources[i].amount), + env->food_sources[i].position.x, env->food_sources[i].position.y, 10, RAYWHITE); + } + } + + // Draw pheromones + for (int i = 0; i < env->num_pheromones; i++) { + Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; + pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); + DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, + PHEROMONE_SIZE, pheromone_color); + } + + // Draw ants + for (int i = 0; i < env->num_ants; i++) { + Ant* ant = &env->ants[i]; + Color ant_color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; + DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, ant->has_food ? FOOD_COLOR : ant_color); + + // Draw direction indicator + float dir_x = ant->position.x + (ANT_SIZE * 1.5f) * cos(ant->direction); + float dir_y = ant->position.y + (ANT_SIZE * 1.5f) * sin(ant->direction); + DrawLine(ant->position.x, ant->position.y, dir_x, dir_y, RAYWHITE); + } + + // Draw UI + DrawText(TextFormat("Colony 1 Food: %d", env->colonies[0].food_collected), 20, 20, 20, COLONY1_COLOR); + DrawText(TextFormat("Colony 2 Food: %d", env->colonies[1].food_collected), 20, 50, 20, COLONY2_COLOR); + DrawText(TextFormat("Tick: %d", env->tick), env->width - 120, 20, 20, RAYWHITE); + + EndDrawing(); +} diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py new file mode 100644 index 000000000..73034a389 --- /dev/null +++ b/pufferlib/ocean/ants/ants.py @@ -0,0 +1,161 @@ +import numpy as np +import gymnasium + +import pufferlib +from pufferlib import APIUsageError +from pufferlib.ocean.ants import binding + +class AntsEnv(pufferlib.PufferEnv): + """ + Ant Colony Simulation Environment + + Each ant receives observations about its surroundings and can: + - Move forward (always happens) + - Turn left/right + - Drop pheromone trails + + Two colonies compete to collect food from the environment. + Following multiagent architecture patterns from snake environment. + """ + + def __init__( + self, + num_envs=64, + width=1280, + height=720, + num_ants=32, + reward_food=0.1, + reward_delivery=1.0, + reward_death=-1.0, + report_interval=1, + render_mode=None, + buf=None, + seed=0): + + if num_envs is not None: + num_ants = num_envs * [num_ants] + width = num_envs * [width] + height = num_envs * [height] + + if not (len(num_ants) == len(width) == len(height)): + raise APIUsageError('num_ants, width, height must be lists of equal length') + + for w, h in zip(width, height): + if w < 100 or h < 100: + raise APIUsageError('width and height must be at least 100') + + self.report_interval = report_interval + + self.single_observation_space = gymnasium.spaces.Box( + low=0.0, high=1.0, shape=(8,), dtype=np.float32 # obs_size + ) + self.single_action_space = gymnasium.spaces.Discrete(4) + self.num_agents = sum(num_ants) + self.render_mode = render_mode + self.tick = 0 + + # Calculate cell size for rendering + self.cell_size = int(np.ceil(1280 / max(max(width), max(height)))) + + super().__init__(buf) + + c_envs = [] + offset = 0 + for i in range(num_envs): + na = num_ants[i] + obs_slice = self.observations[offset:offset+na*8] # Multiply by obs_size + act_slice = self.actions[offset:offset+na] + rew_slice = self.rewards[offset:offset+na] + term_slice = self.terminals[offset:offset+na] + trunc_slice = self.truncations[offset:offset+na] + + # Seed each env uniquely: i + seed * num_envs + env_seed = i + seed * num_envs + env_id = binding.env_init( + obs_slice, + act_slice, + rew_slice, + term_slice, + trunc_slice, + env_seed, + width=width[i], + height=height[i], + num_ants=na, + reward_food=reward_food, + reward_delivery=reward_delivery, + reward_death=reward_death, + cell_size=self.cell_size + ) + c_envs.append(env_id) + offset += na * 8 # Multiply by obs_size + + # VECTORIZE ENVIRONMENTS - FOLLOWING SNAKE PATTERN + self.c_envs = binding.vectorize(*c_envs) + + def reset(self, seed=None): + """Reset all environments""" + self.tick = 0 + if seed is None: + binding.vec_reset(self.c_envs, 0) + else: + binding.vec_reset(self.c_envs, seed) + return self.observations, [] + + def step(self, actions): + """Execute one step for all agents""" + self.actions[:] = actions + self.tick += 1 + binding.vec_step(self.c_envs) + + info = [] + if self.tick % self.report_interval == 0: + log_data = binding.vec_log(self.c_envs) + if log_data: + # Add computed metrics + info.append(log_data) + + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + """Render the first environment""" + binding.vec_render(self.c_envs, 0) + + def close(self): + """Clean up resources""" + binding.vec_close(self.c_envs) + + +def test_performance(timeout=10, atn_cache=1024): + """Performance test following snake pattern""" + env = AntsEnv(num_envs=64, num_ants=50) + env.reset() + tick = 0 + + total_ants = env.num_agents + actions = np.random.randint(0, 4, (atn_cache, total_ants)) + + import time + start = time.time() + while time.time() - start < timeout: + atns = actions[tick % atn_cache] + obs, rewards, dones, truncs, info = env.step(atns) + + # Print info when available + if info: + for log_data in info: + if 'n' in log_data and log_data['n'] > 0: + print(f"Tick {tick}: Episodes: {log_data['n']:.0f}, " + f"Avg score: {log_data.get('score', 0) / log_data['n']:.2f}, " + f"Avg return: {log_data.get('episode_return', 0) / log_data['n']:.2f}") + + tick += 1 + + elapsed = time.time() - start + sps = total_ants * tick / elapsed + print(f'Ant SPS: {sps:.0f} ({tick} environment steps)') + env.close() + + +if __name__ == '__main__': + test_performance() \ No newline at end of file diff --git a/pufferlib/ocean/ants/binding.c b/pufferlib/ocean/ants/binding.c new file mode 100644 index 000000000..9a3b3ecd5 --- /dev/null +++ b/pufferlib/ocean/ants/binding.c @@ -0,0 +1,25 @@ +#include "ants.h" + +#define Env AntsEnv +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->width = unpack(kwargs, "width"); + env->height = unpack(kwargs, "height"); + env->num_ants = unpack(kwargs, "num_ants"); + env->reward_food = unpack(kwargs, "reward_food"); + env->reward_delivery = unpack(kwargs, "reward_delivery"); + env->cell_size = unpack(kwargs, "cell_size"); + + init_ants_env(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "n", log->n); + return 0; +} \ No newline at end of file diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 4c0c0791a..adeb1f9de 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -117,9 +117,9 @@ def make_multiagent(buf=None, **kwargs): return pufferlib.emulation.PettingZooPufferEnv(env=env, buf=buf) MAKE_FUNCTIONS = { + 'ants': 'AntsEnv', 'breakout': 'Breakout', 'blastar': 'Blastar', - 'convert': 'Convert', 'pong': 'Pong', 'enduro': 'Enduro', 'cartpole': 'Cartpole', diff --git a/save_net_flat.py b/save_net_flat.py new file mode 100644 index 000000000..327cde38e --- /dev/null +++ b/save_net_flat.py @@ -0,0 +1,125 @@ +import torch +from torch.nn import functional as F +import numpy as np +import collections + +def save_model_weights(model, filename): + weights = [] + for name, param in model.named_parameters(): + weights.append(param.data.cpu().numpy().flatten()) + print(name, param.shape, param.data.cpu().numpy().ravel()[0]) + + weights = np.concatenate(weights) + print('Num weights:', len(weights)) + weights.tofile(filename) + # Save the model architecture (you may want to adjust this based on your specific model) + #with open(filename + "_architecture.txt", "w") as f: + # for name, param in model.named_parameters(): + # f.write(f"{name}: {param.shape}\n") + +def test_model(model): + model = model.cpu().policy + batch_size = 16 + obs_window = 11 + obs_window_channels = 4 + obs_flat = 26 + x = torch.arange( + 0, batch_size*(obs_window*obs_window*obs_window_channels + obs_flat) + ).reshape(batch_size, -1) % 16 + + cnn_features = x[:, :-obs_flat].view( + batch_size, obs_window, obs_window, obs_window_channels).long() + map_features = F.one_hot(cnn_features[:, :, :, 0], 16).permute(0, 3, 1, 2).float() + extra_map_features = (cnn_features[:, :, :, -3:].float() / 255.0).permute(0, 3, 1, 2) + cnn_features = torch.cat([map_features, extra_map_features], dim=1) + cnn = model.policy.cnn + + cnn_features = torch.from_numpy( + np.arange(batch_size*11*11*19).reshape( + batch_size, 19, obs_window, obs_window) + ).float() + conv1_out = cnn[0](cnn_features) + + #(cnn[0].weight[0] * cnn_features[0, :, :5, :5]).sum() + cnn[0].bias[0] + + breakpoint() + hidden = model.encoder(x) + output = model.decoder(hidden) + atn = output.argmax(dim=1) + print('Encode weight sum:', model.encoder.weight.sum()) + print('encode decode weight and bias sum:', model.encoder.weight.sum() + model.encoder.bias.sum() + model.decoder.weight.sum() + model.decoder.bias.sum()) + print('X sum:', x.sum()) + print('Hidden sum:', hidden.sum()) + print('Hidden 1-10:', hidden[0, :10]) + print('Output sum:', output.sum()) + print('Atn sum:', atn.sum()) + breakpoint() + exit(0) + +def test_lstm(): + batch_size = 16 + input_size = 128 + hidden_size = 128 + + input = torch.arange(batch_size*input_size).reshape(1, batch_size, -1).float()/ 100000 + state = ( + torch.arange(batch_size*hidden_size).reshape(1, batch_size, -1).float()/ 100000, + torch.arange(batch_size*hidden_size).reshape(1, batch_size, -1).float() / 100000 + ) + weights_input = torch.arange(4*hidden_size*input_size).reshape(4*hidden_size, -1).float()/ 100000 + weights_state = torch.arange(4*hidden_size*hidden_size).reshape(4*hidden_size, -1).float()/ 100000 + bias_input = torch.arange(4*hidden_size).reshape(4*hidden_size).float() / 100000 + bias_state = torch.arange(4*hidden_size).reshape(4*hidden_size).float() / 100000 + + lstm = torch.nn.LSTM(input_size=128, hidden_size=128, num_layers=1) + lstm.weight_ih_l0.data = weights_input + lstm.weight_hh_l0.data = weights_state + lstm.bias_ih_l0.data = bias_input + lstm.bias_hh_l0.data = bias_state + + output, new_state = lstm(input, state) + + input = input.squeeze(0) + h, c = state + + buffer = ( + torch.matmul(input, weights_input.T) + bias_input + + torch.matmul(h, weights_state.T) + bias_state + )[0] + + i, f, g, o = torch.split(buffer, hidden_size, dim=1) + + i = torch.sigmoid(i) + f = torch.sigmoid(f) + g = torch.tanh(g) + o = torch.sigmoid(o) + + c = f*c + i*g + h = o*torch.tanh(c) + + breakpoint() + print('Output:', output) + +def test_model_forward(model): + data = torch.arange(10*(11*11*4 + 26)) % 16 + data[(11*11*4 + 26):] = 0 + data = data.reshape(10, -1).float() + output = model(data) + breakpoint() + pass + + +if __name__ == '__main__': + #test_lstm() + model = torch.load('experiments/174870735516/ants.pt', map_location='cpu') + if isinstance(model, collections.OrderedDict): + state_dict = model + class StateDictWrapper: + def named_parameters(self): + return state_dict.items() + model = StateDictWrapper() + #test_model_forward(model) + #test_model(model) + + save_model_weights(model, 'ants_weights.bin') + print('saved') \ No newline at end of file From fa01598817b8866cc2ed12dd4c130ee43e082839 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 3 Jun 2025 18:09:51 -0400 Subject: [PATCH 02/23] updates with joseph and dan --- pufferlib/config/ocean/ants.ini | 3 +- pufferlib/ocean/ants/ants.c | 2 +- pufferlib/ocean/ants/ants.h | 59 ++++++++++++++++++++++----------- pufferlib/ocean/ants/ants.py | 13 ++++---- pufferlib/ocean/ants/binding.c | 1 + 5 files changed, 50 insertions(+), 28 deletions(-) diff --git a/pufferlib/config/ocean/ants.ini b/pufferlib/config/ocean/ants.ini index 9855666e8..448e0bbde 100644 --- a/pufferlib/config/ocean/ants.ini +++ b/pufferlib/config/ocean/ants.ini @@ -5,7 +5,7 @@ policy_name = Policy rnn_name = Recurrent [env] -num_envs = 64 +num_envs = 1 [train] total_timesteps = 100_000_000 @@ -14,6 +14,7 @@ learning_rate = 0.0003 batch_size = 131072 minibatch_size = 4096 update_epochs = 1 +checkpoint_interval = 50 clip_coef = 0.2 vf_coef = 0.5 ent_coef = 0.01 diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 66c2a88b9..0d8e1ea6e 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -8,7 +8,7 @@ int demo() { // Initialize environment with proper parameters - FOLLOWING SNAKE PATTERN AntsEnv env = { - .num_ants = NUM_COLONIES * MAX_ANTS_PER_COLONY, + .num_ants = 32, .width = WINDOW_WIDTH, .height = WINDOW_HEIGHT, .reward_food = 0.1f, diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 8e1bb10f3..540a1da49 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -9,11 +9,10 @@ // Constants for the simulation #define WINDOW_WIDTH 1280 #define WINDOW_HEIGHT 720 -#define MAX_ANTS_PER_COLONY 50 #define NUM_COLONIES 2 #define MAX_FOOD_SOURCES 20 #define MAX_FOOD_PER_SOURCE 100 -#define ANT_SPEED 1.0f +#define ANT_SPEED 5.0f #define ANT_SIZE 4 #define FOOD_SIZE 6 #define COLONY_SIZE 20 @@ -21,11 +20,11 @@ #define PHEROMONE_DEPOSIT_AMOUNT 1.0f #define MAX_PHEROMONES 5000 #define PHEROMONE_SIZE 2 -#define ANT_VISION_RANGE 50.0f +#define ANT_VISION_RANGE 5000.0f #define ANT_VISION_ANGLE (M_PI / 2) #define TURN_ANGLE (M_PI / 20) #define MIN_FOOD_COLONY_DISTANCE 100.0f -#define ANT_LIFETIME 100000 +#define ANT_LIFETIME 5000 // Actions #define ACTION_MOVE_FORWARD 0 @@ -49,6 +48,7 @@ struct Log { float episode_return; // Cumulative rewards float episode_length; // Episode duration float n; // Episode count - REQUIRED AS LAST FIELD + float reward; }; // Forward declarations @@ -138,7 +138,7 @@ void add_log(AntsEnv* env, int ant_id) { env->log.episode_return += env->ant_logs[ant_id].episode_return; env->log.episode_length += env->ant_logs[ant_id].episode_length; env->log.n += 1; - + env->log.reward += env->ant_logs[ant_id].reward; // Reset individual ant log env->ant_logs[ant_id] = (Log){0}; } @@ -282,8 +282,8 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { if (env->food_sources[i].amount > 0) { float dist_sq = distance_squared(ant->position, env->food_sources[i].position); if ( - dist_sq < closest_food_dist_sq && - is_in_vision(ant->position, env->food_sources[i].position) + dist_sq < closest_food_dist_sq + && is_in_vision(ant->position, env->food_sources[i].position) ) { closest_food_dist_sq = dist_sq; closest_food_pos.x = env->food_sources[i].position.x; @@ -291,10 +291,16 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { } } } - // Direction to closest visible food - obs[6] = (get_angle(ant->position, closest_food_pos) + M_PI) / (2 * M_PI); - // Distance to closest visible food - obs[7] = sqrt(closest_food_dist_sq) / sqrt(env->width * env->width + env->height * env->height); + if(closest_food_pos.x == 0 && closest_food_pos.y == 0) { + obs[6] = -1.0f; + obs[7] = -1.0f; + } + else { + // Direction to closest visible food + obs[6] = (get_angle(ant->position, closest_food_pos) + M_PI) / (2 * M_PI); + // Distance to closest visible food + obs[7] = sqrt(closest_food_dist_sq) / sqrt(env->width * env->width + env->height * env->height); + } // Find strongest visible pheromone // float strongest_pheromone = 0; @@ -328,9 +334,9 @@ void spawn_ant(AntsEnv* env, int ant_id) { Colony* colony = &env->colonies[ant->colony_id]; ant->position = colony->position; - ant->direction = random_float(0, 2 * M_PI); + ant->direction = wrap_angle(random_float(0, 2 * M_PI)); ant->has_food = false; - ant->lifetime = 0; + ant->lifetime = random_float(0, ANT_LIFETIME); // Reset individual ant log env->ant_logs[ant_id] = (Log){0}; @@ -448,6 +454,7 @@ void step_ant(AntsEnv* env, int ant_id) { env->food_sources[j].amount--; env->rewards[ant_id] = env->reward_food; env->ant_logs[ant_id].episode_return += env->reward_food; + env->ant_logs[ant_id].reward += env->reward_food; break; } } @@ -464,22 +471,36 @@ void step_ant(AntsEnv* env, int ant_id) { env->rewards[ant_id] = env->reward_delivery; env->ant_logs[ant_id].episode_return += env->reward_delivery; env->ant_logs[ant_id].score += 1; // Score based on deliveries + env->ant_logs[ant_id].reward += 1; + + } + float angle_to_colony = get_angle(ant->position, colony->position); + float angle_diff = wrap_angle(angle_to_colony - ant->direction); + + if(angle_diff > 0.1 && env->actions[ant_id] == ACTION_TURN_RIGHT){ + env->rewards[ant_id] += 0.0001; + env->ant_logs[ant_id].reward += 0.0001; + } + else if (angle_diff < -0.1 && env->actions[ant_id] == ACTION_TURN_LEFT){ + env->rewards[ant_id] += 0.0001; + env->ant_logs[ant_id].reward += 0.0001; } + } // MULTIPLE TERMINAL CONDITIONS FOR FREQUENT LOG GENERATION bool should_terminate = false; // Terminal Condition 1: Shorter lifetime limit (similar to snake death frequency) - if (ant->lifetime > ANT_LIFETIME) { - should_terminate = true; - } - - // // Terminal Condition 2: Random death chance (0.1% per step after 50 steps) - // if (ant->lifetime > 50 && (rand() % 1000) < 1) { + // if (ant->lifetime > ANT_LIFETIME) { // should_terminate = true; // } + // // Terminal Condition 2: Random death chance (0.1% per step after lifetime) + if (ant->lifetime > ANT_LIFETIME && (rand() % 1000) < 1) { + should_terminate = true; + } + // // Terminal Condition 3: Performance-based termination after food delivery // if (env->ant_logs[ant_id].score > 0 && (rand() % 100) < 5) { // should_terminate = true; diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 73034a389..bf679ede4 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -20,13 +20,13 @@ class AntsEnv(pufferlib.PufferEnv): def __init__( self, - num_envs=64, + num_envs=1, width=1280, height=720, num_ants=32, reward_food=0.1, reward_delivery=1.0, - reward_death=-1.0, + reward_death=0.0, report_interval=1, render_mode=None, buf=None, @@ -45,14 +45,13 @@ def __init__( raise APIUsageError('width and height must be at least 100') self.report_interval = report_interval - - self.single_observation_space = gymnasium.spaces.Box( - low=0.0, high=1.0, shape=(8,), dtype=np.float32 # obs_size - ) - self.single_action_space = gymnasium.spaces.Discrete(4) self.num_agents = sum(num_ants) self.render_mode = render_mode self.tick = 0 + self.single_action_space = gymnasium.spaces.Discrete(4) + self.single_observation_space = gymnasium.spaces.Box( + low=0.0, high=1.0, shape=(8,), dtype=np.float32 + ) # Calculate cell size for rendering self.cell_size = int(np.ceil(1280 / max(max(width), max(height)))) diff --git a/pufferlib/ocean/ants/binding.c b/pufferlib/ocean/ants/binding.c index 9a3b3ecd5..f8561b49a 100644 --- a/pufferlib/ocean/ants/binding.c +++ b/pufferlib/ocean/ants/binding.c @@ -21,5 +21,6 @@ static int my_log(PyObject* dict, Log* log) { assign_to_dict(dict, "episode_return", log->episode_return); assign_to_dict(dict, "episode_length", log->episode_length); assign_to_dict(dict, "n", log->n); + assign_to_dict(dict, "reward", log->reward); return 0; } \ No newline at end of file From 3eeb3cf1af73c376ba76b81a31b8fd73737f0496 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Wed, 4 Jun 2025 18:40:38 -0400 Subject: [PATCH 03/23] added food respawn --- pufferlib/ocean/ants/ants.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 540a1da49..675cca734 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -452,6 +452,12 @@ void step_ant(AntsEnv* env, int ant_id) { // printf("Ant %d collected food\n", ant_id); ant->has_food = true; env->food_sources[j].amount--; + + // If food source is exhausted, respawn it + if (env->food_sources[j].amount <= 0) { + spawn_food(env); + } + env->rewards[ant_id] = env->reward_food; env->ant_logs[ant_id].episode_return += env->reward_food; env->ant_logs[ant_id].reward += env->reward_food; From 97d9e3b6902c0a7a30a172bac27028597e0e4875 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Sun, 8 Jun 2025 21:09:24 -0400 Subject: [PATCH 04/23] more modifications to the rewards --- pufferlib/ocean/ants/ants.h | 76 ++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 675cca734..bd14b8649 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -12,7 +12,7 @@ #define NUM_COLONIES 2 #define MAX_FOOD_SOURCES 20 #define MAX_FOOD_PER_SOURCE 100 -#define ANT_SPEED 5.0f +#define ANT_SPEED 3.0f #define ANT_SIZE 4 #define FOOD_SIZE 6 #define COLONY_SIZE 20 @@ -20,9 +20,9 @@ #define PHEROMONE_DEPOSIT_AMOUNT 1.0f #define MAX_PHEROMONES 5000 #define PHEROMONE_SIZE 2 -#define ANT_VISION_RANGE 5000.0f +#define ANT_VISION_RANGE 500.0f #define ANT_VISION_ANGLE (M_PI / 2) -#define TURN_ANGLE (M_PI / 20) +#define TURN_ANGLE (M_PI / 36) #define MIN_FOOD_COLONY_DISTANCE 100.0f #define ANT_LIFETIME 5000 @@ -260,9 +260,9 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { // [0-1]: ant position (normalized) // [2]: ant direction (normalized to 0-1) // [3]: has_food (0 or 1) - // [4]: angle to colony (normalized to 0-1) + // [4]: angle diff to colony (normalized to 0-1) // [5]: distance to colony (normalized) - // [6]: closest food direction (normalized to 0-1) + // [6]: angle diff to closest food (normalized to 0-1) // [7]: closest food distance (normalized) // [8]: strongest pheromone direction (normalized to 0-1) COMMENTED OUT @@ -272,7 +272,9 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { obs[3] = ant->has_food ? 1.0f : 0.0f; // Relative position to colony - obs[4] = (get_angle(ant->position, colony->position) + M_PI) / (2 * M_PI); + float angle_to_colony = get_angle(ant->position, colony->position); + float angle_diff_colony = wrap_angle(angle_to_colony - ant->direction); + obs[4] = (angle_diff_colony + M_PI) / (2 * M_PI); obs[5] = distance_squared(ant->position, colony->position) / (env->width * env->width + env->height * env->height); // Find closest visible food @@ -296,8 +298,10 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { obs[7] = -1.0f; } else { - // Direction to closest visible food - obs[6] = (get_angle(ant->position, closest_food_pos) + M_PI) / (2 * M_PI); + // Direction difference to closest visible food + float angle_to_food = get_angle(ant->position, closest_food_pos); + float angle_diff_food = wrap_angle(angle_to_food - ant->direction); + obs[6] = (angle_diff_food + M_PI) / (2 * M_PI); // Distance to closest visible food obs[7] = sqrt(closest_food_dist_sq) / sqrt(env->width * env->width + env->height * env->height); } @@ -449,7 +453,6 @@ void step_ant(AntsEnv* env, int ant_id) { if (env->food_sources[j].amount > 0) { float dist_sq = distance_squared(ant->position, env->food_sources[j].position); if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { - // printf("Ant %d collected food\n", ant_id); ant->has_food = true; env->food_sources[j].amount--; @@ -458,9 +461,30 @@ void step_ant(AntsEnv* env, int ant_id) { spawn_food(env); } - env->rewards[ant_id] = env->reward_food; - env->ant_logs[ant_id].episode_return += env->reward_food; - env->ant_logs[ant_id].reward += env->reward_food; + env->rewards[ant_id] = 1.0f; + env->ant_logs[ant_id].episode_return += 1.0f; + env->ant_logs[ant_id].reward += 1.0f; + break; + } + } + } + + // Small negative reward for wandering without food + // env->rewards[ant_id] -= 0.001f; + // env->ant_logs[ant_id].reward -= 0.001f; + + // Small positive reward for heading towards visible food + for (int j = 0; j < env->num_food_sources; j++) { + if (env->food_sources[j].amount > 0) { + // float dist_sq = distance_squared(ant->position, env->food_sources[j].position); + if (is_in_vision(ant->position, env->food_sources[j].position)) { + float angle_to_food = get_angle(ant->position, env->food_sources[j].position); + float angle_diff = wrap_angle(angle_to_food - ant->direction); + + if (fabs(angle_diff) < TURN_ANGLE) { + env->rewards[ant_id] += 0.0005f; + env->ant_logs[ant_id].reward += 0.0005f; + } break; } } @@ -474,24 +498,24 @@ void step_ant(AntsEnv* env, int ant_id) { if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { ant->has_food = false; colony->food_collected++; - env->rewards[ant_id] = env->reward_delivery; - env->ant_logs[ant_id].episode_return += env->reward_delivery; - env->ant_logs[ant_id].score += 1; // Score based on deliveries - env->ant_logs[ant_id].reward += 1; - + env->rewards[ant_id] += 5.0f; // Larger reward for food delivery + env->ant_logs[ant_id].episode_return += 5.0f; + env->ant_logs[ant_id].score += 1; + env->ant_logs[ant_id].reward += 5.0f; } + + // Reward for heading towards colony when carrying food float angle_to_colony = get_angle(ant->position, colony->position); float angle_diff = wrap_angle(angle_to_colony - ant->direction); - - if(angle_diff > 0.1 && env->actions[ant_id] == ACTION_TURN_RIGHT){ - env->rewards[ant_id] += 0.0001; - env->ant_logs[ant_id].reward += 0.0001; - } - else if (angle_diff < -0.1 && env->actions[ant_id] == ACTION_TURN_LEFT){ - env->rewards[ant_id] += 0.0001; - env->ant_logs[ant_id].reward += 0.0001; - } + if (fabs(angle_diff) < TURN_ANGLE) { + env->rewards[ant_id] += 0.001f; + env->ant_logs[ant_id].reward += 0.001f; + } else { + // Small negative reward for not heading towards colony when carrying food + env->rewards[ant_id] -= 0.0005f; + env->ant_logs[ant_id].reward -= 0.0005f; + } } // MULTIPLE TERMINAL CONDITIONS FOR FREQUENT LOG GENERATION From 2db483c788f3b566576780ca042197c890a6b892 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Mon, 16 Jun 2025 20:09:37 -0400 Subject: [PATCH 05/23] more changes --- pufferlib/ocean/ants/ants.c | 32 +++++++- pufferlib/ocean/ants/ants.h | 140 +++++++++++++++-------------------- pufferlib/ocean/ants/ants.py | 2 +- 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 0d8e1ea6e..5d14ef0cb 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -43,15 +43,41 @@ int demo() { // Initialize rendering client env.client = make_client(1, env.width, env.height); + // Track key states for single-press detection + bool left_pressed = false; + bool right_pressed = false; + bool space_pressed = false; + // Main loop - FOLLOWING SNAKE PATTERN while (!WindowShouldClose()) { // User can take control with shift key if (IsKeyDown(KEY_LEFT_SHIFT)) { // Control first ant of colony 1 for demo env.actions[0] = ACTION_MOVE_FORWARD; - if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) env.actions[0] = ACTION_TURN_LEFT; - if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) env.actions[0] = ACTION_TURN_RIGHT; - if (IsKeyDown(KEY_SPACE)) env.actions[0] = ACTION_DROP_PHEROMONE; + + // Handle left turn + if ((IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) && !left_pressed) { + env.actions[0] = ACTION_TURN_LEFT; + left_pressed = true; + } else if (!IsKeyDown(KEY_LEFT) && !IsKeyDown(KEY_A)) { + left_pressed = false; + } + + // Handle right turn + if ((IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) && !right_pressed) { + env.actions[0] = ACTION_TURN_RIGHT; + right_pressed = true; + } else if (!IsKeyDown(KEY_RIGHT) && !IsKeyDown(KEY_D)) { + right_pressed = false; + } + + // Handle pheromone drop + if (IsKeyDown(KEY_SPACE) && !space_pressed) { + env.actions[0] = ACTION_DROP_PHEROMONE; + space_pressed = true; + } else if (!IsKeyDown(KEY_SPACE)) { + space_pressed = false; + } // Rest of ants act via scripted behaviors for (int i = 1; i < env.num_ants; i++) { diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index bd14b8649..b32ea9477 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -11,8 +11,8 @@ #define WINDOW_HEIGHT 720 #define NUM_COLONIES 2 #define MAX_FOOD_SOURCES 20 -#define MAX_FOOD_PER_SOURCE 100 -#define ANT_SPEED 3.0f +#define MAX_FOOD_PER_SOURCE 20 +#define ANT_SPEED 5.0f #define ANT_SIZE 4 #define FOOD_SIZE 6 #define COLONY_SIZE 20 @@ -22,15 +22,15 @@ #define PHEROMONE_SIZE 2 #define ANT_VISION_RANGE 500.0f #define ANT_VISION_ANGLE (M_PI / 2) -#define TURN_ANGLE (M_PI / 36) -#define MIN_FOOD_COLONY_DISTANCE 100.0f +#define TURN_ANGLE (M_PI / 2) +#define MIN_FOOD_COLONY_DISTANCE 50.0f #define ANT_LIFETIME 5000 // Actions -#define ACTION_MOVE_FORWARD 0 -#define ACTION_TURN_LEFT 1 -#define ACTION_TURN_RIGHT 2 -#define ACTION_DROP_PHEROMONE 3 +#define ACTION_TURN_LEFT 0 +#define ACTION_TURN_RIGHT 1 +#define ACTION_DROP_PHEROMONE 2 +#define ACTION_MOVE_FORWARD 3 // Colors #define COLONY1_COLOR (Color){220, 0, 0, 255} @@ -226,13 +226,14 @@ static inline float get_angle(Vector2D a, Vector2D b) { } static inline bool is_in_vision(Vector2D ant_pos, Vector2D target) { - float dist_sq = distance_squared(ant_pos, target); - if (dist_sq > ANT_VISION_RANGE * ANT_VISION_RANGE) { - return false; - } - else { - return true; - } +// float dist_sq = distance_squared(ant_pos, target); +// if (dist_sq > (ANT_VISION_RANGE * ANT_VISION_RANGE)) { +// return false; +// } +// else { +// return true; +// } + return true; } @@ -258,23 +259,26 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { // Observation structure (9 elements): // [0-1]: ant position (normalized) - // [2]: ant direction (normalized to 0-1) + // [2]: ant direction (normalized between 0 and 1) // [3]: has_food (0 or 1) - // [4]: angle diff to colony (normalized to 0-1) + // [4]: direction to colony (normalized between 0 and 1) // [5]: distance to colony (normalized) - // [6]: angle diff to closest food (normalized to 0-1) + // [6]: direction to closest food (normalized between 0 and 1) // [7]: closest food distance (normalized) // [8]: strongest pheromone direction (normalized to 0-1) COMMENTED OUT obs[0] = ant->position.x / env->width; obs[1] = ant->position.y / env->height; + + // Normalize direction to 0-1 range (0 = right, 0.25 = up, 0.5 = left, 0.75 = down) obs[2] = (ant->direction + M_PI) / (2 * M_PI); + obs[3] = ant->has_food ? 1.0f : 0.0f; - // Relative position to colony + // Get direction to colony (normalized between 0 and 1) float angle_to_colony = get_angle(ant->position, colony->position); - float angle_diff_colony = wrap_angle(angle_to_colony - ant->direction); - obs[4] = (angle_diff_colony + M_PI) / (2 * M_PI); + obs[4] = (angle_to_colony + M_PI) / (2 * M_PI); + obs[5] = distance_squared(ant->position, colony->position) / (env->width * env->width + env->height * env->height); // Find closest visible food @@ -293,38 +297,17 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { } } } + if(closest_food_pos.x == 0 && closest_food_pos.y == 0) { obs[6] = -1.0f; obs[7] = -1.0f; } else { - // Direction difference to closest visible food + // Get direction to closest food (normalized between 0 and 1) float angle_to_food = get_angle(ant->position, closest_food_pos); - float angle_diff_food = wrap_angle(angle_to_food - ant->direction); - obs[6] = (angle_diff_food + M_PI) / (2 * M_PI); - // Distance to closest visible food + obs[6] = (angle_to_food + M_PI) / (2 * M_PI); obs[7] = sqrt(closest_food_dist_sq) / sqrt(env->width * env->width + env->height * env->height); } - - // Find strongest visible pheromone - // float strongest_pheromone = 0; - // Vector2D pheromone_pos = {0, 0}; - // for (int i = 0; i < env->num_pheromones; i++) { - // if (env->pheromones[i].colony_id == ant->colony_id) { - // float dist_sq = distance_squared(ant->position, env->pheromones[i].position); - // if ( - // is_in_vision(ant->position, env->pheromones[i].position) - // ) { - // float strength = env->pheromones[i].strength / (sqrt(dist_sq) + 1); - // if (strength > strongest_pheromone) { - // strongest_pheromone = strength; - // pheromone_pos.x = env->pheromones[i].position.x; - // pheromone_pos.y = env->pheromones[i].position.y; - // } - // } - // } - // } - // obs[8] = get_angle(ant->position, pheromone_pos); } void compute_observations(AntsEnv* env) { @@ -338,7 +321,7 @@ void spawn_ant(AntsEnv* env, int ant_id) { Colony* colony = &env->colonies[ant->colony_id]; ant->position = colony->position; - ant->direction = wrap_angle(random_float(0, 2 * M_PI)); + ant->direction = wrap_angle((rand() % 4) * (M_PI / 2)); // Randomly choose between 0, 90, 180, or 270 degrees ant->has_food = false; ant->lifetime = random_float(0, ANT_LIFETIME); @@ -435,6 +418,8 @@ void step_ant(AntsEnv* env, int ant_id) { case ACTION_DROP_PHEROMONE: add_pheromone(env, ant->position, ant->colony_id); break; + case ACTION_MOVE_FORWARD: + break; } // Always move forward @@ -461,7 +446,7 @@ void step_ant(AntsEnv* env, int ant_id) { spawn_food(env); } - env->rewards[ant_id] = 1.0f; + env->rewards[ant_id] += 1.0f; env->ant_logs[ant_id].episode_return += 1.0f; env->ant_logs[ant_id].reward += 1.0f; break; @@ -469,26 +454,23 @@ void step_ant(AntsEnv* env, int ant_id) { } } - // Small negative reward for wandering without food - // env->rewards[ant_id] -= 0.001f; - // env->ant_logs[ant_id].reward -= 0.001f; // Small positive reward for heading towards visible food - for (int j = 0; j < env->num_food_sources; j++) { - if (env->food_sources[j].amount > 0) { - // float dist_sq = distance_squared(ant->position, env->food_sources[j].position); - if (is_in_vision(ant->position, env->food_sources[j].position)) { - float angle_to_food = get_angle(ant->position, env->food_sources[j].position); - float angle_diff = wrap_angle(angle_to_food - ant->direction); + // for (int j = 0; j < env->num_food_sources; j++) { + // if (env->food_sources[j].amount > 0) { + // // float dist_sq = distance_squared(ant->position, env->food_sources[j].position); + // if (is_in_vision(ant->position, env->food_sources[j].position)) { + // float angle_to_food = get_angle(ant->position, env->food_sources[j].position); + // float angle_diff = wrap_angle(angle_to_food - ant->direction); - if (fabs(angle_diff) < TURN_ANGLE) { - env->rewards[ant_id] += 0.0005f; - env->ant_logs[ant_id].reward += 0.0005f; - } - break; - } - } - } + // if (fabs(angle_diff) < TURN_ANGLE) { + // env->rewards[ant_id] += 0.0005f; + // env->ant_logs[ant_id].reward += 0.0005f; + // } + // break; + // } + // } + // } } // Check for food delivery @@ -498,24 +480,24 @@ void step_ant(AntsEnv* env, int ant_id) { if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { ant->has_food = false; colony->food_collected++; - env->rewards[ant_id] += 5.0f; // Larger reward for food delivery - env->ant_logs[ant_id].episode_return += 5.0f; + env->rewards[ant_id] += 50; // Larger reward for food delivery + env->ant_logs[ant_id].episode_return += 50; env->ant_logs[ant_id].score += 1; - env->ant_logs[ant_id].reward += 5.0f; + env->ant_logs[ant_id].reward += 50; } - // Reward for heading towards colony when carrying food - float angle_to_colony = get_angle(ant->position, colony->position); - float angle_diff = wrap_angle(angle_to_colony - ant->direction); + // // Reward for heading towards colony when carrying food + // float angle_to_colony = get_angle(ant->position, colony->position); + // float angle_diff = wrap_angle(angle_to_colony - ant->direction); - if (fabs(angle_diff) < TURN_ANGLE) { - env->rewards[ant_id] += 0.001f; - env->ant_logs[ant_id].reward += 0.001f; - } else { - // Small negative reward for not heading towards colony when carrying food - env->rewards[ant_id] -= 0.0005f; - env->ant_logs[ant_id].reward -= 0.0005f; - } + // if (fabs(angle_diff) < TURN_ANGLE) { + // env->rewards[ant_id] += 0.01f; + // env->ant_logs[ant_id].reward += 0.01f; + // } else { + // // Small negative reward for not heading towards colony when carrying food + // env->rewards[ant_id] -= 0.0005f; + // env->ant_logs[ant_id].reward -= 0.0005f; + // } } // MULTIPLE TERMINAL CONDITIONS FOR FREQUENT LOG GENERATION @@ -541,7 +523,7 @@ void step_ant(AntsEnv* env, int ant_id) { env->ant_logs[ant_id].perf = env->ant_logs[ant_id].episode_length > 0 ? env->ant_logs[ant_id].score / env->ant_logs[ant_id].episode_length : 0; add_log(env, ant_id); - spawn_ant(env, ant_id); + spawn_ant(env, ant_id); //Respawn the ant env->terminals[ant_id] = 1; // Debug output for terminal condition verification diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index bf679ede4..90bab910a 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -50,7 +50,7 @@ def __init__( self.tick = 0 self.single_action_space = gymnasium.spaces.Discrete(4) self.single_observation_space = gymnasium.spaces.Box( - low=0.0, high=1.0, shape=(8,), dtype=np.float32 + low=-1.0, high=1.0, shape=(8,), dtype=np.float32 ) # Calculate cell size for rendering From 73f03d22fe0e9ae240304853baf06fcbb640d1a5 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Sat, 21 Jun 2025 20:46:03 -0400 Subject: [PATCH 06/23] saving current state of .h file --- pufferlib/ocean/ants/ants.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index b32ea9477..8c76c9d76 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -47,8 +47,8 @@ struct Log { float score; // Total score float episode_return; // Cumulative rewards float episode_length; // Episode duration + float reward; // Reward for the current step float n; // Episode count - REQUIRED AS LAST FIELD - float reward; }; // Forward declarations @@ -165,7 +165,7 @@ void init_ants_env(AntsEnv* env) { } void allocate_ants_env(AntsEnv* env) { - env->obs_size = 9; // Fixed observation size per ant + env->obs_size = 8; // Fixed observation size per ant env->observations = (float*)calloc(env->num_ants * env->obs_size, sizeof(float)); env->actions = (int*)calloc(env->num_ants, sizeof(int)); env->rewards = (float*)calloc(env->num_ants, sizeof(float)); @@ -218,7 +218,7 @@ static inline float wrap_angle(float angle) { static inline float distance_squared(Vector2D a, Vector2D b) { float dx = a.x - b.x; float dy = a.y - b.y; - return dx * dx + dy * dy; + return (dx * dx) + (dy * dy); } static inline float get_angle(Vector2D a, Vector2D b) { @@ -276,7 +276,7 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { obs[3] = ant->has_food ? 1.0f : 0.0f; // Get direction to colony (normalized between 0 and 1) - float angle_to_colony = get_angle(ant->position, colony->position); + float angle_to_colony = wrap_angle(get_angle(ant->position, colony->position)); obs[4] = (angle_to_colony + M_PI) / (2 * M_PI); obs[5] = distance_squared(ant->position, colony->position) / (env->width * env->width + env->height * env->height); @@ -304,9 +304,9 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { } else { // Get direction to closest food (normalized between 0 and 1) - float angle_to_food = get_angle(ant->position, closest_food_pos); + float angle_to_food = wrap_angle(get_angle(ant->position, closest_food_pos)); obs[6] = (angle_to_food + M_PI) / (2 * M_PI); - obs[7] = sqrt(closest_food_dist_sq) / sqrt(env->width * env->width + env->height * env->height); + obs[7] = closest_food_dist_sq / ((env->width * env->width) + (env->height * env->height)); } } @@ -446,9 +446,9 @@ void step_ant(AntsEnv* env, int ant_id) { spawn_food(env); } - env->rewards[ant_id] += 1.0f; - env->ant_logs[ant_id].episode_return += 1.0f; - env->ant_logs[ant_id].reward += 1.0f; + env->rewards[ant_id] += env->reward_food; + env->ant_logs[ant_id].episode_return += env->reward_food; + env->ant_logs[ant_id].reward += env->reward_food; break; } } @@ -480,10 +480,10 @@ void step_ant(AntsEnv* env, int ant_id) { if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { ant->has_food = false; colony->food_collected++; - env->rewards[ant_id] += 50; // Larger reward for food delivery - env->ant_logs[ant_id].episode_return += 50; + env->rewards[ant_id] += env->reward_delivery; // Larger reward for food delivery + env->ant_logs[ant_id].episode_return += env->reward_delivery; env->ant_logs[ant_id].score += 1; - env->ant_logs[ant_id].reward += 50; + env->ant_logs[ant_id].reward += env->reward_delivery; } // // Reward for heading towards colony when carrying food From afe0d070871fbc2c296b4a73b5714be7360aa41c Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 1 Jul 2025 23:38:18 -0400 Subject: [PATCH 07/23] added observation rendering to c file --- pufferlib/ocean/ants/ants.c | 100 +++++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 5d14ef0cb..277a8cac6 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -5,6 +5,99 @@ #include "ants.h" #include "puffernet.h" +// Function to visualize ant 1's observations +void render_ant_observations(AntsEnv* env, int ant_id) { + if (ant_id >= env->num_ants) return; + + // Get ant 1's observations + float* obs = &env->observations[ant_id * env->obs_size]; + Ant* ant = &env->ants[ant_id]; + + // Define UI panel position + int panel_x = 20; + int panel_y = 100; + int panel_width = 300; + int panel_height = 200; + + // Draw semi-transparent background panel + DrawRectangle(panel_x - 10, panel_y - 10, panel_width + 20, panel_height + 20, + (Color){0, 0, 0, 180}); + DrawRectangleLines(panel_x - 10, panel_y - 10, panel_width + 20, panel_height + 20, RAYWHITE); + + // Title + DrawText(TextFormat("ANT %d OBSERVATIONS", ant_id), panel_x, panel_y, 16, YELLOW); + + int y_offset = panel_y + 25; + int line_height = 18; + + // Display each observation with description + DrawText(TextFormat("Position X: %.3f", obs[0]), panel_x, y_offset, 14, RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Position Y: %.3f", obs[1]), panel_x, y_offset, 14, RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Direction: %.3f", obs[2]), panel_x, y_offset, 14, RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Has Food: %s", obs[3] > 0.5f ? "YES" : "NO"), + panel_x, y_offset, 14, obs[3] > 0.5f ? GREEN : RED); + y_offset += line_height; + + DrawText(TextFormat("Colony Dir: %.3f", obs[4]), panel_x, y_offset, 14, RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Colony Dist: %.3f", obs[5]), panel_x, y_offset, 14, RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Food Dir: %.3f", obs[6]), panel_x, y_offset, 14, + obs[6] < 0 ? GRAY : RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Food Dist: %.3f", obs[7]), panel_x, y_offset, 14, + obs[7] < 0 ? GRAY : RAYWHITE); + + // Visual indicators on the ant + Vector2D ant_pos = ant->position; + + // Highlight the selected ant + DrawCircleLines(ant_pos.x, ant_pos.y, ANT_SIZE + 3, YELLOW); + DrawCircleLines(ant_pos.x, ant_pos.y, ANT_SIZE + 5, YELLOW); + + // Draw direction to colony (if valid) + if (obs[4] >= 0) { + float colony_angle = (obs[4] * 2 * M_PI) - M_PI; + float line_length = 40.0f; + Vector2D colony_end = { + ant_pos.x + line_length * cos(colony_angle), + ant_pos.y + line_length * sin(colony_angle) + }; + DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){colony_end.x, colony_end.y}, 3, BLUE); + DrawText("COLONY", colony_end.x + 5, colony_end.y - 10, 12, BLUE); + } + + // Draw direction to food (if visible) + if (obs[6] >= 0) { + float food_angle = (obs[6] * 2 * M_PI) - M_PI; + float line_length = 30.0f; + Vector2D food_end = { + ant_pos.x + line_length * cos(food_angle), + ant_pos.y + line_length * sin(food_angle) + }; + DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){food_end.x, food_end.y}, 2, GREEN); + DrawText("FOOD", food_end.x + 5, food_end.y - 10, 12, GREEN); + } + + // Draw current direction + float current_angle = (obs[2] * 2 * M_PI) - M_PI; + float dir_length = 25.0f; + Vector2D dir_end = { + ant_pos.x + dir_length * cos(current_angle), + ant_pos.y + dir_length * sin(current_angle) + }; + DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){dir_end.x, dir_end.y}, 4, YELLOW); +} + int demo() { // Initialize environment with proper parameters - FOLLOWING SNAKE PATTERN AntsEnv env = { @@ -144,6 +237,11 @@ int demo() { c_step(&env); c_render(&env); + // Visualize ant observations when shift is pressed + if (IsKeyDown(KEY_LEFT_SHIFT)) { + render_ant_observations(&env, 0); + } + // Print stats periodically if (env.tick % 1000 == 0 && env.log.n > 0) { printf("Tick %d: Episodes completed: %.0f, Avg score: %.2f, Avg return: %.2f\n", @@ -220,7 +318,7 @@ int main() { printf("Ant Colony Environment Demo\n"); printf("Controls:\n"); - printf("- Hold SHIFT to control the first ant\n"); + printf("- Hold SHIFT to control the first ant AND view ant 1's observations\n"); printf("- A/D or LEFT/RIGHT to turn\n"); printf("- SPACE to drop pheromone\n"); printf("- ESC to exit\n\n"); From 3ae697e967fefabf7129f3e83eba00eb4732f8aa Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 1 Jul 2025 23:50:45 -0400 Subject: [PATCH 08/23] cleanup --- pufferlib/ocean/ants/ants.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 277a8cac6..6aead4e87 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -5,11 +5,11 @@ #include "ants.h" #include "puffernet.h" -// Function to visualize ant 1's observations +// Function to visualize ant observations void render_ant_observations(AntsEnv* env, int ant_id) { if (ant_id >= env->num_ants) return; - // Get ant 1's observations + // Get ant observations float* obs = &env->observations[ant_id * env->obs_size]; Ant* ant = &env->ants[ant_id]; @@ -40,7 +40,7 @@ void render_ant_observations(AntsEnv* env, int ant_id) { DrawText(TextFormat("Direction: %.3f", obs[2]), panel_x, y_offset, 14, RAYWHITE); y_offset += line_height; - DrawText(TextFormat("Has Food: %s", obs[3] > 0.5f ? "YES" : "NO"), + DrawText(TextFormat("Has Food: %s", obs[3] == 1 ? "YES" : "NO"), panel_x, y_offset, 14, obs[3] > 0.5f ? GREEN : RED); y_offset += line_height; From 9f93ba68205baa75f0106e65fddbff0da49d640f Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Wed, 16 Jul 2025 20:30:10 -0400 Subject: [PATCH 09/23] updated num_envs --- pufferlib/config/ocean/ants.ini | 20 +++++--------------- pufferlib/ocean/ants/ants.py | 2 +- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/pufferlib/config/ocean/ants.ini b/pufferlib/config/ocean/ants.ini index 448e0bbde..a23a9c901 100644 --- a/pufferlib/config/ocean/ants.ini +++ b/pufferlib/config/ocean/ants.ini @@ -4,21 +4,11 @@ env_name = puffer_ants policy_name = Policy rnn_name = Recurrent +[vec] +num_envs = 8 + [env] -num_envs = 1 +num_envs = 32 [train] -total_timesteps = 100_000_000 -gamma = 0.99 -learning_rate = 0.0003 -batch_size = 131072 -minibatch_size = 4096 -update_epochs = 1 -checkpoint_interval = 50 -clip_coef = 0.2 -vf_coef = 0.5 -ent_coef = 0.01 -max_grad_norm = 0.5 -target_kl = 0.015 -gae_lambda = 0.95 -bptt_horizon = 8 \ No newline at end of file +total_timesteps = 500_000_000 \ No newline at end of file diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 90bab910a..70184b32f 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -20,7 +20,7 @@ class AntsEnv(pufferlib.PufferEnv): def __init__( self, - num_envs=1, + num_envs=32, width=1280, height=720, num_ants=32, From f6fde22430686c041b6c4b9997ce2f35bcc7ca4f Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Sat, 1 Nov 2025 09:12:35 -0400 Subject: [PATCH 10/23] update readme with build command --- pufferlib/ocean/ants/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pufferlib/ocean/ants/README.md b/pufferlib/ocean/ants/README.md index 913da7709..5b8a6c603 100644 --- a/pufferlib/ocean/ants/README.md +++ b/pufferlib/ocean/ants/README.md @@ -8,4 +8,8 @@ puffer eval puffer_ants --load-model-path experiments/ANTS-XXX.pt --train.device ``` scripts/build_ocean.sh ants +``` + +``` +python setup.py build_ext --inplace ``` \ No newline at end of file From 527672ab3a6a7e423c31db4a4696c92700ec83e8 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Thu, 11 Dec 2025 22:13:46 -0500 Subject: [PATCH 11/23] added latest ants env with demo rewards --- pufferlib/ocean/ants/ants.c | 2 + pufferlib/ocean/ants/ants.h | 174 +++++++++++++++++++++++++-------- pufferlib/ocean/ants/ants.py | 20 ++-- pufferlib/ocean/ants/binding.c | 6 +- 4 files changed, 151 insertions(+), 51 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 6aead4e87..8d848b5fb 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -107,6 +107,8 @@ int demo() { .reward_food = 0.1f, .reward_delivery = 1.0f, .reward_death = -1.0f, + .reward_demo_match = 0.01f, // Small reward for matching demo + .reward_demo_mismatch = -0.01f, // Small penalty for not matching demo .cell_size = 1, }; diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 8c76c9d76..2ade8d48e 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -77,6 +77,7 @@ typedef struct { int colony_id; bool has_food; int lifetime; // Track ant lifetime for performance metrics + float prev_target_dist; // Previous distance to target for reward calculation } Ant; typedef struct { @@ -120,6 +121,8 @@ struct AntsEnv { float reward_food; float reward_delivery; float reward_death; + float reward_demo_match; // Reward for matching demo action + float reward_demo_mismatch; // Penalty for not matching demo action // Rendering Client* client; // Raylib client @@ -236,6 +239,30 @@ static inline bool is_in_vision(Vector2D ant_pos, Vector2D target) { return true; } +// Get the target position for an ant (colony if carrying food, nearest food otherwise) +static inline Vector2D get_ant_target(AntsEnv* env, Ant* ant) { + if (ant->has_food) { + // Target is the colony when carrying food + return env->colonies[ant->colony_id].position; + } else { + // Target is the nearest food source when not carrying food + float closest_food_dist_sq = env->width * env->width + env->height * env->height; + Vector2D closest_food_pos = ant->position; // Default to current position if no food found + + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[i].position); + if (dist_sq < closest_food_dist_sq) { + closest_food_dist_sq = dist_sq; + closest_food_pos = env->food_sources[i].position; + } + } + } + + return closest_food_pos; + } +} + static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { @@ -319,12 +346,16 @@ void compute_observations(AntsEnv* env) { void spawn_ant(AntsEnv* env, int ant_id) { Ant* ant = &env->ants[ant_id]; Colony* colony = &env->colonies[ant->colony_id]; - + ant->position = colony->position; ant->direction = wrap_angle((rand() % 4) * (M_PI / 2)); // Randomly choose between 0, 90, 180, or 270 degrees ant->has_food = false; ant->lifetime = random_float(0, ANT_LIFETIME); - + + // Initialize previous target distance + Vector2D target = get_ant_target(env, ant); + ant->prev_target_dist = sqrtf(distance_squared(ant->position, target)); + // Reset individual ant log env->ant_logs[ant_id] = (Log){0}; } @@ -398,13 +429,87 @@ void c_reset(AntsEnv* env) { compute_observations(env); } +// Compute the hardcoded demo action for an ant +// This replicates the logic from demo() lines 176-226 +int get_demo_action(AntsEnv* env, int ant_id) { + Ant* ant = &env->ants[ant_id]; + + if (ant->has_food) { + // If ant has food, return to colony + Colony* colony = &env->colonies[ant->colony_id]; + float angle_to_colony = get_angle(ant->position, colony->position); + float angle_diff = wrap_angle(angle_to_colony - ant->direction); + + // Turn towards colony + if (angle_diff > 0.1) { + return ACTION_TURN_RIGHT; + } else if (angle_diff < -0.1) { + return ACTION_TURN_LEFT; + } else { + return ACTION_MOVE_FORWARD; + } + } else { + // If ant doesn't have food, seek nearest food source + float closest_food_dist_sq = env->width * env->width; + Vector2D closest_food_pos = {0, 0}; + bool found_food = false; + + for (int j = 0; j < env->num_food_sources; j++) { + if (env->food_sources[j].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[j].position); + if (dist_sq < closest_food_dist_sq && is_in_vision(ant->position, env->food_sources[j].position)) { + closest_food_dist_sq = dist_sq; + closest_food_pos = env->food_sources[j].position; + found_food = true; + } + } + } + + if (found_food) { + // Turn towards food + float angle_to_food = get_angle(ant->position, closest_food_pos); + float angle_diff = wrap_angle(angle_to_food - ant->direction); + + if (angle_diff > 0.1) { + return ACTION_TURN_RIGHT; + } else if (angle_diff < -0.1) { + return ACTION_TURN_LEFT; + } else { + return ACTION_MOVE_FORWARD; + } + } else { + // If no food in sight, move forward (we'll use this as the "default" demo action) + // Note: The random turning behavior is not deterministic, so we default to forward + return ACTION_MOVE_FORWARD; + } + } +} + void step_ant(AntsEnv* env, int ant_id) { Ant* ant = &env->ants[ant_id]; env->ant_logs[ant_id].episode_length += 1; ant->lifetime++; - + int action = env->actions[ant_id]; - + + // Store previous target for reward calculation before action execution + Vector2D prev_target = get_ant_target(env, ant); + float prev_dist_to_target = sqrtf(distance_squared(ant->position, prev_target)); + + // Compute demo action and compare with agent's action + int demo_action = get_demo_action(env, ant_id); + if (action == demo_action) { + // Reward for matching the demo action + env->rewards[ant_id] += env->reward_demo_match; + env->ant_logs[ant_id].episode_return += env->reward_demo_match; + env->ant_logs[ant_id].reward += env->reward_demo_match; + } else { + // Punish for not matching the demo action + env->rewards[ant_id] += env->reward_demo_mismatch; + env->ant_logs[ant_id].episode_return += env->reward_demo_mismatch; + env->ant_logs[ant_id].reward += env->reward_demo_mismatch; + } + // Execute action switch (action) { case ACTION_TURN_LEFT: @@ -421,11 +526,11 @@ void step_ant(AntsEnv* env, int ant_id) { case ACTION_MOVE_FORWARD: break; } - + // Always move forward ant->position.x += ANT_SPEED * cos(ant->direction); ant->position.y += ANT_SPEED * sin(ant->direction); - + // Wrap around edges if (ant->position.x < 0) ant->position.x = env->width; if (ant->position.x > env->width) ant->position.x = 0; @@ -440,12 +545,12 @@ void step_ant(AntsEnv* env, int ant_id) { if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { ant->has_food = true; env->food_sources[j].amount--; - + // If food source is exhausted, respawn it if (env->food_sources[j].amount <= 0) { spawn_food(env); } - + env->rewards[ant_id] += env->reward_food; env->ant_logs[ant_id].episode_return += env->reward_food; env->ant_logs[ant_id].reward += env->reward_food; @@ -453,26 +558,8 @@ void step_ant(AntsEnv* env, int ant_id) { } } } - - - // Small positive reward for heading towards visible food - // for (int j = 0; j < env->num_food_sources; j++) { - // if (env->food_sources[j].amount > 0) { - // // float dist_sq = distance_squared(ant->position, env->food_sources[j].position); - // if (is_in_vision(ant->position, env->food_sources[j].position)) { - // float angle_to_food = get_angle(ant->position, env->food_sources[j].position); - // float angle_diff = wrap_angle(angle_to_food - ant->direction); - - // if (fabs(angle_diff) < TURN_ANGLE) { - // env->rewards[ant_id] += 0.0005f; - // env->ant_logs[ant_id].reward += 0.0005f; - // } - // break; - // } - // } - // } } - + // Check for food delivery if (ant->has_food) { Colony* colony = &env->colonies[ant->colony_id]; @@ -480,25 +567,30 @@ void step_ant(AntsEnv* env, int ant_id) { if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { ant->has_food = false; colony->food_collected++; - env->rewards[ant_id] += env->reward_delivery; // Larger reward for food delivery + env->rewards[ant_id] += env->reward_delivery; env->ant_logs[ant_id].episode_return += env->reward_delivery; env->ant_logs[ant_id].score += 1; env->ant_logs[ant_id].reward += env->reward_delivery; } - - // // Reward for heading towards colony when carrying food - // float angle_to_colony = get_angle(ant->position, colony->position); - // float angle_diff = wrap_angle(angle_to_colony - ant->direction); - - // if (fabs(angle_diff) < TURN_ANGLE) { - // env->rewards[ant_id] += 0.01f; - // env->ant_logs[ant_id].reward += 0.01f; - // } else { - // // Small negative reward for not heading towards colony when carrying food - // env->rewards[ant_id] -= 0.0005f; - // env->ant_logs[ant_id].reward -= 0.0005f; - // } } + + // Distance-based reward: reward for getting closer to target, punish for getting further + // Get current target (may have changed if ant picked up or delivered food) + Vector2D current_target = get_ant_target(env, ant); + float current_dist_to_target = sqrtf(distance_squared(ant->position, current_target)); + + // Calculate distance change (negative means got closer, positive means got further) + float distance_change = current_dist_to_target - prev_dist_to_target; + + // Reward proportional to reduction in distance (negative distance_change is good) + float distance_reward = -distance_change * 0.01f; // Scale factor to adjust reward magnitude + + env->rewards[ant_id] += distance_reward; + env->ant_logs[ant_id].episode_return += distance_reward; + env->ant_logs[ant_id].reward += distance_reward; + + // Update previous distance for next step + ant->prev_target_dist = current_dist_to_target; // MULTIPLE TERMINAL CONDITIONS FOR FREQUENT LOG GENERATION bool should_terminate = false; diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 70184b32f..99c23decc 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -19,17 +19,19 @@ class AntsEnv(pufferlib.PufferEnv): """ def __init__( - self, - num_envs=32, - width=1280, + self, + num_envs=32, + width=1280, height=720, num_ants=32, - reward_food=0.1, - reward_delivery=1.0, + reward_food=0.1, + reward_delivery=1.0, reward_death=0.0, - report_interval=1, - render_mode=None, - buf=None, + reward_demo_match=0.01, + reward_demo_mismatch=-0.01, + report_interval=1, + render_mode=None, + buf=None, seed=0): if num_envs is not None: @@ -83,6 +85,8 @@ def __init__( reward_food=reward_food, reward_delivery=reward_delivery, reward_death=reward_death, + reward_demo_match=reward_demo_match, + reward_demo_mismatch=reward_demo_mismatch, cell_size=self.cell_size ) c_envs.append(env_id) diff --git a/pufferlib/ocean/ants/binding.c b/pufferlib/ocean/ants/binding.c index f8561b49a..6110c3b44 100644 --- a/pufferlib/ocean/ants/binding.c +++ b/pufferlib/ocean/ants/binding.c @@ -9,9 +9,11 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) { env->num_ants = unpack(kwargs, "num_ants"); env->reward_food = unpack(kwargs, "reward_food"); env->reward_delivery = unpack(kwargs, "reward_delivery"); + env->reward_demo_match = unpack(kwargs, "reward_demo_match"); + env->reward_demo_mismatch = unpack(kwargs, "reward_demo_mismatch"); env->cell_size = unpack(kwargs, "cell_size"); - - init_ants_env(env); + + init_ants_env(env); return 0; } From fdc606020a34e3763cf11edcc878caa04d5a3725 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Mon, 15 Dec 2025 10:30:37 -0500 Subject: [PATCH 12/23] updated simulation to use discrete moves more and to allow more movement freedom for the ants --- pufferlib/ocean/ants/ants.c | 71 +++++++++++++++++++++++------------ pufferlib/ocean/ants/ants.h | 74 +++++++++++++++++++++++++------------ 2 files changed, 98 insertions(+), 47 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 8d848b5fb..b38d5e5cb 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -17,7 +17,7 @@ void render_ant_observations(AntsEnv* env, int ant_id) { int panel_x = 20; int panel_y = 100; int panel_width = 300; - int panel_height = 200; + int panel_height = 240; // Increased for 2 additional pheromone observations // Draw semi-transparent background panel DrawRectangle(panel_x - 10, panel_y - 10, panel_width + 20, panel_height + 20, @@ -50,12 +50,20 @@ void render_ant_observations(AntsEnv* env, int ant_id) { DrawText(TextFormat("Colony Dist: %.3f", obs[5]), panel_x, y_offset, 14, RAYWHITE); y_offset += line_height; - DrawText(TextFormat("Food Dir: %.3f", obs[6]), panel_x, y_offset, 14, + DrawText(TextFormat("Food Dir: %.3f", obs[6]), panel_x, y_offset, 14, obs[6] < 0 ? GRAY : RAYWHITE); y_offset += line_height; - - DrawText(TextFormat("Food Dist: %.3f", obs[7]), panel_x, y_offset, 14, + + DrawText(TextFormat("Food Dist: %.3f", obs[7]), panel_x, y_offset, 14, obs[7] < 0 ? GRAY : RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Pheromone Dir: %.3f", obs[8]), panel_x, y_offset, 14, + obs[8] < 0 ? GRAY : RAYWHITE); + y_offset += line_height; + + DrawText(TextFormat("Pheromone Str: %.3f", obs[9]), panel_x, y_offset, 14, + obs[9] < 0 ? GRAY : RAYWHITE); // Visual indicators on the ant Vector2D ant_pos = ant->position; @@ -87,7 +95,19 @@ void render_ant_observations(AntsEnv* env, int ant_id) { DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){food_end.x, food_end.y}, 2, GREEN); DrawText("FOOD", food_end.x + 5, food_end.y - 10, 12, GREEN); } - + + // Draw direction to pheromone (if detected) + if (obs[8] >= 0) { + float pheromone_angle = (obs[8] * 2 * M_PI) - M_PI; + float line_length = 35.0f; + Vector2D pheromone_end = { + ant_pos.x + line_length * cos(pheromone_angle), + ant_pos.y + line_length * sin(pheromone_angle) + }; + DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){pheromone_end.x, pheromone_end.y}, 2, MAGENTA); + DrawText("PHEROMONE", pheromone_end.x + 5, pheromone_end.y - 10, 12, MAGENTA); + } + // Draw current direction float current_angle = (obs[2] * 2 * M_PI) - M_PI; float dir_length = 25.0f; @@ -148,25 +168,26 @@ int demo() { // User can take control with shift key if (IsKeyDown(KEY_LEFT_SHIFT)) { // Control first ant of colony 1 for demo + // Default to move forward when no other action is pressed env.actions[0] = ACTION_MOVE_FORWARD; - - // Handle left turn + + // Handle left turn (overrides movement) if ((IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) && !left_pressed) { env.actions[0] = ACTION_TURN_LEFT; left_pressed = true; } else if (!IsKeyDown(KEY_LEFT) && !IsKeyDown(KEY_A)) { left_pressed = false; } - - // Handle right turn + + // Handle right turn (overrides movement) if ((IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) && !right_pressed) { env.actions[0] = ACTION_TURN_RIGHT; right_pressed = true; } else if (!IsKeyDown(KEY_RIGHT) && !IsKeyDown(KEY_D)) { right_pressed = false; } - - // Handle pheromone drop + + // Handle pheromone drop (overrides movement) if (IsKeyDown(KEY_SPACE) && !space_pressed) { env.actions[0] = ACTION_DROP_PHEROMONE; space_pressed = true; @@ -175,6 +196,9 @@ int demo() { } // Rest of ants act via scripted behaviors + // Threshold is half of turn angle to avoid oscillation with 45-degree turns + const float turn_threshold = TURN_ANGLE / 2.0f; // ~22.5 degrees + for (int i = 1; i < env.num_ants; i++) { Ant* ant = &env.ants[i]; if (ant->has_food) { @@ -182,11 +206,11 @@ int demo() { Colony* colony = &env.colonies[ant->colony_id]; float angle_to_colony = get_angle(ant->position, colony->position); float angle_diff = wrap_angle(angle_to_colony - ant->direction); - - // Turn towards colony - if (angle_diff > 0.1) { + + // Turn towards colony if angle difference is significant + if (angle_diff > turn_threshold) { env.actions[i] = ACTION_TURN_RIGHT; - } else if (angle_diff < -0.1) { + } else if (angle_diff < -turn_threshold) { env.actions[i] = ACTION_TURN_LEFT; } else { env.actions[i] = ACTION_MOVE_FORWARD; @@ -196,7 +220,7 @@ int demo() { float closest_food_dist_sq = env.width * env.width; Vector2D closest_food_pos = {0, 0}; bool found_food = false; - + for (int j = 0; j < env.num_food_sources; j++) { if (env.food_sources[j].amount > 0) { float dist_sq = distance_squared(ant->position, env.food_sources[j].position); @@ -207,15 +231,15 @@ int demo() { } } } - + if (found_food) { - // Turn towards food + // Turn towards food if angle difference is significant float angle_to_food = get_angle(ant->position, closest_food_pos); float angle_diff = wrap_angle(angle_to_food - ant->direction); - - if (angle_diff > 0.1) { + + if (angle_diff > turn_threshold) { env.actions[i] = ACTION_TURN_RIGHT; - } else if (angle_diff < -0.1) { + } else if (angle_diff < -turn_threshold) { env.actions[i] = ACTION_TURN_LEFT; } else { env.actions[i] = ACTION_MOVE_FORWARD; @@ -321,8 +345,9 @@ int main() { printf("Ant Colony Environment Demo\n"); printf("Controls:\n"); printf("- Hold SHIFT to control the first ant AND view ant 1's observations\n"); - printf("- A/D or LEFT/RIGHT to turn\n"); - printf("- SPACE to drop pheromone\n"); + printf("- While holding SHIFT: ant moves forward by default\n"); + printf("- A/D or LEFT/RIGHT to turn 45 degrees (stops movement)\n"); + printf("- SPACE to drop pheromone (stops movement)\n"); printf("- ESC to exit\n\n"); demo(); diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 2ade8d48e..e479df8cb 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -22,7 +22,7 @@ #define PHEROMONE_SIZE 2 #define ANT_VISION_RANGE 500.0f #define ANT_VISION_ANGLE (M_PI / 2) -#define TURN_ANGLE (M_PI / 2) +#define TURN_ANGLE (M_PI / 4) #define MIN_FOOD_COLONY_DISTANCE 50.0f #define ANT_LIFETIME 5000 @@ -168,7 +168,7 @@ void init_ants_env(AntsEnv* env) { } void allocate_ants_env(AntsEnv* env) { - env->obs_size = 8; // Fixed observation size per ant + env->obs_size = 10; // Fixed observation size per ant (added 2 for pheromone sensing) env->observations = (float*)calloc(env->num_ants * env->obs_size, sizeof(float)); env->actions = (int*)calloc(env->num_ants, sizeof(int)); env->rewards = (float*)calloc(env->num_ants, sizeof(float)); @@ -283,8 +283,8 @@ static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { Ant* ant = &env->ants[ant_idx]; Colony* colony = &env->colonies[ant->colony_id]; - - // Observation structure (9 elements): + + // Observation structure (10 elements): // [0-1]: ant position (normalized) // [2]: ant direction (normalized between 0 and 1) // [3]: has_food (0 or 1) @@ -292,22 +292,23 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { // [5]: distance to colony (normalized) // [6]: direction to closest food (normalized between 0 and 1) // [7]: closest food distance (normalized) - // [8]: strongest pheromone direction (normalized to 0-1) COMMENTED OUT - + // [8]: direction to strongest pheromone (normalized to 0-1, -1 if none) + // [9]: strongest pheromone strength (normalized, -1 if none) + obs[0] = ant->position.x / env->width; obs[1] = ant->position.y / env->height; - + // Normalize direction to 0-1 range (0 = right, 0.25 = up, 0.5 = left, 0.75 = down) obs[2] = (ant->direction + M_PI) / (2 * M_PI); - + obs[3] = ant->has_food ? 1.0f : 0.0f; - + // Get direction to colony (normalized between 0 and 1) float angle_to_colony = wrap_angle(get_angle(ant->position, colony->position)); obs[4] = (angle_to_colony + M_PI) / (2 * M_PI); - + obs[5] = distance_squared(ant->position, colony->position) / (env->width * env->width + env->height * env->height); - + // Find closest visible food float closest_food_dist_sq = env->width * env->width; Vector2D closest_food_pos = {0, 0}; @@ -324,7 +325,7 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { } } } - + if(closest_food_pos.x == 0 && closest_food_pos.y == 0) { obs[6] = -1.0f; obs[7] = -1.0f; @@ -335,6 +336,27 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { obs[6] = (angle_to_food + M_PI) / (2 * M_PI); obs[7] = closest_food_dist_sq / ((env->width * env->width) + (env->height * env->height)); } + + // Find strongest pheromone from same colony + float strongest_pheromone = 0.0f; + Vector2D strongest_pheromone_pos = {0, 0}; + for (int i = 0; i < env->num_pheromones; i++) { + if (env->pheromones[i].colony_id == ant->colony_id) { + if (env->pheromones[i].strength > strongest_pheromone) { + strongest_pheromone = env->pheromones[i].strength; + strongest_pheromone_pos = env->pheromones[i].position; + } + } + } + + if (strongest_pheromone > 0.0f) { + float angle_to_pheromone = wrap_angle(get_angle(ant->position, strongest_pheromone_pos)); + obs[8] = (angle_to_pheromone + M_PI) / (2 * M_PI); + obs[9] = strongest_pheromone / PHEROMONE_DEPOSIT_AMOUNT; // Normalize by max strength + } else { + obs[8] = -1.0f; + obs[9] = -1.0f; + } } void compute_observations(AntsEnv* env) { @@ -348,7 +370,7 @@ void spawn_ant(AntsEnv* env, int ant_id) { Colony* colony = &env->colonies[ant->colony_id]; ant->position = colony->position; - ant->direction = wrap_angle((rand() % 4) * (M_PI / 2)); // Randomly choose between 0, 90, 180, or 270 degrees + ant->direction = wrap_angle((rand() % 8) * (M_PI / 4)); // Randomly choose between 8 directions (0, 45, 90, 135, 180, 225, 270, 315 degrees) ant->has_food = false; ant->lifetime = random_float(0, ANT_LIFETIME); @@ -433,6 +455,8 @@ void c_reset(AntsEnv* env) { // This replicates the logic from demo() lines 176-226 int get_demo_action(AntsEnv* env, int ant_id) { Ant* ant = &env->ants[ant_id]; + // Threshold is half of turn angle to avoid oscillation with 45-degree turns + const float turn_threshold = TURN_ANGLE / 2.0f; // ~22.5 degrees if (ant->has_food) { // If ant has food, return to colony @@ -440,10 +464,10 @@ int get_demo_action(AntsEnv* env, int ant_id) { float angle_to_colony = get_angle(ant->position, colony->position); float angle_diff = wrap_angle(angle_to_colony - ant->direction); - // Turn towards colony - if (angle_diff > 0.1) { + // Turn towards colony if angle difference is significant + if (angle_diff > turn_threshold) { return ACTION_TURN_RIGHT; - } else if (angle_diff < -0.1) { + } else if (angle_diff < -turn_threshold) { return ACTION_TURN_LEFT; } else { return ACTION_MOVE_FORWARD; @@ -466,13 +490,13 @@ int get_demo_action(AntsEnv* env, int ant_id) { } if (found_food) { - // Turn towards food + // Turn towards food if angle difference is significant float angle_to_food = get_angle(ant->position, closest_food_pos); float angle_diff = wrap_angle(angle_to_food - ant->direction); - if (angle_diff > 0.1) { + if (angle_diff > turn_threshold) { return ACTION_TURN_RIGHT; - } else if (angle_diff < -0.1) { + } else if (angle_diff < -turn_threshold) { return ACTION_TURN_LEFT; } else { return ACTION_MOVE_FORWARD; @@ -521,16 +545,18 @@ void step_ant(AntsEnv* env, int ant_id) { ant->direction = wrap_angle(ant->direction); break; case ACTION_DROP_PHEROMONE: - add_pheromone(env, ant->position, ant->colony_id); + // Only drop pheromones when carrying food + if (ant->has_food) { + add_pheromone(env, ant->position, ant->colony_id); + } break; case ACTION_MOVE_FORWARD: + // Move forward only when this action is selected + ant->position.x += ANT_SPEED * cos(ant->direction); + ant->position.y += ANT_SPEED * sin(ant->direction); break; } - // Always move forward - ant->position.x += ANT_SPEED * cos(ant->direction); - ant->position.y += ANT_SPEED * sin(ant->direction); - // Wrap around edges if (ant->position.x < 0) ant->position.x = env->width; if (ant->position.x > env->width) ant->position.x = 0; From 96bea1b750d77e7b874781a66c9e10b2d14863cf Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Mon, 15 Dec 2025 12:16:10 -0500 Subject: [PATCH 13/23] updated reward func to only include demo and delivery rewards --- pufferlib/ocean/ants/ants.h | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index e479df8cb..7c1cfb7a1 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -77,7 +77,6 @@ typedef struct { int colony_id; bool has_food; int lifetime; // Track ant lifetime for performance metrics - float prev_target_dist; // Previous distance to target for reward calculation } Ant; typedef struct { @@ -374,10 +373,6 @@ void spawn_ant(AntsEnv* env, int ant_id) { ant->has_food = false; ant->lifetime = random_float(0, ANT_LIFETIME); - // Initialize previous target distance - Vector2D target = get_ant_target(env, ant); - ant->prev_target_dist = sqrtf(distance_squared(ant->position, target)); - // Reset individual ant log env->ant_logs[ant_id] = (Log){0}; } @@ -516,10 +511,6 @@ void step_ant(AntsEnv* env, int ant_id) { int action = env->actions[ant_id]; - // Store previous target for reward calculation before action execution - Vector2D prev_target = get_ant_target(env, ant); - float prev_dist_to_target = sqrtf(distance_squared(ant->position, prev_target)); - // Compute demo action and compare with agent's action int demo_action = get_demo_action(env, ant_id); if (action == demo_action) { @@ -600,24 +591,6 @@ void step_ant(AntsEnv* env, int ant_id) { } } - // Distance-based reward: reward for getting closer to target, punish for getting further - // Get current target (may have changed if ant picked up or delivered food) - Vector2D current_target = get_ant_target(env, ant); - float current_dist_to_target = sqrtf(distance_squared(ant->position, current_target)); - - // Calculate distance change (negative means got closer, positive means got further) - float distance_change = current_dist_to_target - prev_dist_to_target; - - // Reward proportional to reduction in distance (negative distance_change is good) - float distance_reward = -distance_change * 0.01f; // Scale factor to adjust reward magnitude - - env->rewards[ant_id] += distance_reward; - env->ant_logs[ant_id].episode_return += distance_reward; - env->ant_logs[ant_id].reward += distance_reward; - - // Update previous distance for next step - ant->prev_target_dist = current_dist_to_target; - // MULTIPLE TERMINAL CONDITIONS FOR FREQUENT LOG GENERATION bool should_terminate = false; From 34d42d1fdb3a79a94eab852428af60a727817dc2 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Mon, 15 Dec 2025 12:52:55 -0500 Subject: [PATCH 14/23] removed pheromones and eliminated vision constraints --- pufferlib/ocean/ants/__init__.py | 3 + pufferlib/ocean/ants/ants.h | 145 +++++++++++++------------------ pufferlib/ocean/ants/ants.py | 8 +- 3 files changed, 65 insertions(+), 91 deletions(-) diff --git a/pufferlib/ocean/ants/__init__.py b/pufferlib/ocean/ants/__init__.py index e69de29bb..55804d502 100644 --- a/pufferlib/ocean/ants/__init__.py +++ b/pufferlib/ocean/ants/__init__.py @@ -0,0 +1,3 @@ +from pufferlib.ocean.ants.ants import AntsEnv + +__all__ = ['AntsEnv'] diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 7c1cfb7a1..d94d261f7 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -16,12 +16,12 @@ #define ANT_SIZE 4 #define FOOD_SIZE 6 #define COLONY_SIZE 20 -#define PHEROMONE_EVAPORATION_RATE 0.001f -#define PHEROMONE_DEPOSIT_AMOUNT 1.0f -#define MAX_PHEROMONES 5000 -#define PHEROMONE_SIZE 2 -#define ANT_VISION_RANGE 500.0f -#define ANT_VISION_ANGLE (M_PI / 2) +// #define PHEROMONE_EVAPORATION_RATE 0.001f +// #define PHEROMONE_DEPOSIT_AMOUNT 1.0f +// #define MAX_PHEROMONES 5000 +// #define PHEROMONE_SIZE 2 +// #define ANT_VISION_RANGE 500.0f +// #define ANT_VISION_ANGLE (M_PI / 2) #define TURN_ANGLE (M_PI / 4) #define MIN_FOOD_COLONY_DISTANCE 50.0f #define ANT_LIFETIME 5000 @@ -29,14 +29,14 @@ // Actions #define ACTION_TURN_LEFT 0 #define ACTION_TURN_RIGHT 1 -#define ACTION_DROP_PHEROMONE 2 +// #define ACTION_DROP_PHEROMONE 2 #define ACTION_MOVE_FORWARD 3 // Colors #define COLONY1_COLOR (Color){220, 0, 0, 255} #define COLONY2_COLOR (Color){0, 0, 220, 255} -#define PHEROMONE1_COLOR (Color){255, 200, 200, 100} -#define PHEROMONE2_COLOR (Color){200, 200, 255, 100} +// #define PHEROMONE1_COLOR (Color){255, 200, 200, 100} +// #define PHEROMONE2_COLOR (Color){200, 200, 255, 100} #define FOOD_COLOR (Color){0, 200, 0, 255} #define BACKGROUND_COLOR (Color){50, 50, 50, 255} @@ -65,11 +65,11 @@ typedef struct { int amount; } FoodSource; -typedef struct { - Vector2D position; - float strength; - int colony_id; -} Pheromone; +// typedef struct { +// Vector2D position; +// float strength; +// int colony_id; +// } Pheromone; typedef struct { Vector2D position; @@ -105,8 +105,8 @@ struct AntsEnv { Colony colonies[NUM_COLONIES]; Ant* ants; // Dynamic array of all ants FoodSource food_sources[MAX_FOOD_SOURCES]; - Pheromone pheromones[MAX_PHEROMONES]; - int num_pheromones; + // Pheromone pheromones[MAX_PHEROMONES]; + // int num_pheromones; int num_food_sources; // Environment parameters @@ -151,7 +151,7 @@ void init_ants_env(AntsEnv* env) { env->ant_logs = (Log*)calloc(env->num_ants, sizeof(Log)); env->tick = 0; env->client = NULL; - env->num_pheromones = 0; + // env->num_pheromones = 0; // Initialize food sources env->num_food_sources = MAX_FOOD_SOURCES; @@ -167,7 +167,7 @@ void init_ants_env(AntsEnv* env) { } void allocate_ants_env(AntsEnv* env) { - env->obs_size = 10; // Fixed observation size per ant (added 2 for pheromone sensing) + env->obs_size = 8; // Fixed observation size per ant (removed 2 pheromone slots) env->observations = (float*)calloc(env->num_ants * env->obs_size, sizeof(float)); env->actions = (int*)calloc(env->num_ants, sizeof(int)); env->rewards = (float*)calloc(env->num_ants, sizeof(float)); @@ -228,13 +228,7 @@ static inline float get_angle(Vector2D a, Vector2D b) { } static inline bool is_in_vision(Vector2D ant_pos, Vector2D target) { -// float dist_sq = distance_squared(ant_pos, target); -// if (dist_sq > (ANT_VISION_RANGE * ANT_VISION_RANGE)) { -// return false; -// } -// else { -// return true; -// } + // Vision range removed - ants can see everything return true; } @@ -264,26 +258,26 @@ static inline Vector2D get_ant_target(AntsEnv* env, Ant* ant) { -static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { - if (env->num_pheromones >= MAX_PHEROMONES) { - // Replace oldest pheromone - for (int i = 0; i < env->num_pheromones - 1; i++) { - env->pheromones[i] = env->pheromones[i + 1]; - } - env->num_pheromones--; - } - - env->pheromones[env->num_pheromones].position = position; - env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; - env->pheromones[env->num_pheromones].colony_id = colony_id; - env->num_pheromones++; -} +// static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { +// if (env->num_pheromones >= MAX_PHEROMONES) { +// // Replace oldest pheromone +// for (int i = 0; i < env->num_pheromones - 1; i++) { +// env->pheromones[i] = env->pheromones[i + 1]; +// } +// env->num_pheromones--; +// } +// +// env->pheromones[env->num_pheromones].position = position; +// env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; +// env->pheromones[env->num_pheromones].colony_id = colony_id; +// env->num_pheromones++; +// } void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { Ant* ant = &env->ants[ant_idx]; Colony* colony = &env->colonies[ant->colony_id]; - // Observation structure (10 elements): + // Observation structure (8 elements): // [0-1]: ant position (normalized) // [2]: ant direction (normalized between 0 and 1) // [3]: has_food (0 or 1) @@ -291,8 +285,6 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { // [5]: distance to colony (normalized) // [6]: direction to closest food (normalized between 0 and 1) // [7]: closest food distance (normalized) - // [8]: direction to strongest pheromone (normalized to 0-1, -1 if none) - // [9]: strongest pheromone strength (normalized, -1 if none) obs[0] = ant->position.x / env->width; obs[1] = ant->position.y / env->height; @@ -335,27 +327,6 @@ void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { obs[6] = (angle_to_food + M_PI) / (2 * M_PI); obs[7] = closest_food_dist_sq / ((env->width * env->width) + (env->height * env->height)); } - - // Find strongest pheromone from same colony - float strongest_pheromone = 0.0f; - Vector2D strongest_pheromone_pos = {0, 0}; - for (int i = 0; i < env->num_pheromones; i++) { - if (env->pheromones[i].colony_id == ant->colony_id) { - if (env->pheromones[i].strength > strongest_pheromone) { - strongest_pheromone = env->pheromones[i].strength; - strongest_pheromone_pos = env->pheromones[i].position; - } - } - } - - if (strongest_pheromone > 0.0f) { - float angle_to_pheromone = wrap_angle(get_angle(ant->position, strongest_pheromone_pos)); - obs[8] = (angle_to_pheromone + M_PI) / (2 * M_PI); - obs[9] = strongest_pheromone / PHEROMONE_DEPOSIT_AMOUNT; // Normalize by max strength - } else { - obs[8] = -1.0f; - obs[9] = -1.0f; - } } void compute_observations(AntsEnv* env) { @@ -413,7 +384,7 @@ void spawn_food(AntsEnv* env) { void c_reset(AntsEnv* env) { env->tick = 0; env->log = (Log){0}; - env->num_pheromones = 0; + // env->num_pheromones = 0; // Reset colonies env->colonies[0].food_collected = 0; @@ -535,12 +506,12 @@ void step_ant(AntsEnv* env, int ant_id) { ant->direction += TURN_ANGLE; ant->direction = wrap_angle(ant->direction); break; - case ACTION_DROP_PHEROMONE: - // Only drop pheromones when carrying food - if (ant->has_food) { - add_pheromone(env, ant->position, ant->colony_id); - } - break; + // case ACTION_DROP_PHEROMONE: + // // Only drop pheromones when carrying food + // if (ant->has_food) { + // add_pheromone(env, ant->position, ant->colony_id); + // } + // break; case ACTION_MOVE_FORWARD: // Move forward only when this action is selected ant->position.x += ANT_SPEED * cos(ant->direction); @@ -636,18 +607,18 @@ void c_step(AntsEnv* env) { for (int i = 0; i < env->num_ants; i++) { step_ant(env, i); } - + // Update pheromones - for (int i = 0; i < env->num_pheromones; i++) { - env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; - if (env->pheromones[i].strength <= 0) { - // Remove evaporated pheromone - env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; - env->num_pheromones--; - i--; - } - } - + // for (int i = 0; i < env->num_pheromones; i++) { + // env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; + // if (env->pheromones[i].strength <= 0) { + // // Remove evaporated pheromone + // env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; + // env->num_pheromones--; + // i--; + // } + // } + // Generate new observations compute_observations(env); } @@ -697,12 +668,12 @@ void c_render(AntsEnv* env) { } // Draw pheromones - for (int i = 0; i < env->num_pheromones; i++) { - Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; - pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); - DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, - PHEROMONE_SIZE, pheromone_color); - } + // for (int i = 0; i < env->num_pheromones; i++) { + // Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; + // pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); + // DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, + // PHEROMONE_SIZE, pheromone_color); + // } // Draw ants for (int i = 0; i < env->num_ants; i++) { diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 99c23decc..3460fe7b6 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -25,10 +25,10 @@ def __init__( height=720, num_ants=32, reward_food=0.1, - reward_delivery=1.0, + reward_delivery=5.0, reward_death=0.0, - reward_demo_match=0.01, - reward_demo_mismatch=-0.01, + reward_demo_match=0.001, + reward_demo_mismatch=-0.001, report_interval=1, render_mode=None, buf=None, @@ -69,7 +69,7 @@ def __init__( rew_slice = self.rewards[offset:offset+na] term_slice = self.terminals[offset:offset+na] trunc_slice = self.truncations[offset:offset+na] - + # Seed each env uniquely: i + seed * num_envs env_seed = i + seed * num_envs env_id = binding.env_init( From e32fbc36ec6f2656dfba0c009f57cc82c6b1fb6f Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Mon, 15 Dec 2025 17:25:25 -0500 Subject: [PATCH 15/23] added more complex reward vars --- pufferlib/ocean/ants/ants.c | 31 +-------- pufferlib/ocean/ants/ants.h | 112 +++++++++++++++++++++++++++++---- pufferlib/ocean/ants/ants.py | 10 ++- pufferlib/ocean/ants/binding.c | 4 ++ 4 files changed, 115 insertions(+), 42 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index b38d5e5cb..bf6581c40 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -17,7 +17,7 @@ void render_ant_observations(AntsEnv* env, int ant_id) { int panel_x = 20; int panel_y = 100; int panel_width = 300; - int panel_height = 240; // Increased for 2 additional pheromone observations + int panel_height = 180; // Adjusted for 8 observations (no pheromones) // Draw semi-transparent background panel DrawRectangle(panel_x - 10, panel_y - 10, panel_width + 20, panel_height + 20, @@ -56,14 +56,6 @@ void render_ant_observations(AntsEnv* env, int ant_id) { DrawText(TextFormat("Food Dist: %.3f", obs[7]), panel_x, y_offset, 14, obs[7] < 0 ? GRAY : RAYWHITE); - y_offset += line_height; - - DrawText(TextFormat("Pheromone Dir: %.3f", obs[8]), panel_x, y_offset, 14, - obs[8] < 0 ? GRAY : RAYWHITE); - y_offset += line_height; - - DrawText(TextFormat("Pheromone Str: %.3f", obs[9]), panel_x, y_offset, 14, - obs[9] < 0 ? GRAY : RAYWHITE); // Visual indicators on the ant Vector2D ant_pos = ant->position; @@ -96,18 +88,6 @@ void render_ant_observations(AntsEnv* env, int ant_id) { DrawText("FOOD", food_end.x + 5, food_end.y - 10, 12, GREEN); } - // Draw direction to pheromone (if detected) - if (obs[8] >= 0) { - float pheromone_angle = (obs[8] * 2 * M_PI) - M_PI; - float line_length = 35.0f; - Vector2D pheromone_end = { - ant_pos.x + line_length * cos(pheromone_angle), - ant_pos.y + line_length * sin(pheromone_angle) - }; - DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){pheromone_end.x, pheromone_end.y}, 2, MAGENTA); - DrawText("PHEROMONE", pheromone_end.x + 5, pheromone_end.y - 10, 12, MAGENTA); - } - // Draw current direction float current_angle = (obs[2] * 2 * M_PI) - M_PI; float dir_length = 25.0f; @@ -161,7 +141,6 @@ int demo() { // Track key states for single-press detection bool left_pressed = false; bool right_pressed = false; - bool space_pressed = false; // Main loop - FOLLOWING SNAKE PATTERN while (!WindowShouldClose()) { @@ -187,13 +166,6 @@ int demo() { right_pressed = false; } - // Handle pheromone drop (overrides movement) - if (IsKeyDown(KEY_SPACE) && !space_pressed) { - env.actions[0] = ACTION_DROP_PHEROMONE; - space_pressed = true; - } else if (!IsKeyDown(KEY_SPACE)) { - space_pressed = false; - } // Rest of ants act via scripted behaviors // Threshold is half of turn angle to avoid oscillation with 45-degree turns @@ -347,7 +319,6 @@ int main() { printf("- Hold SHIFT to control the first ant AND view ant 1's observations\n"); printf("- While holding SHIFT: ant moves forward by default\n"); printf("- A/D or LEFT/RIGHT to turn 45 degrees (stops movement)\n"); - printf("- SPACE to drop pheromone (stops movement)\n"); printf("- ESC to exit\n\n"); demo(); diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index d94d261f7..850497b1c 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -77,6 +77,10 @@ typedef struct { int colony_id; bool has_food; int lifetime; // Track ant lifetime for performance metrics + + // Tracking for reward shaping + float prev_dist_to_objective; // Previous distance to current objective (food or colony) + int steps_with_food; // Steps taken while carrying food (for efficiency bonus) } Ant; typedef struct { @@ -122,6 +126,12 @@ struct AntsEnv { float reward_death; float reward_demo_match; // Reward for matching demo action float reward_demo_mismatch; // Penalty for not matching demo action + + // New reward shaping parameters + float reward_progress; // Reward for moving closer to objective + float reward_time_penalty; // Small penalty per step (encourages efficiency) + float reward_wrong_direction; // Penalty for moving away from objective + float reward_efficiency_bonus; // Bonus multiplier for fast deliveries // Rendering Client* client; // Raylib client @@ -344,6 +354,10 @@ void spawn_ant(AntsEnv* env, int ant_id) { ant->has_food = false; ant->lifetime = random_float(0, ANT_LIFETIME); + // Initialize reward shaping tracking + ant->prev_dist_to_objective = -1.0f; // -1 indicates uninitialized + ant->steps_with_food = 0; + // Reset individual ant log env->ant_logs[ant_id] = (Log){0}; } @@ -524,7 +538,64 @@ void step_ant(AntsEnv* env, int ant_id) { if (ant->position.x > env->width) ant->position.x = 0; if (ant->position.y < 0) ant->position.y = env->height; if (ant->position.y > env->height) ant->position.y = 0; - + + // REWARD SHAPING: Progress-based rewards + // Give rewards for moving toward objective, penalty for moving away + Vector2D objective_pos; + if (ant->has_food) { + // Objective is home colony + objective_pos = env->colonies[ant->colony_id].position; + ant->steps_with_food++; + } else { + // Objective is nearest food source + float closest_food_dist_sq = env->width * env->width + env->height * env->height; + for (int j = 0; j < env->num_food_sources; j++) { + if (env->food_sources[j].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[j].position); + if (dist_sq < closest_food_dist_sq) { + closest_food_dist_sq = dist_sq; + objective_pos = env->food_sources[j].position; + } + } + } + } + + // Calculate current distance to objective + float current_dist = sqrtf(distance_squared(ant->position, objective_pos)); + + // On first step or after picking up food, initialize previous distance + if (ant->prev_dist_to_objective < 0) { + ant->prev_dist_to_objective = current_dist; + } + + // Calculate progress (positive if moving closer, negative if moving away) + float progress = ant->prev_dist_to_objective - current_dist; + + // Only give progress rewards if ant actually moved (action was MOVE_FORWARD) + if (action == ACTION_MOVE_FORWARD) { + if (progress > 0) { + // Moving closer to objective + float progress_reward = env->reward_progress * progress; + env->rewards[ant_id] += progress_reward; + env->ant_logs[ant_id].episode_return += progress_reward; + env->ant_logs[ant_id].reward += progress_reward; + } else if (progress < 0) { + // Moving away from objective (penalty) + float wrong_dir_penalty = env->reward_wrong_direction * progress; // progress is negative + env->rewards[ant_id] += wrong_dir_penalty; + env->ant_logs[ant_id].episode_return += wrong_dir_penalty; + env->ant_logs[ant_id].reward += wrong_dir_penalty; + } + } + + // Update previous distance for next step + ant->prev_dist_to_objective = current_dist; + + // Time penalty (encourages efficiency) + env->rewards[ant_id] += env->reward_time_penalty; + env->ant_logs[ant_id].episode_return += env->reward_time_penalty; + env->ant_logs[ant_id].reward += env->reward_time_penalty; + // Check for food collection if (!ant->has_food) { for (int j = 0; j < env->num_food_sources; j++) { @@ -542,6 +613,10 @@ void step_ant(AntsEnv* env, int ant_id) { env->rewards[ant_id] += env->reward_food; env->ant_logs[ant_id].episode_return += env->reward_food; env->ant_logs[ant_id].reward += env->reward_food; + + // Reset tracking for new objective (now need to return to colony) + ant->prev_dist_to_objective = -1.0f; + ant->steps_with_food = 0; break; } } @@ -555,10 +630,31 @@ void step_ant(AntsEnv* env, int ant_id) { if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { ant->has_food = false; colony->food_collected++; - env->rewards[ant_id] += env->reward_delivery; - env->ant_logs[ant_id].episode_return += env->reward_delivery; + + // Base delivery reward + float delivery_reward = env->reward_delivery; + + // Efficiency bonus: reward faster deliveries + // Normalize by expected optimal steps (width/2 / ANT_SPEED = ~128 steps average) + // Bonus decreases as steps_with_food increases + if (env->reward_efficiency_bonus > 0 && ant->steps_with_food > 0) { + float expected_steps = env->width / (2.0f * ANT_SPEED); + float efficiency_ratio = expected_steps / (float)ant->steps_with_food; + // Only give bonus if delivery was faster than expected + if (efficiency_ratio > 1.0f) { + float efficiency_bonus = env->reward_efficiency_bonus * (efficiency_ratio - 1.0f); + delivery_reward += efficiency_bonus; + } + } + + env->rewards[ant_id] += delivery_reward; + env->ant_logs[ant_id].episode_return += delivery_reward; env->ant_logs[ant_id].score += 1; - env->ant_logs[ant_id].reward += env->reward_delivery; + env->ant_logs[ant_id].reward += delivery_reward; + + // Reset tracking for new foraging trip + ant->prev_dist_to_objective = -1.0f; + ant->steps_with_food = 0; } } @@ -582,17 +678,11 @@ void step_ant(AntsEnv* env, int ant_id) { // Execute termination and log aggregation if (should_terminate) { - env->ant_logs[ant_id].perf = env->ant_logs[ant_id].episode_length > 0 ? + env->ant_logs[ant_id].perf = env->ant_logs[ant_id].episode_length > 0 ? env->ant_logs[ant_id].score / env->ant_logs[ant_id].episode_length : 0; add_log(env, ant_id); spawn_ant(env, ant_id); //Respawn the ant env->terminals[ant_id] = 1; - - // Debug output for terminal condition verification - if (env->tick % 100 == 0) { - printf("Ant %d terminated at tick %d, lifetime %d, score %.1f\n", - ant_id, env->tick, ant->lifetime, env->ant_logs[ant_id].score); - } } } diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 3460fe7b6..2cf5b7a8d 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -25,10 +25,14 @@ def __init__( height=720, num_ants=32, reward_food=0.1, - reward_delivery=5.0, + reward_delivery=10.0, reward_death=0.0, reward_demo_match=0.001, reward_demo_mismatch=-0.001, + reward_progress=0.01, + reward_time_penalty=-0.001, + reward_wrong_direction=-0.005, + reward_efficiency_bonus=2.0, report_interval=1, render_mode=None, buf=None, @@ -87,6 +91,10 @@ def __init__( reward_death=reward_death, reward_demo_match=reward_demo_match, reward_demo_mismatch=reward_demo_mismatch, + reward_progress=reward_progress, + reward_time_penalty=reward_time_penalty, + reward_wrong_direction=reward_wrong_direction, + reward_efficiency_bonus=reward_efficiency_bonus, cell_size=self.cell_size ) c_envs.append(env_id) diff --git a/pufferlib/ocean/ants/binding.c b/pufferlib/ocean/ants/binding.c index 6110c3b44..b8b56adb0 100644 --- a/pufferlib/ocean/ants/binding.c +++ b/pufferlib/ocean/ants/binding.c @@ -11,6 +11,10 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) { env->reward_delivery = unpack(kwargs, "reward_delivery"); env->reward_demo_match = unpack(kwargs, "reward_demo_match"); env->reward_demo_mismatch = unpack(kwargs, "reward_demo_mismatch"); + env->reward_progress = unpack(kwargs, "reward_progress"); + env->reward_time_penalty = unpack(kwargs, "reward_time_penalty"); + env->reward_wrong_direction = unpack(kwargs, "reward_wrong_direction"); + env->reward_efficiency_bonus = unpack(kwargs, "reward_efficiency_bonus"); env->cell_size = unpack(kwargs, "cell_size"); init_ants_env(env); From 09482acd768a9d020bc59d5352da6d0512c902d4 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 16 Dec 2025 22:58:55 -0500 Subject: [PATCH 16/23] updated to use same structure as target env --- pufferlib/ocean/ants/ants.c | 393 ++++----------- pufferlib/ocean/ants/ants.h | 857 ++++++++++----------------------- pufferlib/ocean/ants/ants.py | 193 +++----- pufferlib/ocean/ants/binding.c | 14 +- 4 files changed, 430 insertions(+), 1027 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index bf6581c40..543a78742 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -1,331 +1,116 @@ -// for local testing of c code,build with: -// bash scripts/build_ocean.sh ants local - -#include +/* Ants: Pure C demo file for testing the environment. + * Build it with: + * bash scripts/build_ocean.sh ants local (debug) + * bash scripts/build_ocean.sh ants fast + * + * Following the Target env pattern for consistency. + */ +#include #include "ants.h" -#include "puffernet.h" - -// Function to visualize ant observations -void render_ant_observations(AntsEnv* env, int ant_id) { - if (ant_id >= env->num_ants) return; - - // Get ant observations - float* obs = &env->observations[ant_id * env->obs_size]; - Ant* ant = &env->ants[ant_id]; - - // Define UI panel position - int panel_x = 20; - int panel_y = 100; - int panel_width = 300; - int panel_height = 180; // Adjusted for 8 observations (no pheromones) - - // Draw semi-transparent background panel - DrawRectangle(panel_x - 10, panel_y - 10, panel_width + 20, panel_height + 20, - (Color){0, 0, 0, 180}); - DrawRectangleLines(panel_x - 10, panel_y - 10, panel_width + 20, panel_height + 20, RAYWHITE); - - // Title - DrawText(TextFormat("ANT %d OBSERVATIONS", ant_id), panel_x, panel_y, 16, YELLOW); - - int y_offset = panel_y + 25; - int line_height = 18; - - // Display each observation with description - DrawText(TextFormat("Position X: %.3f", obs[0]), panel_x, y_offset, 14, RAYWHITE); - y_offset += line_height; - - DrawText(TextFormat("Position Y: %.3f", obs[1]), panel_x, y_offset, 14, RAYWHITE); - y_offset += line_height; - - DrawText(TextFormat("Direction: %.3f", obs[2]), panel_x, y_offset, 14, RAYWHITE); - y_offset += line_height; - - DrawText(TextFormat("Has Food: %s", obs[3] == 1 ? "YES" : "NO"), - panel_x, y_offset, 14, obs[3] > 0.5f ? GREEN : RED); - y_offset += line_height; - - DrawText(TextFormat("Colony Dir: %.3f", obs[4]), panel_x, y_offset, 14, RAYWHITE); - y_offset += line_height; - - DrawText(TextFormat("Colony Dist: %.3f", obs[5]), panel_x, y_offset, 14, RAYWHITE); - y_offset += line_height; - - DrawText(TextFormat("Food Dir: %.3f", obs[6]), panel_x, y_offset, 14, - obs[6] < 0 ? GRAY : RAYWHITE); - y_offset += line_height; - DrawText(TextFormat("Food Dist: %.3f", obs[7]), panel_x, y_offset, 14, - obs[7] < 0 ? GRAY : RAYWHITE); - - // Visual indicators on the ant - Vector2D ant_pos = ant->position; - - // Highlight the selected ant - DrawCircleLines(ant_pos.x, ant_pos.y, ANT_SIZE + 3, YELLOW); - DrawCircleLines(ant_pos.x, ant_pos.y, ANT_SIZE + 5, YELLOW); - - // Draw direction to colony (if valid) - if (obs[4] >= 0) { - float colony_angle = (obs[4] * 2 * M_PI) - M_PI; - float line_length = 40.0f; - Vector2D colony_end = { - ant_pos.x + line_length * cos(colony_angle), - ant_pos.y + line_length * sin(colony_angle) - }; - DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){colony_end.x, colony_end.y}, 3, BLUE); - DrawText("COLONY", colony_end.x + 5, colony_end.y - 10, 12, BLUE); - } - - // Draw direction to food (if visible) - if (obs[6] >= 0) { - float food_angle = (obs[6] * 2 * M_PI) - M_PI; - float line_length = 30.0f; - Vector2D food_end = { - ant_pos.x + line_length * cos(food_angle), - ant_pos.y + line_length * sin(food_angle) - }; - DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){food_end.x, food_end.y}, 2, GREEN); - DrawText("FOOD", food_end.x + 5, food_end.y - 10, 12, GREEN); - } - - // Draw current direction - float current_angle = (obs[2] * 2 * M_PI) - M_PI; - float dir_length = 25.0f; - Vector2D dir_end = { - ant_pos.x + dir_length * cos(current_angle), - ant_pos.y + dir_length * sin(current_angle) - }; - DrawLineEx((Vector2){ant_pos.x, ant_pos.y}, (Vector2){dir_end.x, dir_end.y}, 4, YELLOW); -} +int main() { + int num_ants = 64; + int num_obs = 6; // Simplified observation space -int demo() { - // Initialize environment with proper parameters - FOLLOWING SNAKE PATTERN AntsEnv env = { - .num_ants = 32, - .width = WINDOW_WIDTH, - .height = WINDOW_HEIGHT, - .reward_food = 0.1f, - .reward_delivery = 1.0f, - .reward_death = -1.0f, - .reward_demo_match = 0.01f, // Small reward for matching demo - .reward_demo_mismatch = -0.01f, // Small penalty for not matching demo - .cell_size = 1, + .width = 1280, + .height = 720, + .num_ants = num_ants, + .reward_food_pickup = 0.1f, + .reward_delivery = 10.0f }; - - // Allocate memory - CRITICAL: USING PROPER ALLOCATION PATTERN - allocate_ants_env(&env); - c_reset(&env); - - // Load trained weights if available - Weights* weights = NULL; - LinearLSTM* net = NULL; - FILE* f = fopen("resources/ants_weights.bin", "rb"); - if (f) { - fclose(f); - weights = load_weights("resources/ants_weights.bin", 266501); - if (weights) { - int logit_sizes[1] = {4}; - net = make_linearlstm(weights, env.num_ants, env.obs_size, logit_sizes, 4); - } - } - - printf("Environment initialized. Starting render loop...\n"); - printf("Ants: %d, Observation size: %d\n", env.num_ants, env.obs_size); - if (!net) { - printf("No trained weights found. Running with random actions.\n"); - } - - // Initialize rendering client - env.client = make_client(1, env.width, env.height); - - // Track key states for single-press detection - bool left_pressed = false; - bool right_pressed = false; - - // Main loop - FOLLOWING SNAKE PATTERN - while (!WindowShouldClose()) { - // User can take control with shift key - if (IsKeyDown(KEY_LEFT_SHIFT)) { - // Control first ant of colony 1 for demo - // Default to move forward when no other action is pressed - env.actions[0] = ACTION_MOVE_FORWARD; - // Handle left turn (overrides movement) - if ((IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) && !left_pressed) { - env.actions[0] = ACTION_TURN_LEFT; - left_pressed = true; - } else if (!IsKeyDown(KEY_LEFT) && !IsKeyDown(KEY_A)) { - left_pressed = false; - } + init(&env); - // Handle right turn (overrides movement) - if ((IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) && !right_pressed) { - env.actions[0] = ACTION_TURN_RIGHT; - right_pressed = true; - } else if (!IsKeyDown(KEY_RIGHT) && !IsKeyDown(KEY_D)) { - right_pressed = false; - } + // Allocate buffers manually (normally passed from Python) + env.observations = calloc(env.num_ants * num_obs, sizeof(float)); + env.actions = calloc(env.num_ants, sizeof(int)); + env.rewards = calloc(env.num_ants, sizeof(float)); + env.terminals = calloc(env.num_ants, sizeof(unsigned char)); - - // Rest of ants act via scripted behaviors - // Threshold is half of turn angle to avoid oscillation with 45-degree turns - const float turn_threshold = TURN_ANGLE / 2.0f; // ~22.5 degrees + // Always call reset and render first + c_reset(&env); + c_render(&env); - for (int i = 1; i < env.num_ants; i++) { - Ant* ant = &env.ants[i]; - if (ant->has_food) { - // If ant has food, return to colony - Colony* colony = &env.colonies[ant->colony_id]; - float angle_to_colony = get_angle(ant->position, colony->position); - float angle_diff = wrap_angle(angle_to_colony - ant->direction); + printf("Ant Colony Demo\n"); + printf("Controls: ESC to exit\n"); + printf("Using simple heuristic AI: seek food -> return to colony\n\n"); - // Turn towards colony if angle difference is significant - if (angle_diff > turn_threshold) { - env.actions[i] = ACTION_TURN_RIGHT; - } else if (angle_diff < -turn_threshold) { - env.actions[i] = ACTION_TURN_LEFT; - } else { - env.actions[i] = ACTION_MOVE_FORWARD; - } + // Main loop - exit with ESC or close window + while (!WindowShouldClose()) { + // Simple demo AI: seek food when empty, return when full + for (int i = 0; i < env.num_ants; i++) { + Ant* ant = &env.ants[i]; + + // Simple heuristic AI + if (ant->has_food) { + // Return to colony + Colony* colony = &env.colonies[ant->colony_id]; + float angle_to_colony = get_angle(ant->position, colony->position); + float angle_diff = wrap_angle(angle_to_colony - ant->direction); + + if (angle_diff > M_PI / 8) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -M_PI / 8) { + env.actions[i] = ACTION_TURN_LEFT; } else { - // If ant doesn't have food, seek nearest food source - float closest_food_dist_sq = env.width * env.width; - Vector2D closest_food_pos = {0, 0}; - bool found_food = false; - - for (int j = 0; j < env.num_food_sources; j++) { - if (env.food_sources[j].amount > 0) { - float dist_sq = distance_squared(ant->position, env.food_sources[j].position); - if (dist_sq < closest_food_dist_sq && is_in_vision(ant->position, env.food_sources[j].position)) { - closest_food_dist_sq = dist_sq; - closest_food_pos = env.food_sources[j].position; - found_food = true; - } + env.actions[i] = ACTION_MOVE_FORWARD; + } + } else { + // Seek nearest food + float closest_dist_sq = env.width * env.width; + Vector2D closest_food = {0, 0}; + bool found = false; + + for (int f = 0; f < env.num_food_sources; f++) { + if (env.food_sources[f].amount > 0) { + float dist_sq = distance_squared(ant->position, env.food_sources[f].position); + if (dist_sq < closest_dist_sq) { + closest_dist_sq = dist_sq; + closest_food = env.food_sources[f].position; + found = true; } } + } - if (found_food) { - // Turn towards food if angle difference is significant - float angle_to_food = get_angle(ant->position, closest_food_pos); - float angle_diff = wrap_angle(angle_to_food - ant->direction); + if (found) { + float angle_to_food = get_angle(ant->position, closest_food); + float angle_diff = wrap_angle(angle_to_food - ant->direction); - if (angle_diff > turn_threshold) { - env.actions[i] = ACTION_TURN_RIGHT; - } else if (angle_diff < -turn_threshold) { - env.actions[i] = ACTION_TURN_LEFT; - } else { - env.actions[i] = ACTION_MOVE_FORWARD; - } + if (angle_diff > M_PI / 8) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -M_PI / 8) { + env.actions[i] = ACTION_TURN_LEFT; } else { - // If no food in sight, move forward and occasionally turn - env.actions[i] = (rand() % 100 < 5) ? (rand() % 2 ? ACTION_TURN_LEFT : ACTION_TURN_RIGHT) : ACTION_MOVE_FORWARD; + env.actions[i] = ACTION_MOVE_FORWARD; } + } else { + // No food visible, just move forward + env.actions[i] = ACTION_MOVE_FORWARD; } } - } else if (net) { - // Use neural network for all ants - forward_linearlstm(net, env.observations, env.actions); - } else { - // All ants act randomly - for (int i = 0; i < env.num_ants; i++) { - env.actions[i] = rand() % 4; - } } - + c_step(&env); c_render(&env); - - // Visualize ant observations when shift is pressed - if (IsKeyDown(KEY_LEFT_SHIFT)) { - render_ant_observations(&env, 0); - } - - // Print stats periodically - if (env.tick % 1000 == 0 && env.log.n > 0) { - printf("Tick %d: Episodes completed: %.0f, Avg score: %.2f, Avg return: %.2f\n", - env.tick, env.log.n, env.log.score / env.log.n, env.log.episode_return / env.log.n); - } - } - - printf("Closing environment...\n"); - - // Clean up - PROPER CLEANUP FOLLOWING SNAKE PATTERN - if (net) { - free_linearlstm(net); - } - if (weights) { - free(weights); - } - close_client(env.client); - free_ants_env(&env); - - return 0; -} -void test_performance(float test_time) { - // Performance test environment - AntsEnv env = { - .num_ants = 2048, - .width = 1280, - .height = 720, - .reward_food = 0.1f, - .reward_delivery = 1.0f, - .reward_death = -1.0f, - .cell_size = 1, - }; - - allocate_ants_env(&env); - c_reset(&env); - - int start = time(NULL); - int steps = 0; - - while (time(NULL) - start < test_time) { - // Random actions for performance test - for (int i = 0; i < env.num_ants; i++) { - env.actions[i] = rand() % 4; - } - - c_step(&env); - steps++; - - // Print intermediate stats - if (steps % 1000 == 0 && env.log.n > 0) { - printf("Step %d: Episodes: %.0f, Avg performance: %.4f\n", - steps, env.log.n, env.log.perf / env.log.n); + // Print stats every 60 frames + if (env.tick % 60 == 0) { + printf("Tick: %d | Colony 1: %d | Colony 2: %d | Episodes: %.0f | Avg Score: %.2f\n", + env.tick, + env.colonies[0].food_collected, + env.colonies[1].food_collected, + env.log.n, + env.log.n > 0 ? env.log.score / env.log.n : 0.0f); } } - - int end = time(NULL); - float sps = (float)env.num_ants * steps / (end - start); - printf("Ant Colony Environment SPS: %.0f\n", sps); - printf("Total ant steps: %.0f\n", sps); - printf("Episodes completed: %.0f\n", env.log.n); - if (env.log.n > 0) { - printf("Average score: %.2f\n", env.log.score / env.log.n); - printf("Average performance: %.4f\n", env.log.perf / env.log.n); - } - - // Clean up - free_ants_env(&env); -} -int main() { - // Initialize random seed - srand(time(NULL)); - - printf("Ant Colony Environment Demo\n"); - printf("Controls:\n"); - printf("- Hold SHIFT to control the first ant AND view ant 1's observations\n"); - printf("- While holding SHIFT: ant moves forward by default\n"); - printf("- A/D or LEFT/RIGHT to turn 45 degrees (stops movement)\n"); - printf("- ESC to exit\n\n"); - - demo(); - - // Uncomment for performance testing - // printf("\nRunning performance test...\n"); - // test_performance(10); - + // Cleanup + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); + return 0; -} \ No newline at end of file +} diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 850497b1c..60d557abd 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -1,12 +1,20 @@ -#include +/* Ants: A multiagent foraging environment inspired by ant colonies. + * Two colonies compete to collect food from the environment. + * Follows the Target env pattern for simplicity and clarity. + */ + +#define _USE_MATH_DEFINES #include -#include -#include #include -#include +#include +#include #include "raylib.h" -// Constants for the simulation +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +// Environment constants #define WINDOW_WIDTH 1280 #define WINDOW_HEIGHT 720 #define NUM_COLONIES 2 @@ -16,207 +24,95 @@ #define ANT_SIZE 4 #define FOOD_SIZE 6 #define COLONY_SIZE 20 -// #define PHEROMONE_EVAPORATION_RATE 0.001f -// #define PHEROMONE_DEPOSIT_AMOUNT 1.0f -// #define MAX_PHEROMONES 5000 -// #define PHEROMONE_SIZE 2 -// #define ANT_VISION_RANGE 500.0f -// #define ANT_VISION_ANGLE (M_PI / 2) #define TURN_ANGLE (M_PI / 4) #define MIN_FOOD_COLONY_DISTANCE 50.0f -#define ANT_LIFETIME 5000 +#define ANT_RESET_INTERVAL 2048 // Reset ant every N steps (like target.c) // Actions #define ACTION_TURN_LEFT 0 #define ACTION_TURN_RIGHT 1 -// #define ACTION_DROP_PHEROMONE 2 -#define ACTION_MOVE_FORWARD 3 +#define ACTION_MOVE_FORWARD 2 +#define ACTION_NOOP 3 // Colors #define COLONY1_COLOR (Color){220, 0, 0, 255} #define COLONY2_COLOR (Color){0, 0, 220, 255} -// #define PHEROMONE1_COLOR (Color){255, 200, 200, 100} -// #define PHEROMONE2_COLOR (Color){200, 200, 255, 100} #define FOOD_COLOR (Color){0, 200, 0, 255} #define BACKGROUND_COLOR (Color){50, 50, 50, 255} // Required Log struct for PufferLib -typedef struct Log Log; -struct Log { - float perf; // Performance metric - float score; // Total score +typedef struct { + float perf; // Performance metric (score/length) + float score; // Total score (food deliveries) float episode_return; // Cumulative rewards float episode_length; // Episode duration - float reward; // Reward for the current step float n; // Episode count - REQUIRED AS LAST FIELD -}; +} Log; // Forward declarations typedef struct Client Client; typedef struct AntsEnv AntsEnv; -// Environment structs +// Simple 2D vector typedef struct { float x, y; } Vector2D; +// Food source in the environment typedef struct { Vector2D position; int amount; } FoodSource; -// typedef struct { -// Vector2D position; -// float strength; -// int colony_id; -// } Pheromone; - +// Individual ant agent typedef struct { Vector2D position; float direction; int colony_id; bool has_food; - int lifetime; // Track ant lifetime for performance metrics - - // Tracking for reward shaping - float prev_dist_to_objective; // Previous distance to current objective (food or colony) - int steps_with_food; // Steps taken while carrying food (for efficiency bonus) + int steps_alive; // Track steps for periodic reset } Ant; +// Colony home base typedef struct { Vector2D position; int food_collected; } Colony; -// Raylib client structure - FOLLOWING SNAKE PATTERN +// Raylib rendering client struct Client { int cell_size; int width; int height; }; -// Main environment struct - RESTRUCTURED FOLLOWING SNAKE PATTERN +// Main environment struct - FOLLOWING TARGET PATTERN struct AntsEnv { - // Required PufferLib fields - IDENTICAL TO SNAKE - float* observations; // Flattened observations for all ants - int* actions; // Actions for all ants - float* rewards; // Rewards for all ants - unsigned char* terminals; // Terminal flags - Log log; // Main aggregated log - Log* ant_logs; // Individual ant logs - CRITICAL ADDITION - - // Environment state + Log log; // Required: aggregated log for all agents + Client* client; // Rendering client + Ant* ants; // Dynamic array of ants Colony colonies[NUM_COLONIES]; - Ant* ants; // Dynamic array of all ants FoodSource food_sources[MAX_FOOD_SOURCES]; - // Pheromone pheromones[MAX_PHEROMONES]; - // int num_pheromones; - int num_food_sources; - - // Environment parameters - int num_ants; // Total number of ants - int width; // Environment width - int height; // Environment height - int obs_size; // Observation size per ant - int tick; // Current timestep - - // Reward parameters - float reward_food; - float reward_delivery; - float reward_death; - float reward_demo_match; // Reward for matching demo action - float reward_demo_mismatch; // Penalty for not matching demo action - - // New reward shaping parameters - float reward_progress; // Reward for moving closer to objective - float reward_time_penalty; // Small penalty per step (encourages efficiency) - float reward_wrong_direction; // Penalty for moving away from objective - float reward_efficiency_bonus; // Bonus multiplier for fast deliveries - - // Rendering - Client* client; // Raylib client - int cell_size; -}; - -/** - * Add an ant's log to the main log when the ant's episode ends. - * CRITICAL FUNCTION - COPIED FROM SNAKE PATTERN - * This should only be called during termination conditions for a specific ant. - * Accumulates the ant's stats into the main log and resets the ant's individual log. - */ -void add_log(AntsEnv* env, int ant_id) { - env->log.perf += env->ant_logs[ant_id].perf; - env->log.score += env->ant_logs[ant_id].score; - env->log.episode_return += env->ant_logs[ant_id].episode_return; - env->log.episode_length += env->ant_logs[ant_id].episode_length; - env->log.n += 1; - env->log.reward += env->ant_logs[ant_id].reward; - // Reset individual ant log - env->ant_logs[ant_id] = (Log){0}; -} - -// Memory management functions - FOLLOWING SNAKE PATTERN -void init_ants_env(AntsEnv* env) { - env->ants = (Ant*)calloc(env->num_ants, sizeof(Ant)); - env->ant_logs = (Log*)calloc(env->num_ants, sizeof(Log)); - env->tick = 0; - env->client = NULL; - // env->num_pheromones = 0; - - // Initialize food sources - env->num_food_sources = MAX_FOOD_SOURCES; - for (int i = 0; i < env->num_food_sources; i++) { - env->food_sources[i].amount = 0; // Will be set in reset - } - - // Initialize colonies - env->colonies[0].position = (Vector2D){env->width / 4, env->height / 2}; - env->colonies[1].position = (Vector2D){3 * env->width / 4, env->height / 2}; - env->colonies[0].food_collected = 0; - env->colonies[1].food_collected = 0; -} - -void allocate_ants_env(AntsEnv* env) { - env->obs_size = 8; // Fixed observation size per ant (removed 2 pheromone slots) - env->observations = (float*)calloc(env->num_ants * env->obs_size, sizeof(float)); - env->actions = (int*)calloc(env->num_ants, sizeof(int)); - env->rewards = (float*)calloc(env->num_ants, sizeof(float)); - env->terminals = (unsigned char*)calloc(env->num_ants, sizeof(unsigned char)); - init_ants_env(env); -} -void c_close(AntsEnv* env) { - if (env->ants) { - free(env->ants); - env->ants = NULL; - } - if (env->ant_logs) { - free(env->ant_logs); - env->ant_logs = NULL; - } -} + // Required PufferLib fields + float* observations; // Flattened observations + int* actions; // Actions for all ants + float* rewards; // Rewards for all ants + unsigned char* terminals; // Terminal flags -void free_ants_env(AntsEnv* env) { - c_close(env); - if (env->observations) { - free(env->observations); - env->observations = NULL; - } - if (env->actions) { - free(env->actions); - env->actions = NULL; - } - if (env->rewards) { - free(env->rewards); - env->rewards = NULL; - } - if (env->terminals) { - free(env->terminals); - env->terminals = NULL; - } -} + // Environment parameters + int num_ants; // Total number of ants + int width; // Environment width + int height; // Environment height + int num_food_sources; // Active food sources + int tick; // Current timestep + + // Simple reward parameters (like target.c) + float reward_food_pickup; // Reward for picking up food + float reward_delivery; // Reward for delivering food to colony +}; -// Helper function implementations +// Helper functions static inline float random_float(float min, float max) { return min + (max - min) * ((float)rand() / (float)RAND_MAX); } @@ -237,550 +133,329 @@ static inline float get_angle(Vector2D a, Vector2D b) { return atan2(b.y - a.y, b.x - a.x); } -static inline bool is_in_vision(Vector2D ant_pos, Vector2D target) { - // Vision range removed - ants can see everything - return true; -} - -// Get the target position for an ant (colony if carrying food, nearest food otherwise) -static inline Vector2D get_ant_target(AntsEnv* env, Ant* ant) { - if (ant->has_food) { - // Target is the colony when carrying food - return env->colonies[ant->colony_id].position; - } else { - // Target is the nearest food source when not carrying food - float closest_food_dist_sq = env->width * env->width + env->height * env->height; - Vector2D closest_food_pos = ant->position; // Default to current position if no food found - - for (int i = 0; i < env->num_food_sources; i++) { - if (env->food_sources[i].amount > 0) { - float dist_sq = distance_squared(ant->position, env->food_sources[i].position); - if (dist_sq < closest_food_dist_sq) { - closest_food_dist_sq = dist_sq; - closest_food_pos = env->food_sources[i].position; - } - } - } - - return closest_food_pos; - } -} - - - -// static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { -// if (env->num_pheromones >= MAX_PHEROMONES) { -// // Replace oldest pheromone -// for (int i = 0; i < env->num_pheromones - 1; i++) { -// env->pheromones[i] = env->pheromones[i + 1]; -// } -// env->num_pheromones--; -// } -// -// env->pheromones[env->num_pheromones].position = position; -// env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; -// env->pheromones[env->num_pheromones].colony_id = colony_id; -// env->num_pheromones++; -// } - -void get_observation_for_ant(AntsEnv* env, int ant_idx, float* obs) { - Ant* ant = &env->ants[ant_idx]; - Colony* colony = &env->colonies[ant->colony_id]; - - // Observation structure (8 elements): - // [0-1]: ant position (normalized) - // [2]: ant direction (normalized between 0 and 1) - // [3]: has_food (0 or 1) - // [4]: direction to colony (normalized between 0 and 1) - // [5]: distance to colony (normalized) - // [6]: direction to closest food (normalized between 0 and 1) - // [7]: closest food distance (normalized) - - obs[0] = ant->position.x / env->width; - obs[1] = ant->position.y / env->height; - - // Normalize direction to 0-1 range (0 = right, 0.25 = up, 0.5 = left, 0.75 = down) - obs[2] = (ant->direction + M_PI) / (2 * M_PI); - - obs[3] = ant->has_food ? 1.0f : 0.0f; - - // Get direction to colony (normalized between 0 and 1) - float angle_to_colony = wrap_angle(get_angle(ant->position, colony->position)); - obs[4] = (angle_to_colony + M_PI) / (2 * M_PI); - - obs[5] = distance_squared(ant->position, colony->position) / (env->width * env->width + env->height * env->height); - - // Find closest visible food - float closest_food_dist_sq = env->width * env->width; - Vector2D closest_food_pos = {0, 0}; - for (int i = 0; i < env->num_food_sources; i++) { - if (env->food_sources[i].amount > 0) { - float dist_sq = distance_squared(ant->position, env->food_sources[i].position); - if ( - dist_sq < closest_food_dist_sq - && is_in_vision(ant->position, env->food_sources[i].position) - ) { - closest_food_dist_sq = dist_sq; - closest_food_pos.x = env->food_sources[i].position.x; - closest_food_pos.y = env->food_sources[i].position.y; - } - } - } - - if(closest_food_pos.x == 0 && closest_food_pos.y == 0) { - obs[6] = -1.0f; - obs[7] = -1.0f; - } - else { - // Get direction to closest food (normalized between 0 and 1) - float angle_to_food = wrap_angle(get_angle(ant->position, closest_food_pos)); - obs[6] = (angle_to_food + M_PI) / (2 * M_PI); - obs[7] = closest_food_dist_sq / ((env->width * env->width) + (env->height * env->height)); - } -} - -void compute_observations(AntsEnv* env) { - for (int i = 0; i < env->num_ants; i++) { - get_observation_for_ant(env, i, &env->observations[i * env->obs_size]); - } +static inline float clip(float val, float min, float max) { + if (val < min) return min; + if (val > max) return max; + return val; } +// Spawn a new ant at its colony void spawn_ant(AntsEnv* env, int ant_id) { Ant* ant = &env->ants[ant_id]; Colony* colony = &env->colonies[ant->colony_id]; ant->position = colony->position; - ant->direction = wrap_angle((rand() % 8) * (M_PI / 4)); // Randomly choose between 8 directions (0, 45, 90, 135, 180, 225, 270, 315 degrees) + ant->direction = wrap_angle((rand() % 8) * (M_PI / 4)); ant->has_food = false; - ant->lifetime = random_float(0, ANT_LIFETIME); - - // Initialize reward shaping tracking - ant->prev_dist_to_objective = -1.0f; // -1 indicates uninitialized - ant->steps_with_food = 0; - - // Reset individual ant log - env->ant_logs[ant_id] = (Log){0}; + ant->steps_alive = 0; } +// Spawn food at a valid location void spawn_food(AntsEnv* env) { - int idx; - bool valid_position; int attempts = 0; - - do { + + while (attempts < 100) { float x = random_float(50, env->width - 50); float y = random_float(50, env->height - 50); - - valid_position = true; + + // Check distance from colonies + bool valid = true; for (int j = 0; j < NUM_COLONIES; j++) { float dist_sq = distance_squared((Vector2D){x, y}, env->colonies[j].position); if (dist_sq < MIN_FOOD_COLONY_DISTANCE * MIN_FOOD_COLONY_DISTANCE) { - valid_position = false; + valid = false; break; } } - - if (valid_position) { - // Find an empty food source slot - for (idx = 0; idx < env->num_food_sources; idx++) { - if (env->food_sources[idx].amount == 0) { - env->food_sources[idx].position.x = x; - env->food_sources[idx].position.y = y; - env->food_sources[idx].amount = MAX_FOOD_PER_SOURCE; + + if (valid) { + // Find empty slot + for (int i = 0; i < MAX_FOOD_SOURCES; i++) { + if (env->food_sources[i].amount == 0) { + env->food_sources[i].position.x = x; + env->food_sources[i].position.y = y; + env->food_sources[i].amount = MAX_FOOD_PER_SOURCE; return; } } } attempts++; - } while (!valid_position && attempts < 100); + } } -void c_reset(AntsEnv* env) { +// Initialize environment memory +void init(AntsEnv* env) { + env->ants = (Ant*)calloc(env->num_ants, sizeof(Ant)); env->tick = 0; - env->log = (Log){0}; - // env->num_pheromones = 0; - - // Reset colonies + env->client = NULL; + + // Initialize colonies + env->colonies[0].position = (Vector2D){env->width / 4, env->height / 2}; + env->colonies[1].position = (Vector2D){3 * env->width / 4, env->height / 2}; env->colonies[0].food_collected = 0; env->colonies[1].food_collected = 0; - - // Initialize all ants - int ant_idx = 0; - for (int i = 0; i < NUM_COLONIES; i++) { - for (int j = 0; j < env->num_ants / NUM_COLONIES; j++) { - env->ants[ant_idx].colony_id = i; - spawn_ant(env, ant_idx); - ant_idx++; - } - } - - // Clear food sources and spawn new ones + + // Initialize food sources + env->num_food_sources = MAX_FOOD_SOURCES; for (int i = 0; i < env->num_food_sources; i++) { env->food_sources[i].amount = 0; } - - for (int i = 0; i < env->num_food_sources; i++) { - spawn_food(env); - } - - // Clear buffers - memset(env->rewards, 0, env->num_ants * sizeof(float)); - memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); - - // Generate initial observations - compute_observations(env); } -// Compute the hardcoded demo action for an ant -// This replicates the logic from demo() lines 176-226 -int get_demo_action(AntsEnv* env, int ant_id) { - Ant* ant = &env->ants[ant_id]; - // Threshold is half of turn angle to avoid oscillation with 45-degree turns - const float turn_threshold = TURN_ANGLE / 2.0f; // ~22.5 degrees +// Compute observations for all ants - FOLLOWING TARGET PATTERN +void compute_observations(AntsEnv* env) { + int obs_idx = 0; - if (ant->has_food) { - // If ant has food, return to colony + for (int a = 0; a < env->num_ants; a++) { + Ant* ant = &env->ants[a]; Colony* colony = &env->colonies[ant->colony_id]; - float angle_to_colony = get_angle(ant->position, colony->position); - float angle_diff = wrap_angle(angle_to_colony - ant->direction); - - // Turn towards colony if angle difference is significant - if (angle_diff > turn_threshold) { - return ACTION_TURN_RIGHT; - } else if (angle_diff < -turn_threshold) { - return ACTION_TURN_LEFT; - } else { - return ACTION_MOVE_FORWARD; - } - } else { - // If ant doesn't have food, seek nearest food source - float closest_food_dist_sq = env->width * env->width; + + // Find closest food source + float closest_food_dist_sq = env->width * env->width + env->height * env->height; Vector2D closest_food_pos = {0, 0}; bool found_food = false; - for (int j = 0; j < env->num_food_sources; j++) { - if (env->food_sources[j].amount > 0) { - float dist_sq = distance_squared(ant->position, env->food_sources[j].position); - if (dist_sq < closest_food_dist_sq && is_in_vision(ant->position, env->food_sources[j].position)) { + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[i].position); + if (dist_sq < closest_food_dist_sq) { closest_food_dist_sq = dist_sq; - closest_food_pos = env->food_sources[j].position; + closest_food_pos = env->food_sources[i].position; found_food = true; } } } + // Observation: [colony_dx, colony_dy, food_dx, food_dy, has_food, heading] + // Normalized to roughly -1 to 1 range + env->observations[obs_idx++] = (colony->position.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (colony->position.y - ant->position.y) / env->height; + if (found_food) { - // Turn towards food if angle difference is significant - float angle_to_food = get_angle(ant->position, closest_food_pos); - float angle_diff = wrap_angle(angle_to_food - ant->direction); - - if (angle_diff > turn_threshold) { - return ACTION_TURN_RIGHT; - } else if (angle_diff < -turn_threshold) { - return ACTION_TURN_LEFT; - } else { - return ACTION_MOVE_FORWARD; - } + env->observations[obs_idx++] = (closest_food_pos.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (closest_food_pos.y - ant->position.y) / env->height; } else { - // If no food in sight, move forward (we'll use this as the "default" demo action) - // Note: The random turning behavior is not deterministic, so we default to forward - return ACTION_MOVE_FORWARD; + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; } + + env->observations[obs_idx++] = ant->has_food ? 1.0f : 0.0f; + env->observations[obs_idx++] = ant->direction / (2 * M_PI); } } -void step_ant(AntsEnv* env, int ant_id) { - Ant* ant = &env->ants[ant_id]; - env->ant_logs[ant_id].episode_length += 1; - ant->lifetime++; - - int action = env->actions[ant_id]; - - // Compute demo action and compare with agent's action - int demo_action = get_demo_action(env, ant_id); - if (action == demo_action) { - // Reward for matching the demo action - env->rewards[ant_id] += env->reward_demo_match; - env->ant_logs[ant_id].episode_return += env->reward_demo_match; - env->ant_logs[ant_id].reward += env->reward_demo_match; - } else { - // Punish for not matching the demo action - env->rewards[ant_id] += env->reward_demo_mismatch; - env->ant_logs[ant_id].episode_return += env->reward_demo_mismatch; - env->ant_logs[ant_id].reward += env->reward_demo_mismatch; - } +// Required function: reset environment +void c_reset(AntsEnv* env) { + env->tick = 0; + env->log = (Log){0}; - // Execute action - switch (action) { - case ACTION_TURN_LEFT: - ant->direction -= TURN_ANGLE; - ant->direction = wrap_angle(ant->direction); - break; - case ACTION_TURN_RIGHT: - ant->direction += TURN_ANGLE; - ant->direction = wrap_angle(ant->direction); - break; - // case ACTION_DROP_PHEROMONE: - // // Only drop pheromones when carrying food - // if (ant->has_food) { - // add_pheromone(env, ant->position, ant->colony_id); - // } - // break; - case ACTION_MOVE_FORWARD: - // Move forward only when this action is selected - ant->position.x += ANT_SPEED * cos(ant->direction); - ant->position.y += ANT_SPEED * sin(ant->direction); - break; - } + // Reset colonies + env->colonies[0].food_collected = 0; + env->colonies[1].food_collected = 0; - // Wrap around edges - if (ant->position.x < 0) ant->position.x = env->width; - if (ant->position.x > env->width) ant->position.x = 0; - if (ant->position.y < 0) ant->position.y = env->height; - if (ant->position.y > env->height) ant->position.y = 0; - - // REWARD SHAPING: Progress-based rewards - // Give rewards for moving toward objective, penalty for moving away - Vector2D objective_pos; - if (ant->has_food) { - // Objective is home colony - objective_pos = env->colonies[ant->colony_id].position; - ant->steps_with_food++; - } else { - // Objective is nearest food source - float closest_food_dist_sq = env->width * env->width + env->height * env->height; - for (int j = 0; j < env->num_food_sources; j++) { - if (env->food_sources[j].amount > 0) { - float dist_sq = distance_squared(ant->position, env->food_sources[j].position); - if (dist_sq < closest_food_dist_sq) { - closest_food_dist_sq = dist_sq; - objective_pos = env->food_sources[j].position; - } - } + // Initialize ants + int ants_per_colony = env->num_ants / NUM_COLONIES; + for (int i = 0; i < env->num_ants; i++) { + env->ants[i].colony_id = i / ants_per_colony; + if (env->ants[i].colony_id >= NUM_COLONIES) { + env->ants[i].colony_id = NUM_COLONIES - 1; } + spawn_ant(env, i); } - // Calculate current distance to objective - float current_dist = sqrtf(distance_squared(ant->position, objective_pos)); - - // On first step or after picking up food, initialize previous distance - if (ant->prev_dist_to_objective < 0) { - ant->prev_dist_to_objective = current_dist; + // Clear and respawn food + for (int i = 0; i < env->num_food_sources; i++) { + env->food_sources[i].amount = 0; } - - // Calculate progress (positive if moving closer, negative if moving away) - float progress = ant->prev_dist_to_objective - current_dist; - - // Only give progress rewards if ant actually moved (action was MOVE_FORWARD) - if (action == ACTION_MOVE_FORWARD) { - if (progress > 0) { - // Moving closer to objective - float progress_reward = env->reward_progress * progress; - env->rewards[ant_id] += progress_reward; - env->ant_logs[ant_id].episode_return += progress_reward; - env->ant_logs[ant_id].reward += progress_reward; - } else if (progress < 0) { - // Moving away from objective (penalty) - float wrong_dir_penalty = env->reward_wrong_direction * progress; // progress is negative - env->rewards[ant_id] += wrong_dir_penalty; - env->ant_logs[ant_id].episode_return += wrong_dir_penalty; - env->ant_logs[ant_id].reward += wrong_dir_penalty; - } + for (int i = 0; i < env->num_food_sources; i++) { + spawn_food(env); } - // Update previous distance for next step - ant->prev_dist_to_objective = current_dist; - - // Time penalty (encourages efficiency) - env->rewards[ant_id] += env->reward_time_penalty; - env->ant_logs[ant_id].episode_return += env->reward_time_penalty; - env->ant_logs[ant_id].reward += env->reward_time_penalty; - - // Check for food collection - if (!ant->has_food) { - for (int j = 0; j < env->num_food_sources; j++) { - if (env->food_sources[j].amount > 0) { - float dist_sq = distance_squared(ant->position, env->food_sources[j].position); - if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { - ant->has_food = true; - env->food_sources[j].amount--; - - // If food source is exhausted, respawn it - if (env->food_sources[j].amount <= 0) { - spawn_food(env); - } + // Clear buffers + memset(env->rewards, 0, env->num_ants * sizeof(float)); + memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); - env->rewards[ant_id] += env->reward_food; - env->ant_logs[ant_id].episode_return += env->reward_food; - env->ant_logs[ant_id].reward += env->reward_food; + compute_observations(env); +} - // Reset tracking for new objective (now need to return to colony) - ant->prev_dist_to_objective = -1.0f; - ant->steps_with_food = 0; - break; +// Update food collection and delivery - FOLLOWING TARGET update_goals PATTERN +void update_food_interactions(AntsEnv* env) { + for (int a = 0; a < env->num_ants; a++) { + Ant* ant = &env->ants[a]; + + // Check for food pickup + if (!ant->has_food) { + for (int f = 0; f < env->num_food_sources; f++) { + if (env->food_sources[f].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[f].position); + if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { + ant->has_food = true; + env->food_sources[f].amount--; + + // Respawn food if depleted + if (env->food_sources[f].amount <= 0) { + spawn_food(env); + } + + // Simple reward + env->rewards[a] += env->reward_food_pickup; + env->log.episode_return += env->reward_food_pickup; + break; + } } } } - } - // Check for food delivery - if (ant->has_food) { - Colony* colony = &env->colonies[ant->colony_id]; - float dist_sq = distance_squared(ant->position, colony->position); - if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { - ant->has_food = false; - colony->food_collected++; - - // Base delivery reward - float delivery_reward = env->reward_delivery; - - // Efficiency bonus: reward faster deliveries - // Normalize by expected optimal steps (width/2 / ANT_SPEED = ~128 steps average) - // Bonus decreases as steps_with_food increases - if (env->reward_efficiency_bonus > 0 && ant->steps_with_food > 0) { - float expected_steps = env->width / (2.0f * ANT_SPEED); - float efficiency_ratio = expected_steps / (float)ant->steps_with_food; - // Only give bonus if delivery was faster than expected - if (efficiency_ratio > 1.0f) { - float efficiency_bonus = env->reward_efficiency_bonus * (efficiency_ratio - 1.0f); - delivery_reward += efficiency_bonus; - } + // Check for food delivery + if (ant->has_food) { + Colony* colony = &env->colonies[ant->colony_id]; + float dist_sq = distance_squared(ant->position, colony->position); + if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { + ant->has_food = false; + colony->food_collected++; + + // Reward and log update - LIKE TARGET + env->rewards[a] += env->reward_delivery; + env->log.perf += 1.0f; // Performance metric (food delivered) + env->log.score += 1.0f; // Score (food delivered) + env->log.episode_return += env->reward_delivery; + env->log.episode_length += ant->steps_alive; + env->log.n += 1; // Episode count (number of deliveries) + + // Reset ant after delivery + ant->steps_alive = 0; } - - env->rewards[ant_id] += delivery_reward; - env->ant_logs[ant_id].episode_return += delivery_reward; - env->ant_logs[ant_id].score += 1; - env->ant_logs[ant_id].reward += delivery_reward; - - // Reset tracking for new foraging trip - ant->prev_dist_to_objective = -1.0f; - ant->steps_with_food = 0; } } - - // MULTIPLE TERMINAL CONDITIONS FOR FREQUENT LOG GENERATION - bool should_terminate = false; - - // Terminal Condition 1: Shorter lifetime limit (similar to snake death frequency) - // if (ant->lifetime > ANT_LIFETIME) { - // should_terminate = true; - // } - - // // Terminal Condition 2: Random death chance (0.1% per step after lifetime) - if (ant->lifetime > ANT_LIFETIME && (rand() % 1000) < 1) { - should_terminate = true; - } - - // // Terminal Condition 3: Performance-based termination after food delivery - // if (env->ant_logs[ant_id].score > 0 && (rand() % 100) < 5) { - // should_terminate = true; - // } - - // Execute termination and log aggregation - if (should_terminate) { - env->ant_logs[ant_id].perf = env->ant_logs[ant_id].episode_length > 0 ? - env->ant_logs[ant_id].score / env->ant_logs[ant_id].episode_length : 0; - add_log(env, ant_id); - spawn_ant(env, ant_id); //Respawn the ant - env->terminals[ant_id] = 1; - } } +// Required function: step environment void c_step(AntsEnv* env) { env->tick++; - + // Clear rewards and terminals memset(env->rewards, 0, env->num_ants * sizeof(float)); memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); - - // Step all ants + + // Update all ants - SIMPLIFIED LIKE TARGET for (int i = 0; i < env->num_ants; i++) { - step_ant(env, i); - } + Ant* ant = &env->ants[i]; + ant->steps_alive++; + + // Execute action + int action = env->actions[i]; + switch (action) { + case ACTION_TURN_LEFT: + ant->direction -= TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_TURN_RIGHT: + ant->direction += TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_MOVE_FORWARD: + ant->position.x += ANT_SPEED * cos(ant->direction); + ant->position.y += ANT_SPEED * sin(ant->direction); + break; + case ACTION_NOOP: + // Do nothing + break; + } - // Update pheromones - // for (int i = 0; i < env->num_pheromones; i++) { - // env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; - // if (env->pheromones[i].strength <= 0) { - // // Remove evaporated pheromone - // env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; - // env->num_pheromones--; - // i--; - // } - // } - - // Generate new observations - compute_observations(env); -} + // Wrap around edges + if (ant->position.x < 0) ant->position.x = env->width; + if (ant->position.x > env->width) ant->position.x = 0; + if (ant->position.y < 0) ant->position.y = env->height; + if (ant->position.y > env->height) ant->position.y = 0; -// Raylib client functions - FOLLOWING SNAKE PATTERN -Client* make_client(int cell_size, int width, int height) { - Client* client = (Client*)malloc(sizeof(Client)); - client->cell_size = cell_size; - client->width = width; - client->height = height; - InitWindow(width, height, "PufferLib Ant Colony"); - SetTargetFPS(60); - return client; -} + // Periodic reset like target.c (lines 158-161) + if (ant->steps_alive % ANT_RESET_INTERVAL == 0) { + spawn_ant(env, i); + env->terminals[i] = 1; + } + } -void close_client(Client* client) { - CloseWindow(); - free(client); + // Update food interactions + update_food_interactions(env); + + // Compute new observations + compute_observations(env); } +// Required function: render (with lazy client initialization) void c_render(AntsEnv* env) { + if (env->client == NULL) { + InitWindow(env->width, env->height, "PufferLib Ants"); + SetTargetFPS(60); + env->client = (Client*)calloc(1, sizeof(Client)); + env->client->cell_size = 1; + env->client->width = env->width; + env->client->height = env->height; + } + + // Standard exit key if (IsKeyDown(KEY_ESCAPE)) { exit(0); } - - if (env->client == NULL) { - env->client = make_client(1, env->width, env->height); - } - + BeginDrawing(); ClearBackground(BACKGROUND_COLOR); - + // Draw colonies for (int i = 0; i < NUM_COLONIES; i++) { - Color colony_color = (i == 0) ? COLONY1_COLOR : COLONY2_COLOR; - DrawCircle(env->colonies[i].position.x, env->colonies[i].position.y, COLONY_SIZE, colony_color); + Color color = (i == 0) ? COLONY1_COLOR : COLONY2_COLOR; + DrawCircle(env->colonies[i].position.x, env->colonies[i].position.y, + COLONY_SIZE, color); } - - // Draw food sources + + // Draw food for (int i = 0; i < env->num_food_sources; i++) { if (env->food_sources[i].amount > 0) { - DrawCircle(env->food_sources[i].position.x, env->food_sources[i].position.y, + DrawCircle(env->food_sources[i].position.x, env->food_sources[i].position.y, FOOD_SIZE, FOOD_COLOR); - DrawText(TextFormat("%d", env->food_sources[i].amount), - env->food_sources[i].position.x, env->food_sources[i].position.y, 10, RAYWHITE); + DrawText(TextFormat("%d", env->food_sources[i].amount), + env->food_sources[i].position.x - 5, + env->food_sources[i].position.y - 5, 10, RAYWHITE); } } - - // Draw pheromones - // for (int i = 0; i < env->num_pheromones; i++) { - // Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; - // pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); - // DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, - // PHEROMONE_SIZE, pheromone_color); - // } - + // Draw ants for (int i = 0; i < env->num_ants; i++) { Ant* ant = &env->ants[i]; - Color ant_color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; - DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, ant->has_food ? FOOD_COLOR : ant_color); - - // Draw direction indicator + Color color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; + + // Change color if carrying food + if (ant->has_food) { + color = FOOD_COLOR; + } + + DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, color); + + // Direction indicator float dir_x = ant->position.x + (ANT_SIZE * 1.5f) * cos(ant->direction); float dir_y = ant->position.y + (ANT_SIZE * 1.5f) * sin(ant->direction); DrawLine(ant->position.x, ant->position.y, dir_x, dir_y, RAYWHITE); } - + // Draw UI - DrawText(TextFormat("Colony 1 Food: %d", env->colonies[0].food_collected), 20, 20, 20, COLONY1_COLOR); - DrawText(TextFormat("Colony 2 Food: %d", env->colonies[1].food_collected), 20, 50, 20, COLONY2_COLOR); + DrawText(TextFormat("Colony 1: %d", env->colonies[0].food_collected), + 20, 20, 20, COLONY1_COLOR); + DrawText(TextFormat("Colony 2: %d", env->colonies[1].food_collected), + 20, 50, 20, COLONY2_COLOR); DrawText(TextFormat("Tick: %d", env->tick), env->width - 120, 20, 20, RAYWHITE); - + EndDrawing(); } + +// Required function: cleanup +void c_close(AntsEnv* env) { + if (env->ants) { + free(env->ants); + env->ants = NULL; + } + if (env->client != NULL) { + CloseWindow(); + free(env->client); + env->client = NULL; + } +} diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 2cf5b7a8d..02a4d705e 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -1,172 +1,123 @@ +'''Ant Colony Simulation Environment - Simplified following Target pattern''' + import numpy as np import gymnasium import pufferlib -from pufferlib import APIUsageError from pufferlib.ocean.ants import binding class AntsEnv(pufferlib.PufferEnv): """ Ant Colony Simulation Environment - - Each ant receives observations about its surroundings and can: - - Move forward (always happens) - - Turn left/right - - Drop pheromone trails - + Two colonies compete to collect food from the environment. - Following multiagent architecture patterns from snake environment. + Simplified architecture following the Target environment pattern. + + Observations (6 per ant): + - colony_dx, colony_dy: Direction to home colony (normalized) + - food_dx, food_dy: Direction to nearest food (normalized) + - has_food: Binary flag (0 or 1) + - heading: Ant's current direction (normalized) + + Actions (Discrete 4): + 0: Turn left + 1: Turn right + 2: Move forward + 3: No-op """ - + def __init__( self, - num_envs=32, + num_envs=1, width=1280, height=720, num_ants=32, - reward_food=0.1, + reward_food_pickup=0.1, reward_delivery=10.0, - reward_death=0.0, - reward_demo_match=0.001, - reward_demo_mismatch=-0.001, - reward_progress=0.01, - reward_time_penalty=-0.001, - reward_wrong_direction=-0.005, - reward_efficiency_bonus=2.0, - report_interval=1, render_mode=None, + log_interval=128, buf=None, seed=0): - - if num_envs is not None: - num_ants = num_envs * [num_ants] - width = num_envs * [width] - height = num_envs * [height] - - if not (len(num_ants) == len(width) == len(height)): - raise APIUsageError('num_ants, width, height must be lists of equal length') - - for w, h in zip(width, height): - if w < 100 or h < 100: - raise APIUsageError('width and height must be at least 100') - - self.report_interval = report_interval - self.num_agents = sum(num_ants) - self.render_mode = render_mode - self.tick = 0 - self.single_action_space = gymnasium.spaces.Discrete(4) + + # Simple observation space: 6 values per ant self.single_observation_space = gymnasium.spaces.Box( - low=-1.0, high=1.0, shape=(8,), dtype=np.float32 + low=-1.0, high=1.0, shape=(6,), dtype=np.float32 ) - - # Calculate cell size for rendering - self.cell_size = int(np.ceil(1280 / max(max(width), max(height)))) + # Discrete action space: turn left, turn right, move forward, noop + self.single_action_space = gymnasium.spaces.Discrete(4) + + self.render_mode = render_mode + self.num_agents = num_envs * num_ants + self.log_interval = log_interval super().__init__(buf) c_envs = [] - offset = 0 for i in range(num_envs): - na = num_ants[i] - obs_slice = self.observations[offset:offset+na*8] # Multiply by obs_size - act_slice = self.actions[offset:offset+na] - rew_slice = self.rewards[offset:offset+na] - term_slice = self.terminals[offset:offset+na] - trunc_slice = self.truncations[offset:offset+na] - - # Seed each env uniquely: i + seed * num_envs - env_seed = i + seed * num_envs - env_id = binding.env_init( - obs_slice, - act_slice, - rew_slice, - term_slice, - trunc_slice, - env_seed, - width=width[i], - height=height[i], - num_ants=na, - reward_food=reward_food, - reward_delivery=reward_delivery, - reward_death=reward_death, - reward_demo_match=reward_demo_match, - reward_demo_mismatch=reward_demo_mismatch, - reward_progress=reward_progress, - reward_time_penalty=reward_time_penalty, - reward_wrong_direction=reward_wrong_direction, - reward_efficiency_bonus=reward_efficiency_bonus, - cell_size=self.cell_size + c_env = binding.env_init( + self.observations[i*num_ants:(i+1)*num_ants], + self.actions[i*num_ants:(i+1)*num_ants], + self.rewards[i*num_ants:(i+1)*num_ants], + self.terminals[i*num_ants:(i+1)*num_ants], + self.truncations[i*num_ants:(i+1)*num_ants], + seed + i, # Unique seed per env + width=width, + height=height, + num_ants=num_ants, + reward_food_pickup=reward_food_pickup, + reward_delivery=reward_delivery ) - c_envs.append(env_id) - offset += na * 8 # Multiply by obs_size - - # VECTORIZE ENVIRONMENTS - FOLLOWING SNAKE PATTERN + c_envs.append(c_env) + self.c_envs = binding.vectorize(*c_envs) - - def reset(self, seed=None): + + def reset(self, seed=0): """Reset all environments""" + binding.vec_reset(self.c_envs, seed) self.tick = 0 - if seed is None: - binding.vec_reset(self.c_envs, 0) - else: - binding.vec_reset(self.c_envs, seed) return self.observations, [] - + def step(self, actions): """Execute one step for all agents""" - self.actions[:] = actions self.tick += 1 + self.actions[:] = actions binding.vec_step(self.c_envs) - + info = [] - if self.tick % self.report_interval == 0: - log_data = binding.vec_log(self.c_envs) - if log_data: - # Add computed metrics - info.append(log_data) - + if self.tick % self.log_interval == 0: + log = binding.vec_log(self.c_envs) + if log: + info.append(log) + return (self.observations, self.rewards, self.terminals, self.truncations, info) - + def render(self): """Render the first environment""" binding.vec_render(self.c_envs, 0) - + def close(self): """Clean up resources""" binding.vec_close(self.c_envs) -def test_performance(timeout=10, atn_cache=1024): - """Performance test following snake pattern""" - env = AntsEnv(num_envs=64, num_ants=50) +if __name__ == '__main__': + # Performance test following target pattern + N = 512 + + env = AntsEnv(num_envs=N) env.reset() - tick = 0 - - total_ants = env.num_agents - actions = np.random.randint(0, 4, (atn_cache, total_ants)) - + steps = 0 + + CACHE = 1024 + actions = np.random.randint(4, size=(CACHE, N)) + + i = 0 import time start = time.time() - while time.time() - start < timeout: - atns = actions[tick % atn_cache] - obs, rewards, dones, truncs, info = env.step(atns) - - # Print info when available - if info: - for log_data in info: - if 'n' in log_data and log_data['n'] > 0: - print(f"Tick {tick}: Episodes: {log_data['n']:.0f}, " - f"Avg score: {log_data.get('score', 0) / log_data['n']:.2f}, " - f"Avg return: {log_data.get('episode_return', 0) / log_data['n']:.2f}") - - tick += 1 - - elapsed = time.time() - start - sps = total_ants * tick / elapsed - print(f'Ant SPS: {sps:.0f} ({tick} environment steps)') - env.close() - + while time.time() - start < 10: + env.step(actions[i % CACHE]) + steps += env.num_agents + i += 1 -if __name__ == '__main__': - test_performance() \ No newline at end of file + print('Ants SPS:', int(steps / (time.time() - start))) diff --git a/pufferlib/ocean/ants/binding.c b/pufferlib/ocean/ants/binding.c index b8b56adb0..835cbab9e 100644 --- a/pufferlib/ocean/ants/binding.c +++ b/pufferlib/ocean/ants/binding.c @@ -7,17 +7,10 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) { env->width = unpack(kwargs, "width"); env->height = unpack(kwargs, "height"); env->num_ants = unpack(kwargs, "num_ants"); - env->reward_food = unpack(kwargs, "reward_food"); + env->reward_food_pickup = unpack(kwargs, "reward_food_pickup"); env->reward_delivery = unpack(kwargs, "reward_delivery"); - env->reward_demo_match = unpack(kwargs, "reward_demo_match"); - env->reward_demo_mismatch = unpack(kwargs, "reward_demo_mismatch"); - env->reward_progress = unpack(kwargs, "reward_progress"); - env->reward_time_penalty = unpack(kwargs, "reward_time_penalty"); - env->reward_wrong_direction = unpack(kwargs, "reward_wrong_direction"); - env->reward_efficiency_bonus = unpack(kwargs, "reward_efficiency_bonus"); - env->cell_size = unpack(kwargs, "cell_size"); - init_ants_env(env); + init(env); return 0; } @@ -27,6 +20,5 @@ static int my_log(PyObject* dict, Log* log) { assign_to_dict(dict, "episode_return", log->episode_return); assign_to_dict(dict, "episode_length", log->episode_length); assign_to_dict(dict, "n", log->n); - assign_to_dict(dict, "reward", log->reward); return 0; -} \ No newline at end of file +} From 73b1346e9ae026e321b356db2c57ead518220aa3 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Wed, 17 Dec 2025 10:22:50 -0500 Subject: [PATCH 17/23] reduced cone of vision and enabled pheromones --- pufferlib/ocean/ants/ants.c | 16 +++- pufferlib/ocean/ants/ants.h | 180 ++++++++++++++++++++++++++++++++--- pufferlib/ocean/ants/ants.py | 20 +++- 3 files changed, 193 insertions(+), 23 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 543a78742..29fabcee4 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -10,7 +10,7 @@ int main() { int num_ants = 64; - int num_obs = 6; // Simplified observation space + int num_obs = 10; // Observation space with pheromones and neighbor awareness AntsEnv env = { .width = 1280, @@ -32,9 +32,17 @@ int main() { c_reset(&env); c_render(&env); - printf("Ant Colony Demo\n"); - printf("Controls: ESC to exit\n"); - printf("Using simple heuristic AI: seek food -> return to colony\n\n"); + printf("Ant Colony Demo with Pheromones, Vision & Neighbor Awareness\n"); + printf("=============================================================\n"); + printf("Controls:\n"); + printf(" [V] - Toggle vision cone visualization\n"); + printf(" [ESC] - Exit\n\n"); + printf("Features:\n"); + printf(" - Very limited vision: 100px range, 30° narrow beam\n"); + printf(" - Neighbor awareness: ants can see nearby colony members\n"); + printf(" - Automatic pheromone trails when carrying food\n"); + printf(" - Pheromone evaporation (1000 step lifetime)\n"); + printf(" - Simple heuristic AI: seek food -> return to colony\n\n"); // Main loop - exit with ESC or close window while (!WindowShouldClose()) { diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 60d557abd..3caf7ac98 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -28,6 +28,17 @@ #define MIN_FOOD_COLONY_DISTANCE 50.0f #define ANT_RESET_INTERVAL 2048 // Reset ant every N steps (like target.c) +// Pheromone system constants +#define MAX_PHEROMONES 5000 +#define PHEROMONE_DEPOSIT_AMOUNT 1.0f +#define PHEROMONE_EVAPORATION_RATE 0.001f +#define PHEROMONE_SIZE 2 +#define PHEROMONE_DROP_INTERVAL 5 // Drop pheromone every N steps while carrying food + +// Vision system constants +#define ANT_VISION_RANGE 100.0f +#define ANT_VISION_ANGLE (M_PI / 6.0f) // 30 degrees (π/6) + // Actions #define ACTION_TURN_LEFT 0 #define ACTION_TURN_RIGHT 1 @@ -37,6 +48,8 @@ // Colors #define COLONY1_COLOR (Color){220, 0, 0, 255} #define COLONY2_COLOR (Color){0, 0, 220, 255} +#define PHEROMONE1_COLOR (Color){255, 200, 200, 100} +#define PHEROMONE2_COLOR (Color){200, 200, 255, 100} #define FOOD_COLOR (Color){0, 200, 0, 255} #define BACKGROUND_COLOR (Color){50, 50, 50, 255} @@ -64,13 +77,21 @@ typedef struct { int amount; } FoodSource; +// Pheromone trail marker +typedef struct { + Vector2D position; + float strength; + int colony_id; +} Pheromone; + // Individual ant agent typedef struct { Vector2D position; float direction; int colony_id; bool has_food; - int steps_alive; // Track steps for periodic reset + int steps_alive; // Track steps for periodic reset + int steps_since_pheromone; // Track when to drop next pheromone } Ant; // Colony home base @@ -84,6 +105,7 @@ struct Client { int cell_size; int width; int height; + bool show_vision_cones; // Toggle for vision cone visualization }; // Main environment struct - FOLLOWING TARGET PATTERN @@ -93,6 +115,7 @@ struct AntsEnv { Ant* ants; // Dynamic array of ants Colony colonies[NUM_COLONIES]; FoodSource food_sources[MAX_FOOD_SOURCES]; + Pheromone pheromones[MAX_PHEROMONES]; // Required PufferLib fields float* observations; // Flattened observations @@ -105,6 +128,7 @@ struct AntsEnv { int width; // Environment width int height; // Environment height int num_food_sources; // Active food sources + int num_pheromones; // Active pheromones int tick; // Current timestep // Simple reward parameters (like target.c) @@ -139,6 +163,40 @@ static inline float clip(float val, float min, float max) { return val; } +// Check if target is within ant's vision cone +static inline bool is_in_vision(Vector2D ant_pos, float ant_dir, Vector2D target) { + float dx = target.x - ant_pos.x; + float dy = target.y - ant_pos.y; + float dist_sq = dx * dx + dy * dy; + + // Check range + if (dist_sq > ANT_VISION_RANGE * ANT_VISION_RANGE) { + return false; + } + + // Check angle + float angle_to_target = atan2(dy, dx); + float angle_diff = wrap_angle(angle_to_target - ant_dir); + + return fabs(angle_diff) <= ANT_VISION_ANGLE / 2.0f; +} + +// Add pheromone to the environment +static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { + if (env->num_pheromones >= MAX_PHEROMONES) { + // Replace oldest pheromone (circular buffer) + for (int i = 0; i < env->num_pheromones - 1; i++) { + env->pheromones[i] = env->pheromones[i + 1]; + } + env->num_pheromones--; + } + + env->pheromones[env->num_pheromones].position = position; + env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; + env->pheromones[env->num_pheromones].colony_id = colony_id; + env->num_pheromones++; +} + // Spawn a new ant at its colony void spawn_ant(AntsEnv* env, int ant_id) { Ant* ant = &env->ants[ant_id]; @@ -148,6 +206,7 @@ void spawn_ant(AntsEnv* env, int ant_id) { ant->direction = wrap_angle((rand() % 8) * (M_PI / 4)); ant->has_food = false; ant->steps_alive = 0; + ant->steps_since_pheromone = 0; } // Spawn food at a valid location @@ -188,6 +247,7 @@ void init(AntsEnv* env) { env->ants = (Ant*)calloc(env->num_ants, sizeof(Ant)); env->tick = 0; env->client = NULL; + env->num_pheromones = 0; // Initialize colonies env->colonies[0].position = (Vector2D){env->width / 4, env->height / 2}; @@ -202,7 +262,7 @@ void init(AntsEnv* env) { } } -// Compute observations for all ants - FOLLOWING TARGET PATTERN +// Compute observations for all ants - WITH VISION AND PHEROMONES void compute_observations(AntsEnv* env) { int obs_idx = 0; @@ -210,24 +270,46 @@ void compute_observations(AntsEnv* env) { Ant* ant = &env->ants[a]; Colony* colony = &env->colonies[ant->colony_id]; - // Find closest food source + // Find closest visible food source (with vision constraints) float closest_food_dist_sq = env->width * env->width + env->height * env->height; Vector2D closest_food_pos = {0, 0}; bool found_food = false; for (int i = 0; i < env->num_food_sources; i++) { if (env->food_sources[i].amount > 0) { - float dist_sq = distance_squared(ant->position, env->food_sources[i].position); - if (dist_sq < closest_food_dist_sq) { - closest_food_dist_sq = dist_sq; - closest_food_pos = env->food_sources[i].position; - found_food = true; + Vector2D food_pos = env->food_sources[i].position; + if (is_in_vision(ant->position, ant->direction, food_pos)) { + float dist_sq = distance_squared(ant->position, food_pos); + if (dist_sq < closest_food_dist_sq) { + closest_food_dist_sq = dist_sq; + closest_food_pos = food_pos; + found_food = true; + } } } } - // Observation: [colony_dx, colony_dy, food_dx, food_dy, has_food, heading] - // Normalized to roughly -1 to 1 range + // Find closest visible pheromone from own colony + float closest_pheromone_dist_sq = env->width * env->width + env->height * env->height; + Vector2D closest_pheromone_pos = {0, 0}; + bool found_pheromone = false; + + for (int i = 0; i < env->num_pheromones; i++) { + if (env->pheromones[i].colony_id == ant->colony_id) { + Vector2D pheromone_pos = env->pheromones[i].position; + if (is_in_vision(ant->position, ant->direction, pheromone_pos)) { + float dist_sq = distance_squared(ant->position, pheromone_pos); + if (dist_sq < closest_pheromone_dist_sq) { + closest_pheromone_dist_sq = dist_sq; + closest_pheromone_pos = pheromone_pos; + found_pheromone = true; + } + } + } + } + + // Observation: [colony_dx, colony_dy, food_dx, food_dy, pheromone_dx, pheromone_dy, has_food, heading] + // 8 values total - normalized to roughly -1 to 1 range env->observations[obs_idx++] = (colony->position.x - ant->position.x) / env->width; env->observations[obs_idx++] = (colony->position.y - ant->position.y) / env->height; @@ -239,6 +321,14 @@ void compute_observations(AntsEnv* env) { env->observations[obs_idx++] = 0.0f; } + if (found_pheromone) { + env->observations[obs_idx++] = (closest_pheromone_pos.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (closest_pheromone_pos.y - ant->position.y) / env->height; + } else { + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + } + env->observations[obs_idx++] = ant->has_food ? 1.0f : 0.0f; env->observations[obs_idx++] = ant->direction / (2 * M_PI); } @@ -248,6 +338,7 @@ void compute_observations(AntsEnv* env) { void c_reset(AntsEnv* env) { env->tick = 0; env->log = (Log){0}; + env->num_pheromones = 0; // Reset colonies env->colonies[0].food_collected = 0; @@ -368,6 +459,15 @@ void c_step(AntsEnv* env) { if (ant->position.y < 0) ant->position.y = env->height; if (ant->position.y > env->height) ant->position.y = 0; + // Automatic pheromone dropping when carrying food + if (ant->has_food) { + ant->steps_since_pheromone++; + if (ant->steps_since_pheromone >= PHEROMONE_DROP_INTERVAL) { + add_pheromone(env, ant->position, ant->colony_id); + ant->steps_since_pheromone = 0; + } + } + // Periodic reset like target.c (lines 158-161) if (ant->steps_alive % ANT_RESET_INTERVAL == 0) { spawn_ant(env, i); @@ -375,6 +475,17 @@ void c_step(AntsEnv* env) { } } + // Update pheromone evaporation + for (int i = 0; i < env->num_pheromones; i++) { + env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; + if (env->pheromones[i].strength <= 0) { + // Remove evaporated pheromone (swap with last and shrink) + env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; + env->num_pheromones--; + i--; // Check this slot again + } + } + // Update food interactions update_food_interactions(env); @@ -391,6 +502,7 @@ void c_render(AntsEnv* env) { env->client->cell_size = 1; env->client->width = env->width; env->client->height = env->height; + env->client->show_vision_cones = true; // Start with vision cones on } // Standard exit key @@ -398,6 +510,11 @@ void c_render(AntsEnv* env) { exit(0); } + // Toggle vision cones with 'V' key + if (IsKeyPressed(KEY_V)) { + env->client->show_vision_cones = !env->client->show_vision_cones; + } + BeginDrawing(); ClearBackground(BACKGROUND_COLOR); @@ -408,6 +525,14 @@ void c_render(AntsEnv* env) { COLONY_SIZE, color); } + // Draw pheromones (before other objects for layering) + for (int i = 0; i < env->num_pheromones; i++) { + Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; + pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); + DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, + PHEROMONE_SIZE, pheromone_color); + } + // Draw food for (int i = 0; i < env->num_food_sources; i++) { if (env->food_sources[i].amount > 0) { @@ -419,19 +544,38 @@ void c_render(AntsEnv* env) { } } - // Draw ants + // Draw ants with optional vision cones for (int i = 0; i < env->num_ants; i++) { Ant* ant = &env->ants[i]; - Color color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; + Color ant_color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; + + // Draw vision cone if enabled (semi-transparent) + if (env->client->show_vision_cones) { + Color vision_color = ant_color; + vision_color.a = 30; // Very transparent + + // Calculate vision cone arc + float start_angle = (ant->direction - ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; + float end_angle = (ant->direction + ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; + + DrawCircleSector( + (Vector2){ant->position.x, ant->position.y}, + ANT_VISION_RANGE, + start_angle, + end_angle, + 32, // segments for smooth arc + vision_color + ); + } // Change color if carrying food if (ant->has_food) { - color = FOOD_COLOR; + ant_color = FOOD_COLOR; } - DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, color); + DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, ant_color); - // Direction indicator + // Direction indicator (pointing forward) float dir_x = ant->position.x + (ANT_SIZE * 1.5f) * cos(ant->direction); float dir_y = ant->position.y + (ANT_SIZE * 1.5f) * sin(ant->direction); DrawLine(ant->position.x, ant->position.y, dir_x, dir_y, RAYWHITE); @@ -443,6 +587,12 @@ void c_render(AntsEnv* env) { DrawText(TextFormat("Colony 2: %d", env->colonies[1].food_collected), 20, 50, 20, COLONY2_COLOR); DrawText(TextFormat("Tick: %d", env->tick), env->width - 120, 20, 20, RAYWHITE); + DrawText(TextFormat("Pheromones: %d", env->num_pheromones), env->width - 180, 50, 20, RAYWHITE); + + // Controls help + const char* vision_status = env->client->show_vision_cones ? "ON" : "OFF"; + DrawText(TextFormat("[V] Vision Cones: %s", vision_status), 20, env->height - 30, 16, RAYWHITE); + DrawText("[ESC] Exit", 20, env->height - 50, 16, GRAY); EndDrawing(); } diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 02a4d705e..341c36072 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -13,12 +13,24 @@ class AntsEnv(pufferlib.PufferEnv): Two colonies compete to collect food from the environment. Simplified architecture following the Target environment pattern. - Observations (6 per ant): + Observations (8 per ant): - colony_dx, colony_dy: Direction to home colony (normalized) - - food_dx, food_dy: Direction to nearest food (normalized) + - food_dx, food_dy: Direction to nearest VISIBLE food (normalized, with vision constraints) + - pheromone_dx, pheromone_dy: Direction to nearest VISIBLE pheromone from own colony - has_food: Binary flag (0 or 1) - heading: Ant's current direction (normalized) + Vision System: + - Ants have limited vision range (100 pixels) - very short range + - Vision cone of 30 degrees (π/6) - narrow focused beam + - Can only see food and pheromones within their vision cone + + Pheromone System: + - Ants automatically drop pheromones every 5 steps while carrying food + - Pheromones evaporate over time (rate: 0.001 per step) + - Each colony's pheromones are distinct + - Ants only observe pheromones from their own colony + Actions (Discrete 4): 0: Turn left 1: Turn right @@ -39,9 +51,9 @@ def __init__( buf=None, seed=0): - # Simple observation space: 6 values per ant + # Observation space: 8 values per ant (added pheromone observations) self.single_observation_space = gymnasium.spaces.Box( - low=-1.0, high=1.0, shape=(6,), dtype=np.float32 + low=-1.0, high=1.0, shape=(8,), dtype=np.float32 ) # Discrete action space: turn left, turn right, move forward, noop self.single_action_space = gymnasium.spaces.Discrete(4) From 281f9f910c2905521c0c4594855fbc1ca87b9b20 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Wed, 17 Dec 2025 15:40:08 -0500 Subject: [PATCH 18/23] reducing steps in ini --- pufferlib/config/ocean/ants.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/config/ocean/ants.ini b/pufferlib/config/ocean/ants.ini index a23a9c901..c12219bae 100644 --- a/pufferlib/config/ocean/ants.ini +++ b/pufferlib/config/ocean/ants.ini @@ -11,4 +11,4 @@ num_envs = 8 num_envs = 32 [train] -total_timesteps = 500_000_000 \ No newline at end of file +total_timesteps = 100_000_000 \ No newline at end of file From 3b70e9f465e763422d6fff6d980f4545b22538f7 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Wed, 17 Dec 2025 15:52:16 -0500 Subject: [PATCH 19/23] updated logging and improved handling --- pufferlib/ocean/ants/ants.c | 12 +++++-- pufferlib/ocean/ants/ants.h | 70 ++++++++++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 29fabcee4..6e196cef5 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -104,12 +104,18 @@ int main() { // Print stats every 60 frames if (env.tick % 60 == 0) { - printf("Tick: %d | Colony 1: %d | Colony 2: %d | Episodes: %.0f | Avg Score: %.2f\n", + float success_rate = env.log.total_resets > 0 + ? (env.log.successful_trips / env.log.total_resets * 100.0f) + : 0.0f; + printf("Tick: %d | C1: %d (%.0f%%) | C2: %d (%.0f%%) | Efficiency: %.1f steps/food | Throughput: %.2f | Success: %.1f%%\n", env.tick, env.colonies[0].food_collected, + env.log.total_deliveries > 0 ? (env.log.colony1_food / env.log.total_deliveries * 100.0f) : 0.0f, env.colonies[1].food_collected, - env.log.n, - env.log.n > 0 ? env.log.score / env.log.n : 0.0f); + env.log.total_deliveries > 0 ? (env.log.colony2_food / env.log.total_deliveries * 100.0f) : 0.0f, + env.log.avg_delivery_steps, + env.log.score, + success_rate); } } diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 3caf7ac98..e1b8ede24 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -24,7 +24,7 @@ #define ANT_SIZE 4 #define FOOD_SIZE 6 #define COLONY_SIZE 20 -#define TURN_ANGLE (M_PI / 4) +#define TURN_ANGLE (M_PI / 12) #define MIN_FOOD_COLONY_DISTANCE 50.0f #define ANT_RESET_INTERVAL 2048 // Reset ant every N steps (like target.c) @@ -55,11 +55,17 @@ // Required Log struct for PufferLib typedef struct { - float perf; // Performance metric (score/length) - float score; // Total score (food deliveries) - float episode_return; // Cumulative rewards - float episode_length; // Episode duration - float n; // Episode count - REQUIRED AS LAST FIELD + float perf; // Average steps per delivery (efficiency - lower is better) + float score; // Food deliveries per 1000 steps (throughput) + float episode_return; // Cumulative rewards + float episode_length; // Total steps across all ants + float avg_delivery_steps; // Average steps taken per successful delivery + float colony1_food; // Food collected by colony 1 + float colony2_food; // Food collected by colony 2 + float total_deliveries; // Total successful food deliveries + float successful_trips; // Number of ants that successfully found food + float total_resets; // Total ant resets (successful + unsuccessful) + float n; // Episode count - REQUIRED AS LAST FIELD } Log; // Forward declarations @@ -391,6 +397,9 @@ void update_food_interactions(AntsEnv* env) { // Simple reward env->rewards[a] += env->reward_food_pickup; env->log.episode_return += env->reward_food_pickup; + + // Track successful trip (ant found food) + env->log.successful_trips += 1.0f; break; } } @@ -405,12 +414,28 @@ void update_food_interactions(AntsEnv* env) { ant->has_food = false; colony->food_collected++; - // Reward and log update - LIKE TARGET + // Reward and log update - WITH EFFICIENCY METRICS env->rewards[a] += env->reward_delivery; - env->log.perf += 1.0f; // Performance metric (food delivered) - env->log.score += 1.0f; // Score (food delivered) env->log.episode_return += env->reward_delivery; env->log.episode_length += ant->steps_alive; + env->log.total_deliveries += 1.0f; + + // Track per-colony performance + if (ant->colony_id == 0) { + env->log.colony1_food += 1.0f; + } else { + env->log.colony2_food += 1.0f; + } + + // Update derived efficiency metrics + env->log.avg_delivery_steps = env->log.episode_length / env->log.total_deliveries; + + // Performance: Average steps per delivery (lower is better) + env->log.perf = env->log.avg_delivery_steps; + + // Score: Food deliveries per 1000 steps (higher is better) + env->log.score = (env->log.total_deliveries * 1000.0f) / env->log.episode_length; + env->log.n += 1; // Episode count (number of deliveries) // Reset ant after delivery @@ -472,6 +497,7 @@ void c_step(AntsEnv* env) { if (ant->steps_alive % ANT_RESET_INTERVAL == 0) { spawn_ant(env, i); env->terminals[i] = 1; + env->log.total_resets += 1.0f; } } @@ -581,13 +607,33 @@ void c_render(AntsEnv* env) { DrawLine(ant->position.x, ant->position.y, dir_x, dir_y, RAYWHITE); } - // Draw UI - DrawText(TextFormat("Colony 1: %d", env->colonies[0].food_collected), + // Draw UI - Colony scores + DrawText(TextFormat("Colony 1: %d (%.1f%%)", + env->colonies[0].food_collected, + env->log.total_deliveries > 0 ? (env->log.colony1_food / env->log.total_deliveries * 100.0f) : 0.0f), 20, 20, 20, COLONY1_COLOR); - DrawText(TextFormat("Colony 2: %d", env->colonies[1].food_collected), + DrawText(TextFormat("Colony 2: %d (%.1f%%)", + env->colonies[1].food_collected, + env->log.total_deliveries > 0 ? (env->log.colony2_food / env->log.total_deliveries * 100.0f) : 0.0f), 20, 50, 20, COLONY2_COLOR); + + // Efficiency metrics + DrawText(TextFormat("Efficiency: %.1f steps/food", env->log.avg_delivery_steps), + 20, 80, 18, YELLOW); + DrawText(TextFormat("Throughput: %.2f food/1000 steps", env->log.score), + 20, 105, 18, YELLOW); + + // Success rate + float success_rate = env->log.total_resets > 0 + ? (env->log.successful_trips / env->log.total_resets * 100.0f) + : 0.0f; + DrawText(TextFormat("Success Rate: %.1f%%", success_rate), + 20, 130, 18, GREEN); + + // Right side - System info DrawText(TextFormat("Tick: %d", env->tick), env->width - 120, 20, 20, RAYWHITE); DrawText(TextFormat("Pheromones: %d", env->num_pheromones), env->width - 180, 50, 20, RAYWHITE); + DrawText(TextFormat("Deliveries: %.0f", env->log.total_deliveries), env->width - 180, 75, 18, RAYWHITE); // Controls help const char* vision_status = env->client->show_vision_cones ? "ON" : "OFF"; From 6cf2042a38c5de55719194949290797675e4cf23 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Wed, 17 Dec 2025 23:20:57 -0500 Subject: [PATCH 20/23] added pheromone range and density --- pufferlib/ocean/ants/ants.c | 14 +++++---- pufferlib/ocean/ants/ants.h | 58 ++++++++++++++++++++++++++++++------ pufferlib/ocean/ants/ants.py | 18 +++++++---- 3 files changed, 69 insertions(+), 21 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index 6e196cef5..a8ffb6639 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -10,7 +10,7 @@ int main() { int num_ants = 64; - int num_obs = 10; // Observation space with pheromones and neighbor awareness + int num_obs = 9; // Observation space: colony, food, pheromone, has_food, heading, density AntsEnv env = { .width = 1280, @@ -32,14 +32,16 @@ int main() { c_reset(&env); c_render(&env); - printf("Ant Colony Demo with Pheromones, Vision & Neighbor Awareness\n"); - printf("=============================================================\n"); + printf("Ant Colony Demo with Pheromones, Vision & Density Awareness\n"); + printf("============================================================\n"); printf("Controls:\n"); - printf(" [V] - Toggle vision cone visualization\n"); + printf(" [V] - Toggle vision cone visualization\n"); + printf(" [P] - Toggle pheromone range visualization\n"); printf(" [ESC] - Exit\n\n"); printf("Features:\n"); - printf(" - Very limited vision: 100px range, 30° narrow beam\n"); - printf(" - Neighbor awareness: ants can see nearby colony members\n"); + printf(" - Vision: 50px range, 30° narrow beam (for food)\n"); + printf(" - Pheromone sensing: 100px range, 360° omnidirectional\n"); + printf(" - Density awareness: count friendly ants in pheromone range\n"); printf(" - Automatic pheromone trails when carrying food\n"); printf(" - Pheromone evaporation (1000 step lifetime)\n"); printf(" - Simple heuristic AI: seek food -> return to colony\n\n"); diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index e1b8ede24..d7d42c30c 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -36,9 +36,13 @@ #define PHEROMONE_DROP_INTERVAL 5 // Drop pheromone every N steps while carrying food // Vision system constants -#define ANT_VISION_RANGE 100.0f +#define ANT_VISION_RANGE 50.0f #define ANT_VISION_ANGLE (M_PI / 6.0f) // 30 degrees (π/6) +// Pheromone sensing constants +#define ANT_PHEROMONE_RANGE 100.0f // 100px range +#define ANT_PHEROMONE_ANGLE (2 * M_PI) // 360 degrees (full circle) + // Actions #define ACTION_TURN_LEFT 0 #define ACTION_TURN_RIGHT 1 @@ -111,7 +115,8 @@ struct Client { int cell_size; int width; int height; - bool show_vision_cones; // Toggle for vision cone visualization + bool show_vision_cones; // Toggle for vision cone visualization + bool show_pheromone_range; // Toggle for pheromone range visualization }; // Main environment struct - FOLLOWING TARGET PATTERN @@ -187,6 +192,12 @@ static inline bool is_in_vision(Vector2D ant_pos, float ant_dir, Vector2D target return fabs(angle_diff) <= ANT_VISION_ANGLE / 2.0f; } +// Check if target is within ant's pheromone sensing range (360 degrees) +static inline bool is_in_pheromone_range(Vector2D ant_pos, Vector2D target) { + float dist_sq = distance_squared(ant_pos, target); + return dist_sq <= ANT_PHEROMONE_RANGE * ANT_PHEROMONE_RANGE; +} + // Add pheromone to the environment static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { if (env->num_pheromones >= MAX_PHEROMONES) { @@ -295,7 +306,7 @@ void compute_observations(AntsEnv* env) { } } - // Find closest visible pheromone from own colony + // Find closest pheromone from own colony (using pheromone range, not vision) float closest_pheromone_dist_sq = env->width * env->width + env->height * env->height; Vector2D closest_pheromone_pos = {0, 0}; bool found_pheromone = false; @@ -303,7 +314,7 @@ void compute_observations(AntsEnv* env) { for (int i = 0; i < env->num_pheromones; i++) { if (env->pheromones[i].colony_id == ant->colony_id) { Vector2D pheromone_pos = env->pheromones[i].position; - if (is_in_vision(ant->position, ant->direction, pheromone_pos)) { + if (is_in_pheromone_range(ant->position, pheromone_pos)) { float dist_sq = distance_squared(ant->position, pheromone_pos); if (dist_sq < closest_pheromone_dist_sq) { closest_pheromone_dist_sq = dist_sq; @@ -314,8 +325,18 @@ void compute_observations(AntsEnv* env) { } } - // Observation: [colony_dx, colony_dy, food_dx, food_dy, pheromone_dx, pheromone_dy, has_food, heading] - // 8 values total - normalized to roughly -1 to 1 range + // Count friendly ants within pheromone range (density) + int friendly_ants_nearby = 0; + for (int i = 0; i < env->num_ants; i++) { + if (i != a && env->ants[i].colony_id == ant->colony_id) { + if (is_in_pheromone_range(ant->position, env->ants[i].position)) { + friendly_ants_nearby++; + } + } + } + + // Observation: [colony_dx, colony_dy, food_dx, food_dy, pheromone_dx, pheromone_dy, has_food, heading, density] + // 9 values total - normalized to roughly -1 to 1 range env->observations[obs_idx++] = (colony->position.x - ant->position.x) / env->width; env->observations[obs_idx++] = (colony->position.y - ant->position.y) / env->height; @@ -337,6 +358,10 @@ void compute_observations(AntsEnv* env) { env->observations[obs_idx++] = ant->has_food ? 1.0f : 0.0f; env->observations[obs_idx++] = ant->direction / (2 * M_PI); + + // Normalize density by max possible ants per colony (roughly num_ants / 2) + float max_friendly_ants = (env->num_ants / NUM_COLONIES) - 1; // -1 to exclude self + env->observations[obs_idx++] = max_friendly_ants > 0 ? (float)friendly_ants_nearby / max_friendly_ants : 0.0f; } } @@ -528,7 +553,8 @@ void c_render(AntsEnv* env) { env->client->cell_size = 1; env->client->width = env->width; env->client->height = env->height; - env->client->show_vision_cones = true; // Start with vision cones on + env->client->show_vision_cones = true; // Start with vision cones on + env->client->show_pheromone_range = false; // Start with pheromone range off } // Standard exit key @@ -541,6 +567,11 @@ void c_render(AntsEnv* env) { env->client->show_vision_cones = !env->client->show_vision_cones; } + // Toggle pheromone range with 'P' key + if (IsKeyPressed(KEY_P)) { + env->client->show_pheromone_range = !env->client->show_pheromone_range; + } + BeginDrawing(); ClearBackground(BACKGROUND_COLOR); @@ -570,11 +601,18 @@ void c_render(AntsEnv* env) { } } - // Draw ants with optional vision cones + // Draw ants with optional vision cones and pheromone range for (int i = 0; i < env->num_ants; i++) { Ant* ant = &env->ants[i]; Color ant_color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; + // Draw pheromone range if enabled (semi-transparent circle) + if (env->client->show_pheromone_range) { + Color pheromone_range_color = ant_color; + pheromone_range_color.a = 15; // Very transparent + DrawCircle(ant->position.x, ant->position.y, ANT_PHEROMONE_RANGE, pheromone_range_color); + } + // Draw vision cone if enabled (semi-transparent) if (env->client->show_vision_cones) { Color vision_color = ant_color; @@ -637,8 +675,10 @@ void c_render(AntsEnv* env) { // Controls help const char* vision_status = env->client->show_vision_cones ? "ON" : "OFF"; + const char* pheromone_status = env->client->show_pheromone_range ? "ON" : "OFF"; DrawText(TextFormat("[V] Vision Cones: %s", vision_status), 20, env->height - 30, 16, RAYWHITE); - DrawText("[ESC] Exit", 20, env->height - 50, 16, GRAY); + DrawText(TextFormat("[P] Pheromone Range: %s", pheromone_status), 20, env->height - 50, 16, RAYWHITE); + DrawText("[ESC] Exit", 20, env->height - 70, 16, GRAY); EndDrawing(); } diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index 341c36072..aa3dafea1 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -13,17 +13,23 @@ class AntsEnv(pufferlib.PufferEnv): Two colonies compete to collect food from the environment. Simplified architecture following the Target environment pattern. - Observations (8 per ant): + Observations (9 per ant): - colony_dx, colony_dy: Direction to home colony (normalized) - food_dx, food_dy: Direction to nearest VISIBLE food (normalized, with vision constraints) - - pheromone_dx, pheromone_dy: Direction to nearest VISIBLE pheromone from own colony + - pheromone_dx, pheromone_dy: Direction to nearest pheromone from own colony (within pheromone range) - has_food: Binary flag (0 or 1) - heading: Ant's current direction (normalized) + - density: Number of friendly ants within pheromone range (normalized) Vision System: - - Ants have limited vision range (100 pixels) - very short range + - Ants have limited vision range (50 pixels) for seeing food - Vision cone of 30 degrees (π/6) - narrow focused beam - - Can only see food and pheromones within their vision cone + - Can only see food within their vision cone + + Pheromone Sensing: + - Separate from vision: 100 pixels range, 360 degrees (omnidirectional) + - Can sense pheromones from own colony within this range + - Also used to detect nearby friendly ants (density) Pheromone System: - Ants automatically drop pheromones every 5 steps while carrying food @@ -51,9 +57,9 @@ def __init__( buf=None, seed=0): - # Observation space: 8 values per ant (added pheromone observations) + # Observation space: 9 values per ant (colony, food, pheromone, has_food, heading, density) self.single_observation_space = gymnasium.spaces.Box( - low=-1.0, high=1.0, shape=(8,), dtype=np.float32 + low=-1.0, high=1.0, shape=(9,), dtype=np.float32 ) # Discrete action space: turn left, turn right, move forward, noop self.single_action_space = gymnasium.spaces.Discrete(4) From 55775c8ef8979acd970c71f3ffbc981057785079 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 23 Dec 2025 21:09:41 -0500 Subject: [PATCH 21/23] added pheromone direction to observations --- pufferlib/ocean/ants/ants.h | 15 +++++++++++---- pufferlib/ocean/ants/ants.py | 7 ++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index d7d42c30c..1f47b6962 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -92,6 +92,7 @@ typedef struct { Vector2D position; float strength; int colony_id; + float direction; // Direction the ant was moving when placing this pheromone } Pheromone; // Individual ant agent @@ -199,7 +200,7 @@ static inline bool is_in_pheromone_range(Vector2D ant_pos, Vector2D target) { } // Add pheromone to the environment -static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) { +static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id, float direction) { if (env->num_pheromones >= MAX_PHEROMONES) { // Replace oldest pheromone (circular buffer) for (int i = 0; i < env->num_pheromones - 1; i++) { @@ -211,6 +212,7 @@ static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id) env->pheromones[env->num_pheromones].position = position; env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; env->pheromones[env->num_pheromones].colony_id = colony_id; + env->pheromones[env->num_pheromones].direction = direction; env->num_pheromones++; } @@ -309,6 +311,7 @@ void compute_observations(AntsEnv* env) { // Find closest pheromone from own colony (using pheromone range, not vision) float closest_pheromone_dist_sq = env->width * env->width + env->height * env->height; Vector2D closest_pheromone_pos = {0, 0}; + float closest_pheromone_direction = 0.0f; bool found_pheromone = false; for (int i = 0; i < env->num_pheromones; i++) { @@ -319,6 +322,7 @@ void compute_observations(AntsEnv* env) { if (dist_sq < closest_pheromone_dist_sq) { closest_pheromone_dist_sq = dist_sq; closest_pheromone_pos = pheromone_pos; + closest_pheromone_direction = env->pheromones[i].direction; found_pheromone = true; } } @@ -335,8 +339,8 @@ void compute_observations(AntsEnv* env) { } } - // Observation: [colony_dx, colony_dy, food_dx, food_dy, pheromone_dx, pheromone_dy, has_food, heading, density] - // 9 values total - normalized to roughly -1 to 1 range + // Observation: [colony_dx, colony_dy, food_dx, food_dy, pheromone_dx, pheromone_dy, pheromone_direction, has_food, heading, density] + // 10 values total - normalized to roughly -1 to 1 range env->observations[obs_idx++] = (colony->position.x - ant->position.x) / env->width; env->observations[obs_idx++] = (colony->position.y - ant->position.y) / env->height; @@ -351,9 +355,12 @@ void compute_observations(AntsEnv* env) { if (found_pheromone) { env->observations[obs_idx++] = (closest_pheromone_pos.x - ant->position.x) / env->width; env->observations[obs_idx++] = (closest_pheromone_pos.y - ant->position.y) / env->height; + // Normalize pheromone direction to -1 to 1 range (divide by π) + env->observations[obs_idx++] = closest_pheromone_direction / M_PI; } else { env->observations[obs_idx++] = 0.0f; env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; } env->observations[obs_idx++] = ant->has_food ? 1.0f : 0.0f; @@ -513,7 +520,7 @@ void c_step(AntsEnv* env) { if (ant->has_food) { ant->steps_since_pheromone++; if (ant->steps_since_pheromone >= PHEROMONE_DROP_INTERVAL) { - add_pheromone(env, ant->position, ant->colony_id); + add_pheromone(env, ant->position, ant->colony_id, ant->direction); ant->steps_since_pheromone = 0; } } diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index aa3dafea1..f34fd7f44 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -13,10 +13,11 @@ class AntsEnv(pufferlib.PufferEnv): Two colonies compete to collect food from the environment. Simplified architecture following the Target environment pattern. - Observations (9 per ant): + Observations (10 per ant): - colony_dx, colony_dy: Direction to home colony (normalized) - food_dx, food_dy: Direction to nearest VISIBLE food (normalized, with vision constraints) - pheromone_dx, pheromone_dy: Direction to nearest pheromone from own colony (within pheromone range) + - pheromone_direction: Direction the pheromone was placed in (normalized angle, -1 to 1) - has_food: Binary flag (0 or 1) - heading: Ant's current direction (normalized) - density: Number of friendly ants within pheromone range (normalized) @@ -57,9 +58,9 @@ def __init__( buf=None, seed=0): - # Observation space: 9 values per ant (colony, food, pheromone, has_food, heading, density) + # Observation space: 10 values per ant (colony, food, pheromone, pheromone_direction, has_food, heading, density) self.single_observation_space = gymnasium.spaces.Box( - low=-1.0, high=1.0, shape=(9,), dtype=np.float32 + low=-1.0, high=1.0, shape=(10,), dtype=np.float32 ) # Discrete action space: turn left, turn right, move forward, noop self.single_action_space = gymnasium.spaces.Discrete(4) From 56cdb352824e542c16be26645cc10bf2cb079ea4 Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 6 Jan 2026 08:38:31 -0500 Subject: [PATCH 22/23] added pheromone evaporation and vision changes --- pufferlib/ocean/ants/ants.c | 265 +++--- pufferlib/ocean/ants/ants.h | 1459 ++++++++++++++++++---------------- pufferlib/ocean/ants/ants.py | 26 +- 3 files changed, 905 insertions(+), 845 deletions(-) diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c index a8ffb6639..f53493aba 100644 --- a/pufferlib/ocean/ants/ants.c +++ b/pufferlib/ocean/ants/ants.c @@ -1,132 +1,133 @@ -/* Ants: Pure C demo file for testing the environment. - * Build it with: - * bash scripts/build_ocean.sh ants local (debug) - * bash scripts/build_ocean.sh ants fast - * - * Following the Target env pattern for consistency. - */ -#include -#include "ants.h" - -int main() { - int num_ants = 64; - int num_obs = 9; // Observation space: colony, food, pheromone, has_food, heading, density - - AntsEnv env = { - .width = 1280, - .height = 720, - .num_ants = num_ants, - .reward_food_pickup = 0.1f, - .reward_delivery = 10.0f - }; - - init(&env); - - // Allocate buffers manually (normally passed from Python) - env.observations = calloc(env.num_ants * num_obs, sizeof(float)); - env.actions = calloc(env.num_ants, sizeof(int)); - env.rewards = calloc(env.num_ants, sizeof(float)); - env.terminals = calloc(env.num_ants, sizeof(unsigned char)); - - // Always call reset and render first - c_reset(&env); - c_render(&env); - - printf("Ant Colony Demo with Pheromones, Vision & Density Awareness\n"); - printf("============================================================\n"); - printf("Controls:\n"); - printf(" [V] - Toggle vision cone visualization\n"); - printf(" [P] - Toggle pheromone range visualization\n"); - printf(" [ESC] - Exit\n\n"); - printf("Features:\n"); - printf(" - Vision: 50px range, 30° narrow beam (for food)\n"); - printf(" - Pheromone sensing: 100px range, 360° omnidirectional\n"); - printf(" - Density awareness: count friendly ants in pheromone range\n"); - printf(" - Automatic pheromone trails when carrying food\n"); - printf(" - Pheromone evaporation (1000 step lifetime)\n"); - printf(" - Simple heuristic AI: seek food -> return to colony\n\n"); - - // Main loop - exit with ESC or close window - while (!WindowShouldClose()) { - // Simple demo AI: seek food when empty, return when full - for (int i = 0; i < env.num_ants; i++) { - Ant* ant = &env.ants[i]; - - // Simple heuristic AI - if (ant->has_food) { - // Return to colony - Colony* colony = &env.colonies[ant->colony_id]; - float angle_to_colony = get_angle(ant->position, colony->position); - float angle_diff = wrap_angle(angle_to_colony - ant->direction); - - if (angle_diff > M_PI / 8) { - env.actions[i] = ACTION_TURN_RIGHT; - } else if (angle_diff < -M_PI / 8) { - env.actions[i] = ACTION_TURN_LEFT; - } else { - env.actions[i] = ACTION_MOVE_FORWARD; - } - } else { - // Seek nearest food - float closest_dist_sq = env.width * env.width; - Vector2D closest_food = {0, 0}; - bool found = false; - - for (int f = 0; f < env.num_food_sources; f++) { - if (env.food_sources[f].amount > 0) { - float dist_sq = distance_squared(ant->position, env.food_sources[f].position); - if (dist_sq < closest_dist_sq) { - closest_dist_sq = dist_sq; - closest_food = env.food_sources[f].position; - found = true; - } - } - } - - if (found) { - float angle_to_food = get_angle(ant->position, closest_food); - float angle_diff = wrap_angle(angle_to_food - ant->direction); - - if (angle_diff > M_PI / 8) { - env.actions[i] = ACTION_TURN_RIGHT; - } else if (angle_diff < -M_PI / 8) { - env.actions[i] = ACTION_TURN_LEFT; - } else { - env.actions[i] = ACTION_MOVE_FORWARD; - } - } else { - // No food visible, just move forward - env.actions[i] = ACTION_MOVE_FORWARD; - } - } - } - - c_step(&env); - c_render(&env); - - // Print stats every 60 frames - if (env.tick % 60 == 0) { - float success_rate = env.log.total_resets > 0 - ? (env.log.successful_trips / env.log.total_resets * 100.0f) - : 0.0f; - printf("Tick: %d | C1: %d (%.0f%%) | C2: %d (%.0f%%) | Efficiency: %.1f steps/food | Throughput: %.2f | Success: %.1f%%\n", - env.tick, - env.colonies[0].food_collected, - env.log.total_deliveries > 0 ? (env.log.colony1_food / env.log.total_deliveries * 100.0f) : 0.0f, - env.colonies[1].food_collected, - env.log.total_deliveries > 0 ? (env.log.colony2_food / env.log.total_deliveries * 100.0f) : 0.0f, - env.log.avg_delivery_steps, - env.log.score, - success_rate); - } - } - - // Cleanup - free(env.observations); - free(env.actions); - free(env.rewards); - free(env.terminals); - c_close(&env); - - return 0; -} +/* Ants: Pure C demo file for testing the environment. + * Build it with: + * bash scripts/build_ocean.sh ants local (debug) + * bash scripts/build_ocean.sh ants fast + * + * Following the Target env pattern for consistency. + */ +#include +#include "ants.h" + +int main() { + int num_ants = 64; + int num_obs = 27; // Observation space: colony, food, 5 pheromones (dx, dy, direction, strength each), has_food, heading, density + + AntsEnv env = { + .width = 1280, + .height = 720, + .num_ants = num_ants, + .reward_food_pickup = 0.1f, + .reward_delivery = 10.0f + }; + + init(&env); + + // Allocate buffers manually (normally passed from Python) + env.observations = calloc(env.num_ants * num_obs, sizeof(float)); + env.actions = calloc(env.num_ants, sizeof(int)); + env.rewards = calloc(env.num_ants, sizeof(float)); + env.terminals = calloc(env.num_ants, sizeof(unsigned char)); + + // Always call reset and render first + c_reset(&env); + c_render(&env); + + printf("Ant Colony Demo with Pheromones, Vision & Density Awareness\n"); + printf("============================================================\n"); + printf("Controls:\n"); + printf(" [V] - Toggle vision cone visualization\n"); + printf(" [P] - Toggle pheromone range visualization\n"); + printf(" [ESC] - Exit\n\n"); + printf("Features:\n"); + printf(" - Vision: 75px range, 60° beam (for food) - improved for exploration\n"); + printf(" - Pheromone sensing: 100px range, 360° omnidirectional\n"); + printf(" - Density awareness: count friendly ants in pheromone range\n"); + printf(" - Automatic pheromone trails when carrying food\n"); + printf(" - Pheromone evaporation (500 step lifetime) - faster to break loops\n"); + printf(" - Random exploration: ants stuck >100 steps get random turns (5%% chance)\n"); + printf(" - Simple heuristic AI: seek food -> return to colony\n\n"); + + // Main loop - exit with ESC or close window + while (!WindowShouldClose()) { + // Simple demo AI: seek food when empty, return when full + for (int i = 0; i < env.num_ants; i++) { + Ant* ant = &env.ants[i]; + + // Simple heuristic AI + if (ant->has_food) { + // Return to colony + Colony* colony = &env.colonies[ant->colony_id]; + float angle_to_colony = get_angle(ant->position, colony->position); + float angle_diff = wrap_angle(angle_to_colony - ant->direction); + + if (angle_diff > M_PI / 8) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -M_PI / 8) { + env.actions[i] = ACTION_TURN_LEFT; + } else { + env.actions[i] = ACTION_MOVE_FORWARD; + } + } else { + // Seek nearest food + float closest_dist_sq = env.width * env.width; + Vector2D closest_food = {0, 0}; + bool found = false; + + for (int f = 0; f < env.num_food_sources; f++) { + if (env.food_sources[f].amount > 0) { + float dist_sq = distance_squared(ant->position, env.food_sources[f].position); + if (dist_sq < closest_dist_sq) { + closest_dist_sq = dist_sq; + closest_food = env.food_sources[f].position; + found = true; + } + } + } + + if (found) { + float angle_to_food = get_angle(ant->position, closest_food); + float angle_diff = wrap_angle(angle_to_food - ant->direction); + + if (angle_diff > M_PI / 8) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -M_PI / 8) { + env.actions[i] = ACTION_TURN_LEFT; + } else { + env.actions[i] = ACTION_MOVE_FORWARD; + } + } else { + // No food visible, just move forward + env.actions[i] = ACTION_MOVE_FORWARD; + } + } + } + + c_step(&env); + c_render(&env); + + // Print stats every 60 frames + if (env.tick % 60 == 0) { + float success_rate = env.log.total_resets > 0 + ? (env.log.successful_trips / env.log.total_resets * 100.0f) + : 0.0f; + printf("Tick: %d | C1: %d (%.0f%%) | C2: %d (%.0f%%) | Efficiency: %.1f steps/food | Throughput: %.2f | Success: %.1f%%\n", + env.tick, + env.colonies[0].food_collected, + env.log.total_deliveries > 0 ? (env.log.colony1_food / env.log.total_deliveries * 100.0f) : 0.0f, + env.colonies[1].food_collected, + env.log.total_deliveries > 0 ? (env.log.colony2_food / env.log.total_deliveries * 100.0f) : 0.0f, + env.log.avg_delivery_steps, + env.log.score, + success_rate); + } + } + + // Cleanup + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); + + return 0; +} diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h index 1f47b6962..90e3990a2 100644 --- a/pufferlib/ocean/ants/ants.h +++ b/pufferlib/ocean/ants/ants.h @@ -1,704 +1,755 @@ -/* Ants: A multiagent foraging environment inspired by ant colonies. - * Two colonies compete to collect food from the environment. - * Follows the Target env pattern for simplicity and clarity. - */ - -#define _USE_MATH_DEFINES -#include -#include -#include -#include -#include "raylib.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -// Environment constants -#define WINDOW_WIDTH 1280 -#define WINDOW_HEIGHT 720 -#define NUM_COLONIES 2 -#define MAX_FOOD_SOURCES 20 -#define MAX_FOOD_PER_SOURCE 20 -#define ANT_SPEED 5.0f -#define ANT_SIZE 4 -#define FOOD_SIZE 6 -#define COLONY_SIZE 20 -#define TURN_ANGLE (M_PI / 12) -#define MIN_FOOD_COLONY_DISTANCE 50.0f -#define ANT_RESET_INTERVAL 2048 // Reset ant every N steps (like target.c) - -// Pheromone system constants -#define MAX_PHEROMONES 5000 -#define PHEROMONE_DEPOSIT_AMOUNT 1.0f -#define PHEROMONE_EVAPORATION_RATE 0.001f -#define PHEROMONE_SIZE 2 -#define PHEROMONE_DROP_INTERVAL 5 // Drop pheromone every N steps while carrying food - -// Vision system constants -#define ANT_VISION_RANGE 50.0f -#define ANT_VISION_ANGLE (M_PI / 6.0f) // 30 degrees (π/6) - -// Pheromone sensing constants -#define ANT_PHEROMONE_RANGE 100.0f // 100px range -#define ANT_PHEROMONE_ANGLE (2 * M_PI) // 360 degrees (full circle) - -// Actions -#define ACTION_TURN_LEFT 0 -#define ACTION_TURN_RIGHT 1 -#define ACTION_MOVE_FORWARD 2 -#define ACTION_NOOP 3 - -// Colors -#define COLONY1_COLOR (Color){220, 0, 0, 255} -#define COLONY2_COLOR (Color){0, 0, 220, 255} -#define PHEROMONE1_COLOR (Color){255, 200, 200, 100} -#define PHEROMONE2_COLOR (Color){200, 200, 255, 100} -#define FOOD_COLOR (Color){0, 200, 0, 255} -#define BACKGROUND_COLOR (Color){50, 50, 50, 255} - -// Required Log struct for PufferLib -typedef struct { - float perf; // Average steps per delivery (efficiency - lower is better) - float score; // Food deliveries per 1000 steps (throughput) - float episode_return; // Cumulative rewards - float episode_length; // Total steps across all ants - float avg_delivery_steps; // Average steps taken per successful delivery - float colony1_food; // Food collected by colony 1 - float colony2_food; // Food collected by colony 2 - float total_deliveries; // Total successful food deliveries - float successful_trips; // Number of ants that successfully found food - float total_resets; // Total ant resets (successful + unsuccessful) - float n; // Episode count - REQUIRED AS LAST FIELD -} Log; - -// Forward declarations -typedef struct Client Client; -typedef struct AntsEnv AntsEnv; - -// Simple 2D vector -typedef struct { - float x, y; -} Vector2D; - -// Food source in the environment -typedef struct { - Vector2D position; - int amount; -} FoodSource; - -// Pheromone trail marker -typedef struct { - Vector2D position; - float strength; - int colony_id; - float direction; // Direction the ant was moving when placing this pheromone -} Pheromone; - -// Individual ant agent -typedef struct { - Vector2D position; - float direction; - int colony_id; - bool has_food; - int steps_alive; // Track steps for periodic reset - int steps_since_pheromone; // Track when to drop next pheromone -} Ant; - -// Colony home base -typedef struct { - Vector2D position; - int food_collected; -} Colony; - -// Raylib rendering client -struct Client { - int cell_size; - int width; - int height; - bool show_vision_cones; // Toggle for vision cone visualization - bool show_pheromone_range; // Toggle for pheromone range visualization -}; - -// Main environment struct - FOLLOWING TARGET PATTERN -struct AntsEnv { - Log log; // Required: aggregated log for all agents - Client* client; // Rendering client - Ant* ants; // Dynamic array of ants - Colony colonies[NUM_COLONIES]; - FoodSource food_sources[MAX_FOOD_SOURCES]; - Pheromone pheromones[MAX_PHEROMONES]; - - // Required PufferLib fields - float* observations; // Flattened observations - int* actions; // Actions for all ants - float* rewards; // Rewards for all ants - unsigned char* terminals; // Terminal flags - - // Environment parameters - int num_ants; // Total number of ants - int width; // Environment width - int height; // Environment height - int num_food_sources; // Active food sources - int num_pheromones; // Active pheromones - int tick; // Current timestep - - // Simple reward parameters (like target.c) - float reward_food_pickup; // Reward for picking up food - float reward_delivery; // Reward for delivering food to colony -}; - -// Helper functions -static inline float random_float(float min, float max) { - return min + (max - min) * ((float)rand() / (float)RAND_MAX); -} - -static inline float wrap_angle(float angle) { - while (angle > M_PI) angle -= 2 * M_PI; - while (angle < -M_PI) angle += 2 * M_PI; - return angle; -} - -static inline float distance_squared(Vector2D a, Vector2D b) { - float dx = a.x - b.x; - float dy = a.y - b.y; - return (dx * dx) + (dy * dy); -} - -static inline float get_angle(Vector2D a, Vector2D b) { - return atan2(b.y - a.y, b.x - a.x); -} - -static inline float clip(float val, float min, float max) { - if (val < min) return min; - if (val > max) return max; - return val; -} - -// Check if target is within ant's vision cone -static inline bool is_in_vision(Vector2D ant_pos, float ant_dir, Vector2D target) { - float dx = target.x - ant_pos.x; - float dy = target.y - ant_pos.y; - float dist_sq = dx * dx + dy * dy; - - // Check range - if (dist_sq > ANT_VISION_RANGE * ANT_VISION_RANGE) { - return false; - } - - // Check angle - float angle_to_target = atan2(dy, dx); - float angle_diff = wrap_angle(angle_to_target - ant_dir); - - return fabs(angle_diff) <= ANT_VISION_ANGLE / 2.0f; -} - -// Check if target is within ant's pheromone sensing range (360 degrees) -static inline bool is_in_pheromone_range(Vector2D ant_pos, Vector2D target) { - float dist_sq = distance_squared(ant_pos, target); - return dist_sq <= ANT_PHEROMONE_RANGE * ANT_PHEROMONE_RANGE; -} - -// Add pheromone to the environment -static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id, float direction) { - if (env->num_pheromones >= MAX_PHEROMONES) { - // Replace oldest pheromone (circular buffer) - for (int i = 0; i < env->num_pheromones - 1; i++) { - env->pheromones[i] = env->pheromones[i + 1]; - } - env->num_pheromones--; - } - - env->pheromones[env->num_pheromones].position = position; - env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; - env->pheromones[env->num_pheromones].colony_id = colony_id; - env->pheromones[env->num_pheromones].direction = direction; - env->num_pheromones++; -} - -// Spawn a new ant at its colony -void spawn_ant(AntsEnv* env, int ant_id) { - Ant* ant = &env->ants[ant_id]; - Colony* colony = &env->colonies[ant->colony_id]; - - ant->position = colony->position; - ant->direction = wrap_angle((rand() % 8) * (M_PI / 4)); - ant->has_food = false; - ant->steps_alive = 0; - ant->steps_since_pheromone = 0; -} - -// Spawn food at a valid location -void spawn_food(AntsEnv* env) { - int attempts = 0; - - while (attempts < 100) { - float x = random_float(50, env->width - 50); - float y = random_float(50, env->height - 50); - - // Check distance from colonies - bool valid = true; - for (int j = 0; j < NUM_COLONIES; j++) { - float dist_sq = distance_squared((Vector2D){x, y}, env->colonies[j].position); - if (dist_sq < MIN_FOOD_COLONY_DISTANCE * MIN_FOOD_COLONY_DISTANCE) { - valid = false; - break; - } - } - - if (valid) { - // Find empty slot - for (int i = 0; i < MAX_FOOD_SOURCES; i++) { - if (env->food_sources[i].amount == 0) { - env->food_sources[i].position.x = x; - env->food_sources[i].position.y = y; - env->food_sources[i].amount = MAX_FOOD_PER_SOURCE; - return; - } - } - } - attempts++; - } -} - -// Initialize environment memory -void init(AntsEnv* env) { - env->ants = (Ant*)calloc(env->num_ants, sizeof(Ant)); - env->tick = 0; - env->client = NULL; - env->num_pheromones = 0; - - // Initialize colonies - env->colonies[0].position = (Vector2D){env->width / 4, env->height / 2}; - env->colonies[1].position = (Vector2D){3 * env->width / 4, env->height / 2}; - env->colonies[0].food_collected = 0; - env->colonies[1].food_collected = 0; - - // Initialize food sources - env->num_food_sources = MAX_FOOD_SOURCES; - for (int i = 0; i < env->num_food_sources; i++) { - env->food_sources[i].amount = 0; - } -} - -// Compute observations for all ants - WITH VISION AND PHEROMONES -void compute_observations(AntsEnv* env) { - int obs_idx = 0; - - for (int a = 0; a < env->num_ants; a++) { - Ant* ant = &env->ants[a]; - Colony* colony = &env->colonies[ant->colony_id]; - - // Find closest visible food source (with vision constraints) - float closest_food_dist_sq = env->width * env->width + env->height * env->height; - Vector2D closest_food_pos = {0, 0}; - bool found_food = false; - - for (int i = 0; i < env->num_food_sources; i++) { - if (env->food_sources[i].amount > 0) { - Vector2D food_pos = env->food_sources[i].position; - if (is_in_vision(ant->position, ant->direction, food_pos)) { - float dist_sq = distance_squared(ant->position, food_pos); - if (dist_sq < closest_food_dist_sq) { - closest_food_dist_sq = dist_sq; - closest_food_pos = food_pos; - found_food = true; - } - } - } - } - - // Find closest pheromone from own colony (using pheromone range, not vision) - float closest_pheromone_dist_sq = env->width * env->width + env->height * env->height; - Vector2D closest_pheromone_pos = {0, 0}; - float closest_pheromone_direction = 0.0f; - bool found_pheromone = false; - - for (int i = 0; i < env->num_pheromones; i++) { - if (env->pheromones[i].colony_id == ant->colony_id) { - Vector2D pheromone_pos = env->pheromones[i].position; - if (is_in_pheromone_range(ant->position, pheromone_pos)) { - float dist_sq = distance_squared(ant->position, pheromone_pos); - if (dist_sq < closest_pheromone_dist_sq) { - closest_pheromone_dist_sq = dist_sq; - closest_pheromone_pos = pheromone_pos; - closest_pheromone_direction = env->pheromones[i].direction; - found_pheromone = true; - } - } - } - } - - // Count friendly ants within pheromone range (density) - int friendly_ants_nearby = 0; - for (int i = 0; i < env->num_ants; i++) { - if (i != a && env->ants[i].colony_id == ant->colony_id) { - if (is_in_pheromone_range(ant->position, env->ants[i].position)) { - friendly_ants_nearby++; - } - } - } - - // Observation: [colony_dx, colony_dy, food_dx, food_dy, pheromone_dx, pheromone_dy, pheromone_direction, has_food, heading, density] - // 10 values total - normalized to roughly -1 to 1 range - env->observations[obs_idx++] = (colony->position.x - ant->position.x) / env->width; - env->observations[obs_idx++] = (colony->position.y - ant->position.y) / env->height; - - if (found_food) { - env->observations[obs_idx++] = (closest_food_pos.x - ant->position.x) / env->width; - env->observations[obs_idx++] = (closest_food_pos.y - ant->position.y) / env->height; - } else { - env->observations[obs_idx++] = 0.0f; - env->observations[obs_idx++] = 0.0f; - } - - if (found_pheromone) { - env->observations[obs_idx++] = (closest_pheromone_pos.x - ant->position.x) / env->width; - env->observations[obs_idx++] = (closest_pheromone_pos.y - ant->position.y) / env->height; - // Normalize pheromone direction to -1 to 1 range (divide by π) - env->observations[obs_idx++] = closest_pheromone_direction / M_PI; - } else { - env->observations[obs_idx++] = 0.0f; - env->observations[obs_idx++] = 0.0f; - env->observations[obs_idx++] = 0.0f; - } - - env->observations[obs_idx++] = ant->has_food ? 1.0f : 0.0f; - env->observations[obs_idx++] = ant->direction / (2 * M_PI); - - // Normalize density by max possible ants per colony (roughly num_ants / 2) - float max_friendly_ants = (env->num_ants / NUM_COLONIES) - 1; // -1 to exclude self - env->observations[obs_idx++] = max_friendly_ants > 0 ? (float)friendly_ants_nearby / max_friendly_ants : 0.0f; - } -} - -// Required function: reset environment -void c_reset(AntsEnv* env) { - env->tick = 0; - env->log = (Log){0}; - env->num_pheromones = 0; - - // Reset colonies - env->colonies[0].food_collected = 0; - env->colonies[1].food_collected = 0; - - // Initialize ants - int ants_per_colony = env->num_ants / NUM_COLONIES; - for (int i = 0; i < env->num_ants; i++) { - env->ants[i].colony_id = i / ants_per_colony; - if (env->ants[i].colony_id >= NUM_COLONIES) { - env->ants[i].colony_id = NUM_COLONIES - 1; - } - spawn_ant(env, i); - } - - // Clear and respawn food - for (int i = 0; i < env->num_food_sources; i++) { - env->food_sources[i].amount = 0; - } - for (int i = 0; i < env->num_food_sources; i++) { - spawn_food(env); - } - - // Clear buffers - memset(env->rewards, 0, env->num_ants * sizeof(float)); - memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); - - compute_observations(env); -} - -// Update food collection and delivery - FOLLOWING TARGET update_goals PATTERN -void update_food_interactions(AntsEnv* env) { - for (int a = 0; a < env->num_ants; a++) { - Ant* ant = &env->ants[a]; - - // Check for food pickup - if (!ant->has_food) { - for (int f = 0; f < env->num_food_sources; f++) { - if (env->food_sources[f].amount > 0) { - float dist_sq = distance_squared(ant->position, env->food_sources[f].position); - if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { - ant->has_food = true; - env->food_sources[f].amount--; - - // Respawn food if depleted - if (env->food_sources[f].amount <= 0) { - spawn_food(env); - } - - // Simple reward - env->rewards[a] += env->reward_food_pickup; - env->log.episode_return += env->reward_food_pickup; - - // Track successful trip (ant found food) - env->log.successful_trips += 1.0f; - break; - } - } - } - } - - // Check for food delivery - if (ant->has_food) { - Colony* colony = &env->colonies[ant->colony_id]; - float dist_sq = distance_squared(ant->position, colony->position); - if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { - ant->has_food = false; - colony->food_collected++; - - // Reward and log update - WITH EFFICIENCY METRICS - env->rewards[a] += env->reward_delivery; - env->log.episode_return += env->reward_delivery; - env->log.episode_length += ant->steps_alive; - env->log.total_deliveries += 1.0f; - - // Track per-colony performance - if (ant->colony_id == 0) { - env->log.colony1_food += 1.0f; - } else { - env->log.colony2_food += 1.0f; - } - - // Update derived efficiency metrics - env->log.avg_delivery_steps = env->log.episode_length / env->log.total_deliveries; - - // Performance: Average steps per delivery (lower is better) - env->log.perf = env->log.avg_delivery_steps; - - // Score: Food deliveries per 1000 steps (higher is better) - env->log.score = (env->log.total_deliveries * 1000.0f) / env->log.episode_length; - - env->log.n += 1; // Episode count (number of deliveries) - - // Reset ant after delivery - ant->steps_alive = 0; - } - } - } -} - -// Required function: step environment -void c_step(AntsEnv* env) { - env->tick++; - - // Clear rewards and terminals - memset(env->rewards, 0, env->num_ants * sizeof(float)); - memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); - - // Update all ants - SIMPLIFIED LIKE TARGET - for (int i = 0; i < env->num_ants; i++) { - Ant* ant = &env->ants[i]; - ant->steps_alive++; - - // Execute action - int action = env->actions[i]; - switch (action) { - case ACTION_TURN_LEFT: - ant->direction -= TURN_ANGLE; - ant->direction = wrap_angle(ant->direction); - break; - case ACTION_TURN_RIGHT: - ant->direction += TURN_ANGLE; - ant->direction = wrap_angle(ant->direction); - break; - case ACTION_MOVE_FORWARD: - ant->position.x += ANT_SPEED * cos(ant->direction); - ant->position.y += ANT_SPEED * sin(ant->direction); - break; - case ACTION_NOOP: - // Do nothing - break; - } - - // Wrap around edges - if (ant->position.x < 0) ant->position.x = env->width; - if (ant->position.x > env->width) ant->position.x = 0; - if (ant->position.y < 0) ant->position.y = env->height; - if (ant->position.y > env->height) ant->position.y = 0; - - // Automatic pheromone dropping when carrying food - if (ant->has_food) { - ant->steps_since_pheromone++; - if (ant->steps_since_pheromone >= PHEROMONE_DROP_INTERVAL) { - add_pheromone(env, ant->position, ant->colony_id, ant->direction); - ant->steps_since_pheromone = 0; - } - } - - // Periodic reset like target.c (lines 158-161) - if (ant->steps_alive % ANT_RESET_INTERVAL == 0) { - spawn_ant(env, i); - env->terminals[i] = 1; - env->log.total_resets += 1.0f; - } - } - - // Update pheromone evaporation - for (int i = 0; i < env->num_pheromones; i++) { - env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; - if (env->pheromones[i].strength <= 0) { - // Remove evaporated pheromone (swap with last and shrink) - env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; - env->num_pheromones--; - i--; // Check this slot again - } - } - - // Update food interactions - update_food_interactions(env); - - // Compute new observations - compute_observations(env); -} - -// Required function: render (with lazy client initialization) -void c_render(AntsEnv* env) { - if (env->client == NULL) { - InitWindow(env->width, env->height, "PufferLib Ants"); - SetTargetFPS(60); - env->client = (Client*)calloc(1, sizeof(Client)); - env->client->cell_size = 1; - env->client->width = env->width; - env->client->height = env->height; - env->client->show_vision_cones = true; // Start with vision cones on - env->client->show_pheromone_range = false; // Start with pheromone range off - } - - // Standard exit key - if (IsKeyDown(KEY_ESCAPE)) { - exit(0); - } - - // Toggle vision cones with 'V' key - if (IsKeyPressed(KEY_V)) { - env->client->show_vision_cones = !env->client->show_vision_cones; - } - - // Toggle pheromone range with 'P' key - if (IsKeyPressed(KEY_P)) { - env->client->show_pheromone_range = !env->client->show_pheromone_range; - } - - BeginDrawing(); - ClearBackground(BACKGROUND_COLOR); - - // Draw colonies - for (int i = 0; i < NUM_COLONIES; i++) { - Color color = (i == 0) ? COLONY1_COLOR : COLONY2_COLOR; - DrawCircle(env->colonies[i].position.x, env->colonies[i].position.y, - COLONY_SIZE, color); - } - - // Draw pheromones (before other objects for layering) - for (int i = 0; i < env->num_pheromones; i++) { - Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; - pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); - DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, - PHEROMONE_SIZE, pheromone_color); - } - - // Draw food - for (int i = 0; i < env->num_food_sources; i++) { - if (env->food_sources[i].amount > 0) { - DrawCircle(env->food_sources[i].position.x, env->food_sources[i].position.y, - FOOD_SIZE, FOOD_COLOR); - DrawText(TextFormat("%d", env->food_sources[i].amount), - env->food_sources[i].position.x - 5, - env->food_sources[i].position.y - 5, 10, RAYWHITE); - } - } - - // Draw ants with optional vision cones and pheromone range - for (int i = 0; i < env->num_ants; i++) { - Ant* ant = &env->ants[i]; - Color ant_color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; - - // Draw pheromone range if enabled (semi-transparent circle) - if (env->client->show_pheromone_range) { - Color pheromone_range_color = ant_color; - pheromone_range_color.a = 15; // Very transparent - DrawCircle(ant->position.x, ant->position.y, ANT_PHEROMONE_RANGE, pheromone_range_color); - } - - // Draw vision cone if enabled (semi-transparent) - if (env->client->show_vision_cones) { - Color vision_color = ant_color; - vision_color.a = 30; // Very transparent - - // Calculate vision cone arc - float start_angle = (ant->direction - ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; - float end_angle = (ant->direction + ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; - - DrawCircleSector( - (Vector2){ant->position.x, ant->position.y}, - ANT_VISION_RANGE, - start_angle, - end_angle, - 32, // segments for smooth arc - vision_color - ); - } - - // Change color if carrying food - if (ant->has_food) { - ant_color = FOOD_COLOR; - } - - DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, ant_color); - - // Direction indicator (pointing forward) - float dir_x = ant->position.x + (ANT_SIZE * 1.5f) * cos(ant->direction); - float dir_y = ant->position.y + (ANT_SIZE * 1.5f) * sin(ant->direction); - DrawLine(ant->position.x, ant->position.y, dir_x, dir_y, RAYWHITE); - } - - // Draw UI - Colony scores - DrawText(TextFormat("Colony 1: %d (%.1f%%)", - env->colonies[0].food_collected, - env->log.total_deliveries > 0 ? (env->log.colony1_food / env->log.total_deliveries * 100.0f) : 0.0f), - 20, 20, 20, COLONY1_COLOR); - DrawText(TextFormat("Colony 2: %d (%.1f%%)", - env->colonies[1].food_collected, - env->log.total_deliveries > 0 ? (env->log.colony2_food / env->log.total_deliveries * 100.0f) : 0.0f), - 20, 50, 20, COLONY2_COLOR); - - // Efficiency metrics - DrawText(TextFormat("Efficiency: %.1f steps/food", env->log.avg_delivery_steps), - 20, 80, 18, YELLOW); - DrawText(TextFormat("Throughput: %.2f food/1000 steps", env->log.score), - 20, 105, 18, YELLOW); - - // Success rate - float success_rate = env->log.total_resets > 0 - ? (env->log.successful_trips / env->log.total_resets * 100.0f) - : 0.0f; - DrawText(TextFormat("Success Rate: %.1f%%", success_rate), - 20, 130, 18, GREEN); - - // Right side - System info - DrawText(TextFormat("Tick: %d", env->tick), env->width - 120, 20, 20, RAYWHITE); - DrawText(TextFormat("Pheromones: %d", env->num_pheromones), env->width - 180, 50, 20, RAYWHITE); - DrawText(TextFormat("Deliveries: %.0f", env->log.total_deliveries), env->width - 180, 75, 18, RAYWHITE); - - // Controls help - const char* vision_status = env->client->show_vision_cones ? "ON" : "OFF"; - const char* pheromone_status = env->client->show_pheromone_range ? "ON" : "OFF"; - DrawText(TextFormat("[V] Vision Cones: %s", vision_status), 20, env->height - 30, 16, RAYWHITE); - DrawText(TextFormat("[P] Pheromone Range: %s", pheromone_status), 20, env->height - 50, 16, RAYWHITE); - DrawText("[ESC] Exit", 20, env->height - 70, 16, GRAY); - - EndDrawing(); -} - -// Required function: cleanup -void c_close(AntsEnv* env) { - if (env->ants) { - free(env->ants); - env->ants = NULL; - } - if (env->client != NULL) { - CloseWindow(); - free(env->client); - env->client = NULL; - } -} +/* Ants: A multiagent foraging environment inspired by ant colonies. + * Two colonies compete to collect food from the environment. + * Follows the Target env pattern for simplicity and clarity. + */ + +#define _USE_MATH_DEFINES +#include +#include +#include +#include +#include "raylib.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +// Environment constants +#define WINDOW_WIDTH 1280 +#define WINDOW_HEIGHT 720 +#define NUM_COLONIES 2 +#define MAX_FOOD_SOURCES 20 +#define MAX_FOOD_PER_SOURCE 20 +#define ANT_SPEED 5.0f +#define ANT_SIZE 4 +#define FOOD_SIZE 6 +#define COLONY_SIZE 20 +#define TURN_ANGLE (M_PI / 12) +#define MIN_FOOD_COLONY_DISTANCE 50.0f +#define ANT_RESET_INTERVAL 2048 // Reset ant every N steps (like target.c) + +// Pheromone system constants +#define MAX_PHEROMONES 5000 +#define PHEROMONE_DEPOSIT_AMOUNT 1.0f +#define PHEROMONE_EVAPORATION_RATE 0.005f // Increased from 0.001 to break loops faster +#define PHEROMONE_SIZE 2 +#define PHEROMONE_DROP_INTERVAL 5 // Drop pheromone every N steps while carrying food + +// Vision system constants +#define ANT_VISION_RANGE 75.0f // Increased from 50 to encourage exploration +#define ANT_VISION_ANGLE (M_PI / 3.0f) // 60 degrees (π/3) - increased from 45° for better exploration + +// Pheromone sensing constants +#define ANT_PHEROMONE_RANGE 100.0f // 100px range +#define ANT_PHEROMONE_ANGLE (2 * M_PI) // 360 degrees (full circle) + +// Actions +#define ACTION_TURN_LEFT 0 +#define ACTION_TURN_RIGHT 1 +#define ACTION_MOVE_FORWARD 2 +#define ACTION_NOOP 3 + +// Colors +#define COLONY1_COLOR (Color){220, 0, 0, 255} +#define COLONY2_COLOR (Color){0, 0, 220, 255} +#define PHEROMONE1_COLOR (Color){255, 200, 200, 100} +#define PHEROMONE2_COLOR (Color){200, 200, 255, 100} +#define FOOD_COLOR (Color){0, 200, 0, 255} +#define BACKGROUND_COLOR (Color){50, 50, 50, 255} + +// Required Log struct for PufferLib +typedef struct { + float perf; // Average steps per delivery (efficiency - lower is better) + float score; // Food deliveries per 1000 steps (throughput) + float episode_return; // Cumulative rewards + float episode_length; // Total steps across all ants + float avg_delivery_steps; // Average steps taken per successful delivery + float colony1_food; // Food collected by colony 1 + float colony2_food; // Food collected by colony 2 + float total_deliveries; // Total successful food deliveries + float successful_trips; // Number of ants that successfully found food + float total_resets; // Total ant resets (successful + unsuccessful) + float n; // Episode count - REQUIRED AS LAST FIELD +} Log; + +// Forward declarations +typedef struct Client Client; +typedef struct AntsEnv AntsEnv; + +// Simple 2D vector +typedef struct { + float x, y; +} Vector2D; + +// Food source in the environment +typedef struct { + Vector2D position; + int amount; +} FoodSource; + +// Pheromone trail marker +typedef struct { + Vector2D position; + float strength; + int colony_id; + float direction; // Direction the ant was moving when placing this pheromone +} Pheromone; + +// Individual ant agent +typedef struct { + Vector2D position; + float direction; + int colony_id; + bool has_food; + int steps_alive; // Track steps for periodic reset + int steps_since_pheromone; // Track when to drop next pheromone + int steps_without_food; // Track steps since last food pickup (for exploration) +} Ant; + +// Colony home base +typedef struct { + Vector2D position; + int food_collected; +} Colony; + +// Raylib rendering client +struct Client { + int cell_size; + int width; + int height; + bool show_vision_cones; // Toggle for vision cone visualization + bool show_pheromone_range; // Toggle for pheromone range visualization +}; + +// Main environment struct - FOLLOWING TARGET PATTERN +struct AntsEnv { + Log log; // Required: aggregated log for all agents + Client* client; // Rendering client + Ant* ants; // Dynamic array of ants + Colony colonies[NUM_COLONIES]; + FoodSource food_sources[MAX_FOOD_SOURCES]; + Pheromone pheromones[MAX_PHEROMONES]; + + // Required PufferLib fields + float* observations; // Flattened observations + int* actions; // Actions for all ants + float* rewards; // Rewards for all ants + unsigned char* terminals; // Terminal flags + + // Environment parameters + int num_ants; // Total number of ants + int width; // Environment width + int height; // Environment height + int num_food_sources; // Active food sources + int num_pheromones; // Active pheromones + int tick; // Current timestep + + // Simple reward parameters (like target.c) + float reward_food_pickup; // Reward for picking up food + float reward_delivery; // Reward for delivering food to colony +}; + +// Helper functions +static inline float random_float(float min, float max) { + return min + (max - min) * ((float)rand() / (float)RAND_MAX); +} + +static inline float wrap_angle(float angle) { + while (angle > M_PI) angle -= 2 * M_PI; + while (angle < -M_PI) angle += 2 * M_PI; + return angle; +} + +static inline float distance_squared(Vector2D a, Vector2D b) { + float dx = a.x - b.x; + float dy = a.y - b.y; + return (dx * dx) + (dy * dy); +} + +static inline float get_angle(Vector2D a, Vector2D b) { + return atan2(b.y - a.y, b.x - a.x); +} + +static inline float clip(float val, float min, float max) { + if (val < min) return min; + if (val > max) return max; + return val; +} + +// Check if target is within ant's vision cone +static inline bool is_in_vision(Vector2D ant_pos, float ant_dir, Vector2D target) { + float dx = target.x - ant_pos.x; + float dy = target.y - ant_pos.y; + float dist_sq = dx * dx + dy * dy; + + // Check range + if (dist_sq > ANT_VISION_RANGE * ANT_VISION_RANGE) { + return false; + } + + // Check angle + float angle_to_target = atan2(dy, dx); + float angle_diff = wrap_angle(angle_to_target - ant_dir); + + return fabs(angle_diff) <= ANT_VISION_ANGLE / 2.0f; +} + +// Check if target is within ant's pheromone sensing range (360 degrees) +static inline bool is_in_pheromone_range(Vector2D ant_pos, Vector2D target) { + float dist_sq = distance_squared(ant_pos, target); + return dist_sq <= ANT_PHEROMONE_RANGE * ANT_PHEROMONE_RANGE; +} + +// Add pheromone to the environment +static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id, float direction) { + if (env->num_pheromones >= MAX_PHEROMONES) { + // Replace oldest pheromone (circular buffer) + for (int i = 0; i < env->num_pheromones - 1; i++) { + env->pheromones[i] = env->pheromones[i + 1]; + } + env->num_pheromones--; + } + + env->pheromones[env->num_pheromones].position = position; + env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; + env->pheromones[env->num_pheromones].colony_id = colony_id; + env->pheromones[env->num_pheromones].direction = direction; + env->num_pheromones++; +} + +// Spawn a new ant at its colony +void spawn_ant(AntsEnv* env, int ant_id) { + Ant* ant = &env->ants[ant_id]; + Colony* colony = &env->colonies[ant->colony_id]; + + ant->position = colony->position; + ant->direction = wrap_angle((rand() % 8) * (M_PI / 4)); + ant->has_food = false; + ant->steps_alive = 0; + ant->steps_since_pheromone = 0; + ant->steps_without_food = 0; +} + +// Spawn food at a valid location +void spawn_food(AntsEnv* env) { + int attempts = 0; + + while (attempts < 100) { + float x = random_float(50, env->width - 50); + float y = random_float(50, env->height - 50); + + // Check distance from colonies + bool valid = true; + for (int j = 0; j < NUM_COLONIES; j++) { + float dist_sq = distance_squared((Vector2D){x, y}, env->colonies[j].position); + if (dist_sq < MIN_FOOD_COLONY_DISTANCE * MIN_FOOD_COLONY_DISTANCE) { + valid = false; + break; + } + } + + if (valid) { + // Find empty slot + for (int i = 0; i < MAX_FOOD_SOURCES; i++) { + if (env->food_sources[i].amount == 0) { + env->food_sources[i].position.x = x; + env->food_sources[i].position.y = y; + env->food_sources[i].amount = MAX_FOOD_PER_SOURCE; + return; + } + } + } + attempts++; + } +} + +// Initialize environment memory +void init(AntsEnv* env) { + env->ants = (Ant*)calloc(env->num_ants, sizeof(Ant)); + env->tick = 0; + env->client = NULL; + env->num_pheromones = 0; + + // Initialize colonies + env->colonies[0].position = (Vector2D){env->width / 4, env->height / 2}; + env->colonies[1].position = (Vector2D){3 * env->width / 4, env->height / 2}; + env->colonies[0].food_collected = 0; + env->colonies[1].food_collected = 0; + + // Initialize food sources + env->num_food_sources = MAX_FOOD_SOURCES; + for (int i = 0; i < env->num_food_sources; i++) { + env->food_sources[i].amount = 0; + } +} + +// Compute observations for all ants - WITH VISION AND PHEROMONES +void compute_observations(AntsEnv* env) { + int obs_idx = 0; + + for (int a = 0; a < env->num_ants; a++) { + Ant* ant = &env->ants[a]; + Colony* colony = &env->colonies[ant->colony_id]; + + // Find closest visible food source (with vision constraints) + float closest_food_dist_sq = env->width * env->width + env->height * env->height; + Vector2D closest_food_pos = {0, 0}; + bool found_food = false; + + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + Vector2D food_pos = env->food_sources[i].position; + if (is_in_vision(ant->position, ant->direction, food_pos)) { + float dist_sq = distance_squared(ant->position, food_pos); + if (dist_sq < closest_food_dist_sq) { + closest_food_dist_sq = dist_sq; + closest_food_pos = food_pos; + found_food = true; + } + } + } + } + + // Find top 5 strongest pheromones from own colony (using pheromone range, not vision) + // Store pheromones in range with their strength for sorting + typedef struct { + Vector2D position; + float strength; + float direction; + } PheromoneCandidate; + + PheromoneCandidate candidates[100]; // Max 100 candidates (should be enough) + int num_candidates = 0; + + for (int i = 0; i < env->num_pheromones; i++) { + if (env->pheromones[i].colony_id == ant->colony_id) { + Vector2D pheromone_pos = env->pheromones[i].position; + if (is_in_pheromone_range(ant->position, pheromone_pos)) { + if (num_candidates < 100) { + candidates[num_candidates].position = pheromone_pos; + candidates[num_candidates].strength = env->pheromones[i].strength; + candidates[num_candidates].direction = env->pheromones[i].direction; + num_candidates++; + } + } + } + } + + // Sort by strength (descending) - simple bubble sort for small arrays + for (int i = 0; i < num_candidates - 1; i++) { + for (int j = 0; j < num_candidates - i - 1; j++) { + if (candidates[j].strength < candidates[j + 1].strength) { + PheromoneCandidate temp = candidates[j]; + candidates[j] = candidates[j + 1]; + candidates[j + 1] = temp; + } + } + } + + // Take top 5 (or fewer if less available) + int top_count = num_candidates < 5 ? num_candidates : 5; + + // Count friendly ants within pheromone range (density) + int friendly_ants_nearby = 0; + for (int i = 0; i < env->num_ants; i++) { + if (i != a && env->ants[i].colony_id == ant->colony_id) { + if (is_in_pheromone_range(ant->position, env->ants[i].position)) { + friendly_ants_nearby++; + } + } + } + + // Observation: [colony_dx, colony_dy, food_dx, food_dy, + // pheromone1_dx, pheromone1_dy, pheromone1_direction, pheromone1_strength, + // pheromone2_dx, pheromone2_dy, pheromone2_direction, pheromone2_strength, + // pheromone3_dx, pheromone3_dy, pheromone3_direction, pheromone3_strength, + // pheromone4_dx, pheromone4_dy, pheromone4_direction, pheromone4_strength, + // pheromone5_dx, pheromone5_dy, pheromone5_direction, pheromone5_strength, + // has_food, heading, density] + // 27 values total - normalized to roughly -1 to 1 range + env->observations[obs_idx++] = (colony->position.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (colony->position.y - ant->position.y) / env->height; + + if (found_food) { + env->observations[obs_idx++] = (closest_food_pos.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (closest_food_pos.y - ant->position.y) / env->height; + } else { + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + } + + // Output top 5 strongest pheromones (or zeros if fewer available) + for (int p = 0; p < 5; p++) { + if (p < top_count) { + env->observations[obs_idx++] = (candidates[p].position.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (candidates[p].position.y - ant->position.y) / env->height; + // Normalize pheromone direction to -1 to 1 range (divide by π) + env->observations[obs_idx++] = candidates[p].direction / M_PI; + // Normalize pheromone strength (typically 0.0 to 1.0, but can be higher) + // Clamp to reasonable range for normalization + env->observations[obs_idx++] = candidates[p].strength / PHEROMONE_DEPOSIT_AMOUNT; + } else { + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + } + } + + env->observations[obs_idx++] = ant->has_food ? 1.0f : 0.0f; + env->observations[obs_idx++] = ant->direction / (2 * M_PI); + + // Normalize density by max possible ants per colony (roughly num_ants / 2) + float max_friendly_ants = (env->num_ants / NUM_COLONIES) - 1; // -1 to exclude self + env->observations[obs_idx++] = max_friendly_ants > 0 ? (float)friendly_ants_nearby / max_friendly_ants : 0.0f; + } +} + +// Required function: reset environment +void c_reset(AntsEnv* env) { + env->tick = 0; + env->log = (Log){0}; + env->num_pheromones = 0; + + // Reset colonies + env->colonies[0].food_collected = 0; + env->colonies[1].food_collected = 0; + + // Initialize ants + int ants_per_colony = env->num_ants / NUM_COLONIES; + for (int i = 0; i < env->num_ants; i++) { + env->ants[i].colony_id = i / ants_per_colony; + if (env->ants[i].colony_id >= NUM_COLONIES) { + env->ants[i].colony_id = NUM_COLONIES - 1; + } + spawn_ant(env, i); + } + + // Clear and respawn food + for (int i = 0; i < env->num_food_sources; i++) { + env->food_sources[i].amount = 0; + } + for (int i = 0; i < env->num_food_sources; i++) { + spawn_food(env); + } + + // Clear buffers + memset(env->rewards, 0, env->num_ants * sizeof(float)); + memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); + + compute_observations(env); +} + +// Update food collection and delivery - FOLLOWING TARGET update_goals PATTERN +void update_food_interactions(AntsEnv* env) { + for (int a = 0; a < env->num_ants; a++) { + Ant* ant = &env->ants[a]; + + // Check for food pickup + if (!ant->has_food) { + for (int f = 0; f < env->num_food_sources; f++) { + if (env->food_sources[f].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[f].position); + if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { + ant->has_food = true; + env->food_sources[f].amount--; + + // Respawn food if depleted + if (env->food_sources[f].amount <= 0) { + spawn_food(env); + } + + // Simple reward + env->rewards[a] += env->reward_food_pickup; + env->log.episode_return += env->reward_food_pickup; + + // Track successful trip (ant found food) + env->log.successful_trips += 1.0f; + break; + } + } + } + } + + // Check for food delivery + if (ant->has_food) { + Colony* colony = &env->colonies[ant->colony_id]; + float dist_sq = distance_squared(ant->position, colony->position); + if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { + ant->has_food = false; + colony->food_collected++; + + // Reward and log update - WITH EFFICIENCY METRICS + env->rewards[a] += env->reward_delivery; + env->log.episode_return += env->reward_delivery; + env->log.episode_length += ant->steps_alive; + env->log.total_deliveries += 1.0f; + + // Track per-colony performance + if (ant->colony_id == 0) { + env->log.colony1_food += 1.0f; + } else { + env->log.colony2_food += 1.0f; + } + + // Update derived efficiency metrics + env->log.avg_delivery_steps = env->log.episode_length / env->log.total_deliveries; + + // Performance: Average steps per delivery (lower is better) + env->log.perf = env->log.avg_delivery_steps; + + // Score: Food deliveries per 1000 steps (higher is better) + env->log.score = (env->log.total_deliveries * 1000.0f) / env->log.episode_length; + + env->log.n += 1; // Episode count (number of deliveries) + + // Reset ant after delivery + ant->steps_alive = 0; + ant->steps_without_food = 0; // Reset exploration counter after successful delivery + } + } + } +} + +// Required function: step environment +void c_step(AntsEnv* env) { + env->tick++; + + // Clear rewards and terminals + memset(env->rewards, 0, env->num_ants * sizeof(float)); + memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); + + // Update all ants - SIMPLIFIED LIKE TARGET + for (int i = 0; i < env->num_ants; i++) { + Ant* ant = &env->ants[i]; + ant->steps_alive++; + if (!ant->has_food) { + ant->steps_without_food++; + } + + // Execute action + int action = env->actions[i]; + + // Exploration: Add small random perturbations when ant hasn't found food for a while + // This helps break out of circular patterns + if (!ant->has_food && ant->steps_without_food > 100) { + // 5% chance per step to add random exploration when stuck + if ((rand() % 100) < 5) { + // Add small random turn to encourage exploration + float random_turn = (random_float(-1.0f, 1.0f) * TURN_ANGLE * 2.0f); + ant->direction += random_turn; + ant->direction = wrap_angle(ant->direction); + } + } + + switch (action) { + case ACTION_TURN_LEFT: + ant->direction -= TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_TURN_RIGHT: + ant->direction += TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_MOVE_FORWARD: + ant->position.x += ANT_SPEED * cos(ant->direction); + ant->position.y += ANT_SPEED * sin(ant->direction); + break; + case ACTION_NOOP: + // Do nothing + break; + } + + // Wrap around edges + if (ant->position.x < 0) ant->position.x = env->width; + if (ant->position.x > env->width) ant->position.x = 0; + if (ant->position.y < 0) ant->position.y = env->height; + if (ant->position.y > env->height) ant->position.y = 0; + + // Automatic pheromone dropping when carrying food + if (ant->has_food) { + ant->steps_since_pheromone++; + if (ant->steps_since_pheromone >= PHEROMONE_DROP_INTERVAL) { + add_pheromone(env, ant->position, ant->colony_id, ant->direction); + ant->steps_since_pheromone = 0; + } + } + + // Periodic reset like target.c (lines 158-161) + if (ant->steps_alive % ANT_RESET_INTERVAL == 0) { + spawn_ant(env, i); + env->terminals[i] = 1; + env->log.total_resets += 1.0f; + } + } + + // Update pheromone evaporation + for (int i = 0; i < env->num_pheromones; i++) { + env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; + if (env->pheromones[i].strength <= 0) { + // Remove evaporated pheromone (swap with last and shrink) + env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; + env->num_pheromones--; + i--; // Check this slot again + } + } + + // Update food interactions + update_food_interactions(env); + + // Compute new observations + compute_observations(env); +} + +// Required function: render (with lazy client initialization) +void c_render(AntsEnv* env) { + if (env->client == NULL) { + InitWindow(env->width, env->height, "PufferLib Ants"); + SetTargetFPS(60); + env->client = (Client*)calloc(1, sizeof(Client)); + env->client->cell_size = 1; + env->client->width = env->width; + env->client->height = env->height; + env->client->show_vision_cones = true; // Start with vision cones on + env->client->show_pheromone_range = false; // Start with pheromone range off + } + + // Standard exit key + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + // Toggle vision cones with 'V' key + if (IsKeyPressed(KEY_V)) { + env->client->show_vision_cones = !env->client->show_vision_cones; + } + + // Toggle pheromone range with 'P' key + if (IsKeyPressed(KEY_P)) { + env->client->show_pheromone_range = !env->client->show_pheromone_range; + } + + BeginDrawing(); + ClearBackground(BACKGROUND_COLOR); + + // Draw colonies + for (int i = 0; i < NUM_COLONIES; i++) { + Color color = (i == 0) ? COLONY1_COLOR : COLONY2_COLOR; + DrawCircle(env->colonies[i].position.x, env->colonies[i].position.y, + COLONY_SIZE, color); + } + + // Draw pheromones (before other objects for layering) + for (int i = 0; i < env->num_pheromones; i++) { + Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; + pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); + DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, + PHEROMONE_SIZE, pheromone_color); + } + + // Draw food + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + DrawCircle(env->food_sources[i].position.x, env->food_sources[i].position.y, + FOOD_SIZE, FOOD_COLOR); + DrawText(TextFormat("%d", env->food_sources[i].amount), + env->food_sources[i].position.x - 5, + env->food_sources[i].position.y - 5, 10, RAYWHITE); + } + } + + // Draw ants with optional vision cones and pheromone range + for (int i = 0; i < env->num_ants; i++) { + Ant* ant = &env->ants[i]; + Color ant_color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; + + // Draw pheromone range if enabled (semi-transparent circle) + if (env->client->show_pheromone_range) { + Color pheromone_range_color = ant_color; + pheromone_range_color.a = 15; // Very transparent + DrawCircle(ant->position.x, ant->position.y, ANT_PHEROMONE_RANGE, pheromone_range_color); + } + + // Draw vision cone if enabled (semi-transparent) + if (env->client->show_vision_cones) { + Color vision_color = ant_color; + vision_color.a = 30; // Very transparent + + // Calculate vision cone arc + float start_angle = (ant->direction - ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; + float end_angle = (ant->direction + ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; + + DrawCircleSector( + (Vector2){ant->position.x, ant->position.y}, + ANT_VISION_RANGE, + start_angle, + end_angle, + 32, // segments for smooth arc + vision_color + ); + } + + // Change color if carrying food + if (ant->has_food) { + ant_color = FOOD_COLOR; + } + + DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, ant_color); + + // Direction indicator (pointing forward) + float dir_x = ant->position.x + (ANT_SIZE * 1.5f) * cos(ant->direction); + float dir_y = ant->position.y + (ANT_SIZE * 1.5f) * sin(ant->direction); + DrawLine(ant->position.x, ant->position.y, dir_x, dir_y, RAYWHITE); + } + + // Draw UI - Colony scores + DrawText(TextFormat("Colony 1: %d (%.1f%%)", + env->colonies[0].food_collected, + env->log.total_deliveries > 0 ? (env->log.colony1_food / env->log.total_deliveries * 100.0f) : 0.0f), + 20, 20, 20, COLONY1_COLOR); + DrawText(TextFormat("Colony 2: %d (%.1f%%)", + env->colonies[1].food_collected, + env->log.total_deliveries > 0 ? (env->log.colony2_food / env->log.total_deliveries * 100.0f) : 0.0f), + 20, 50, 20, COLONY2_COLOR); + + // Efficiency metrics + DrawText(TextFormat("Efficiency: %.1f steps/food", env->log.avg_delivery_steps), + 20, 80, 18, YELLOW); + DrawText(TextFormat("Throughput: %.2f food/1000 steps", env->log.score), + 20, 105, 18, YELLOW); + + // Success rate + float success_rate = env->log.total_resets > 0 + ? (env->log.successful_trips / env->log.total_resets * 100.0f) + : 0.0f; + DrawText(TextFormat("Success Rate: %.1f%%", success_rate), + 20, 130, 18, GREEN); + + // Right side - System info + DrawText(TextFormat("Tick: %d", env->tick), env->width - 120, 20, 20, RAYWHITE); + DrawText(TextFormat("Pheromones: %d", env->num_pheromones), env->width - 180, 50, 20, RAYWHITE); + DrawText(TextFormat("Deliveries: %.0f", env->log.total_deliveries), env->width - 180, 75, 18, RAYWHITE); + + // Controls help + const char* vision_status = env->client->show_vision_cones ? "ON" : "OFF"; + const char* pheromone_status = env->client->show_pheromone_range ? "ON" : "OFF"; + DrawText(TextFormat("[V] Vision Cones: %s", vision_status), 20, env->height - 30, 16, RAYWHITE); + DrawText(TextFormat("[P] Pheromone Range: %s", pheromone_status), 20, env->height - 50, 16, RAYWHITE); + DrawText("[ESC] Exit", 20, env->height - 70, 16, GRAY); + + EndDrawing(); +} + +// Required function: cleanup +void c_close(AntsEnv* env) { + if (env->ants) { + free(env->ants); + env->ants = NULL; + } + if (env->client != NULL) { + CloseWindow(); + free(env->client); + env->client = NULL; + } +} + diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py index f34fd7f44..a95021335 100644 --- a/pufferlib/ocean/ants/ants.py +++ b/pufferlib/ocean/ants/ants.py @@ -13,30 +13,38 @@ class AntsEnv(pufferlib.PufferEnv): Two colonies compete to collect food from the environment. Simplified architecture following the Target environment pattern. - Observations (10 per ant): + Observations (27 per ant): - colony_dx, colony_dy: Direction to home colony (normalized) - food_dx, food_dy: Direction to nearest VISIBLE food (normalized, with vision constraints) - - pheromone_dx, pheromone_dy: Direction to nearest pheromone from own colony (within pheromone range) - - pheromone_direction: Direction the pheromone was placed in (normalized angle, -1 to 1) + - pheromone1_dx, pheromone1_dy, pheromone1_direction, pheromone1_strength: Top strongest pheromone from own colony (within pheromone range) + - pheromone2_dx, pheromone2_dy, pheromone2_direction, pheromone2_strength: 2nd strongest pheromone + - pheromone3_dx, pheromone3_dy, pheromone3_direction, pheromone3_strength: 3rd strongest pheromone + - pheromone4_dx, pheromone4_dy, pheromone4_direction, pheromone4_strength: 4th strongest pheromone + - pheromone5_dx, pheromone5_dy, pheromone5_direction, pheromone5_strength: 5th strongest pheromone - has_food: Binary flag (0 or 1) - heading: Ant's current direction (normalized) - density: Number of friendly ants within pheromone range (normalized) Vision System: - - Ants have limited vision range (50 pixels) for seeing food - - Vision cone of 30 degrees (π/6) - narrow focused beam + - Ants have limited vision range (75 pixels) for seeing food + - Vision cone of 60 degrees (π/3) - wider beam for better exploration - Can only see food within their vision cone Pheromone Sensing: - Separate from vision: 100 pixels range, 360 degrees (omnidirectional) - - Can sense pheromones from own colony within this range + - Can sense top 5 strongest pheromones from own colony within this range + - Pheromones are ranked by strength (not distance) - Also used to detect nearby friendly ants (density) Pheromone System: - Ants automatically drop pheromones every 5 steps while carrying food - - Pheromones evaporate over time (rate: 0.001 per step) + - Pheromones evaporate over time (rate: 0.002 per step) - faster evaporation to break loops - Each colony's pheromones are distinct - Ants only observe pheromones from their own colony + + Exploration Mechanism: + - Ants that haven't found food for 100+ steps have a 5% chance per step to add random exploration turns + - This helps break out of circular patterns and encourages map exploration Actions (Discrete 4): 0: Turn left @@ -58,9 +66,9 @@ def __init__( buf=None, seed=0): - # Observation space: 10 values per ant (colony, food, pheromone, pheromone_direction, has_food, heading, density) + # Observation space: 27 values per ant (colony, food, 5 pheromones × 4 values each, has_food, heading, density) self.single_observation_space = gymnasium.spaces.Box( - low=-1.0, high=1.0, shape=(10,), dtype=np.float32 + low=-1.0, high=1.0, shape=(27,), dtype=np.float32 ) # Discrete action space: turn left, turn right, move forward, noop self.single_action_space = gymnasium.spaces.Discrete(4) From bf2daae53bbf87619b52975aaee85c7f2bc31e2a Mon Sep 17 00:00:00 2001 From: Matan Itah Date: Tue, 6 Jan 2026 13:04:12 -0500 Subject: [PATCH 23/23] small changes to scripts and setup.py to make PufferLib compatible with NVIDIA DGX Spark --- scripts/build_ocean.sh | 6 ++++++ setup.py | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/scripts/build_ocean.sh b/scripts/build_ocean.sh index 88909d44f..334fab9b7 100755 --- a/scripts/build_ocean.sh +++ b/scripts/build_ocean.sh @@ -5,6 +5,7 @@ ENV=$1 MODE=${2:-local} PLATFORM="$(uname -s)" +ARCH="$(uname -m)" SRC_DIR="pufferlib/ocean/$ENV" WEB_OUTPUT_DIR="build_web/$ENV" RAYLIB_NAME='raylib-5.5_macos' @@ -12,6 +13,11 @@ BOX2D_NAME='box2d-macos-arm64' if [ "$PLATFORM" = "Linux" ]; then RAYLIB_NAME='raylib-5.5_linux_amd64' BOX2D_NAME='box2d-linux-amd64' + if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then + RAYLIB_NAME='raylib-5.5_linux_aarch64' + else + RAYLIB_NAME='raylib-5.5_linux_amd64' + fi fi if [ "$MODE" = "web" ]; then RAYLIB_NAME='raylib-5.5_webassembly' diff --git a/setup.py b/setup.py index 8a8444dd6..c8f3e1468 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,15 @@ # Build raylib for your platform RAYLIB_URL = 'https://github.com/raysan5/raylib/releases/download/5.5/' -RAYLIB_NAME = 'raylib-5.5_macos' if platform.system() == "Darwin" else 'raylib-5.5_linux_amd64' +if platform.system() == "Darwin": + RAYLIB_NAME = 'raylib-5.5_macos' +else: + # Detect Linux architecture + arch = platform.machine() + if arch == 'aarch64' or arch == 'arm64': + RAYLIB_NAME = 'raylib-5.5_linux_aarch64' + else: + RAYLIB_NAME = 'raylib-5.5_linux_amd64' RLIGHTS_URL = 'https://raw.githubusercontent.com/raysan5/raylib/refs/heads/master/examples/shaders/rlights.h' def download_raylib(platform, ext): @@ -129,6 +137,11 @@ def download_box2d(platform): extra_link_args += [ '-Bsymbolic-functions', ] + arch = platform.machine() + if arch == 'aarch64' or arch == 'arm64': + download_raylib('raylib-5.5_linux_aarch64', '.tar.gz') + else: + download_raylib('raylib-5.5_linux_amd64', '.tar.gz') elif system == 'Darwin': extra_compile_args += [ '-Wno-error=int-conversion',