diff --git a/pufferlib/config/ocean/ants.ini b/pufferlib/config/ocean/ants.ini new file mode 100644 index 000000000..c12219bae --- /dev/null +++ b/pufferlib/config/ocean/ants.ini @@ -0,0 +1,14 @@ +[base] +package = ocean +env_name = puffer_ants +policy_name = Policy +rnn_name = Recurrent + +[vec] +num_envs = 8 + +[env] +num_envs = 32 + +[train] +total_timesteps = 100_000_000 \ No newline at end of file diff --git a/pufferlib/ocean/ants/README.md b/pufferlib/ocean/ants/README.md new file mode 100644 index 000000000..5b8a6c603 --- /dev/null +++ b/pufferlib/ocean/ants/README.md @@ -0,0 +1,15 @@ +``` +puffer train puffer_ants --train.device cpu --train.optimizer adam --neptune --neptune-name "matanitah" --neptune-project "ant-sim" +``` + +``` +puffer eval puffer_ants --load-model-path experiments/ANTS-XXX.pt --train.device cpu --train.optimizer adam --neptune +``` + +``` +scripts/build_ocean.sh ants +``` + +``` +python setup.py build_ext --inplace +``` \ No newline at end of file diff --git a/pufferlib/ocean/ants/__init__.py b/pufferlib/ocean/ants/__init__.py new file mode 100644 index 000000000..55804d502 --- /dev/null +++ b/pufferlib/ocean/ants/__init__.py @@ -0,0 +1,3 @@ +from pufferlib.ocean.ants.ants import AntsEnv + +__all__ = ['AntsEnv'] diff --git a/pufferlib/ocean/ants/ants.c b/pufferlib/ocean/ants/ants.c new file mode 100644 index 000000000..f53493aba --- /dev/null +++ b/pufferlib/ocean/ants/ants.c @@ -0,0 +1,133 @@ +/* Ants: Pure C demo file for testing the environment. + * Build it with: + * bash scripts/build_ocean.sh ants local (debug) + * bash scripts/build_ocean.sh ants fast + * + * Following the Target env pattern for consistency. + */ +#include +#include "ants.h" + +int main() { + int num_ants = 64; + int num_obs = 27; // Observation space: colony, food, 5 pheromones (dx, dy, direction, strength each), has_food, heading, density + + AntsEnv env = { + .width = 1280, + .height = 720, + .num_ants = num_ants, + .reward_food_pickup = 0.1f, + .reward_delivery = 10.0f + }; + + init(&env); + + // Allocate buffers manually (normally passed from Python) + env.observations = calloc(env.num_ants * num_obs, sizeof(float)); + env.actions = calloc(env.num_ants, sizeof(int)); + env.rewards = calloc(env.num_ants, sizeof(float)); + env.terminals = calloc(env.num_ants, sizeof(unsigned char)); + + // Always call reset and render first + c_reset(&env); + c_render(&env); + + printf("Ant Colony Demo with Pheromones, Vision & Density Awareness\n"); + printf("============================================================\n"); + printf("Controls:\n"); + printf(" [V] - Toggle vision cone visualization\n"); + printf(" [P] - Toggle pheromone range visualization\n"); + printf(" [ESC] - Exit\n\n"); + printf("Features:\n"); + printf(" - Vision: 75px range, 60° beam (for food) - improved for exploration\n"); + printf(" - Pheromone sensing: 100px range, 360° omnidirectional\n"); + printf(" - Density awareness: count friendly ants in pheromone range\n"); + printf(" - Automatic pheromone trails when carrying food\n"); + printf(" - Pheromone evaporation (500 step lifetime) - faster to break loops\n"); + printf(" - Random exploration: ants stuck >100 steps get random turns (5%% chance)\n"); + printf(" - Simple heuristic AI: seek food -> return to colony\n\n"); + + // Main loop - exit with ESC or close window + while (!WindowShouldClose()) { + // Simple demo AI: seek food when empty, return when full + for (int i = 0; i < env.num_ants; i++) { + Ant* ant = &env.ants[i]; + + // Simple heuristic AI + if (ant->has_food) { + // Return to colony + Colony* colony = &env.colonies[ant->colony_id]; + float angle_to_colony = get_angle(ant->position, colony->position); + float angle_diff = wrap_angle(angle_to_colony - ant->direction); + + if (angle_diff > M_PI / 8) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -M_PI / 8) { + env.actions[i] = ACTION_TURN_LEFT; + } else { + env.actions[i] = ACTION_MOVE_FORWARD; + } + } else { + // Seek nearest food + float closest_dist_sq = env.width * env.width; + Vector2D closest_food = {0, 0}; + bool found = false; + + for (int f = 0; f < env.num_food_sources; f++) { + if (env.food_sources[f].amount > 0) { + float dist_sq = distance_squared(ant->position, env.food_sources[f].position); + if (dist_sq < closest_dist_sq) { + closest_dist_sq = dist_sq; + closest_food = env.food_sources[f].position; + found = true; + } + } + } + + if (found) { + float angle_to_food = get_angle(ant->position, closest_food); + float angle_diff = wrap_angle(angle_to_food - ant->direction); + + if (angle_diff > M_PI / 8) { + env.actions[i] = ACTION_TURN_RIGHT; + } else if (angle_diff < -M_PI / 8) { + env.actions[i] = ACTION_TURN_LEFT; + } else { + env.actions[i] = ACTION_MOVE_FORWARD; + } + } else { + // No food visible, just move forward + env.actions[i] = ACTION_MOVE_FORWARD; + } + } + } + + c_step(&env); + c_render(&env); + + // Print stats every 60 frames + if (env.tick % 60 == 0) { + float success_rate = env.log.total_resets > 0 + ? (env.log.successful_trips / env.log.total_resets * 100.0f) + : 0.0f; + printf("Tick: %d | C1: %d (%.0f%%) | C2: %d (%.0f%%) | Efficiency: %.1f steps/food | Throughput: %.2f | Success: %.1f%%\n", + env.tick, + env.colonies[0].food_collected, + env.log.total_deliveries > 0 ? (env.log.colony1_food / env.log.total_deliveries * 100.0f) : 0.0f, + env.colonies[1].food_collected, + env.log.total_deliveries > 0 ? (env.log.colony2_food / env.log.total_deliveries * 100.0f) : 0.0f, + env.log.avg_delivery_steps, + env.log.score, + success_rate); + } + } + + // Cleanup + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); + + return 0; +} diff --git a/pufferlib/ocean/ants/ants.h b/pufferlib/ocean/ants/ants.h new file mode 100644 index 000000000..90e3990a2 --- /dev/null +++ b/pufferlib/ocean/ants/ants.h @@ -0,0 +1,755 @@ +/* Ants: A multiagent foraging environment inspired by ant colonies. + * Two colonies compete to collect food from the environment. + * Follows the Target env pattern for simplicity and clarity. + */ + +#define _USE_MATH_DEFINES +#include +#include +#include +#include +#include "raylib.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +// Environment constants +#define WINDOW_WIDTH 1280 +#define WINDOW_HEIGHT 720 +#define NUM_COLONIES 2 +#define MAX_FOOD_SOURCES 20 +#define MAX_FOOD_PER_SOURCE 20 +#define ANT_SPEED 5.0f +#define ANT_SIZE 4 +#define FOOD_SIZE 6 +#define COLONY_SIZE 20 +#define TURN_ANGLE (M_PI / 12) +#define MIN_FOOD_COLONY_DISTANCE 50.0f +#define ANT_RESET_INTERVAL 2048 // Reset ant every N steps (like target.c) + +// Pheromone system constants +#define MAX_PHEROMONES 5000 +#define PHEROMONE_DEPOSIT_AMOUNT 1.0f +#define PHEROMONE_EVAPORATION_RATE 0.005f // Increased from 0.001 to break loops faster +#define PHEROMONE_SIZE 2 +#define PHEROMONE_DROP_INTERVAL 5 // Drop pheromone every N steps while carrying food + +// Vision system constants +#define ANT_VISION_RANGE 75.0f // Increased from 50 to encourage exploration +#define ANT_VISION_ANGLE (M_PI / 3.0f) // 60 degrees (π/3) - increased from 45° for better exploration + +// Pheromone sensing constants +#define ANT_PHEROMONE_RANGE 100.0f // 100px range +#define ANT_PHEROMONE_ANGLE (2 * M_PI) // 360 degrees (full circle) + +// Actions +#define ACTION_TURN_LEFT 0 +#define ACTION_TURN_RIGHT 1 +#define ACTION_MOVE_FORWARD 2 +#define ACTION_NOOP 3 + +// Colors +#define COLONY1_COLOR (Color){220, 0, 0, 255} +#define COLONY2_COLOR (Color){0, 0, 220, 255} +#define PHEROMONE1_COLOR (Color){255, 200, 200, 100} +#define PHEROMONE2_COLOR (Color){200, 200, 255, 100} +#define FOOD_COLOR (Color){0, 200, 0, 255} +#define BACKGROUND_COLOR (Color){50, 50, 50, 255} + +// Required Log struct for PufferLib +typedef struct { + float perf; // Average steps per delivery (efficiency - lower is better) + float score; // Food deliveries per 1000 steps (throughput) + float episode_return; // Cumulative rewards + float episode_length; // Total steps across all ants + float avg_delivery_steps; // Average steps taken per successful delivery + float colony1_food; // Food collected by colony 1 + float colony2_food; // Food collected by colony 2 + float total_deliveries; // Total successful food deliveries + float successful_trips; // Number of ants that successfully found food + float total_resets; // Total ant resets (successful + unsuccessful) + float n; // Episode count - REQUIRED AS LAST FIELD +} Log; + +// Forward declarations +typedef struct Client Client; +typedef struct AntsEnv AntsEnv; + +// Simple 2D vector +typedef struct { + float x, y; +} Vector2D; + +// Food source in the environment +typedef struct { + Vector2D position; + int amount; +} FoodSource; + +// Pheromone trail marker +typedef struct { + Vector2D position; + float strength; + int colony_id; + float direction; // Direction the ant was moving when placing this pheromone +} Pheromone; + +// Individual ant agent +typedef struct { + Vector2D position; + float direction; + int colony_id; + bool has_food; + int steps_alive; // Track steps for periodic reset + int steps_since_pheromone; // Track when to drop next pheromone + int steps_without_food; // Track steps since last food pickup (for exploration) +} Ant; + +// Colony home base +typedef struct { + Vector2D position; + int food_collected; +} Colony; + +// Raylib rendering client +struct Client { + int cell_size; + int width; + int height; + bool show_vision_cones; // Toggle for vision cone visualization + bool show_pheromone_range; // Toggle for pheromone range visualization +}; + +// Main environment struct - FOLLOWING TARGET PATTERN +struct AntsEnv { + Log log; // Required: aggregated log for all agents + Client* client; // Rendering client + Ant* ants; // Dynamic array of ants + Colony colonies[NUM_COLONIES]; + FoodSource food_sources[MAX_FOOD_SOURCES]; + Pheromone pheromones[MAX_PHEROMONES]; + + // Required PufferLib fields + float* observations; // Flattened observations + int* actions; // Actions for all ants + float* rewards; // Rewards for all ants + unsigned char* terminals; // Terminal flags + + // Environment parameters + int num_ants; // Total number of ants + int width; // Environment width + int height; // Environment height + int num_food_sources; // Active food sources + int num_pheromones; // Active pheromones + int tick; // Current timestep + + // Simple reward parameters (like target.c) + float reward_food_pickup; // Reward for picking up food + float reward_delivery; // Reward for delivering food to colony +}; + +// Helper functions +static inline float random_float(float min, float max) { + return min + (max - min) * ((float)rand() / (float)RAND_MAX); +} + +static inline float wrap_angle(float angle) { + while (angle > M_PI) angle -= 2 * M_PI; + while (angle < -M_PI) angle += 2 * M_PI; + return angle; +} + +static inline float distance_squared(Vector2D a, Vector2D b) { + float dx = a.x - b.x; + float dy = a.y - b.y; + return (dx * dx) + (dy * dy); +} + +static inline float get_angle(Vector2D a, Vector2D b) { + return atan2(b.y - a.y, b.x - a.x); +} + +static inline float clip(float val, float min, float max) { + if (val < min) return min; + if (val > max) return max; + return val; +} + +// Check if target is within ant's vision cone +static inline bool is_in_vision(Vector2D ant_pos, float ant_dir, Vector2D target) { + float dx = target.x - ant_pos.x; + float dy = target.y - ant_pos.y; + float dist_sq = dx * dx + dy * dy; + + // Check range + if (dist_sq > ANT_VISION_RANGE * ANT_VISION_RANGE) { + return false; + } + + // Check angle + float angle_to_target = atan2(dy, dx); + float angle_diff = wrap_angle(angle_to_target - ant_dir); + + return fabs(angle_diff) <= ANT_VISION_ANGLE / 2.0f; +} + +// Check if target is within ant's pheromone sensing range (360 degrees) +static inline bool is_in_pheromone_range(Vector2D ant_pos, Vector2D target) { + float dist_sq = distance_squared(ant_pos, target); + return dist_sq <= ANT_PHEROMONE_RANGE * ANT_PHEROMONE_RANGE; +} + +// Add pheromone to the environment +static inline void add_pheromone(AntsEnv* env, Vector2D position, int colony_id, float direction) { + if (env->num_pheromones >= MAX_PHEROMONES) { + // Replace oldest pheromone (circular buffer) + for (int i = 0; i < env->num_pheromones - 1; i++) { + env->pheromones[i] = env->pheromones[i + 1]; + } + env->num_pheromones--; + } + + env->pheromones[env->num_pheromones].position = position; + env->pheromones[env->num_pheromones].strength = PHEROMONE_DEPOSIT_AMOUNT; + env->pheromones[env->num_pheromones].colony_id = colony_id; + env->pheromones[env->num_pheromones].direction = direction; + env->num_pheromones++; +} + +// Spawn a new ant at its colony +void spawn_ant(AntsEnv* env, int ant_id) { + Ant* ant = &env->ants[ant_id]; + Colony* colony = &env->colonies[ant->colony_id]; + + ant->position = colony->position; + ant->direction = wrap_angle((rand() % 8) * (M_PI / 4)); + ant->has_food = false; + ant->steps_alive = 0; + ant->steps_since_pheromone = 0; + ant->steps_without_food = 0; +} + +// Spawn food at a valid location +void spawn_food(AntsEnv* env) { + int attempts = 0; + + while (attempts < 100) { + float x = random_float(50, env->width - 50); + float y = random_float(50, env->height - 50); + + // Check distance from colonies + bool valid = true; + for (int j = 0; j < NUM_COLONIES; j++) { + float dist_sq = distance_squared((Vector2D){x, y}, env->colonies[j].position); + if (dist_sq < MIN_FOOD_COLONY_DISTANCE * MIN_FOOD_COLONY_DISTANCE) { + valid = false; + break; + } + } + + if (valid) { + // Find empty slot + for (int i = 0; i < MAX_FOOD_SOURCES; i++) { + if (env->food_sources[i].amount == 0) { + env->food_sources[i].position.x = x; + env->food_sources[i].position.y = y; + env->food_sources[i].amount = MAX_FOOD_PER_SOURCE; + return; + } + } + } + attempts++; + } +} + +// Initialize environment memory +void init(AntsEnv* env) { + env->ants = (Ant*)calloc(env->num_ants, sizeof(Ant)); + env->tick = 0; + env->client = NULL; + env->num_pheromones = 0; + + // Initialize colonies + env->colonies[0].position = (Vector2D){env->width / 4, env->height / 2}; + env->colonies[1].position = (Vector2D){3 * env->width / 4, env->height / 2}; + env->colonies[0].food_collected = 0; + env->colonies[1].food_collected = 0; + + // Initialize food sources + env->num_food_sources = MAX_FOOD_SOURCES; + for (int i = 0; i < env->num_food_sources; i++) { + env->food_sources[i].amount = 0; + } +} + +// Compute observations for all ants - WITH VISION AND PHEROMONES +void compute_observations(AntsEnv* env) { + int obs_idx = 0; + + for (int a = 0; a < env->num_ants; a++) { + Ant* ant = &env->ants[a]; + Colony* colony = &env->colonies[ant->colony_id]; + + // Find closest visible food source (with vision constraints) + float closest_food_dist_sq = env->width * env->width + env->height * env->height; + Vector2D closest_food_pos = {0, 0}; + bool found_food = false; + + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + Vector2D food_pos = env->food_sources[i].position; + if (is_in_vision(ant->position, ant->direction, food_pos)) { + float dist_sq = distance_squared(ant->position, food_pos); + if (dist_sq < closest_food_dist_sq) { + closest_food_dist_sq = dist_sq; + closest_food_pos = food_pos; + found_food = true; + } + } + } + } + + // Find top 5 strongest pheromones from own colony (using pheromone range, not vision) + // Store pheromones in range with their strength for sorting + typedef struct { + Vector2D position; + float strength; + float direction; + } PheromoneCandidate; + + PheromoneCandidate candidates[100]; // Max 100 candidates (should be enough) + int num_candidates = 0; + + for (int i = 0; i < env->num_pheromones; i++) { + if (env->pheromones[i].colony_id == ant->colony_id) { + Vector2D pheromone_pos = env->pheromones[i].position; + if (is_in_pheromone_range(ant->position, pheromone_pos)) { + if (num_candidates < 100) { + candidates[num_candidates].position = pheromone_pos; + candidates[num_candidates].strength = env->pheromones[i].strength; + candidates[num_candidates].direction = env->pheromones[i].direction; + num_candidates++; + } + } + } + } + + // Sort by strength (descending) - simple bubble sort for small arrays + for (int i = 0; i < num_candidates - 1; i++) { + for (int j = 0; j < num_candidates - i - 1; j++) { + if (candidates[j].strength < candidates[j + 1].strength) { + PheromoneCandidate temp = candidates[j]; + candidates[j] = candidates[j + 1]; + candidates[j + 1] = temp; + } + } + } + + // Take top 5 (or fewer if less available) + int top_count = num_candidates < 5 ? num_candidates : 5; + + // Count friendly ants within pheromone range (density) + int friendly_ants_nearby = 0; + for (int i = 0; i < env->num_ants; i++) { + if (i != a && env->ants[i].colony_id == ant->colony_id) { + if (is_in_pheromone_range(ant->position, env->ants[i].position)) { + friendly_ants_nearby++; + } + } + } + + // Observation: [colony_dx, colony_dy, food_dx, food_dy, + // pheromone1_dx, pheromone1_dy, pheromone1_direction, pheromone1_strength, + // pheromone2_dx, pheromone2_dy, pheromone2_direction, pheromone2_strength, + // pheromone3_dx, pheromone3_dy, pheromone3_direction, pheromone3_strength, + // pheromone4_dx, pheromone4_dy, pheromone4_direction, pheromone4_strength, + // pheromone5_dx, pheromone5_dy, pheromone5_direction, pheromone5_strength, + // has_food, heading, density] + // 27 values total - normalized to roughly -1 to 1 range + env->observations[obs_idx++] = (colony->position.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (colony->position.y - ant->position.y) / env->height; + + if (found_food) { + env->observations[obs_idx++] = (closest_food_pos.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (closest_food_pos.y - ant->position.y) / env->height; + } else { + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + } + + // Output top 5 strongest pheromones (or zeros if fewer available) + for (int p = 0; p < 5; p++) { + if (p < top_count) { + env->observations[obs_idx++] = (candidates[p].position.x - ant->position.x) / env->width; + env->observations[obs_idx++] = (candidates[p].position.y - ant->position.y) / env->height; + // Normalize pheromone direction to -1 to 1 range (divide by π) + env->observations[obs_idx++] = candidates[p].direction / M_PI; + // Normalize pheromone strength (typically 0.0 to 1.0, but can be higher) + // Clamp to reasonable range for normalization + env->observations[obs_idx++] = candidates[p].strength / PHEROMONE_DEPOSIT_AMOUNT; + } else { + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + env->observations[obs_idx++] = 0.0f; + } + } + + env->observations[obs_idx++] = ant->has_food ? 1.0f : 0.0f; + env->observations[obs_idx++] = ant->direction / (2 * M_PI); + + // Normalize density by max possible ants per colony (roughly num_ants / 2) + float max_friendly_ants = (env->num_ants / NUM_COLONIES) - 1; // -1 to exclude self + env->observations[obs_idx++] = max_friendly_ants > 0 ? (float)friendly_ants_nearby / max_friendly_ants : 0.0f; + } +} + +// Required function: reset environment +void c_reset(AntsEnv* env) { + env->tick = 0; + env->log = (Log){0}; + env->num_pheromones = 0; + + // Reset colonies + env->colonies[0].food_collected = 0; + env->colonies[1].food_collected = 0; + + // Initialize ants + int ants_per_colony = env->num_ants / NUM_COLONIES; + for (int i = 0; i < env->num_ants; i++) { + env->ants[i].colony_id = i / ants_per_colony; + if (env->ants[i].colony_id >= NUM_COLONIES) { + env->ants[i].colony_id = NUM_COLONIES - 1; + } + spawn_ant(env, i); + } + + // Clear and respawn food + for (int i = 0; i < env->num_food_sources; i++) { + env->food_sources[i].amount = 0; + } + for (int i = 0; i < env->num_food_sources; i++) { + spawn_food(env); + } + + // Clear buffers + memset(env->rewards, 0, env->num_ants * sizeof(float)); + memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); + + compute_observations(env); +} + +// Update food collection and delivery - FOLLOWING TARGET update_goals PATTERN +void update_food_interactions(AntsEnv* env) { + for (int a = 0; a < env->num_ants; a++) { + Ant* ant = &env->ants[a]; + + // Check for food pickup + if (!ant->has_food) { + for (int f = 0; f < env->num_food_sources; f++) { + if (env->food_sources[f].amount > 0) { + float dist_sq = distance_squared(ant->position, env->food_sources[f].position); + if (dist_sq < (ANT_SIZE + FOOD_SIZE) * (ANT_SIZE + FOOD_SIZE)) { + ant->has_food = true; + env->food_sources[f].amount--; + + // Respawn food if depleted + if (env->food_sources[f].amount <= 0) { + spawn_food(env); + } + + // Simple reward + env->rewards[a] += env->reward_food_pickup; + env->log.episode_return += env->reward_food_pickup; + + // Track successful trip (ant found food) + env->log.successful_trips += 1.0f; + break; + } + } + } + } + + // Check for food delivery + if (ant->has_food) { + Colony* colony = &env->colonies[ant->colony_id]; + float dist_sq = distance_squared(ant->position, colony->position); + if (dist_sq < (ANT_SIZE + COLONY_SIZE) * (ANT_SIZE + COLONY_SIZE)) { + ant->has_food = false; + colony->food_collected++; + + // Reward and log update - WITH EFFICIENCY METRICS + env->rewards[a] += env->reward_delivery; + env->log.episode_return += env->reward_delivery; + env->log.episode_length += ant->steps_alive; + env->log.total_deliveries += 1.0f; + + // Track per-colony performance + if (ant->colony_id == 0) { + env->log.colony1_food += 1.0f; + } else { + env->log.colony2_food += 1.0f; + } + + // Update derived efficiency metrics + env->log.avg_delivery_steps = env->log.episode_length / env->log.total_deliveries; + + // Performance: Average steps per delivery (lower is better) + env->log.perf = env->log.avg_delivery_steps; + + // Score: Food deliveries per 1000 steps (higher is better) + env->log.score = (env->log.total_deliveries * 1000.0f) / env->log.episode_length; + + env->log.n += 1; // Episode count (number of deliveries) + + // Reset ant after delivery + ant->steps_alive = 0; + ant->steps_without_food = 0; // Reset exploration counter after successful delivery + } + } + } +} + +// Required function: step environment +void c_step(AntsEnv* env) { + env->tick++; + + // Clear rewards and terminals + memset(env->rewards, 0, env->num_ants * sizeof(float)); + memset(env->terminals, 0, env->num_ants * sizeof(unsigned char)); + + // Update all ants - SIMPLIFIED LIKE TARGET + for (int i = 0; i < env->num_ants; i++) { + Ant* ant = &env->ants[i]; + ant->steps_alive++; + if (!ant->has_food) { + ant->steps_without_food++; + } + + // Execute action + int action = env->actions[i]; + + // Exploration: Add small random perturbations when ant hasn't found food for a while + // This helps break out of circular patterns + if (!ant->has_food && ant->steps_without_food > 100) { + // 5% chance per step to add random exploration when stuck + if ((rand() % 100) < 5) { + // Add small random turn to encourage exploration + float random_turn = (random_float(-1.0f, 1.0f) * TURN_ANGLE * 2.0f); + ant->direction += random_turn; + ant->direction = wrap_angle(ant->direction); + } + } + + switch (action) { + case ACTION_TURN_LEFT: + ant->direction -= TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_TURN_RIGHT: + ant->direction += TURN_ANGLE; + ant->direction = wrap_angle(ant->direction); + break; + case ACTION_MOVE_FORWARD: + ant->position.x += ANT_SPEED * cos(ant->direction); + ant->position.y += ANT_SPEED * sin(ant->direction); + break; + case ACTION_NOOP: + // Do nothing + break; + } + + // Wrap around edges + if (ant->position.x < 0) ant->position.x = env->width; + if (ant->position.x > env->width) ant->position.x = 0; + if (ant->position.y < 0) ant->position.y = env->height; + if (ant->position.y > env->height) ant->position.y = 0; + + // Automatic pheromone dropping when carrying food + if (ant->has_food) { + ant->steps_since_pheromone++; + if (ant->steps_since_pheromone >= PHEROMONE_DROP_INTERVAL) { + add_pheromone(env, ant->position, ant->colony_id, ant->direction); + ant->steps_since_pheromone = 0; + } + } + + // Periodic reset like target.c (lines 158-161) + if (ant->steps_alive % ANT_RESET_INTERVAL == 0) { + spawn_ant(env, i); + env->terminals[i] = 1; + env->log.total_resets += 1.0f; + } + } + + // Update pheromone evaporation + for (int i = 0; i < env->num_pheromones; i++) { + env->pheromones[i].strength -= PHEROMONE_EVAPORATION_RATE; + if (env->pheromones[i].strength <= 0) { + // Remove evaporated pheromone (swap with last and shrink) + env->pheromones[i] = env->pheromones[env->num_pheromones - 1]; + env->num_pheromones--; + i--; // Check this slot again + } + } + + // Update food interactions + update_food_interactions(env); + + // Compute new observations + compute_observations(env); +} + +// Required function: render (with lazy client initialization) +void c_render(AntsEnv* env) { + if (env->client == NULL) { + InitWindow(env->width, env->height, "PufferLib Ants"); + SetTargetFPS(60); + env->client = (Client*)calloc(1, sizeof(Client)); + env->client->cell_size = 1; + env->client->width = env->width; + env->client->height = env->height; + env->client->show_vision_cones = true; // Start with vision cones on + env->client->show_pheromone_range = false; // Start with pheromone range off + } + + // Standard exit key + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + // Toggle vision cones with 'V' key + if (IsKeyPressed(KEY_V)) { + env->client->show_vision_cones = !env->client->show_vision_cones; + } + + // Toggle pheromone range with 'P' key + if (IsKeyPressed(KEY_P)) { + env->client->show_pheromone_range = !env->client->show_pheromone_range; + } + + BeginDrawing(); + ClearBackground(BACKGROUND_COLOR); + + // Draw colonies + for (int i = 0; i < NUM_COLONIES; i++) { + Color color = (i == 0) ? COLONY1_COLOR : COLONY2_COLOR; + DrawCircle(env->colonies[i].position.x, env->colonies[i].position.y, + COLONY_SIZE, color); + } + + // Draw pheromones (before other objects for layering) + for (int i = 0; i < env->num_pheromones; i++) { + Color pheromone_color = (env->pheromones[i].colony_id == 0) ? PHEROMONE1_COLOR : PHEROMONE2_COLOR; + pheromone_color.a = (unsigned char)(100 * env->pheromones[i].strength); + DrawCircle(env->pheromones[i].position.x, env->pheromones[i].position.y, + PHEROMONE_SIZE, pheromone_color); + } + + // Draw food + for (int i = 0; i < env->num_food_sources; i++) { + if (env->food_sources[i].amount > 0) { + DrawCircle(env->food_sources[i].position.x, env->food_sources[i].position.y, + FOOD_SIZE, FOOD_COLOR); + DrawText(TextFormat("%d", env->food_sources[i].amount), + env->food_sources[i].position.x - 5, + env->food_sources[i].position.y - 5, 10, RAYWHITE); + } + } + + // Draw ants with optional vision cones and pheromone range + for (int i = 0; i < env->num_ants; i++) { + Ant* ant = &env->ants[i]; + Color ant_color = (ant->colony_id == 0) ? COLONY1_COLOR : COLONY2_COLOR; + + // Draw pheromone range if enabled (semi-transparent circle) + if (env->client->show_pheromone_range) { + Color pheromone_range_color = ant_color; + pheromone_range_color.a = 15; // Very transparent + DrawCircle(ant->position.x, ant->position.y, ANT_PHEROMONE_RANGE, pheromone_range_color); + } + + // Draw vision cone if enabled (semi-transparent) + if (env->client->show_vision_cones) { + Color vision_color = ant_color; + vision_color.a = 30; // Very transparent + + // Calculate vision cone arc + float start_angle = (ant->direction - ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; + float end_angle = (ant->direction + ANT_VISION_ANGLE / 2.0f) * 180.0f / M_PI; + + DrawCircleSector( + (Vector2){ant->position.x, ant->position.y}, + ANT_VISION_RANGE, + start_angle, + end_angle, + 32, // segments for smooth arc + vision_color + ); + } + + // Change color if carrying food + if (ant->has_food) { + ant_color = FOOD_COLOR; + } + + DrawCircle(ant->position.x, ant->position.y, ANT_SIZE, ant_color); + + // Direction indicator (pointing forward) + float dir_x = ant->position.x + (ANT_SIZE * 1.5f) * cos(ant->direction); + float dir_y = ant->position.y + (ANT_SIZE * 1.5f) * sin(ant->direction); + DrawLine(ant->position.x, ant->position.y, dir_x, dir_y, RAYWHITE); + } + + // Draw UI - Colony scores + DrawText(TextFormat("Colony 1: %d (%.1f%%)", + env->colonies[0].food_collected, + env->log.total_deliveries > 0 ? (env->log.colony1_food / env->log.total_deliveries * 100.0f) : 0.0f), + 20, 20, 20, COLONY1_COLOR); + DrawText(TextFormat("Colony 2: %d (%.1f%%)", + env->colonies[1].food_collected, + env->log.total_deliveries > 0 ? (env->log.colony2_food / env->log.total_deliveries * 100.0f) : 0.0f), + 20, 50, 20, COLONY2_COLOR); + + // Efficiency metrics + DrawText(TextFormat("Efficiency: %.1f steps/food", env->log.avg_delivery_steps), + 20, 80, 18, YELLOW); + DrawText(TextFormat("Throughput: %.2f food/1000 steps", env->log.score), + 20, 105, 18, YELLOW); + + // Success rate + float success_rate = env->log.total_resets > 0 + ? (env->log.successful_trips / env->log.total_resets * 100.0f) + : 0.0f; + DrawText(TextFormat("Success Rate: %.1f%%", success_rate), + 20, 130, 18, GREEN); + + // Right side - System info + DrawText(TextFormat("Tick: %d", env->tick), env->width - 120, 20, 20, RAYWHITE); + DrawText(TextFormat("Pheromones: %d", env->num_pheromones), env->width - 180, 50, 20, RAYWHITE); + DrawText(TextFormat("Deliveries: %.0f", env->log.total_deliveries), env->width - 180, 75, 18, RAYWHITE); + + // Controls help + const char* vision_status = env->client->show_vision_cones ? "ON" : "OFF"; + const char* pheromone_status = env->client->show_pheromone_range ? "ON" : "OFF"; + DrawText(TextFormat("[V] Vision Cones: %s", vision_status), 20, env->height - 30, 16, RAYWHITE); + DrawText(TextFormat("[P] Pheromone Range: %s", pheromone_status), 20, env->height - 50, 16, RAYWHITE); + DrawText("[ESC] Exit", 20, env->height - 70, 16, GRAY); + + EndDrawing(); +} + +// Required function: cleanup +void c_close(AntsEnv* env) { + if (env->ants) { + free(env->ants); + env->ants = NULL; + } + if (env->client != NULL) { + CloseWindow(); + free(env->client); + env->client = NULL; + } +} + diff --git a/pufferlib/ocean/ants/ants.py b/pufferlib/ocean/ants/ants.py new file mode 100644 index 000000000..a95021335 --- /dev/null +++ b/pufferlib/ocean/ants/ants.py @@ -0,0 +1,150 @@ +'''Ant Colony Simulation Environment - Simplified following Target pattern''' + +import numpy as np +import gymnasium + +import pufferlib +from pufferlib.ocean.ants import binding + +class AntsEnv(pufferlib.PufferEnv): + """ + Ant Colony Simulation Environment + + Two colonies compete to collect food from the environment. + Simplified architecture following the Target environment pattern. + + Observations (27 per ant): + - colony_dx, colony_dy: Direction to home colony (normalized) + - food_dx, food_dy: Direction to nearest VISIBLE food (normalized, with vision constraints) + - pheromone1_dx, pheromone1_dy, pheromone1_direction, pheromone1_strength: Top strongest pheromone from own colony (within pheromone range) + - pheromone2_dx, pheromone2_dy, pheromone2_direction, pheromone2_strength: 2nd strongest pheromone + - pheromone3_dx, pheromone3_dy, pheromone3_direction, pheromone3_strength: 3rd strongest pheromone + - pheromone4_dx, pheromone4_dy, pheromone4_direction, pheromone4_strength: 4th strongest pheromone + - pheromone5_dx, pheromone5_dy, pheromone5_direction, pheromone5_strength: 5th strongest pheromone + - has_food: Binary flag (0 or 1) + - heading: Ant's current direction (normalized) + - density: Number of friendly ants within pheromone range (normalized) + + Vision System: + - Ants have limited vision range (75 pixels) for seeing food + - Vision cone of 60 degrees (π/3) - wider beam for better exploration + - Can only see food within their vision cone + + Pheromone Sensing: + - Separate from vision: 100 pixels range, 360 degrees (omnidirectional) + - Can sense top 5 strongest pheromones from own colony within this range + - Pheromones are ranked by strength (not distance) + - Also used to detect nearby friendly ants (density) + + Pheromone System: + - Ants automatically drop pheromones every 5 steps while carrying food + - Pheromones evaporate over time (rate: 0.002 per step) - faster evaporation to break loops + - Each colony's pheromones are distinct + - Ants only observe pheromones from their own colony + + Exploration Mechanism: + - Ants that haven't found food for 100+ steps have a 5% chance per step to add random exploration turns + - This helps break out of circular patterns and encourages map exploration + + Actions (Discrete 4): + 0: Turn left + 1: Turn right + 2: Move forward + 3: No-op + """ + + def __init__( + self, + num_envs=1, + width=1280, + height=720, + num_ants=32, + reward_food_pickup=0.1, + reward_delivery=10.0, + render_mode=None, + log_interval=128, + buf=None, + seed=0): + + # Observation space: 27 values per ant (colony, food, 5 pheromones × 4 values each, has_food, heading, density) + self.single_observation_space = gymnasium.spaces.Box( + low=-1.0, high=1.0, shape=(27,), dtype=np.float32 + ) + # Discrete action space: turn left, turn right, move forward, noop + self.single_action_space = gymnasium.spaces.Discrete(4) + + self.render_mode = render_mode + self.num_agents = num_envs * num_ants + self.log_interval = log_interval + + super().__init__(buf) + + c_envs = [] + for i in range(num_envs): + c_env = binding.env_init( + self.observations[i*num_ants:(i+1)*num_ants], + self.actions[i*num_ants:(i+1)*num_ants], + self.rewards[i*num_ants:(i+1)*num_ants], + self.terminals[i*num_ants:(i+1)*num_ants], + self.truncations[i*num_ants:(i+1)*num_ants], + seed + i, # Unique seed per env + width=width, + height=height, + num_ants=num_ants, + reward_food_pickup=reward_food_pickup, + reward_delivery=reward_delivery + ) + c_envs.append(c_env) + + self.c_envs = binding.vectorize(*c_envs) + + def reset(self, seed=0): + """Reset all environments""" + binding.vec_reset(self.c_envs, seed) + self.tick = 0 + return self.observations, [] + + def step(self, actions): + """Execute one step for all agents""" + self.tick += 1 + self.actions[:] = actions + binding.vec_step(self.c_envs) + + info = [] + if self.tick % self.log_interval == 0: + log = binding.vec_log(self.c_envs) + if log: + info.append(log) + + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + """Render the first environment""" + binding.vec_render(self.c_envs, 0) + + def close(self): + """Clean up resources""" + binding.vec_close(self.c_envs) + + +if __name__ == '__main__': + # Performance test following target pattern + N = 512 + + env = AntsEnv(num_envs=N) + env.reset() + steps = 0 + + CACHE = 1024 + actions = np.random.randint(4, size=(CACHE, N)) + + i = 0 + import time + start = time.time() + while time.time() - start < 10: + env.step(actions[i % CACHE]) + steps += env.num_agents + i += 1 + + print('Ants SPS:', int(steps / (time.time() - start))) diff --git a/pufferlib/ocean/ants/binding.c b/pufferlib/ocean/ants/binding.c new file mode 100644 index 000000000..835cbab9e --- /dev/null +++ b/pufferlib/ocean/ants/binding.c @@ -0,0 +1,24 @@ +#include "ants.h" + +#define Env AntsEnv +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->width = unpack(kwargs, "width"); + env->height = unpack(kwargs, "height"); + env->num_ants = unpack(kwargs, "num_ants"); + env->reward_food_pickup = unpack(kwargs, "reward_food_pickup"); + env->reward_delivery = unpack(kwargs, "reward_delivery"); + + init(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "n", log->n); + return 0; +} diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 6c56a4ea2..bd92f5a5d 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -117,6 +117,9 @@ def make_multiagent(buf=None, **kwargs): return pufferlib.emulation.PettingZooPufferEnv(env=env, buf=buf) MAKE_FUNCTIONS = { + 'ants': 'AntsEnv', + 'breakout': 'Breakout', + 'blastar': 'Blastar', 'battle': 'Battle', 'breakout': 'Breakout', 'blastar': 'Blastar', diff --git a/save_net_flat.py b/save_net_flat.py new file mode 100644 index 000000000..327cde38e --- /dev/null +++ b/save_net_flat.py @@ -0,0 +1,125 @@ +import torch +from torch.nn import functional as F +import numpy as np +import collections + +def save_model_weights(model, filename): + weights = [] + for name, param in model.named_parameters(): + weights.append(param.data.cpu().numpy().flatten()) + print(name, param.shape, param.data.cpu().numpy().ravel()[0]) + + weights = np.concatenate(weights) + print('Num weights:', len(weights)) + weights.tofile(filename) + # Save the model architecture (you may want to adjust this based on your specific model) + #with open(filename + "_architecture.txt", "w") as f: + # for name, param in model.named_parameters(): + # f.write(f"{name}: {param.shape}\n") + +def test_model(model): + model = model.cpu().policy + batch_size = 16 + obs_window = 11 + obs_window_channels = 4 + obs_flat = 26 + x = torch.arange( + 0, batch_size*(obs_window*obs_window*obs_window_channels + obs_flat) + ).reshape(batch_size, -1) % 16 + + cnn_features = x[:, :-obs_flat].view( + batch_size, obs_window, obs_window, obs_window_channels).long() + map_features = F.one_hot(cnn_features[:, :, :, 0], 16).permute(0, 3, 1, 2).float() + extra_map_features = (cnn_features[:, :, :, -3:].float() / 255.0).permute(0, 3, 1, 2) + cnn_features = torch.cat([map_features, extra_map_features], dim=1) + cnn = model.policy.cnn + + cnn_features = torch.from_numpy( + np.arange(batch_size*11*11*19).reshape( + batch_size, 19, obs_window, obs_window) + ).float() + conv1_out = cnn[0](cnn_features) + + #(cnn[0].weight[0] * cnn_features[0, :, :5, :5]).sum() + cnn[0].bias[0] + + breakpoint() + hidden = model.encoder(x) + output = model.decoder(hidden) + atn = output.argmax(dim=1) + print('Encode weight sum:', model.encoder.weight.sum()) + print('encode decode weight and bias sum:', model.encoder.weight.sum() + model.encoder.bias.sum() + model.decoder.weight.sum() + model.decoder.bias.sum()) + print('X sum:', x.sum()) + print('Hidden sum:', hidden.sum()) + print('Hidden 1-10:', hidden[0, :10]) + print('Output sum:', output.sum()) + print('Atn sum:', atn.sum()) + breakpoint() + exit(0) + +def test_lstm(): + batch_size = 16 + input_size = 128 + hidden_size = 128 + + input = torch.arange(batch_size*input_size).reshape(1, batch_size, -1).float()/ 100000 + state = ( + torch.arange(batch_size*hidden_size).reshape(1, batch_size, -1).float()/ 100000, + torch.arange(batch_size*hidden_size).reshape(1, batch_size, -1).float() / 100000 + ) + weights_input = torch.arange(4*hidden_size*input_size).reshape(4*hidden_size, -1).float()/ 100000 + weights_state = torch.arange(4*hidden_size*hidden_size).reshape(4*hidden_size, -1).float()/ 100000 + bias_input = torch.arange(4*hidden_size).reshape(4*hidden_size).float() / 100000 + bias_state = torch.arange(4*hidden_size).reshape(4*hidden_size).float() / 100000 + + lstm = torch.nn.LSTM(input_size=128, hidden_size=128, num_layers=1) + lstm.weight_ih_l0.data = weights_input + lstm.weight_hh_l0.data = weights_state + lstm.bias_ih_l0.data = bias_input + lstm.bias_hh_l0.data = bias_state + + output, new_state = lstm(input, state) + + input = input.squeeze(0) + h, c = state + + buffer = ( + torch.matmul(input, weights_input.T) + bias_input + + torch.matmul(h, weights_state.T) + bias_state + )[0] + + i, f, g, o = torch.split(buffer, hidden_size, dim=1) + + i = torch.sigmoid(i) + f = torch.sigmoid(f) + g = torch.tanh(g) + o = torch.sigmoid(o) + + c = f*c + i*g + h = o*torch.tanh(c) + + breakpoint() + print('Output:', output) + +def test_model_forward(model): + data = torch.arange(10*(11*11*4 + 26)) % 16 + data[(11*11*4 + 26):] = 0 + data = data.reshape(10, -1).float() + output = model(data) + breakpoint() + pass + + +if __name__ == '__main__': + #test_lstm() + model = torch.load('experiments/174870735516/ants.pt', map_location='cpu') + if isinstance(model, collections.OrderedDict): + state_dict = model + class StateDictWrapper: + def named_parameters(self): + return state_dict.items() + model = StateDictWrapper() + #test_model_forward(model) + #test_model(model) + + save_model_weights(model, 'ants_weights.bin') + print('saved') \ No newline at end of file diff --git a/scripts/build_ocean.sh b/scripts/build_ocean.sh index 88909d44f..334fab9b7 100755 --- a/scripts/build_ocean.sh +++ b/scripts/build_ocean.sh @@ -5,6 +5,7 @@ ENV=$1 MODE=${2:-local} PLATFORM="$(uname -s)" +ARCH="$(uname -m)" SRC_DIR="pufferlib/ocean/$ENV" WEB_OUTPUT_DIR="build_web/$ENV" RAYLIB_NAME='raylib-5.5_macos' @@ -12,6 +13,11 @@ BOX2D_NAME='box2d-macos-arm64' if [ "$PLATFORM" = "Linux" ]; then RAYLIB_NAME='raylib-5.5_linux_amd64' BOX2D_NAME='box2d-linux-amd64' + if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then + RAYLIB_NAME='raylib-5.5_linux_aarch64' + else + RAYLIB_NAME='raylib-5.5_linux_amd64' + fi fi if [ "$MODE" = "web" ]; then RAYLIB_NAME='raylib-5.5_webassembly' diff --git a/setup.py b/setup.py index 8a8444dd6..c8f3e1468 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,15 @@ # Build raylib for your platform RAYLIB_URL = 'https://github.com/raysan5/raylib/releases/download/5.5/' -RAYLIB_NAME = 'raylib-5.5_macos' if platform.system() == "Darwin" else 'raylib-5.5_linux_amd64' +if platform.system() == "Darwin": + RAYLIB_NAME = 'raylib-5.5_macos' +else: + # Detect Linux architecture + arch = platform.machine() + if arch == 'aarch64' or arch == 'arm64': + RAYLIB_NAME = 'raylib-5.5_linux_aarch64' + else: + RAYLIB_NAME = 'raylib-5.5_linux_amd64' RLIGHTS_URL = 'https://raw.githubusercontent.com/raysan5/raylib/refs/heads/master/examples/shaders/rlights.h' def download_raylib(platform, ext): @@ -129,6 +137,11 @@ def download_box2d(platform): extra_link_args += [ '-Bsymbolic-functions', ] + arch = platform.machine() + if arch == 'aarch64' or arch == 'arm64': + download_raylib('raylib-5.5_linux_aarch64', '.tar.gz') + else: + download_raylib('raylib-5.5_linux_amd64', '.tar.gz') elif system == 'Darwin': extra_compile_args += [ '-Wno-error=int-conversion',