Zapper9982 · Zapper9982 · Jun 13, 2025
diff --git a/.github/workflows/coverage_check.yml b/.github/workflows/coverage_check.yml
@@ -0,0 +1,82 @@
+name: Java Test Coverage Check
+
+on:
+  push:
+    branches: [ main, master ] # Adjust branches as needed
+  pull_request:
+    branches: [ main, master ] # Adjust branches as needed
+
+jobs:
+  test-coverage-check:
+    runs-on: ubuntu-latest
+
+    env:
+      # SPRING_BOOT_PROJECT_ROOT: ${{ github.workspace }}/your-spring-boot-project-dir # If SB project is in a subdir
+      SPRING_BOOT_PROJECT_ROOT: ${{ github.workspace }} # Assuming SB project is at the root of the repo
+      # GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} # Required by TestCaseGenerator
+      # MAX_ITERATIONS: 3 # Optional: Override default max iterations
+      # TARGET_COVERAGE: 0.9 # Optional: Override default target coverage
+      # BUILD_TOOL: "maven" # Optional: specify maven or gradle, defaults to maven in scripts
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Set up JDK
+      uses: actions/setup-java@v4
+      with:
+        java-version: '17' # Specify the Java version required by the Spring Boot project
+        distribution: 'temurin' # Or any other distribution
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10' # Specify Python version used for the test generator scripts
+
+    - name: Install Python dependencies
+      run: |
+        python -m pip install --upgrade pip
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+    - name: Set up Maven (if using Maven) # Conditional step
+      if: env.BUILD_TOOL == 'maven' || env.BUILD_TOOL == '' # Default to maven if not set
+      run: |
+        # Maven is usually pre-installed on ubuntu-latest runners. This step can ensure specific version or settings if needed.
+        mvn --version
+
+    - name: Set up Gradle (if using Gradle) # Conditional step
+      if: env.BUILD_TOOL == 'gradle'
+      run: |
+        # Gradle might need to be set up or ensured it's available
+        # Ensure gradlew is executable if it exists
+        if [ -f "./gradlew" ]; then chmod +x ./gradlew; fi
+        ./gradlew --version # Check if gradlew wrapper is present and executable
+
+    - name: Run Test Generation and Coverage Check Pipeline
+      run: |
+        # Ensure SPRING_BOOT_PROJECT_ROOT is correctly set if it's a subdirectory
+        # export SPRING_BOOT_PROJECT_ROOT=${{ github.workspace }}/path-to-your-java-project
+        # Ensure GOOGLE_API_KEY is available if your LLM calls are not mocked
+        if [ -z "$GOOGLE_API_KEY" ]; then
+          echo "Warning: GOOGLE_API_KEY is not set. LLM calls might fail if not mocked."
+          # You might want to fail here if the key is essential:
+          # echo "Error: GOOGLE_API_KEY is required."
+          # exit 1
+        fi
+
+        echo "Running main pipeline from src/main.py..."
+        python src/main.py
+      env:
+        # Pass the Google API Key as an environment variable to the script
+        GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+
+    # Optional: Upload JaCoCo reports as artifacts
+    - name: Upload JaCoCo Report
+      if: always() # Run this step even if the pipeline fails, to get the last report
+      uses: actions/upload-artifact@v4
+      with:
+        name: jacoco-report
+        path: | # Adjust paths based on build tool and actual output
+          ${{ env.SPRING_BOOT_PROJECT_ROOT }}/target/site/jacoco/jacoco.xml
+          ${{ env.SPRING_BOOT_PROJECT_ROOT }}/build/reports/jacoco/test/jacocoTestReport.xml
+        if-no-files-found: ignore # Don't fail if a path is not found (e.g. Maven vs Gradle)
diff --git a/README.md b/README.md
@@ -4,46 +4,136 @@
 
 ## 🚀 Project Overview
 
-Welcome to the **Java Test Generation Suite**! This project aims to automate the creation of comprehensive **JUnit 5** test cases for Java Spring Boot services and controllers, leveraging **Mockito** for mocking and **MockMvc** for integration testing. Our goal is to significantly reduce the manual effort involved in writing tests, thereby accelerating development cycles and improving code quality.
+Welcome to the **Java Test Generation Suite**! This project automates the creation of JUnit 5 test cases for Java Spring Boot applications. It leverages Large Language Models (LLMs) to generate tests and iteratively improves test coverage based on feedback from JaCoCo code coverage reports. The core idea is to intelligently identify and target under-tested areas of the codebase, dynamically adjusting prompts to the LLM to generate more effective tests over multiple iterations.
 
 ---
 
-## 💡 The Vision: Automated Test Generation
+## 📈 Key Features
 
-Our suite follows a structured approach to dynamically generate relevant test cases. Here's a high-level look at the project's flow:
+*   **Automated Test Generation**: Utilizes LLMs (specifically Gemini 1.5 Flash via Google API) to generate JUnit 5 test cases for Spring Boot services and controllers.
+*   **Iterative Coverage Improvement**: Employs an iterative process where JaCoCo code coverage reports are analyzed after each round of test generation and execution.
+*   **Targeted Prompt Engineering**: Dynamically adjusts prompts to the LLM to focus on methods and classes that have low coverage, aiming to improve specific areas.
+*   **Build Tool Integration**: Supports both Maven and Gradle based Spring Boot projects for building the project and generating JaCoCo reports.
+*   **Vector Database for Context**: Uses ChromaDB with Hugging Face embeddings (BAAI/bge-small-en-v1.5) to store and retrieve relevant code chunks, providing context to the LLM.
+*   **Configurable Pipeline**: Key parameters like maximum iterations, target coverage percentage, and build tool can be configured via environment variables.
+*   **GitHub Actions Workflow**: Includes a CI workflow (`.github/workflows/coverage_check.yml`) to automate the test generation and coverage checking process, suitable for integration into development pipelines.
+*   **Modular Design**: The system is broken down into distinct Python scripts/modules for pre-processing, code analysis, chunking, embedding, LLM interaction, and test execution.
 
-![Project Flow](https://github.com/user-attachments/assets/c51a2223-bcdc-45ea-a8fc-11449e504b86)
+---
+
+## 🌊 Project Flow
+
+The system operates in an iterative loop:
+
+1.  **Pre-processing**: Java source files are processed to remove comments, empty lines, etc.
+2.  **Code Analysis**: The Spring Boot application is analyzed to identify target classes (services, controllers) for test generation.
+3.  **Chunking & Embedding**: The relevant Java source code (target classes and their dependencies) is split into semantic chunks and stored in a ChromaDB vector database.
+4.  **Test Case Generation**: For each target class (or focused methods within a class):
+    *   Relevant code chunks are retrieved from ChromaDB.
+    *   An LLM (Gemini 1.5 Flash) is prompted with the code context and specific instructions (including focusing on low-coverage methods in later iterations) to generate a JUnit 5 test class.
+    *   The generated test is saved to the appropriate test directory in the target Spring Boot project.
+5.  **Test Execution & Coverage Analysis**:
+    *   The target Spring Boot project is built using the configured build tool (Maven/Gradle).
+    *   Tests (including newly generated ones) are executed.
+    *   A JaCoCo code coverage report (XML) is generated.
+6.  **Coverage Evaluation & Iteration**:
+    *   The JaCoCo report is parsed to determine overall line coverage and method-level coverage.
+    *   If the overall coverage meets the `TARGET_COVERAGE` or `MAX_ITERATIONS` is reached, the process stops.
+    *   Otherwise, methods with coverage below the target are identified.
+    *   The list of these low-coverage methods is fed back into the Test Case Generation step for the next iteration, guiding the LLM to focus on these areas.
+
+*(Note: A visual diagram illustrating this iterative flow would be beneficial here but is out of scope for this text-based update.)*
 
 ---
 
-## 📈 Current Progress
+## 🛠️ How to Run
+
+Follow these steps to set up and run the Java Test Generation Suite:
+
+**1. Prerequisites:**
+
+*   **Python**: Version 3.9 or higher.
+*   **Java Development Kit (JDK)**: Version 11, 17, or as required by your Spring Boot project.
+*   **Build Tool**: Apache Maven or Gradle installed and configured for your Spring Boot project.
+*   **Git**: For cloning the repository.
+*   **Python Dependencies**: Install using `pip install -r requirements.txt` (ensure this file is present and up-to-date in the repository).
+
+**2. Environment Variables (Crucial):**
+
+You **must** set the following environment variables:
+
+*   `SPRING_BOOT_PROJECT_ROOT`: The absolute path to the root directory of your target Java Spring Boot project.
+    *   Example: `export SPRING_BOOT_PROJECT_ROOT="/home/user/dev/my-spring-app"`
+*   `GOOGLE_API_KEY`: Your Google API key for accessing the Gemini LLM.
+    *   Example: `export GOOGLE_API_KEY="AIzaSy..."`
+
+Optionally, you can also set these to override defaults:
+
+*   `MAX_ITERATIONS`: Maximum number of test generation iterations. Defaults to `5` (as set in `src/main.py`).
+    *   Example: `export MAX_ITERATIONS=3`
+*   `TARGET_COVERAGE`: Desired overall line coverage percentage (0.0 to 1.0). Defaults to `0.9` (i.e., 90%) (as set in `src/main.py`).
+    *   Example: `export TARGET_COVERAGE=0.85`
+*   `BUILD_TOOL`: Specify "maven" or "gradle". Defaults to "maven" if not set (this default is handled by `src/test_runner/java_test_runner.py` and `src/main.py`).
+    *   Example: `export BUILD_TOOL="gradle"`
 
-I have laid a strong foundation for the core functionalities of the test generation suite:
+**3. Execution:**
 
-1.  **Semantic Chunker Developed:** Successfully built a robust chunker that creates **semantic chunks** from source code. These chunks are enriched with ample metadata, crucial for the LLM's understanding and context retention.
-2.  **Embedder Functionality Implemented:** Created a function to embed these semantic chunks into our vector database, enabling efficient similarity searches.
-3.  **Retrieval QA Chain Initiated:** Set up the initial **LangChain QA retrieval chain**. This foundational step allows us to fetch relevant documents (currently hardcoded for proof of concept) based on queries.
+It is recommended to use the provided shell script to run the pipeline:
+
+*   **Using `run.sh` (Recommended):**
+    1.  Make the script executable: `chmod +x run.sh`
+    2.  **Important**: Edit `run.sh` to set your `SPRING_BOOT_PROJECT_ROOT` and `GOOGLE_API_KEY` values. You can also uncomment and set optional variables.
+    3.  Execute the script: `./run.sh`
+
+    The `run.sh` script includes sanity checks for the required environment variables.
+
+*   **Directly using `python src/main.py`:**
+    1.  Ensure all required environment variables are exported in your current shell session.
+    2.  Run the main script: `python3 src/main.py` (or `python src/main.py` depending on your Python installation).
+
+**4. Output:**
+
+*   The pipeline will log its progress to the console.
+*   Generated test files will be saved directly into the `src/test/java/...` directory of your `SPRING_BOOT_PROJECT_ROOT`.
+*   JaCoCo reports will be generated in the standard output directories of your build tool (e.g., `target/site/jacoco/jacoco.xml` for Maven).
 
 ---
 
-## 🎯 Next Week's Goals
+## ⚙️ GitHub Workflow for CI
 
- immediate focus is on bringing dynamic intelligence and robustness to the system:
+This project includes a GitHub Actions workflow defined in `.github/workflows/coverage_check.yml`. This workflow automates the execution of the test generation and coverage analysis pipeline on pushes and pull requests to the `main` or `master` branches.
 
-1.  **Complete Dynamic Retrieval QA Chain:** Fully implement the dynamic retrieval process to intelligently fetch context for test case generation.
-2.  **Develop CodeAnalyser Function:** Build out the `CodeAnalyser` function. This crucial component will parse Java source files and leverage **JaCoCo reports** to dynamically adjust prompts, ensuring the generated test cases are highly relevant and target areas needing coverage.
-3.  **Optimize LLM API Usage:** Devise strategies to run the entire chain efficiently, specifically addressing and mitigating **rate limiting** issues with the Gemini or OpenAI API keys.
+Key aspects of the workflow:
+*   Sets up Java and Python environments.
+*   Installs Python dependencies.
+*   Runs `src/main.py`.
+*   Requires `GOOGLE_API_KEY` to be set as a repository secret in GitHub Actions settings.
+*   The `SPRING_BOOT_PROJECT_ROOT` is assumed to be the root of the checkout repository by default but can be configured.
+*   The workflow's success or failure is determined by the exit code of `src/main.py` (i.e., whether the target coverage was achieved).
+*   It can optionally upload JaCoCo reports as build artifacts.
 
 ---
 
-## 🛠️ Tech Stack
+## 💻 Tech Stack
 
-This project is built using a powerful combination of modern technologies:
+*   **Orchestration & Logic**: Python 3
+*   **LLM Interaction**: LangChain, Google Generative AI (for Gemini 1.5 Flash)
+*   **Vector Database**: ChromaDB
+*   **Embeddings**: Hugging Face BAAI/bge-small-en-v1.5
+*   **Java Build & Coverage**: Maven or Gradle, JaCoCo
+*   **CI/CD**: GitHub Actions
 
-* **Python:** The primary programming language orchestrating the entire suite.
-* **LangChain:** Utilized for advanced capabilities like semantic chunking and constructing the QA retrieval chain.
-* **ChromaDB:** Our chosen Vector Database for storing and retrieving embedded code chunks efficiently.
-* **Hugging Face (BAAI/bge-small-en-v1.5):** The embedding model used for converting code chunks into high-quality vector representations.
-* **LLM Model - Groq Llama (llama3-8b-8192):** Currently leveraging this fast and efficient LLM for test case generation (exploring alternatives for scalability).
+---
+
+## ☁️ Future Enhancements / TODO
+
+*   Refine prompt engineering for even more precise test generation.
+*   Optimize context retrieval from ChromaDB.
+*   Allow selection of specific classes/packages to target from `code_analyzer.py` output.
+*   More sophisticated error handling and recovery within the pipeline.
+*   UI for easier configuration and monitoring (potentially).
+*   Support for other types of tests (e.g., performance, security) if feasible.
+
+---
 
-* GOOD NEWS -  INTEGRATION WITH Gemini-1.5-flash was sucessfull and now will be used to generate testcases . :))
+*This README provides a guide to understanding, running, and contributing to the Java Test Generation Suite.*
diff --git a/run.sh b/run.sh
@@ -1,24 +1,49 @@
 #!/usr/bin/env bash
-set -e
-
-echo "[Pipeline] Starting preprocessing..."
-
-python3 pre-processing/processing.py
-
-echo "[Pipeline] Starting Spring Boot application analysis..."
-
-python3 src/analyzer/code_analyzer.py
-
-echo "[Pipeline] Starting code chunking..."
-
-python3 scripts/chunker.py
-
-echo "[Pipeline] Starting embedding chunks into ChromaDB..."
-
-python3 scripts/embed_chunks.py
-
-echo "[Pipeline] Starting test case generation..."
-
-python3 src/llm/test_case_generator.py
-
-echo "[Pipeline] All steps completed. Test generation pipeline finished."
+set -e # Exit immediately if a command exits with a non-zero status.
+
+# --- Configuration ---
+# REQUIRED: Set the absolute path to your Spring Boot project
+export SPRING_BOOT_PROJECT_ROOT="/path/to/your/spring-boot-project"
+
+# REQUIRED: Set your Google API Key for the LLM
+export GOOGLE_API_KEY="YOUR_GOOGLE_API_KEY"
+
+# OPTIONAL: Override default settings from src/main.py
+# export MAX_ITERATIONS=3       # Default is 5
+# export TARGET_COVERAGE=0.85   # Default is 0.9 (90%)
+# export BUILD_TOOL="gradle"    # Default is "maven"
+
+# --- Sanity Checks ---
+if [ -z "$SPRING_BOOT_PROJECT_ROOT" ] || [ "$SPRING_BOOT_PROJECT_ROOT" == "/path/to/your/spring-boot-project" ]; then
+  echo "ERROR: SPRING_BOOT_PROJECT_ROOT is not set or is still the placeholder value."
+  echo "Please edit run.sh and set it to the absolute path of your Spring Boot project."
+  exit 1
+fi
+
+if [ -z "$GOOGLE_API_KEY" ] || [ "$GOOGLE_API_KEY" == "YOUR_GOOGLE_API_KEY" ]; then
+  echo "ERROR: GOOGLE_API_KEY is not set or is still the placeholder value."
+  echo "Please edit run.sh and set your Google API key."
+  exit 1
+fi
+
+if [ ! -d "$SPRING_BOOT_PROJECT_ROOT" ]; then
+  echo "ERROR: SPRING_BOOT_PROJECT_ROOT directory does not exist: $SPRING_BOOT_PROJECT_ROOT"
+  exit 1
+fi
+
+# --- Run the main pipeline ---
+echo "[Pipeline] Starting Java Test Generation Suite via src/main.py..."
+echo "Spring Boot Project: $SPRING_BOOT_PROJECT_ROOT"
+echo "Build Tool: ${BUILD_TOOL:-maven}" # Print default if not set
+echo "Max Iterations: ${MAX_ITERATIONS:-5}"
+echo "Target Coverage: ${TARGET_COVERAGE:-0.9}"
+
+# Ensure src/main.py is executable or called with python
+if [ -f "src/main.py" ]; then
+  python3 src/main.py
+else
+  echo "ERROR: src/main.py not found!"
+  exit 1
+fi
+
+echo "[Pipeline] Execution finished."
diff --git a/src/analyzer/code_analyzer.py b/src/analyzer/code_analyzer.py
@@ -5,7 +5,11 @@
 
 #imports
 TESTGEN_AUTOMATION_ROOT = Path(__file__).parent.parent.parent 
-SPRING_BOOT_PROJECT_ROOT = Path("/Users/tanmay/Desktop/AMRIT/BeneficiaryID-Generation-API")
+import os
+SPRING_BOOT_PROJECT_ROOT_STR = os.getenv("SPRING_BOOT_PROJECT_ROOT")
+if not SPRING_BOOT_PROJECT_ROOT_STR:
+    raise ValueError("Environment variable SPRING_BOOT_PROJECT_ROOT not set.")
+SPRING_BOOT_PROJECT_ROOT = Path(SPRING_BOOT_PROJECT_ROOT_STR)
 SPRING_BOOT_MAIN_JAVA_DIR = SPRING_BOOT_PROJECT_ROOT / "src" / "main" / "java"
 PROCESSED_OUTPUT_ROOT = TESTGEN_AUTOMATION_ROOT / "processed_output"
 TESTGEN_AUTOMATION_SRC_DIR = TESTGEN_AUTOMATION_ROOT / "src"