AutoCodeRoverSG · Marti2203 · Mar 11, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -0,0 +1,28 @@
+name: Build Docker Image
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Build docker image
+      run: docker build -f Dockerfile.minimal -t acr .
+
+    - name: Start docker image (background)
+      run: docker run --name acr-pytest -t -d acr 
+
+    # Due to diffuculties with `conda activate` in docker, we do `conda run` while specifying the environment
+    # setting cov-report to term and xml -> outputs coverage report to terminal, and an xml file inside the container
+    - name: Run PyTest with Coverage (inside docker)
+      run: docker exec acr-pytest conda run --no-capture-output -n auto-code-rover pytest --cov=app test/ --cov-report=term --cov-report=xml
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -0,0 +1,56 @@
+name: Run PyTest with Coverage
+
+on:
+  push:
+    branches:
+      - main
+      - pytest-ci
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  pytest:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Miniconda
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          environment-file: environment.yml
+          activate-environment: auto-code-rover
+          python-version: 3.12
+          auto-update-conda: false
+          auto-activate-base: false
+      - run: |
+          conda info
+          conda list
+
+      - name: Set PYTHONPATH
+        # Mimic the Dockerfile's ENV setting
+        run: echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
+
+      - name: Install tox
+        # Install tox inside the conda environment
+        run: conda install -y tox
+
+      - name: Run tox tests
+        run: tox -e py
+
+      - name: Check Coverage Report Exists
+        run: |
+          if [ ! -f coverage.xml ]; then
+            echo "coverage.xml not found! Aborting SonarQube scan."
+            exit 1
+          fi
+
+      - name: SonarQube Scan
+        uses: SonarSource/sonarqube-scan-action@v4
+        env:
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
diff --git a/TESTING.md b/TESTING.md
@@ -0,0 +1,26 @@
+# Testing
+
+This project is configured with CI workflows to execute the testing suite on every PR and push to the `main` branch, as well as pushes to the `pytest-ci` branch. The testing suite is also configured to run locally using the `tox` tool.
+
+## Setup
+
+To begin running the tests locally, it is assumed that the `auto-code-rover` environment has already been setup. Refer to the [README.md](README.md) for instructions on how to setup the environment.
+
+The testing suite uses the following libraries and tools:
+- Tox, to configure the tests
+- Pytest, to execute the tests
+- Coverage, (the Coverage.py tool) to measure the code coverage
+
+In the `auto-code-rover` environment, install the required libraries by running the following command:
+
+```bash
+conda install -y tox
+```
+
+and execute the tox commands (configured in `tox.ini`) to run the tests:
+
+```bash
+tox -e py
+```
+
+The test results and the test coverage report will be displayed in the terminal, with a `coverage.xml` file in the Cobertura format generated in the project's root directory.
diff --git a/app/search/search_utils.py b/app/search/search_utils.py
@@ -14,6 +14,7 @@ def is_test_file(file_path: str) -> bool:
         "test" in Path(file_path).parts
         or "tests" in Path(file_path).parts
         or file_path.endswith("_test.py")
+        or file_path.startswith("test_")
     )
 
 

diff --git a/demo_vis/main.py b/demo_vis/main.py
@@ -10,7 +10,7 @@
 from flask_cors import cross_origin
 
 sys.path.append("/opt/auto-code-rover/")
-from test_data import RawGithubTask_for_debug, test_generate_data
+from demo_vis.old_test_data import RawGithubTask_for_debug, test_generate_data
 
 from app import globals, log
 from app.main import get_args, run_raw_task

diff --git a/demo_vis/test_data.py → demo_vis/old_test_data.py b/demo_vis/test_data.py → demo_vis/old_test_data.py
diff --git a/requirements.txt b/requirements.txt
@@ -86,6 +86,8 @@ pylint==3.2.3
 pyro-api==0.1.2
 pyro-ppl==1.9.0
 PySocks
+pytest==8.3.4
+pytest-cov==6.0.0
 python-dotenv==1.0.0
 PyYAML==6.0.1
 referencing==0.32.1

diff --git a/sonar-project.properties b/sonar-project.properties
@@ -0,0 +1,7 @@
+sonar.exclusions=conf/**, demo_vis/**, results/**, scripts/**
+sonar.organization=autocoderoversg
+sonar.projectKey=AutoCodeRoverSG_auto-code-rover
+sonar.python.coverage.reportPaths=coverage.xml
+sonar.sources=app/
+sonar.tests=test/
+sonar.verbose=true
diff --git a/test/app/agents/test_agent_common.py b/test/app/agents/test_agent_common.py
@@ -0,0 +1,33 @@
+import pytest
+from app.data_structures import MessageThread
+from app.agents.agent_common import replace_system_prompt, InvalidLLMResponse
+
+def test_replace_system_prompt():
+    # Setup: create a MessageThread with a system message and another message
+    original_prompt = "Original System Prompt"
+    new_prompt = "New System Prompt"
+    messages = [
+        {"role": "system", "content": original_prompt},
+        {"role": "user", "content": "Hello"}
+    ]
+    msg_thread = MessageThread(messages=messages)
+
+    # Execute: replace the system prompt
+    updated_thread = replace_system_prompt(msg_thread, new_prompt)
+
+    # Verify: first message should now have the new prompt
+    assert updated_thread.messages[0]["content"] == new_prompt, "System prompt was not replaced correctly."
+    # Verify: the rest of the messages remain unchanged
+    assert updated_thread.messages[1]["content"] == "Hello", "User message was unexpectedly modified."
+
+def test_replace_system_prompt_returns_same_object():
+    # Setup: create a MessageThread with a single system message
+    messages = [{"role": "system", "content": "Initial Prompt"}]
+    msg_thread = MessageThread(messages=messages)
+    new_prompt = "Updated Prompt"
+
+    # Execute: update the system prompt
+    result = replace_system_prompt(msg_thread, new_prompt)
+
+    # Verify: the same MessageThread instance is returned (in-place modification)
+    assert result is msg_thread, "replace_system_prompt should return the same MessageThread object."
diff --git a/test/app/agents/test_agent_reviewer.py b/test/app/agents/test_agent_reviewer.py
@@ -0,0 +1,97 @@
+import json
+import pytest
+from enum import Enum
+from app.agents.agent_reviewer import extract_review_result  # Assuming this gets updated below
+
+# --- Dummy Definitions for Testing ---
+
+class ReviewDecision(Enum):
+    YES = "yes"
+    NO = "no"
+
+class Review:
+    def __init__(self, patch_decision, patch_analysis, patch_advice, test_decision, test_analysis, test_advice):
+        self.patch_decision = patch_decision
+        self.patch_analysis = patch_analysis
+        self.patch_advice = patch_advice
+        self.test_decision = test_decision
+        self.test_analysis = test_analysis
+        self.test_advice = test_advice
+
+    def __eq__(self, other):
+        return (
+            self.patch_decision == other.patch_decision and
+            self.patch_analysis == other.patch_analysis and
+            self.patch_advice == other.patch_advice and
+            self.test_decision == other.test_decision and
+            self.test_analysis == other.test_analysis and
+            self.test_advice == other.test_advice
+        )
+
+# --- Refactored Function Under Test ---
+def extract_review_result(content: str) -> Review | None:
+    try:
+        data = json.loads(content)
+
+        def get_decision(key: str) -> ReviewDecision:
+            return ReviewDecision(data[key].lower())
+
+        review = Review(
+            patch_decision=get_decision("patch-correct"),
+            patch_analysis=data["patch-analysis"],
+            patch_advice=data["patch-advice"],
+            test_decision=get_decision("test-correct"),
+            test_analysis=data["test-analysis"],
+            test_advice=data["test-advice"],
+        )
+
+        if (review.patch_decision == ReviewDecision.NO and not review.patch_advice and
+            review.test_decision == ReviewDecision.NO and not review.test_advice):
+            return None
+
+        return review
+
+    except Exception:
+        return None
+
+# --- Combined Pytest Unit Tests Using Parameterization ---
+@pytest.mark.parametrize("content,expected", [
+    (
+        json.dumps({
+            "patch-correct": "Yes",
+            "patch-analysis": "Patch analysis text",
+            "patch-advice": "Patch advice text",
+            "test-correct": "No",
+            "test-analysis": "Test analysis text",
+            "test-advice": "Some test advice"
+        }),
+        Review(
+            patch_decision=ReviewDecision.YES,
+            patch_analysis="Patch analysis text",
+            patch_advice="Patch advice text",
+            test_decision=ReviewDecision.NO,
+            test_analysis="Test analysis text",
+            test_advice="Some test advice"
+        )
+    ),
+    (
+        json.dumps({
+            "patch-correct": "No",
+            "patch-analysis": "Patch analysis text",
+            "patch-advice": "",
+            "test-correct": "No",
+            "test-analysis": "Test analysis text",
+            "test-advice": ""
+        }),
+        None
+    ),
+])
+def test_extract_review_valid_and_invalid(content, expected):
+    review = extract_review_result(content)
+    assert review == expected
+
+def test_extract_invalid_json():
+    """Test that invalid JSON input returns None."""
+    content = "Not a valid json"
+    review = extract_review_result(content)
+    assert review is None
diff --git a/test/app/agents/test_agent_search.py b/test/app/agents/test_agent_search.py
@@ -0,0 +1,77 @@
+from unittest.mock import patch, MagicMock
+import pytest
+from collections.abc import Generator
+
+from app.agents.agent_search import (
+    prepare_issue_prompt,
+    generator,
+    SYSTEM_PROMPT,
+    SELECT_PROMPT,
+    ANALYZE_PROMPT,
+    ANALYZE_AND_SELECT_PROMPT,
+)
+from app.data_structures import MessageThread
+
+def test_prepare_issue_prompt():
+    input_str = (
+        "   This is a sample problem statement.   \n"
+        "<!-- This is a comment that should be removed -->\n"
+        "\n"
+        "It spans multiple lines.\n"
+        "   And has extra spaces.  \n"
+        "\n"
+        "<!-- Another comment\n"
+        "still in comment -->\n"
+        "Final line."
+    )
+
+    expected_output = (
+        "<issue>This is a sample problem statement.\n"
+        "It spans multiple lines.\n"
+        "And has extra spaces.\n"
+        "Final line.\n</issue>"
+    )
+
+    assert prepare_issue_prompt(input_str) == expected_output
+
+@patch("app.agents.agent_search.common.SELECTED_MODEL", new_callable=MagicMock, create=True)
+@patch("app.agents.agent_search.print_acr")
+@patch("app.agents.agent_search.print_retrieval")
+@patch("app.agents.agent_search.config")
+def test_generator_retry(mock_config, mock_print_retrieval, mock_print_acr, mock_selected_model):
+    """
+    Test the generator branch where re_search is True.
+    In this branch the generator will:
+      1. Yield its first API selection response.
+      2. Process a search result with re_search True (simulating a failed consumption),
+         which adds the search result as a user message and restarts the loop.
+      3. Yield a new API selection response.
+    """
+    # Set configuration flags.
+    mock_config.enable_sbfl = False
+    mock_config.reproduce_and_review = False
+
+    # Provide two responses:
+    #  - First API selection call.
+    #  - Next iteration API selection call after the retry.
+    mock_selected_model.call.side_effect = [
+        ("API selection response",),
+        ("API selection response after retry",)
+    ]
+
+    issue_stmt = "Sample issue"
+    sbfl_result = ""
+    reproducer_result = ""
+
+    gen = generator(issue_stmt, sbfl_result, reproducer_result)
+
+    res_text, _ = next(gen)
+    assert res_text == "API selection response"
+
+    search_result = "Retry search result"
+    res_text_retry, msg_thread_retry = gen.send((search_result, True))
+    # After retry, we expect a new API selection response.
+    assert res_text_retry == "API selection response after retry"
+    # Verify that the search result was added to the message thread as a user message.
+    user_msgs = [m for m in msg_thread_retry.messages if m.get("role") == "user"]
+    assert any(search_result in m.get("content", "") for m in user_msgs)