Skip to content

Commit 3b67ece

Browse files
authored
Merge pull request #7 from metauto-ai/feat/add_scripts
upload the demo scripts and guidelines
2 parents 612b23a + 821ba24 commit 3b67ece

File tree

4 files changed

+432
-0
lines changed

4 files changed

+432
-0
lines changed

scripts/README.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
## Demo scripts
2+
3+
### Ask Anything
4+
5+
1. Ask any questions about the given workspace
6+
7+
```python
8+
9+
PYTHONPATH=. python scripts/run_ask.py \
10+
--workspace $(pwd)/benchmark/workspaces/OpenHands/39_Drug_Response_Prediction_SVM_GDSC_ML \
11+
--question "What does this workspace contain?"
12+
```
13+
14+
### Agent-as-a-Judge
15+
16+
2. Using the collected trajectories or development logs (gray-box setting)
17+
18+
```python
19+
PYTHONPATH=. python scripts/run_aaaj.py \
20+
--developer_agent "OpenHands" \
21+
--setting "gray_box" \
22+
--planning "comprehensive (no planning)" \
23+
--benchmark_dir $(pwd)/benchmark
24+
```
25+
26+
3. Do not have trajectories or development logs (black-box setting)
27+
28+
```python
29+
PYTHONPATH=. python scripts/run_aaaj.py \
30+
--developer_agent "OpenHands" \
31+
--setting "black_box" \
32+
--planning "efficient (no planning)" \
33+
--benchmark_dir $(pwd)/benchmark
34+
```
35+
36+
4. Do not have trajectories or development logs and using planning to decide the actions of Agent-as-a-Judge (black-box setting)
37+
38+
```python
39+
PYTHONPATH=. python scripts/run_aaaj.py \
40+
--developer_agent "OpenHands" \
41+
--setting "gray_box" \
42+
--planning "planning" \
43+
--benchmark_dir $(pwd)/benchmark
44+
```
45+
46+
### Statistics
47+
48+
5. Get the statistics of the projects
49+
50+
```python
51+
PYTHONPATH=. python scripts/run_statistics.py \
52+
--benchmark_dir $(pwd)/benchmark \
53+
--developer_agent OpenHands
54+
```

scripts/run_aaaj.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import re
2+
import argparse
3+
import logging
4+
from pathlib import Path
5+
from dotenv import load_dotenv
6+
7+
from agent_as_a_judge.agent import JudgeAgent
8+
from agent_as_a_judge.config import AgentConfig
9+
10+
11+
def main(agent_config: AgentConfig, logger: logging.Logger):
12+
13+
def extract_number_from_filename(filename: str) -> int:
14+
match = re.search(r"(\d+)", filename)
15+
return int(match.group(1)) if match else float("inf")
16+
17+
instance_files = sorted(
18+
list(agent_config.instance_dir.glob("*.json")),
19+
key=lambda f: extract_number_from_filename(f.stem),
20+
)
21+
22+
logger.info(f"Total instances found: {len(instance_files)}")
23+
24+
for instance_file in instance_files:
25+
instance_name = instance_file.stem
26+
27+
trajectory_file = None
28+
if agent_config.trajectory_file:
29+
trajectory_file = agent_config.trajectory_file / f"{instance_name}.json"
30+
31+
judgment_file = agent_config.judge_dir / instance_file.name
32+
33+
if judgment_file.exists():
34+
logger.info(
35+
f"Judgment for instance '{instance_name}' already exists. Skipping..."
36+
)
37+
continue
38+
39+
if trajectory_file and trajectory_file.exists():
40+
logger.info(
41+
f"Processing instance: {instance_file} with trajectory: {trajectory_file}"
42+
)
43+
else:
44+
logger.warning(
45+
f"Trajectory file not found for instance: {instance_file}, processing without it"
46+
)
47+
trajectory_file = None
48+
49+
workspace = agent_config.workspace_dir / instance_name
50+
51+
judge_agent = JudgeAgent(
52+
workspace=workspace,
53+
instance=instance_file,
54+
judge_dir=agent_config.judge_dir,
55+
trajectory_file=trajectory_file,
56+
config=agent_config,
57+
)
58+
judge_agent.judge_anything()
59+
60+
61+
def parse_arguments():
62+
parser = argparse.ArgumentParser()
63+
64+
parser.add_argument(
65+
"--developer_agent", type=str, required=True, help="Name of the developer agent"
66+
)
67+
parser.add_argument(
68+
"--setting",
69+
type=str,
70+
required=True,
71+
help="Setting for the JudgeAgent (e.g., gray_box, black_box)",
72+
)
73+
parser.add_argument(
74+
"--planning",
75+
type=str,
76+
required=True,
77+
choices=["planning", "comprehensive (no planning)", "efficient (no planning)"],
78+
help="Module to run",
79+
)
80+
parser.add_argument(
81+
"--benchmark_dir",
82+
type=str,
83+
required=True,
84+
help="Base directory for the DevAI benchmark",
85+
)
86+
parser.add_argument(
87+
"--include_dirs",
88+
nargs="+",
89+
default=["src", "results", "models", "data"],
90+
help="Directories to include in search",
91+
)
92+
parser.add_argument(
93+
"--exclude_dirs",
94+
nargs="+",
95+
default=[
96+
"__pycache__",
97+
"env",
98+
".git",
99+
"venv",
100+
"logs",
101+
"output",
102+
"tmp",
103+
"temp",
104+
"cache",
105+
"data",
106+
],
107+
help="Directories to exclude in search",
108+
)
109+
parser.add_argument(
110+
"--exclude_files",
111+
nargs="+",
112+
default=[".DS_Store"],
113+
help="Files to exclude in search",
114+
)
115+
parser.add_argument(
116+
"--trajectory_file",
117+
type=str,
118+
help="Path to the trajectory directory, if available",
119+
)
120+
121+
return parser.parse_args()
122+
123+
124+
if __name__ == "__main__":
125+
load_dotenv()
126+
127+
logger = logging.getLogger(__name__)
128+
logging.basicConfig(level=logging.INFO)
129+
args = parse_arguments()
130+
131+
benchmark_dir = Path(args.benchmark_dir)
132+
instance_dir = benchmark_dir / "devai/instances"
133+
workspace_dir = benchmark_dir / f"workspaces/{args.developer_agent}"
134+
judge_dir = (
135+
benchmark_dir
136+
/ f"judgment/{args.developer_agent}/agent_as_a_judge/{args.setting}"
137+
)
138+
trajectory_file = benchmark_dir / f"trajectories/{args.developer_agent}"
139+
140+
agent_config = AgentConfig(
141+
include_dirs=args.include_dirs,
142+
exclude_dirs=args.exclude_dirs,
143+
exclude_files=args.exclude_files,
144+
setting=args.setting,
145+
planning=args.planning,
146+
judge_dir=judge_dir,
147+
workspace_dir=workspace_dir,
148+
instance_dir=instance_dir,
149+
trajectory_file=trajectory_file,
150+
)
151+
152+
main(
153+
agent_config=agent_config,
154+
logger=logger,
155+
)

scripts/run_ask.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import argparse
2+
import logging
3+
from pathlib import Path
4+
from dotenv import load_dotenv
5+
from rich.console import Console
6+
from rich.panel import Panel
7+
from rich.text import Text
8+
from rich.markdown import Markdown
9+
from rich.emoji import Emoji
10+
import io
11+
12+
13+
from agent_as_a_judge.agent import JudgeAgent
14+
from agent_as_a_judge.config import AgentConfig
15+
16+
logging.basicConfig(
17+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
18+
)
19+
console = Console()
20+
21+
22+
def main(agent_config: AgentConfig, initial_question: str, logger: logging.Logger):
23+
workspace = agent_config.workspace_dir
24+
judge_agent = JudgeAgent(
25+
workspace=workspace,
26+
instance=None,
27+
judge_dir=agent_config.judge_dir,
28+
trajectory_file=None,
29+
config=agent_config,
30+
)
31+
32+
handle_question(judge_agent, initial_question, logger)
33+
while True:
34+
next_question = input(
35+
"\nDo you have another question? (Enter question or type 'no' to exit): "
36+
).strip()
37+
if next_question.lower() == "no":
38+
break
39+
handle_question(judge_agent, next_question, logger)
40+
41+
42+
def handle_question(judge_agent: JudgeAgent, question: str, logger: logging.Logger):
43+
44+
response = judge_agent.ask_anything(question)
45+
display_qa(question, response, logger)
46+
47+
48+
def display_qa(question: str, response: str, logger: logging.Logger):
49+
50+
question_markdown = f"{Emoji('question')} **Question**\n{question}"
51+
response_markdown = f"{Emoji('speech_balloon')} **Response**\n{response}"
52+
53+
panel_content = f"{question_markdown}\n\n---\n\n{response_markdown}"
54+
panel = Panel(
55+
Markdown(panel_content),
56+
title="[bold magenta]🔍 Question and Response[/bold magenta]",
57+
border_style="bold cyan",
58+
title_align="center",
59+
padding=(1, 2),
60+
)
61+
62+
with io.StringIO() as buf:
63+
temp_console = Console(file=buf, width=80, record=True)
64+
temp_console.print(panel)
65+
formatted_message = buf.getvalue()
66+
console.print(panel)
67+
68+
69+
def parse_arguments():
70+
parser = argparse.ArgumentParser()
71+
parser.add_argument(
72+
"--workspace", type=str, required=True, help="Path to the workspace directory"
73+
)
74+
parser.add_argument(
75+
"--question", type=str, required=True, help="Initial question to ask the agent"
76+
)
77+
parser.add_argument(
78+
"--include_dirs",
79+
nargs="+",
80+
default=None,
81+
help="Directories to include in search",
82+
)
83+
parser.add_argument(
84+
"--exclude_dirs",
85+
nargs="+",
86+
default=[
87+
"__pycache__",
88+
"env",
89+
".git",
90+
"venv",
91+
"logs",
92+
"output",
93+
"tmp",
94+
"temp",
95+
"cache",
96+
"data",
97+
],
98+
help="Directories to exclude in search",
99+
)
100+
parser.add_argument(
101+
"--exclude_files",
102+
nargs="+",
103+
default=[".DS_Store"],
104+
help="Files to exclude in search",
105+
)
106+
107+
return parser.parse_args()
108+
109+
110+
if __name__ == "__main__":
111+
load_dotenv()
112+
logger = logging.getLogger(__name__)
113+
logging.basicConfig(level=logging.INFO)
114+
args = parse_arguments()
115+
workspace_dir = Path(args.workspace)
116+
judge_dir = workspace_dir / "judge"
117+
118+
agent_config = AgentConfig(
119+
include_dirs=args.include_dirs,
120+
exclude_dirs=args.exclude_dirs,
121+
exclude_files=args.exclude_files,
122+
setting="black_box",
123+
planning="comprehensive (no planning)",
124+
judge_dir=judge_dir,
125+
workspace_dir=workspace_dir,
126+
instance_dir=None,
127+
trajectory_file=None,
128+
)
129+
130+
main(
131+
agent_config=agent_config,
132+
initial_question=args.question,
133+
logger=logger,
134+
)

0 commit comments

Comments
 (0)