diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..9b865a5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,96 @@ +name: CI + +on: + pull_request: + types: [opened, synchronize] + push: + branches: [main] + workflow_dispatch: + +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + ci: + name: CI (Python ${{ matrix.python-version }}) + if: ${{ !cancelled() && !failure() }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11"] + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: requirements.txt + + - name: Show environment + run: env | sort + + - name: Upgrade pip tooling + run: | + python -m pip install --upgrade pip setuptools wheel + + - name: Install dependencies + run: | + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Sanity build (byte-compile sources) + run: | + python -m compileall -q src || (echo "Byte-compilation failed" && exit 1) + + - name: Verify dependency resolution + run: | + python -m pip check || true + + dependabot: + name: Dependabot validation + if: ${{ github.actor == 'dependabot[bot]' && startsWith(github.head_ref, 'dependabot/pip/') }} + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.head_ref }} + + - name: Set up Git + run: | + git config --global user.name github-actions + git config --global user.email github-actions@github.com + + - name: Validate requirements on Python 3.10 and 3.11 + run: | + set -euo pipefail + for PYV in 3.10 3.11; do + echo "Setting up Python ${PYV}..." + echo "python-version: ${PYV}" >> $GITHUB_OUTPUT + echo "Installing with Python ${PYV}..." + pyenv global ${PYV} || true + python -V + python -m pip install --upgrade pip setuptools wheel + if [ -f requirements.txt ]; then + pip install -r requirements.txt + pip check || true + fi + done + + - name: Push changes if applicable + run: | + if [[ -n $(git status --porcelain) ]]; then + git commit -a -m "build: Apply automated updates for dependabot." + git push + fi + + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d1eb0f6 --- /dev/null +++ b/Makefile @@ -0,0 +1,224 @@ +.PHONY: clean deepclean install dev constraints black isort mypy ruff toml-sort lint pre-commit \ + auto-black auto-isort auto-toml-sort auto-lint \ + test-run test-run-offline test test-offline \ + build upload docs-autobuild changelog docs-gen docs-mypy docs-coverage docs + +SHELL := /bin/bash + +######################################################################################## +# Variables +######################################################################################## + +# Use pipenv when not in CI and available +PIPRUN := $(shell [ "$$CI" != "true" ] && command -v pipenv > /dev/null 2>&1 && echo "pipenv run") + +# Python version (major.minor) +PYTHON_VERSION := $(shell echo $${PYTHON_VERSION:-$$(python -V 2>&1 | cut -d ' ' -f 2)} | cut -d '.' -f 1,2) + +# Constraints file by Python version +CONSTRAINTS_DIR := constraints +CONSTRAINTS_FILE := $(CONSTRAINTS_DIR)/$(PYTHON_VERSION).txt + +# Public docs dir (compatible with ReadTheDocs) +PUBLIC_DIR := $(shell [ "$$READTHEDOCS" = "True" ] && echo "$$READTHEDOCS_OUTPUT/html" || echo "public") + +# Changelog (optional) +CHANGELOG_URL := $(shell echo $${CI_PAGES_URL:-https://example.com/yourproject}/_sources/changelog.md.txt) +CHANGELOG_PATH := docs/changelog.md + +######################################################################################## +# Development Environment Management +######################################################################################## + +clean: + -rm -rf \ + $(PUBLIC_DIR) \ + .coverage \ + .mypy_cache \ + .pytest_cache \ + .ruff_cache \ + Pipfile* \ + coverage.xml \ + dist \ + release-notes.md + find . -name '*.egg-info' -print0 | xargs -0 rm -rf + find . -name '*.pyc' -print0 | xargs -0 rm -f + find . -name '*.swp' -print0 | xargs -0 rm -f + find . -name '.DS_Store' -print0 | xargs -0 rm -f + find . -name '__pycache__' -print0 | xargs -0 rm -rf + +deepclean: clean + if command -v pre-commit > /dev/null 2>&1; then pre-commit uninstall --hook-type pre-push; fi + if command -v pipenv >/dev/null 2>&1 && pipenv --venv >/dev/null 2>&1; then pipenv --rm; fi + +# Editable install (if this is a package). For this repo we default to requirements.txt +install: + @if [ -f setup.py ] || [ -f pyproject.toml ]; then \ + $(PIPRUN) pip install -e . -c $(CONSTRAINTS_FILE) || $(PIPRUN) pip install -e . ; \ + else \ + $(PIPRUN) pip install -r requirements.txt ; \ + fi + +# Developer setup: project deps + common dev tooling +dev: + @if [ -f requirements.txt ]; then $(PIPRUN) pip install -r requirements.txt ; fi + # Common dev tools (safe if already installed) + $(PIPRUN) pip install -U \ + black isort ruff mypy \ + pytest coverage build twine \ + sphinx sphinx-autobuild git-changelog toml-sort + @if [ "$(CI)" != "true" ] && command -v pre-commit > /dev/null 2>&1; then pre-commit install --hook-type pre-push; fi + +# Generate constraints for current Python version +constraints: deepclean + @mkdir -p $(CONSTRAINTS_DIR) + @if [ -f setup.py ] || [ -f pyproject.toml ]; then \ + $(PIPRUN) --python $(PYTHON_VERSION) pip install --upgrade -e . ; \ + fi + @if [ -f requirements.txt ]; then $(PIPRUN) pip install -r requirements.txt ; fi + $(PIPRUN) pip freeze --exclude-editable > $(CONSTRAINTS_FILE) + +######################################################################################## +# Lint and pre-commit +######################################################################################## + +black: + @command -v black >/dev/null 2>&1 || { echo "black not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) python -m black --check --diff . -l 120 + +isort: + @command -v isort >/dev/null 2>&1 || { echo "isort not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) python -m isort --check . + +mypy: + @command -v mypy >/dev/null 2>&1 || { echo "mypy not installed. Run 'make dev' first."; exit 1; } + # Narrow the scope if needed + $(PIPRUN) python -m mypy src || true + +ruff: + @command -v ruff >/dev/null 2>&1 || { echo "ruff not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) ruff check src || true + +toml-sort: + @command -v toml-sort >/dev/null 2>&1 || { echo "toml-sort not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) toml-sort --check pyproject.toml || true + +# Prioritize isort before black to avoid style conflicts +lint: mypy ruff isort black toml-sort + +pre-commit: + pre-commit run --all-files + +######################################################################################## +# Auto Lint +######################################################################################## + +auto-black: + @command -v black >/dev/null 2>&1 || { echo "black not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) python -m black . -l 120 + +auto-isort: + @command -v isort >/dev/null 2>&1 || { echo "isort not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) python -m isort . + +auto-toml-sort: + @command -v toml-sort >/dev/null 2>&1 || { echo "toml-sort not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) toml-sort --in-place pyproject.toml >/dev/null 2>&1 || true + +auto-lint: auto-isort auto-black auto-toml-sort + +######################################################################################## +# Test +######################################################################################## + +test-run: + @command -v coverage >/dev/null 2>&1 || { echo "coverage not installed. Run 'make dev' first."; exit 1; } + @if command -v pytest >/dev/null 2>&1; then \ + $(PIPRUN) python -m coverage erase; \ + $(PIPRUN) python -m coverage run --concurrency=multiprocessing -m pytest || true; \ + $(PIPRUN) python -m coverage combine; \ + else \ + echo "pytest not installed or no tests; skipping test-run."; \ + fi + +test-run-offline: + @command -v coverage >/dev/null 2>&1 || { echo "coverage not installed. Run 'make dev' first."; exit 1; } + @if command -v pytest >/dev/null 2>&1; then \ + $(PIPRUN) python -m coverage erase; \ + $(PIPRUN) python -m coverage run --concurrency=multiprocessing -m pytest -m "offline" || true; \ + $(PIPRUN) python -m coverage combine; \ + else \ + echo "pytest not installed or no tests; skipping test-run-offline."; \ + fi + +test: test-run + $(PIPRUN) python -m coverage report --fail-under 20 || true + $(PIPRUN) python -m coverage xml --fail-under 20 || true + +test-offline: test-run-offline + $(PIPRUN) python -m coverage report --fail-under 20 || true + $(PIPRUN) python -m coverage xml --fail-under 20 || true + +######################################################################################## +# Package +######################################################################################## + +build: + @command -v python >/dev/null 2>&1 || { echo "python not found"; exit 1; } + $(PIPRUN) python -m build + +upload: + $(PIPRUN) python -m twine upload dist/* + +######################################################################################## +# Documentation (optional, only if docs/ exists) +######################################################################################## + +docs-autobuild: + @if [ -d docs ]; then \ + $(PIPRUN) python -m sphinx_autobuild docs $(PUBLIC_DIR); \ + else \ + echo "No docs directory; skipping docs-autobuild."; \ + fi + +changelog: + @if wget -q --spider $(CHANGELOG_URL); then \ + echo "Existing Changelog found at '$(CHANGELOG_URL)', download for incremental generation."; \ + wget -q -O $(CHANGELOG_PATH) $(CHANGELOG_URL); \ + fi + @command -v git-changelog >/dev/null 2>&1 || { echo "git-changelog not installed. Run 'make dev' first."; exit 1; } + $(PIPRUN) LATEST_TAG=$$(git tag --sort=-creatordate | head -n 1); \ + git-changelog --bump $$LATEST_TAG -Tio docs/changelog.md -c conventional -s build,chore,ci,deps,doc,docs,feat,fix,perf,ref,refactor,revert,style,test,tests || true + +release-notes: + @command -v git-changelog >/dev/null 2>&1 || { echo "git-changelog not installed. Run 'make dev' first."; exit 1; } + @$(PIPRUN) git-changelog --input $(CHANGELOG_PATH) --release-notes || true + +docs-gen: + @if [ -d docs ]; then \ + $(PIPRUN) python -m sphinx.cmd.build -W docs $(PUBLIC_DIR); \ + else \ + echo "No docs directory; skipping docs-gen."; \ + fi + +docs-mypy: docs-gen + @if [ -d docs ]; then \ + $(PIPRUN) python -m mypy src --html-report $(PUBLIC_DIR)/reports/mypy || true; \ + else \ + echo "No docs directory; skipping docs-mypy."; \ + fi + +docs-coverage: test-run docs-gen + @if [ -d docs ]; then \ + $(PIPRUN) python -m coverage html -d $(PUBLIC_DIR)/reports/coverage --fail-under 20 || true; \ + else \ + echo "No docs directory; skipping docs-coverage."; \ + fi + +docs: changelog docs-gen docs-mypy docs-coverage + +######################################################################################## +# End +######################################################################################## + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..66e7b09 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +requires = ["hatchling>=1.24.2"] +build-backend = "hatchling.build" + +[project] +name = "debate" +version = "0.0.0" +description = "Debate project" +readme = "README.md" +requires-python = ">=3.10" +license = {text = "MIT"} +authors = [{name = "LeiLi Lab"}] +dependencies = [] + +[tool.black] +line-length = 120 +target-version = ["py310", "py311"] + +[tool.coverage.report] +exclude_lines = [ + "if __name__ == \"__main__\":" +] + +[tool.coverage.run] +source = ["src"] +branch = true + +[tool.isort] +profile = "black" +line_length = 120 +skip = ["dataset"] + +[tool.mypy] +python_version = "3.10" +ignore_missing_imports = true +warn_unused_ignores = true +warn_redundant_casts = true +warn_unreachable = true +strict_optional = false +show_error_codes = true +pretty = true +files = ["src"] + +[tool.pytest.ini_options] +addopts = "-ra" +testpaths = ["tests", "test"] + +[tool.ruff] +line-length = 120 +target-version = "py310" +src = ["src"] + +[tool.ruff.lint] +select = ["E", "F", "W"] +ignore = [] diff --git a/src/agents.py b/src/agents.py index 50fc029..8dee25c 100644 --- a/src/agents.py +++ b/src/agents.py @@ -1,23 +1,24 @@ -import os -import re +import copy import json import math -from openai import OpenAI -from functools import partial -import traceback, random -from dataclasses import dataclass -import requests -import copy +import os +import random +import re import time +import traceback +from dataclasses import dataclass +from functools import partial import litellm +import requests +from openai import OpenAI +from evaluator import eval_surprise, extract_claims, extract_obj_aspect +from tts import convert_text_to_speech, trim_audio_by_sentences +from utils.constants import CLOSING_TIME, OPENING_TIME, REBUTTAL_TIME, WORDRATIO, deepseek_api_key from utils.model import HelperClient, safety_setting -from utils.tool import logger, log_file_path from utils.prompts import * -from utils.constants import WORDRATIO, OPENING_TIME, REBUTTAL_TIME, CLOSING_TIME, deepseek_api_key -from tts import convert_text_to_speech, trim_audio_by_sentences -from evaluator import extract_claims, eval_surprise, extract_obj_aspect +from utils.tool import log_file_path, logger @dataclass @@ -27,6 +28,7 @@ class AgentConfig: max_tokens: int = 4096 system_prompt: str = "" + @dataclass class DebaterConfig(AgentConfig): side: str = "for" @@ -39,11 +41,13 @@ class DebaterConfig(AgentConfig): use_debate_flow_tree: bool = True url: str = "http://127.0.0.1:8081/" + @dataclass class JudgeConfig(AgentConfig): system_prompt: str = judge_system_prompt temperature: float = 0.0 - + + @dataclass class AudienceConfig(AgentConfig): pre_prompt: str = audience_system_prompt_pre @@ -58,38 +62,48 @@ def __init__(self, config) -> None: self.system_prompt = config.system_prompt print(f"[Agent Init] Model: {self.config.model}") if self.config.model.startswith("gpt") or self.config.model.startswith("o1"): - self.client = partial(litellm.completion, - model=self.config.model, - temperature=self.config.temperature, - max_tokens=self.config.max_tokens) + self.client = partial( + litellm.completion, + model=self.config.model, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) elif self.config.model.startswith("gemini"): - self.client = partial(litellm.completion, - model="gemini/" + self.config.model, - api_key=os.environ["GOOGLE_API_KEY"], - temperature=self.config.temperature, - max_tokens=self.config.max_tokens, - safety_settings=safety_setting) + self.client = partial( + litellm.completion, + model="gemini/" + self.config.model, + api_key=os.environ["GOOGLE_API_KEY"], + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + safety_settings=safety_setting, + ) elif "llama" in self.config.model.lower(): - self.client = partial(litellm.completion, - model="together_ai/" + self.config.model, - temperature=self.config.temperature, - max_tokens=self.config.max_tokens) + self.client = partial( + litellm.completion, + model="together_ai/" + self.config.model, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) elif "deepseek" in self.config.model.lower(): - self.client = partial(litellm.completion, - model="deepseek/" + self.config.model, - api_key=deepseek_api_key, - temperature=self.config.temperature, - max_tokens=self.config.max_tokens) + self.client = partial( + litellm.completion, + model="deepseek/" + self.config.model, + api_key=deepseek_api_key, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) elif "moonshot" in self.config.model.lower(): - self.client = partial(litellm.completion, - model="moonshot/" + self.config.model, - api_base="https://api.moonshot.cn/v1", - api_key=os.environ["MOONSHOT_API_KEY"], - temperature=self.config.temperature, - max_tokens=self.config.max_tokens) + self.client = partial( + litellm.completion, + model="moonshot/" + self.config.model, + api_base="https://api.moonshot.cn/v1", + api_key=os.environ["MOONSHOT_API_KEY"], + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) else: raise ValueError(f"Invalid model: {self.config.model}") - + self.conversation = [] if self.config.system_prompt != "": self._add_message("system", self.config.system_prompt) @@ -99,21 +113,21 @@ def __init__(self, config) -> None: def speak(self, prompt, **kwargs): self._add_message("user", prompt) logger.debug(f"[Conversation-History] {json.dumps(self.conversation)}") - logger.debug("[Prompt] " + prompt.strip().replace('\n',' ||| ')) + logger.debug("[Prompt] " + prompt.strip().replace("\n", " ||| ")) response = self._get_response(self.conversation, **kwargs) if kwargs.get("n", 1) > 1: logger.info(f"[Response] {response}") return response - logger.debug("[Response-Before-Post-Process] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Response-Before-Post-Process] " + response.strip().replace("\n", " ||| ")) response = self.post_process(response, **kwargs) - logger.debug("[Response-After-Post-Process] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Response-After-Post-Process] " + response.strip().replace("\n", " ||| ")) return response def post_process(self, statement, **kwargs): self._add_message("assistant", f"{statement}") - logger.info("[Response] " + statement.strip().replace('\n',' ||| ')) + logger.info("[Response] " + statement.strip().replace("\n", " ||| ")) return statement - + def _get_response(self, messages, **kwargs): kwargs.pop("max_time", None) kwargs.pop("history", None) @@ -139,10 +153,10 @@ def _get_response(self, messages, **kwargs): else: return response return "" - + def _add_message(self, role, content): if isinstance(content, list): - content = '\n'.join(content) + content = "\n".join(content) self.conversation.append({"role": role, "content": content}) def reset(self): @@ -153,6 +167,7 @@ def reset(self): ##################### Debater ##################### + class Debater(Agent): def __init__(self, config, motion) -> None: super().__init__(config) @@ -172,7 +187,7 @@ def opening_generation(self, history, **kwargs): self.listen(history) prompt = default_opening_prompt.format(motion=self.motion, act=self.act) prompt = prompt.replace("{n_words}", str(math.ceil(kwargs.get("max_time", OPENING_TIME) / WORDRATIO["time"]))) - response = self.speak(prompt, **kwargs) + response = self.speak(prompt, **kwargs) return response def rebuttal_generation(self, history, **kwargs): @@ -181,16 +196,16 @@ def rebuttal_generation(self, history, **kwargs): opponent = history[-1]["content"] prompt = default_rebuttal_prompt.format(counter_act=self.counter_act, opponent=opponent, act=self.act) prompt = prompt.replace("{n_words}", str(math.ceil(kwargs.get("max_time", REBUTTAL_TIME) / WORDRATIO["time"]))) - response = self.speak(prompt, **kwargs) + response = self.speak(prompt, **kwargs) return response - + def closing_generation(self, history, **kwargs): self.status = "closing" self.listen(history) opponent = history[-1]["content"] prompt = default_closing_prompt.format(counter_act=self.counter_act, opponent=opponent, act=self.act) prompt = prompt.replace("{n_words}", str(math.ceil(kwargs.get("max_time", CLOSING_TIME) / WORDRATIO["time"]))) - response = self.speak(prompt, **kwargs) + response = self.speak(prompt, **kwargs) return response def post_process(self, statement, max_time=-1, time_control=False, **kwargs): @@ -204,7 +219,7 @@ def post_process(self, statement, max_time=-1, time_control=False, **kwargs): self._add_message("assistant", f"") logger.warning("[Response] Statement is None. Return empty string.") return "" - + # TODO: 这里没有设置固定去提取LLM回答的函数嘛? start_idx = statement.find("```") end_idx = statement.find("```", start_idx + 1) @@ -215,7 +230,7 @@ def post_process(self, statement, max_time=-1, time_control=False, **kwargs): length = len(format) else: length = 3 - statement = statement[start_idx+length:end_idx].strip() + statement = statement[start_idx + length : end_idx].strip() else: format = re.search(r"```(.*)\n", statement) if format is not None: @@ -224,16 +239,15 @@ def post_process(self, statement, max_time=-1, time_control=False, **kwargs): if len(trunc_statement) > 0: statement = trunc_statement - if max_time <= 0 or not time_control: self._add_message("assistant", f"{statement}") - logger.info("[Response] " + statement.strip().replace('\n',' ||| ')) + logger.info("[Response] " + statement.strip().replace("\n", " ||| ")) return statement - #NOTE the below part is time-consuming, can comment them and add "new_statement = statement" when developing + # NOTE the below part is time-consuming, can comment them and add "new_statement = statement" when developing prefix = log_file_path.replace(".log", "") audio_file = f"{prefix}_{self.config.type}_{self.status}_{self.side}.mp3" - logger.debug("[Time-Control] Statement: " + statement.replace('\n', ' ||| ')) + logger.debug("[Time-Control] Statement: " + statement.replace("\n", " ||| ")) content, reference, duration = convert_text_to_speech(statement, audio_file) logger.debug(f"[Time-Control] Save Audio: {audio_file}") logger.debug(f"[Time-Control] Original Time: {duration:0.2f}") @@ -248,23 +262,23 @@ def post_process(self, statement, max_time=-1, time_control=False, **kwargs): # TODO: 这里可以优化? duration, new_sentences = trim_audio_by_sentences(audio_file, save_file, max_duration=max_time * 1000) last_sentence = new_sentences[-1] - idx = content.lower().find(last_sentence[:-1].lower()) # remove the punctuation + idx = content.lower().find(last_sentence[:-1].lower()) # remove the punctuation if idx == -1: print(f"Last sentence not found in content") new_content = " ".join(new_sentences) else: - new_content = content[:idx+len(last_sentence)] - + new_content = content[: idx + len(last_sentence)] + logger.debug(f"[Time-Control] Final Time: {duration:0.2f}") - + if new_content is None or len(new_content) == 0: logger.warning(f"[Time-Control] Trimmed Content is None. Use the original content as the transcript.") new_content = content - - new_statement = new_content + "\n\n**Reference**\n"+ reference + + new_statement = new_content + "\n\n**Reference**\n" + reference # new_statement = statement self._add_message("assistant", f"{new_statement}") - logger.info("[Response] " + new_statement.strip().replace('\n',' ||| ')) + logger.info("[Response] " + new_statement.strip().replace("\n", " ||| ")) return new_statement @@ -288,26 +302,29 @@ def next_oppo_status(self): else: return "finished" + class HumanDebater(Debater): def __init__(self, config, motion) -> None: super().__init__(config, motion) def get_multiline_input(self, instruction): - print('\n[User Input]' + instruction) + print("\n[User Input]" + instruction) print("Please input your response in the command. End with 'END'.") lines = [] while True: line = input() - if line == 'END': + if line == "END": break lines.append(line) - return '\n'.join(lines) + return "\n".join(lines) def opening_generation(self, **kwargs): self.status = "opening" max_time = kwargs.get("max_time", OPENING_TIME) max_words = math.ceil(max_time / WORDRATIO["time"]) - response = self.get_multiline_input(f"Please give an opening statement using three claims with {max_words} words, do not output other things. Please input the response in the command.") + response = self.get_multiline_input( + f"Please give an opening statement using three claims with {max_words} words, do not output other things. Please input the response in the command." + ) response = self.post_process(response, **kwargs) return response @@ -315,18 +332,23 @@ def rebuttal_generation(self, history, **kwargs): self.status = "rebuttal" max_time = kwargs.get("max_time", REBUTTAL_TIME) max_words = math.ceil(max_time / WORDRATIO["time"]) - response = self.get_multiline_input(f"Now it comes the rebuttal phase, where you respond to your opponent. You should stand firm on your position and attack the opponent's weak points. Give your response within {max_words} words and do not output other things than our response. Please input the response in the command.") + response = self.get_multiline_input( + f"Now it comes the rebuttal phase, where you respond to your opponent. You should stand firm on your position and attack the opponent's weak points. Give your response within {max_words} words and do not output other things than our response. Please input the response in the command." + ) response = self.post_process(response, **kwargs) return response - + def closing_generation(self, history, **kwargs): self.status = "closing" max_time = kwargs.get("max_time", CLOSING_TIME) max_words = math.ceil(max_time / WORDRATIO["time"]) - response = self.get_multiline_input(f"Now it comes the closing statement, where you summarize your key points and reaffirm your position. Give your response within {max_words} words and do not output other things than our response. Please input the response in the command.") + response = self.get_multiline_input( + f"Now it comes the closing statement, where you summarize your key points and reaffirm your position. Give your response within {max_words} words and do not output other things than our response. Please input the response in the command." + ) response = self.post_process(response, **kwargs) return response + class BaselineDebater(Debater): def __init__(self, config, motion, port=8081) -> None: super().__init__(config, motion) @@ -363,14 +385,14 @@ def opening_generation(self, history, **kwargs): if len(history) > 0: self.input["PositiveArgument"] = history[0]["content"] assert len(history) == 1 and self.oppo_side == "for" - + opening_response = self._make_request(self.BASE_URL + "v1/argument", self.input) opening = opening_response["Result"] self.input["Reference"] = opening_response["Reference"] - logger.debug("[Baseline-opening-input] " + str(self.input).replace('\n',' ||| ')) - logger.debug("[Baseline-opening-before] " + opening.strip().replace('\n',' ||| ')) + logger.debug("[Baseline-opening-input] " + str(self.input).replace("\n", " ||| ")) + logger.debug("[Baseline-opening-before] " + opening.strip().replace("\n", " ||| ")) opening = self.post_process(opening, **kwargs) - logger.debug("[Baseline-opening-after] " + opening.strip().replace('\n',' ||| ')) + logger.debug("[Baseline-opening-after] " + opening.strip().replace("\n", " ||| ")) return opening def rebuttal_generation(self, history, **kwargs): @@ -387,12 +409,12 @@ def rebuttal_generation(self, history, **kwargs): rebuttal_response = self._make_request(self.BASE_URL + "v1/rebuttal", self.input) rebuttal = rebuttal_response["Result"] self.input["Reference"] = rebuttal_response["Reference"] - logger.debug("[Baseline-rebuttal-input] " + str(self.input).replace('\n',' ||| ')) - logger.debug("[Baseline-rebuttal-before] " + rebuttal.strip().replace('\n',' ||| ')) + logger.debug("[Baseline-rebuttal-input] " + str(self.input).replace("\n", " ||| ")) + logger.debug("[Baseline-rebuttal-before] " + rebuttal.strip().replace("\n", " ||| ")) rebuttal = self.post_process(rebuttal, **kwargs) - logger.debug("[Baseline-rebuttal-after] " + rebuttal.strip().replace('\n',' ||| ')) + logger.debug("[Baseline-rebuttal-after] " + rebuttal.strip().replace("\n", " ||| ")) return rebuttal - + def closing_generation(self, history, **kwargs): self.status = "closing" self.input.update( @@ -408,10 +430,10 @@ def closing_generation(self, history, **kwargs): summary_response = self._make_request(self.BASE_URL + "v1/summary", self.input) summary = summary_response["Result"] self.input["Reference"] = summary_response["Reference"] - logger.debug("[Baseline-summary-input] " + str(self.input).replace('\n',' ||| ')) - logger.debug("[Baseline-summary-before] " + summary.strip().replace('\n',' ||| ')) + logger.debug("[Baseline-summary-input] " + str(self.input).replace("\n", " ||| ")) + logger.debug("[Baseline-summary-before] " + summary.strip().replace("\n", " ||| ")) summary = self.post_process(summary, **kwargs) - logger.debug("[Baseline-summary-after] " + summary.strip().replace('\n',' ||| ')) + logger.debug("[Baseline-summary-after] " + summary.strip().replace("\n", " ||| ")) return summary def reset_stage(self, stage, side, new_content): @@ -428,32 +450,40 @@ def reset_stage(self, stage, side, new_content): self.input["NegativeRebuttal"] = new_content elif stage == "closing": return - + ##################### Judge ##################### + class Judge(Agent): def __init__(self, config) -> None: super().__init__(config) - self.helper_client = partial(HelperClient, model=self.config.model, temperature=0, max_tokens=config.max_tokens, n=1) + self.helper_client = partial( + HelperClient, model=self.config.model, temperature=0, max_tokens=config.max_tokens, n=1 + ) def eval(self, motion, debate_process, **kwargs): - prompt = f"The debate topic is {motion}. The for side is to support this motion while the against side is to oppose it. The debate process is as follows: \n" + json.dumps(debate_process, indent=2) - prompt += "By adhering to these principles and criteria, you will provide an impartial and comprehensive evaluation of each side's performance, ensuring a fair and constructive outcome for the debate. Do determine the winner even if you find the two sides perform similarly. Please output your final judgment in the format: \"The winning side is [For/Against] due to [reasons].\"" + prompt = ( + f"The debate topic is {motion}. The for side is to support this motion while the against side is to oppose it. The debate process is as follows: \n" + + json.dumps(debate_process, indent=2) + ) + prompt += 'By adhering to these principles and criteria, you will provide an impartial and comprehensive evaluation of each side\'s performance, ensuring a fair and constructive outcome for the debate. Do determine the winner even if you find the two sides perform similarly. Please output your final judgment in the format: "The winning side is [For/Against] due to [reasons]."' response = self.speak(prompt, **kwargs) winner = self.extract_winner(response) return winner, response - + def comparison(self, motion, context, side, a, b, **kwargs): self.reset() - prompt = (f"The debate topic is {motion}. The for side is to support this motion while the against side is to oppose it. " - f"The debate process is as follows: \n{json.dumps(context, indent=2)}\n\n" - f"Here are the two versions of the {side} side's response based on the debate process: \n\n" - f"=========Version A Start======== \n{a}\n=========Version A End========\n\n" - f"=========Version B Start======== \n{b}\n=========Version B End========\n\n" - f"By adhering to these principles and criteria, you will provide an impartial and comprehensive evaluation of each version's performance, ensuring a fair and constructive outcome for the debate. Do determine the version even if you find the two sides perform similarly. " - "Please output your final judgment in the format: \"The better version is Version [A/B] due to [reasons].\"") + prompt = ( + f"The debate topic is {motion}. The for side is to support this motion while the against side is to oppose it. " + f"The debate process is as follows: \n{json.dumps(context, indent=2)}\n\n" + f"Here are the two versions of the {side} side's response based on the debate process: \n\n" + f"=========Version A Start======== \n{a}\n=========Version A End========\n\n" + f"=========Version B Start======== \n{b}\n=========Version B End========\n\n" + f"By adhering to these principles and criteria, you will provide an impartial and comprehensive evaluation of each version's performance, ensuring a fair and constructive outcome for the debate. Do determine the version even if you find the two sides perform similarly. " + 'Please output your final judgment in the format: "The better version is Version [A/B] due to [reasons]."' + ) response = self.speak(prompt, **kwargs) winner = self.extract_version(response) self.reset() @@ -461,29 +491,31 @@ def comparison(self, motion, context, side, a, b, **kwargs): def extract_winner(self, comments): pos = comments.find("The winning side is") - if "For" in comments[pos+20:pos+33]: + if "For" in comments[pos + 20 : pos + 33]: return "For wins" - elif "Against" in comments[pos+20:pos+33]: + elif "Against" in comments[pos + 20 : pos + 33]: return "Against wins" else: return "[GGG judge not detected]" - + def extract_version(self, comments): pos = comments.find("better version is") - if "A" in comments[pos+18:pos+36]: + if "A" in comments[pos + 18 : pos + 36]: return "A" - elif "B" in comments[pos+18:pos+36]: + elif "B" in comments[pos + 18 : pos + 36]: return "B" else: return "[GGG judge not detected]" - + def finegrained_check(self, motion, side_info, side): oppo = "against" if side == "for" else "for" claims = extract_claims(self.helper_client, motion, side, side_info[side]["content"]) side_info[side]["claims"] = claims try: - obj_scores, obj_scores_explanation = extract_obj_aspect(self.helper_client, motion, side, side_info[side]["content"], side_info[oppo]["claims"]) + obj_scores, obj_scores_explanation = extract_obj_aspect( + self.helper_client, motion, side, side_info[side]["content"], side_info[oppo]["claims"] + ) except: traceback.print_exc() exit(0) @@ -495,7 +527,9 @@ def __init__(self, config) -> None: super().__init__(config) self.n = config.n - self.helper_client = partial(HelperClient, model=self.config.model, temperature=0, max_tokens=config.max_tokens, n=1) + self.helper_client = partial( + HelperClient, model=self.config.model, temperature=0, max_tokens=config.max_tokens, n=1 + ) def vote(self, process, motion): prompt = f"{self.config.pre_prompt}\n\n The for side is to support the motion of {motion}. The against side is to oppose the motion. " @@ -504,20 +538,22 @@ def vote(self, process, motion): post_vote = self.speak(prompt) return self.extract_winner(pre_vote) + " -> " + self.extract_winner(post_vote) - + def comparison(self, motion, context, side, a, b, **kwargs): self.reset() - prompt = (f"The debate topic is {motion}. The for side is to support this motion while the against side is to oppose it. " - f"The debate process is as follows: \n{json.dumps(context, indent=2)}\n\n" - f"Here are the two versions of the {side} side's response based on the debate process: \n\n" - f"=========Version A Start======== \n{a}\n=========Version A End========\n\n" - f"=========Version B Start======== \n{b}\n=========Version B End========\n\n" - "Now that you've heard arguments from both sides, it's time to cast your final vote. Consider the following factors: \n" - "Strength and clarity of each team's arguments \n" - "Evidence and reasoning used to support their claims \n" - "Effectiveness in addressing and countering the opposing team's points \n" - "Overall persuasiveness and impact of each team's case \n" - "Please output your final judgment in the format: \"The better version is Version [A/B] due to [reasons].\"") + prompt = ( + f"The debate topic is {motion}. The for side is to support this motion while the against side is to oppose it. " + f"The debate process is as follows: \n{json.dumps(context, indent=2)}\n\n" + f"Here are the two versions of the {side} side's response based on the debate process: \n\n" + f"=========Version A Start======== \n{a}\n=========Version A End========\n\n" + f"=========Version B Start======== \n{b}\n=========Version B End========\n\n" + "Now that you've heard arguments from both sides, it's time to cast your final vote. Consider the following factors: \n" + "Strength and clarity of each team's arguments \n" + "Evidence and reasoning used to support their claims \n" + "Effectiveness in addressing and countering the opposing team's points \n" + "Overall persuasiveness and impact of each team's case \n" + 'Please output your final judgment in the format: "The better version is Version [A/B] due to [reasons]."' + ) response = self.speak(prompt, n=self.n, **kwargs) if self.n > 1: winner = [self.extract_version(r) for r in response] @@ -528,27 +564,26 @@ def comparison(self, motion, context, side, a, b, **kwargs): def extract_winner(self, response): pos = response.find("vote is") - if "For" in response[pos+8:pos+21]: + if "For" in response[pos + 8 : pos + 21]: return "For" - elif "Against" in response[pos+8:pos+21]: + elif "Against" in response[pos + 8 : pos + 21]: return "Against" else: return "[GGG]" - + def extract_version(self, response): pos = response.find("better version is") - if "A" in response[pos+18:pos+36]: + if "A" in response[pos + 18 : pos + 36]: return "A" - elif "B" in response[pos+18:pos+36]: + elif "B" in response[pos + 18 : pos + 36]: return "B" else: return "[GGG]" - + def surprise(self, motion, side, claims): scores, explanations = eval_surprise(self.helper_client, motion, side, claims, n=1) return scores, explanations - + def feedback(self, prompt, **kwargs): response = self.speak(prompt, **kwargs) return response - diff --git a/src/compare_env.py b/src/compare_env.py index 2c9aad1..ed482b0 100644 --- a/src/compare_env.py +++ b/src/compare_env.py @@ -1,13 +1,15 @@ -import os, json -import yaml import argparse import copy -from env import Env, EnvConfig, extract_overall_score -from utils.tool import logger -from utils.constants import OPENING_TIME, REBUTTAL_TIME, CLOSING_TIME +import json +import os -from agents import BaselineDebater, DebaterConfig, JudgeConfig, AudienceConfig +import yaml + +from agents import AudienceConfig, BaselineDebater, DebaterConfig, JudgeConfig +from env import Env, EnvConfig, extract_overall_score from ouragents import TreeDebater +from utils.constants import CLOSING_TIME, OPENING_TIME, REBUTTAL_TIME +from utils.tool import logger def get_debater_class(type): @@ -19,7 +21,7 @@ def get_debater_class(type): raise ValueError(f"Type {type} is not supported.") -class CompareEnv(): +class CompareEnv: def __init__(self, config, debug, baseline_type="baseline", test_type="treedebater", port=8081) -> None: self.config = config self.motion = config.motion @@ -32,18 +34,21 @@ def __init__(self, config, debug, baseline_type="baseline", test_type="treedebat self.stance_dict = {"for": None, "against": None} assert self.baseline_type == "baseline", "Baseline type must be baseline" - - + # init players self.test_debaters = {} self.baseline_debaters = {} config_for_type = {conf.type: conf for conf in config.debater_config} - self.debater_type_for_side = {conf.side: "baseline" if conf.type == self.baseline_type else "test" for conf in config.debater_config} + self.debater_type_for_side = { + conf.side: "baseline" if conf.type == self.baseline_type else "test" for conf in config.debater_config + } if self.baseline_type not in config_for_type: - logger.warning(f"Baseline type {self.baseline_type} not found in config_for_type, using {self.test_type} config") - baseline_conf = copy.deepcopy(config_for_type[self.test_type]) + logger.warning( + f"Baseline type {self.baseline_type} not found in config_for_type, using {self.test_type} config" + ) + baseline_conf = copy.deepcopy(config_for_type[self.test_type]) baseline_conf.type = self.baseline_type config_for_type[self.baseline_type] = baseline_conf @@ -56,8 +61,9 @@ def __init__(self, config, debug, baseline_type="baseline", test_type="treedebat self.test_debaters[side] = get_debater_class(self.test_type)(cur_config, motion=self.motion) cur_config = config_for_type[self.baseline_type] cur_config.side = side - self.baseline_debaters[side] = get_debater_class(self.baseline_type)(cur_config, motion=self.motion, port=port) - + self.baseline_debaters[side] = get_debater_class(self.baseline_type)( + cur_config, motion=self.motion, port=port + ) def step_play(self, side, stage, history, max_time): @@ -73,18 +79,10 @@ def step_play(self, side, stage, history, max_time): "closing": self.test_debaters[side].closing_generation, } - base_response = baseline_call[stage]( - history=history, - max_time=max_time, - time_control=self.time_control - ) + base_response = baseline_call[stage](history=history, max_time=max_time, time_control=self.time_control) # Generate test response using reference history - test_response = test_call[stage]( - history=history, - max_time=max_time, - time_control=self.time_control - ) + test_response = test_call[stage](history=history, max_time=max_time, time_control=self.time_control) return base_response, test_response def compare_play(self): @@ -93,17 +91,16 @@ def compare_play(self): debate_settings = { "stage": "settings", "motion": self.motion, - "debaters": {x.side: x for x in self.config.debater_config} + "debaters": {x.side: x for x in self.config.debater_config}, } debate_process.append(debate_settings) - comparison_results = {} - + # Run through each stage for stage in ["preparation", "opening", "rebuttal", "closing"]: logger.info(f"[{stage}] Start Comparison") - + if stage == "preparation": # Generate claims for both reference and test debaters # let the TreeDebater-type debaters generate a set of candidate claims as the knowledge or material for the subsequent发言 @@ -117,11 +114,17 @@ def compare_play(self): # Generate reference response if stage == "opening": - base_response, test_response = self.step_play(side, stage, history=debate_process[1:], max_time=OPENING_TIME) + base_response, test_response = self.step_play( + side, stage, history=debate_process[1:], max_time=OPENING_TIME + ) elif stage == "rebuttal": - base_response, test_response = self.step_play(side, stage, history=debate_process[1:], max_time=REBUTTAL_TIME) + base_response, test_response = self.step_play( + side, stage, history=debate_process[1:], max_time=REBUTTAL_TIME + ) elif stage == "closing": - base_response, test_response = self.step_play(side, stage, history=debate_process[1:], max_time=CLOSING_TIME) + base_response, test_response = self.step_play( + side, stage, history=debate_process[1:], max_time=CLOSING_TIME + ) # according to the type of the debater, decide which response to use as the material for the subsequent发言 # if the debater type is baseline, use base_response as the material for the subsequent发言 @@ -129,32 +132,29 @@ def compare_play(self): if keep_response_for_side == "baseline": # when test is TreeDebater self.test_debaters[side].reset_stage(stage, side, base_response, history=debate_process[1:]) keep_response = base_response - else: # keep_response_for_side == "test" + else: # keep_response_for_side == "test" self.test_debaters[side].reset_stage(stage, side, test_response, history=debate_process[1:]) self.baseline_debaters[side].reset_stage(stage, side, test_response) keep_response = test_response - debate_process.append({ - "stage": stage, - "side": side, - "content": keep_response - }) - + debate_process.append({"stage": stage, "side": side, "content": keep_response}) + comparison_results[f"{stage}_{side}"] = { "baseline_response": base_response, "test_response": test_response, - "keep_response": keep_response + "keep_response": keep_response, } - + logger.info(f"[{stage}] Comparison Complete") if self.debug: response = input("Press N to stop: ") if response.lower() == "n": break - + return comparison_results, debate_process + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, default="compare.yml") @@ -167,44 +167,60 @@ def compare_play(self): log_file = logger.handlers[0].baseFilename save_file = log_file.replace(".log", "_test_comparison.json") logger.info(f"Saving to {save_file}") - + with open(f"configs/{args.config}", "r") as f: config = yaml.load(f, Loader=yaml.FullLoader) logger.info(f"Config: {config}") env_config = EnvConfig( - debater_config=[DebaterConfig(**config) for config in config["debater"]], - judge_config=JudgeConfig(**config["judge"]), - audience_config=AudienceConfig(**config["audience"]), - **config["env"] + debater_config=[DebaterConfig(**config) for config in config["debater"]], + judge_config=JudgeConfig(**config["judge"]), + audience_config=AudienceConfig(**config["audience"]), + **config["env"], ) - + compare_env = CompareEnv(env_config, args.debug, args.baseline_type, args.test_type, port=args.port) comparison_results, debate_process = compare_env.compare_play() test_record = { - "motion": compare_env.motion, - "config": config, - "debate_thoughts": { - "for": compare_env.test_debaters["for"].debate_thoughts, - "against": compare_env.test_debaters["against"].debate_thoughts - }, - "debate_tree": { - "for": [ - compare_env.test_debaters["for"].debate_tree.get_tree_info() if compare_env.test_debaters["for"].type in ["treedebater"] else {}, - compare_env.test_debaters["for"].oppo_debate_tree.get_tree_info() if compare_env.test_debaters["for"].type in ["treedebater"] else {}, - ], - "against": [ - compare_env.test_debaters["against"].debate_tree.get_tree_info() if compare_env.test_debaters["against"].type in ["treedebater"] else {}, - compare_env.test_debaters["against"].oppo_debate_tree.get_tree_info() if compare_env.test_debaters["against"].type in ["treedebater"] else {}, - ] - }, - "conversation": { - "for": compare_env.test_debaters["for"].conversation, - "against": compare_env.test_debaters["against"].conversation - } - } - + "motion": compare_env.motion, + "config": config, + "debate_thoughts": { + "for": compare_env.test_debaters["for"].debate_thoughts, + "against": compare_env.test_debaters["against"].debate_thoughts, + }, + "debate_tree": { + "for": [ + ( + compare_env.test_debaters["for"].debate_tree.get_tree_info() + if compare_env.test_debaters["for"].type in ["treedebater"] + else {} + ), + ( + compare_env.test_debaters["for"].oppo_debate_tree.get_tree_info() + if compare_env.test_debaters["for"].type in ["treedebater"] + else {} + ), + ], + "against": [ + ( + compare_env.test_debaters["against"].debate_tree.get_tree_info() + if compare_env.test_debaters["against"].type in ["treedebater"] + else {} + ), + ( + compare_env.test_debaters["against"].oppo_debate_tree.get_tree_info() + if compare_env.test_debaters["against"].type in ["treedebater"] + else {} + ), + ], + }, + "conversation": { + "for": compare_env.test_debaters["for"].conversation, + "against": compare_env.test_debaters["against"].conversation, + }, + } + # Save results with open(save_file, "w") as f: full_results = { @@ -213,13 +229,13 @@ def compare_play(self): "test_type": args.test_type, "comparison": comparison_results, "debate_process": debate_process[1:], - "test_record": test_record + "test_record": test_record, } json.dump(full_results, f, indent=2) - - logger.info(f"Comparison results saved to {save_file}") + + logger.info(f"Comparison results saved to {save_file}") total_usage = 0 for side in ["for", "against"]: logger.info(f"Usage for Agent {side} " + str(compare_env.test_debaters[side].client_cost)) - total_usage += compare_env.test_debaters[side].client_cost \ No newline at end of file + total_usage += compare_env.test_debaters[side].client_cost diff --git a/src/configs/create_config.py b/src/configs/create_config.py index 849b1a3..5d1676a 100644 --- a/src/configs/create_config.py +++ b/src/configs/create_config.py @@ -1,9 +1,9 @@ -import os -import yaml import argparse +import os +from collections import OrderedDict import yaml -from collections import OrderedDict + def dict_order_preserving_yaml_dump(data, file_path=None, **kwargs): class OrderPreservingDumper(yaml.SafeDumper): @@ -16,8 +16,8 @@ def dict_representer(dumper, data): OrderPreservingDumper.add_representer(dict, dict_representer) if file_path: - with open(file_path, 'w', encoding='utf-8') as file: - yaml.dump(data, file, Dumper=OrderPreservingDumper, width=float("inf"),**kwargs) + with open(file_path, "w", encoding="utf-8") as file: + yaml.dump(data, file, Dumper=OrderPreservingDumper, width=float("inf"), **kwargs) print(f"YAML content has been written to {file_path}") else: return yaml.dump(data, Dumper=OrderPreservingDumper, width=float("inf"), **kwargs) @@ -26,9 +26,9 @@ def dict_representer(dumper, data): def get_options(): parser = argparse.ArgumentParser() parser.add_argument("--template", type=str, default="compare.yml") - parser.add_argument("--motion", type=str, nargs='+', default=["AI will lead to the decline of human creative arts"]) + parser.add_argument("--motion", type=str, nargs="+", default=["AI will lead to the decline of human creative arts"]) parser.add_argument("--motion_file", type=str, default=None) - parser.add_argument("--model", type=str, nargs='+', default=["gemini-2.0-flash"]) + parser.add_argument("--model", type=str, nargs="+", default=["gemini-2.0-flash"]) parser.add_argument("--baseline", type=str, default="baseline", choices=["treedebater", "baseline"]) parser.add_argument("--pool_version", type=str, default="1228") parser.add_argument("--pool_size", type=int, default=50) @@ -36,8 +36,9 @@ def get_options(): args = parser.parse_args() return args + # python create_config.py --model deepseek/deepseek-chat -# python create_config.py --model meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo gemini-1.5-pro +# python create_config.py --model meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo gemini-1.5-pro # python create_config.py --model gemini-2.0-flash --motion "We should abolish the debt ceiling" --save_dir 0410 --pool_version 0410 # python create_config.py --model deepseek-chat --motion "AI will lead to the decline of human creative arts" --save_dir testw --pool_version 10 --template compare.yml # python create_config.py --model deepseek-chat --motion "AI will lead to the decline of human creative arts" --save_dir test3 --pool_version 10 --template compare.yml @@ -50,7 +51,7 @@ def get_options(): args = get_options() with open(args.template, "r") as f: configs = yaml.load(f, Loader=yaml.FullLoader) - + if args.motion_file: with open(args.motion_file, "r") as f: motions = [x.strip() for x in f.readlines()] @@ -59,34 +60,34 @@ def get_options(): print(motions) configs["env"]["claim_pool_size"] = args.pool_size - + assert len(args.model) <= 2 model1, model2 = args.model[0], args.model[-1] - template_name = args.template.split('.yml')[0] - + template_name = args.template.split(".yml")[0] + for i, motion in enumerate(motions): save_path = f"{args.save_dir}/case{i+1}" if not os.path.exists(save_path): os.makedirs(save_path) - model_name1 = model1.split('/')[-1] - model_name2 = model2.split('/')[-1] - motion_name = motion.replace(' ', '_').lower() + model_name1 = model1.split("/")[-1] + model_name2 = model2.split("/")[-1] + motion_name = motion.replace(" ", "_").lower() configs["env"]["motion"] = motion configs["debater"][0] = { - "side": "for", - "model": model1, - "type": "treedebater", - "temperature": 1.0, - "use_retrieval": True, - "pool_file": f"../results{args.pool_version}/{model_name1}/{motion_name}_pool_for.json" - } + "side": "for", + "model": model1, + "type": "treedebater", + "temperature": 1.0, + "use_retrieval": True, + "pool_file": f"../results{args.pool_version}/{model_name1}/{motion_name}_pool_for.json", + } configs["debater"][1] = { - "side": "against", - "model": model2, - "type": "baseline", - "temperature": 1.0, - } + "side": "against", + "model": model2, + "type": "baseline", + "temperature": 1.0, + } if model_name1 == model_name2: save_file = f"{save_path}/{template_name}_{model_name1}.yml" else: @@ -94,25 +95,22 @@ def get_options(): dict_order_preserving_yaml_dump(configs, save_file) configs["debater"][0] = { - "side": "for", - "model": model2, - "type": "baseline", - } + "side": "for", + "model": model2, + "type": "baseline", + } configs["debater"][1] = { - "side": "against", - "model": model1, - "type": "treedebater", - "temperature": 1.0, - "use_retrieval": True, - "add_retrieval_feedback": True, - "pool_file": f"../results{args.pool_version}/{model_name2}/{motion_name}_pool_against.json" - } - + "side": "against", + "model": model1, + "type": "treedebater", + "temperature": 1.0, + "use_retrieval": True, + "add_retrieval_feedback": True, + "pool_file": f"../results{args.pool_version}/{model_name2}/{motion_name}_pool_against.json", + } if model_name1 == model_name2: save_file = f"{save_path}/{template_name}_{model_name1}_re.yml" else: save_file = f"{save_path}/{template_name}_{model_name1}_{model_name2}_re.yml" dict_order_preserving_yaml_dump(configs, save_file) - - diff --git a/src/debate_tree.py b/src/debate_tree.py index 6a53030..e79b702 100644 --- a/src/debate_tree.py +++ b/src/debate_tree.py @@ -1,25 +1,23 @@ +import argparse +import json import os import sys -import json -import argparse -import nltk -import torch import time -import numpy as np -import pandas as pd +from functools import partial from typing import List + import google.generativeai as genai -from functools import partial -from utils.tool import logger +import nltk +import numpy as np +import pandas as pd +import torch +from sentence_transformers.util import dot_score, semantic_search -from utils.model import HelperClient, reward_model -from utils.tool import logger, get_response_with_retry +from evaluator import evaluate_defense_strength, evaluate_support_strength from utils.constants import EMBEDDING_MODEL +from utils.model import HelperClient, reward_model +from utils.tool import get_response_with_retry, logger -from evaluator import evaluate_support_strength, evaluate_defense_strength - -from sentence_transformers.util import (semantic_search, - dot_score) def propose_new_claims(proposer, motion, side, history, n): # generate the next layer of rebuttal claims @@ -37,7 +35,6 @@ def propose_new_claims(proposer, motion, side, history, n): "## Task: Generate Strategic Counter-Arguments\n" f"You are participating in a formal debate on the motion: {motion}\n" f"Your position: {act} the motion\n\n" - "## Your Objective\n" f"Generate {num} persuasive counter-arguments that:\n" "1. Focus on countering the opponent's last argument\n" @@ -45,25 +42,21 @@ def propose_new_claims(proposer, motion, side, history, n): "3. Strengthen your stance on the motion\n" "4. Cover distinct aspects without overlap\n" "5. Maintain consistency with your previous statements\n\n" - "### Counter-Argument Techniques\n" "- **Logical fallacies:** Identify errors in reasoning (cause-effect reversal, equivocation, straw man arguments, circular reasoning, tautology)\n" - " - **Example:** \"You've committed a false dilemma fallacy by suggesting only two possible outcomes when multiple alternatives exist.\"\n" + ' - **Example:** "You\'ve committed a false dilemma fallacy by suggesting only two possible outcomes when multiple alternatives exist."\n' "- **Factual errors:** Highlight inaccuracies in the opponent's factual statements\n" - " - **Example:** \"Your argument relies on statistics from 2010, but more recent data from 2023 shows the opposite trend.\"\n" + ' - **Example:** "Your argument relies on statistics from 2010, but more recent data from 2023 shows the opposite trend."\n' "- **Logic errors:** Identify flawed underlying logic\n" - " - **Example 1:** \"Your conclusion is based on premise A, but A is not always true. For example, ... Therefore, your conclusion is flawed.\"\n" - " - **Example 2:** \"Your conclusion relies on premises A and B, but B is not always true. For example, ... Therefore, your conclusion is not always correct.\"\n" - " - **Example 3:** \"You claim A and B lead to C, but that is not always the case. For example, ... Therefore, your conclusion is questionable.\"\n" + ' - **Example 1:** "Your conclusion is based on premise A, but A is not always true. For example, ... Therefore, your conclusion is flawed."\n' + ' - **Example 2:** "Your conclusion relies on premises A and B, but B is not always true. For example, ... Therefore, your conclusion is not always correct."\n' + ' - **Example 3:** "You claim A and B lead to C, but that is not always the case. For example, ... Therefore, your conclusion is questionable."\n' "- **Leveling the playing field:** Neutralize the opponent's advantage by showing both sides share the same issue or benefit\n" - " - **Example 1:** \"You claim A, but B also has this problem. Therefore, both sides are equal in this regard.\"\n" - " - **Example 2:** \"You mention the benefits of A, but B offers the same benefits. So, both sides are equally advantageous.\"\n" - - + ' - **Example 1:** "You claim A, but B also has this problem. Therefore, both sides are equal in this regard."\n' + ' - **Example 2:** "You mention the benefits of A, but B offers the same benefits. So, both sides are equally advantageous."\n' "## Context\n" "Previous debate exchanges:\n" f"{history_str}\n\n" - "## Response Format\n" "Provide your response in JSON format:\n" "{{\n" @@ -76,11 +69,12 @@ def propose_new_claims(proposer, motion, side, history, n): " ]\n" "}}\n" ) - logger.debug("[Proposer-Tree-Helper-Prompt] " + prompt.strip().replace('\n',' ||| ')) + logger.debug("[Proposer-Tree-Helper-Prompt] " + prompt.strip().replace("\n", " ||| ")) content, response = get_response_with_retry(proposer, prompt, "statements", temperature=1) - logger.debug("[Proposer-Tree-Helper-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Proposer-Tree-Helper-Response] " + response.strip().replace("\n", " ||| ")) return content + def update_eval_score(node, scorer): if node.children: for child in node.children: @@ -115,18 +109,24 @@ def eval_score(self, scorer): while cur_node: history.append(cur_node.data) cur_node = cur_node.parent - history = history[::-1] #NOTE: history is from root to this node + history = history[::-1] # NOTE: history is from root to this node defense, support = 0, 0 if self.parent: defense = evaluate_defense_strength(scorer, self.motion, self.data, self.parent.data, history=history) if self.parent.parent: - support = evaluate_support_strength(scorer, self.motion, self.data, self.parent.parent.data, history=history) + support = evaluate_support_strength( + scorer, self.motion, self.data, self.parent.parent.data, history=history + ) else: if self.side == "for": - support = evaluate_support_strength(scorer, self.motion, self.data, self.motion, history=[self.motion, "", self.data]) + support = evaluate_support_strength( + scorer, self.motion, self.data, self.motion, history=[self.motion, "", self.data] + ) else: - support = evaluate_defense_strength(scorer, self.motion, self.data, self.motion, history=[self.motion, self.data]) + support = evaluate_defense_strength( + scorer, self.motion, self.data, self.motion, history=[self.motion, self.data] + ) self.scores = {"defense": defense, "support": support} def get_minimax_score(self, max_depth=2, level_decoy=0.8, support_weight=0.5, root_type="support"): @@ -153,12 +153,15 @@ def get_minimax_score(self, max_depth=2, level_decoy=0.8, support_weight=0.5, ro # self is player 1, child is player 2, child.children is player 1 for child in self.children: # player 1 choose the best rebuttal to player 2, maximize player 1's utility (defense + support) - child_scores = [(1-support_weight) * c.scores["defense"] + support_weight * c.scores["support"] for c in child.children] + child_scores = [ + (1 - support_weight) * c.scores["defense"] + support_weight * c.scores["support"] + for c in child.children + ] best_idx = np.argmax(child_scores).item() best_child = child.children[best_idx] # player 2's utility (towards player 1) - utility = - child.scores["defense"] + level_decoy * child_scores[best_idx] + utility = -child.scores["defense"] + level_decoy * child_scores[best_idx] chosen_child_idx.append(best_idx) chosen_child.append(best_child) child_utility.append(utility) @@ -175,7 +178,9 @@ def get_minimax_score(self, max_depth=2, level_decoy=0.8, support_weight=0.5, ro # self is player 1, child is player 2 for child in self.children: # child_score is the score towards player 2 - child_score_idx, child_path, child_score = child.get_minimax_score(max_depth=max_depth-1, level_decoy=level_decoy, support_weight=support_weight) + child_score_idx, child_path, child_score = child.get_minimax_score( + max_depth=max_depth - 1, level_decoy=level_decoy, support_weight=support_weight + ) child_utility.append(-child_score) chosen_child_idx.append(child_score_idx) chosen_child.append(child_path) @@ -186,11 +191,11 @@ def get_minimax_score(self, max_depth=2, level_decoy=0.8, support_weight=0.5, ro return path_idx, path, root_score + level_decoy * child_utility[idx] else: raise ValueError("Currently only support depth 3") - + def get_strength(self, max_depth=1, level_decoy=0.8, support_weight=0.5): strength = 0 if self.scores["defense"] != 0 and self.scores["support"] != 0: - strength = support_weight * self.scores["defense"] + (1-support_weight) * self.scores["support"] + strength = support_weight * self.scores["defense"] + (1 - support_weight) * self.scores["support"] elif self.scores["defense"] != 0: strength = self.scores["defense"] elif self.scores["support"] != 0: @@ -199,9 +204,12 @@ def get_strength(self, max_depth=1, level_decoy=0.8, support_weight=0.5): if max_depth == 0 or self.is_terminal(): return strength else: - children_strength = [c.get_strength(max_depth=max_depth-1, level_decoy=level_decoy, support_weight=support_weight) for c in self.children] + children_strength = [ + c.get_strength(max_depth=max_depth - 1, level_decoy=level_decoy, support_weight=support_weight) + for c in self.children + ] return strength - level_decoy * max(children_strength) - + def add_node(self, data=None, new_claim=None, new_argument=None, side=None): if side is None: new_side = "against" if self.side == "for" else "for" @@ -231,9 +239,11 @@ def expand(self, proposer, scorer, branch=3): while cur_node: history.append(cur_node.data) cur_node = cur_node.parent - history = history[::-1] #NOTE: history is from root to this node + history = history[::-1] # NOTE: history is from root to this node - new_data = propose_new_claims(proposer, self.motion, "against" if self.side=="for" else "for", history, n=branch) + new_data = propose_new_claims( + proposer, self.motion, "against" if self.side == "for" else "for", history, n=branch + ) for data in new_data: child_node = self.add_node(data) child_node.eval_score(scorer) @@ -248,27 +258,27 @@ def get_node_info(self): "status": self.status, "visit_count": self.visit_count, "scores": self.scores, - "children": [] + "children": [], } for child in self.children: child_info = child.get_node_info() info["children"].append(child_info) return info - + def update_status(self, status, keep_visit=False): self.status = status if status != "waiting" or not keep_visit: self.visit_count += 1 - + def is_terminal(self): return len(self.children) == 0 - + def update_evidence(self, new_evidence): if isinstance(new_evidence, list): self.evidence.extend(new_evidence) else: self.evidence.append(new_evidence) - + @property def data(self): info = { @@ -276,7 +286,7 @@ def data(self): "argument": self.argument, } return json.dumps(info) - + @property def statement(self): info = { @@ -284,7 +294,7 @@ def statement(self): "argument": self.argument, } return json.dumps(info) - + @staticmethod def from_json(motion, side, parent, json_info): node = Node(motion, side, parent) @@ -299,6 +309,7 @@ def from_json(motion, side, parent, json_info): node.children.append(child_node) return node + class Tree: def __init__(self, motion, side): self.motion = motion @@ -308,7 +319,7 @@ def __init__(self, motion, side): def get_all_nodes(self): return self.get_all_nodes_recursive(self.root) - + def get_all_nodes_recursive(self, node): all_nodes = [] if node is None: @@ -324,12 +335,12 @@ def get_nodes_by_level(self, level): if node.level == level: nodes.append(node) return nodes - + def get_node_by_side(self, side): if side is None: side = self.side return self.get_node_by_side_recursive(self.root, side) - + def get_node_by_side_recursive(self, node, side): side_nodes = [] if node is None: @@ -349,7 +360,7 @@ def get_node_by_claim_recursive(self, node, claim, side=None): if node.claim == claim and (side is None or node.side == side): return node for child in node.children: - match = self.get_node_by_claim_recursive(child, claim, side=side) + match = self.get_node_by_claim_recursive(child, claim, side=side) if match: return match return None @@ -363,7 +374,7 @@ def get_node_by_status(self, status, side=None): status_nodes.extend(self.get_node_by_status_recursive(self.root, s, side=side)) return status_nodes return self.get_node_by_status_recursive(self.root, status, side=side) - + def get_node_by_status_recursive(self, node, status, side=None): status_nodes = [] if node is None: @@ -373,7 +384,7 @@ def get_node_by_status_recursive(self, node, status, side=None): for child in node.children: status_nodes.extend(self.search_status(child, status, side=side)) return status_nodes - + def print_tree_recursive(self, node, level=0, prefix="", include_status=False, max_print_level=None): if node is not None: if max_print_level is not None and level > max_print_level: @@ -386,20 +397,27 @@ def print_tree_recursive(self, node, level=0, prefix="", include_status=False, m elif k == "support" and v != 0: score_str += f"Support Score: {v:.1f}, " score_str = score_str.strip(", ") - prefix += ' ' * level * 4 + f"Level-{level} Data (Visit: {node.visit_count}, Status: {node.status}): {node.data}, Scores: {score_str}\n" + prefix += ( + " " * level * 4 + + f"Level-{level} Data (Visit: {node.visit_count}, Status: {node.status}): {node.data}, Scores: {score_str}\n" + ) for child in node.children: - prefix += self.print_tree_recursive(child, level + 1, include_status=False, max_print_level=max_print_level) + prefix += self.print_tree_recursive( + child, level + 1, include_status=False, max_print_level=max_print_level + ) return prefix def print_tree(self, prefix="", include_status=False, max_print_level=None): - return self.print_tree_recursive(self.root, level=0, prefix=prefix, include_status=include_status, max_print_level=max_print_level) + return self.print_tree_recursive( + self.root, level=0, prefix=prefix, include_status=include_status, max_print_level=max_print_level + ) def get_tree_info(self): info = { "motion": self.motion, "root": self.root.data, "side": self.side, - "structure": self.root.get_node_info() + "structure": self.root.get_node_info(), } return info @@ -419,7 +437,7 @@ def max_level(self): if node.level > max_level: max_level = node.level return max_level - + def get_embedding_from_cache(self, contents: List[str]): if isinstance(contents, str): contents = [contents] @@ -431,10 +449,10 @@ def get_embedding_from_cache(self, contents: List[str]): embeddings[idx] = self.embedding_cache[content] else: new_content_idx.append(idx) - + if len(new_content_idx) == 0: return embeddings - + new_contents = [contents[i] for i in new_content_idx] max_retry = 3 retry = 0 @@ -455,41 +473,46 @@ def get_embedding_from_cache(self, contents: List[str]): def get_most_similar_node(self, query, query_embedding=None, side=None, level=None, top_k=1, threshold=0.5): """Returns the most similar node and its similarity score given a query. - + Args: query (str): The query text to compare against side (str, optional): Filter nodes by side ("for" or "against"). If None, search all nodes. - + Returns: tuple: (most_similar_node, similarity_score) """ - + nodes = self.get_node_by_side(side) if side else self.get_all_nodes() if level is not None: nodes = [node for node in nodes if node.level == level] nodes = [node for node in nodes if node.claim != ""] node_embedding = self.get_embedding_from_cache([node.claim for node in nodes]) - + if not nodes: return None, 0.0 - + query_embedding = self.get_embedding_from_cache(query) if query_embedding is None else query_embedding - hits = semantic_search(torch.tensor(query_embedding), torch.tensor(node_embedding), score_function=dot_score, top_k=top_k)[0] + hits = semantic_search( + torch.tensor(query_embedding), torch.tensor(node_embedding), score_function=dot_score, top_k=top_k + )[0] retrieval_idx = [x["corpus_id"] for x in hits if x["score"] >= threshold] retrieval_node = [nodes[idx] for idx in retrieval_idx] retrieval_similarity = [x["score"] for x in hits] - + if len(retrieval_idx) == 0: highest_score = hits[0]["score"] highest_score_idx = hits[0]["corpus_id"] highest_score_claim = nodes[highest_score_idx].claim - logger.warning(f"No retrieval node for query: {query} (threshold: {threshold}). The highest score is {highest_score} for [{highest_score_claim}].") + logger.warning( + f"No retrieval node for query: {query} (threshold: {threshold}). The highest score is {highest_score} for [{highest_score_claim}]." + ) return None, 0.0 elif len(retrieval_idx) == 1: return retrieval_node[0], retrieval_similarity[0] else: return retrieval_node, retrieval_similarity - + + class PrepareTree(Tree): def __init__(self, root_data, motion, side, proposer, scorer, root_argument=None): super().__init__(motion, side) @@ -523,7 +546,7 @@ def print_tree_recursive(self, node, level=0, prefix="", include_status=False, m if node is not None: if max_print_level is not None and level > max_print_level: return "" - + score_str = "" if node.scores is not None: for k, v in node.scores.items(): @@ -534,13 +557,15 @@ def print_tree_recursive(self, node, level=0, prefix="", include_status=False, m elif k == "minimax_strength" and v != 0: score_str += f"Strength: {v:0.1f}, " score_str = score_str.strip(", ") - + if include_status: - prefix += ' ' * level * 4 + f"Level-{level} {position}: {node.data}, Scores: {score_str}\n" + prefix += " " * level * 4 + f"Level-{level} {position}: {node.data}, Scores: {score_str}\n" else: - prefix += ' ' * level * 4 + f"Level-{level} {position}: {node.data}\n" + prefix += " " * level * 4 + f"Level-{level} {position}: {node.data}\n" for child in node.children: - prefix += self.print_tree_recursive(child, level + 1, include_status=include_status, max_print_level=max_print_level) + prefix += self.print_tree_recursive( + child, level + 1, include_status=include_status, max_print_level=max_print_level + ) return prefix def backward(self, level_decoy=0.8, support_weight=0.5): @@ -563,8 +588,10 @@ def backward_recursive(self, node, level_decoy=0.8, support_weight=0.5): self.backward_recursive(child, level_decoy, support_weight) child_minimax_strength = [child.scores["minimax_strength"] for child in node.children] node.scores["minimax_strength"] = strength - level_decoy * max(child_minimax_strength) - logger.debug(f"Child Minimax Strength: {child_minimax_strength}, Max: {max(child_minimax_strength)}, Strength: {strength}, Level Decoy: {level_decoy}, Minimax Strength: {node.scores['minimax_strength']}") - + logger.debug( + f"Child Minimax Strength: {child_minimax_strength}, Max: {max(child_minimax_strength)}, Strength: {strength}, Level Decoy: {level_decoy}, Minimax Strength: {node.scores['minimax_strength']}" + ) + @staticmethod def from_json(json_info): motion = json_info["motion"] @@ -574,7 +601,7 @@ def from_json(json_info): tree = PrepareTree(root_data, motion, side, proposer=None, scorer=None) tree.root = Node.from_json(motion, side, None, root_json_info) return tree - + @property def max_level(self): max_level = 0 @@ -582,7 +609,7 @@ def max_level(self): if node.level > max_level: max_level = node.level return max_level - + class DebateTree(Tree): def __init__(self, motion, side): @@ -606,18 +633,30 @@ def print_tree_recursive(self, node, level=0, prefix="", include_status=False, m if max_print_level is not None and level > max_print_level: return "" if level == 0: - prefix += ' ' * level * 4 + f"Level-{level} Motion: {self.motion}, Side: {self.side}\n" + prefix += " " * level * 4 + f"Level-{level} Motion: {self.motion}, Side: {self.side}\n" else: if include_status: - prefix += ' ' * level * 4 + f"Level-{level} {position} (Visit: {node.visit_count}, Status: {node.status}): {node.data}\n" + prefix += ( + " " * level * 4 + + f"Level-{level} {position} (Visit: {node.visit_count}, Status: {node.status}): {node.data}\n" + ) else: - prefix += ' ' * level * 4 + f"Level-{level} {position}: {node.data}\n" + prefix += " " * level * 4 + f"Level-{level} {position}: {node.data}\n" for child in node.children: - prefix += self.print_tree_recursive(child, level + 1, include_status=include_status, max_print_level=max_print_level, reverse=reverse) + prefix += self.print_tree_recursive( + child, level + 1, include_status=include_status, max_print_level=max_print_level, reverse=reverse + ) return prefix def print_tree(self, prefix="", include_status=False, max_print_level=None, meta_info=True, reverse=False): - info_str = self.print_tree_recursive(self.root, level=0, prefix=prefix, include_status=include_status, max_print_level=max_print_level, reverse=reverse) + info_str = self.print_tree_recursive( + self.root, + level=0, + prefix=prefix, + include_status=include_status, + max_print_level=max_print_level, + reverse=reverse, + ) if meta_info: if len(self.meta_attack_list) > 0: info_str += f"Meta Attack to this debate tree: {self.meta_attack_list}\n" @@ -625,7 +664,6 @@ def print_tree(self, prefix="", include_status=False, max_print_level=None, meta info_str += f"Meta Rebuttal to the attacks on this debate tree: {self.meta_rebuttal_list}" return info_str - def update_node(self, action, new_claim=None, new_argument=None, target=None): if len(new_claim) == 0: logger.warning(f"Empty claim: [{new_claim}]") @@ -638,26 +676,28 @@ def update_node(self, action, new_claim=None, new_argument=None, target=None): self.meta_attack_list.append(new_claim) elif action == "rebut": self.meta_rebuttal_list.append(new_claim) - + return - + if action == "propose": new_node = self.root.add_node(new_claim=new_claim, new_argument=new_argument, side=self.root.side) new_node.update_status("proposed") return - + if action == "rebut": target_node_side = "against" if self.root.side == "for" else "for" - else: # propose or reinforce or attack, the target is the same side + else: # propose or reinforce or attack, the target is the same side target_node_side = self.root.side - + match_node = self.get_node_by_claim(target, side=target_node_side) if match_node is None: match_node, similarity = self.get_most_similar_node(target, side=target_node_side, top_k=1, threshold=0.8) - + # try to find the rebut node in the same side if match_node is None and action == "rebut": - logger.info(f"Cannot find the matched node for: action: {action}, target: {target}, try to find the reinforce node in the same side") + logger.info( + f"Cannot find the matched node for: action: {action}, target: {target}, try to find the reinforce node in the same side" + ) action = "reinforce" match_node = self.get_node_by_claim(target, side=self.root.side) if match_node is None: @@ -666,7 +706,7 @@ def update_node(self, action, new_claim=None, new_argument=None, target=None): if match_node is None: logger.warning(f"Cannot find the matched node for action: {action}, target: {target}") return - + if action == "reinforce": match_node.argument.extend(new_argument) match_node.update_status(match_node.status) @@ -685,7 +725,7 @@ def get_tree_info(self): "side": self.side, "structure": self.root.get_node_info(), "meta_attack_list": self.meta_attack_list, - "meta_rebuttal_list": self.meta_rebuttal_list + "meta_rebuttal_list": self.meta_rebuttal_list, } return info @@ -701,16 +741,21 @@ def from_json(json_info): tree.meta_attack_list = meta_attack_list tree.meta_rebuttal_list = meta_rebuttal_list return tree - - - + + # python debate_tree.py --mode update --save_suffix RMH --load_from ../results1217_3/gemini-1.5-pro/if_health_care_is_a_scarce_resource,_government_should_step_in_to_ration_care,_deciding_whose_life_is_worth_saving_pool_against.json --use_reward_model --soft_logits if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--mode", type=str, default="prepare", choices=["prepare", "update", "backward"]) - parser.add_argument("--motion", type=str, default="Artists should be free to borrow from cultures other than their own") - parser.add_argument("--root", type=str, default="Restricting cultural borrowing would undermine the universality of art and limit its ability to transcend cultural differences.") + parser.add_argument( + "--motion", type=str, default="Artists should be free to borrow from cultures other than their own" + ) + parser.add_argument( + "--root", + type=str, + default="Restricting cultural borrowing would undermine the universality of art and limit its ability to transcend cultural differences.", + ) parser.add_argument("--side", type=str, default="for") parser.add_argument("--level", type=int, default=3) parser.add_argument("--branch", type=int, default=2) @@ -720,7 +765,6 @@ def from_json(json_info): parser.add_argument("--save_suffix", type=str, default="gemini") args = parser.parse_args() - if args.mode == "prepare": logger.info("Prepare Debate Tree for\nMotion: {}\nRoot: {}\nSide: {}".format(args.motion, args.root, args.side)) motion = args.motion @@ -729,12 +773,12 @@ def from_json(json_info): use_reward_model = args.use_reward_model proposer = partial(HelperClient, model="gemini-2.0-flash", max_tokens=2048, temperature=1) - '''use (scorer.func.__name__ == 'reward_model') to differentiate''' + """use (scorer.func.__name__ == 'reward_model') to differentiate""" if use_reward_model: scorer = partial(reward_model, soft=args.soft_logits) else: scorer = partial(HelperClient, model="gemini-2.0-flash", max_tokens=2048, temperature=0) - + # proposer = partial(gpt, model="gpt-3.5-turbo", max_tokens=2048) # scorer = partial(gpt, model="gpt-3.5-turbo", max_tokens=2048) tree = PrepareTree(root, motion, side, proposer, scorer) @@ -742,28 +786,33 @@ def from_json(json_info): logger.info(tree.print_tree(prefix="\n")) logger.info(tree.get_tree_info()) - best_path_idx, best_path, best_score = tree.root.get_minimax_score(max_depth=args.level-1, support_weight=0.5, level_decoy=0.8) + best_path_idx, best_path, best_score = tree.root.get_minimax_score( + max_depth=args.level - 1, support_weight=0.5, level_decoy=0.8 + ) best_path_str = "\n=> ".join([node.data for node in best_path]) logger.info(f"Path IDX: {best_path_idx}") logger.info(f"Path: {best_path_str}") logger.info(f"Score: {best_score}") - + elif args.mode == "update": - logger.info("Update Debate Tree for\nLoad From: {}\nNew Scorer: {}\nSoft Mode: {}".format(args.load_from, - "gemini-2.0-flash" if not args.use_reward_model else "reward_model", - args.soft_logits)) + logger.info( + "Update Debate Tree for\nLoad From: {}\nNew Scorer: {}\nSoft Mode: {}".format( + args.load_from, "gemini-2.0-flash" if not args.use_reward_model else "reward_model", args.soft_logits + ) + ) data = json.load(open(args.load_from)) if args.use_reward_model: scorer = partial(reward_model, soft=args.soft_logits) else: scorer = partial(gemini, model="gemini-2.0-flash", max_tokens=2048, temperature=0) - for group in data: for item in group[:1]: tree = PrepareTree.from_json(item["tree_structure"]) update_eval_score(tree.root, scorer) - best_path_idx, best_path, best_score = tree.root.get_minimax_score(max_depth=2, support_weight=0.5, level_decoy=0.8) + best_path_idx, best_path, best_score = tree.root.get_minimax_score( + max_depth=2, support_weight=0.5, level_decoy=0.8 + ) best_path_str = "\n=> ".join([node.data for node in best_path]) logger.debug(f"Path IDX: {best_path_idx}") logger.debug(f"Path: {best_path_str}") @@ -779,7 +828,7 @@ def from_json(json_info): elif args.mode == "backward": logger.info("backward Debate Tree for Minimax Strength") data = json.load(open(args.load_from)) - + for group in data: for item in group[:1]: tree = PrepareTree.from_json(item["tree_structure"]) diff --git a/src/env.py b/src/env.py index 1ea3b80..b55feec 100644 --- a/src/env.py +++ b/src/env.py @@ -1,21 +1,19 @@ -import os, json -import yaml import argparse +import json +import os import time -from functools import partial from dataclasses import dataclass +from functools import partial from typing import List +import yaml +from agents import Audience, AudienceConfig, BaselineDebater, Debater, DebaterConfig, HumanDebater, Judge, JudgeConfig +from ouragents import TreeDebater +from utils.constants import CLOSING_TIME, OPENING_TIME, REBUTTAL_TIME from utils.model import HelperClient -from utils.constants import OPENING_TIME, REBUTTAL_TIME, CLOSING_TIME from utils.tool import logger -from agents import ( - Debater, HumanDebater, BaselineDebater, - Judge, Audience, - DebaterConfig, JudgeConfig, AudienceConfig -) -from ouragents import TreeDebater + @dataclass class EnvConfig: @@ -30,15 +28,18 @@ class EnvConfig: time_control: bool = True -def extract_overall_score(obj_scores): # larger is better - return -obj_scores["Logical Inconsistencies"] \ - - obj_scores["Unsupported Assertions"] \ - + obj_scores["Inferences"] \ - + obj_scores["Statistics"] \ - + obj_scores["Case Studies"] \ +def extract_overall_score(obj_scores): # larger is better + return ( + -obj_scores["Logical Inconsistencies"] + - obj_scores["Unsupported Assertions"] + + obj_scores["Inferences"] + + obj_scores["Statistics"] + + obj_scores["Case Studies"] - obj_scores["Unanswered Arguments"] + ) + -class Env(): +class Env: def __init__(self, config, debug) -> None: self.config = config self.motion = config.motion @@ -46,7 +47,7 @@ def __init__(self, config, debug) -> None: self.reverse = config.reverse self.time_control = config.time_control self.debug = debug - + # init players self.debaters = {} for conf in config.debater_config: @@ -57,10 +58,9 @@ def __init__(self, config, debug) -> None: elif conf.type == "baseline": self.debaters[conf.side] = BaselineDebater(conf, motion=self.motion) elif conf.type == "treedebater": - self.debaters[conf.side] = TreeDebater(conf, motion=self.motion) # prompt-based with expert prompt + self.debaters[conf.side] = TreeDebater(conf, motion=self.motion) # prompt-based with expert prompt else: raise ValueError(f"Type {conf.type} is not supported.") - # init judge if config.judge_num > 1: @@ -71,15 +71,15 @@ def __init__(self, config, debug) -> None: self.audiences = [Audience(config.audience_config) for _ in range(config.audience_num)] self.debate_process = [] - self.debate_process.append({ - "stage": "settings", - "motion": self.motion, - "debaters": { - side: debater.config for side, debater in self.debaters.items() - }, - "judges": self.judge.config, - "audiences": [audience.config for audience in self.audiences], - }) + self.debate_process.append( + { + "stage": "settings", + "motion": self.motion, + "debaters": {side: debater.config for side, debater in self.debaters.items()}, + "judges": self.judge.config, + "audiences": [audience.config for audience in self.audiences], + } + ) def play(self, pre_only=False): order = ["for", "against"] if not self.reverse else ["against", "for"] @@ -93,37 +93,30 @@ def play(self, pre_only=False): elif stage == "opening": for side in order: player = self.debaters[side] - response = player.opening_generation(history=self.debate_process[1:], max_time=OPENING_TIME, time_control=self.time_control) - self.debate_process.append({ - "stage": stage, - "side": side, - "content": response - }) + response = player.opening_generation( + history=self.debate_process[1:], max_time=OPENING_TIME, time_control=self.time_control + ) + self.debate_process.append({"stage": stage, "side": side, "content": response}) elif stage == "rebuttal": for side in order: player = self.debaters[side] - response = player.rebuttal_generation(history=self.debate_process[1:], max_time=REBUTTAL_TIME, time_control=self.time_control) - self.debate_process.append({ - "stage": stage, - "side": side, - "content": response - }) + response = player.rebuttal_generation( + history=self.debate_process[1:], max_time=REBUTTAL_TIME, time_control=self.time_control + ) + self.debate_process.append({"stage": stage, "side": side, "content": response}) elif stage == "closing": - for side in order: # reverse to make compatible with agent4debate + for side in order: # reverse to make compatible with agent4debate player = self.debaters[side] - response = player.closing_generation(history=self.debate_process[1:], max_time=CLOSING_TIME, time_control=self.time_control) - self.debate_process.append({ - "stage": stage, - "side": side, - "content": response - }) + response = player.closing_generation( + history=self.debate_process[1:], max_time=CLOSING_TIME, time_control=self.time_control + ) + self.debate_process.append({"stage": stage, "side": side, "content": response}) logger.info(f"[{stage}] Done") if self.debug: response = input("Press N to stop: ") if response.lower() == "n": break - def eval(self, process=None): logger.info("[Evaluation] Start") output = {} @@ -137,14 +130,17 @@ def eval(self, process=None): side_info = { "for": {"content": [p["content"] for p in process if p["side"] == "for"], "claims": [], "surprises": []}, - "against": {"content": [p["content"] for p in process if p["side"] == "against"], "claims": [], "surprises": []} + "against": { + "content": [p["content"] for p in process if p["side"] == "against"], + "claims": [], + "surprises": [], + }, } for side in ["for", "against"]: obj_scores, obj_scores_explanation = self.judge.finegrained_check(self.motion, side_info, side) output[f"{side}_objective_scores"] = extract_overall_score(obj_scores) output[f"{side}_objective_scores_explanation"] = obj_scores_explanation - # audience output["audience_votes"] = [] output["for_surprise"], output["against_surprise"] = [], [] @@ -155,7 +151,9 @@ def eval(self, process=None): for side in ["for", "against"]: surprise_scores, surprise_explanation = audience.surprise(self.motion, side, side_info[side]["claims"]) side_info[side]["surprises"] = surprise_scores[0] - output[f"{side}_surprise"].append(sum(side_info[side]["surprises"].values()) / len(side_info[side]["surprises"])) + output[f"{side}_surprise"].append( + sum(side_info[side]["surprises"].values()) / len(side_info[side]["surprises"]) + ) output[f"{side}_surprise_explanation"].append(surprise_explanation[0]) logger.info("[Evaluation] Done") @@ -176,7 +174,9 @@ def compare_debate(self, comparison_process, order_reverse=False): versionb = comparison_process[phase][order[1]].split("**Reference**")[0] # judge - winner, comments = self.judge.comparison(motion=self.motion, context=context, side=side, a=versiona, b=versionb, max_tokens=200) + winner, comments = self.judge.comparison( + motion=self.motion, context=context, side=side, a=versiona, b=versionb, max_tokens=200 + ) output[phase]["judge_version"] = winner output[phase]["judge_version_comment"] = comments @@ -184,7 +184,9 @@ def compare_debate(self, comparison_process, order_reverse=False): output[phase]["audience_version"] = [] output[phase]["audience_version_comment"] = [] for audience in self.audiences: - winner, comments = audience.comparison(motion=self.motion, context=context, side=side, a=versiona, b=versionb, max_tokens=200) + winner, comments = audience.comparison( + motion=self.motion, context=context, side=side, a=versiona, b=versionb, max_tokens=200 + ) output[phase]["audience_version"].append(winner) output[phase]["audience_version_comment"].append(comments) @@ -196,7 +198,7 @@ def compare_debate(self, comparison_process, order_reverse=False): { "stage": stage, "side": side, - "content": comparison_process[phase]["keep_response"].split("**Reference**")[0] + "content": comparison_process[phase]["keep_response"].split("**Reference**")[0], } ) @@ -231,23 +233,23 @@ def compare_debate(self, comparison_process, order_reverse=False): # Set use_rehearsal_tree and use_debate_flow_tree based on command line arguments # use_rehearsal_tree = not args.no_rehearsal_tree # use_debate_flow_tree = not args.no_debate_flow_tree - + # if "env" not in config: # config["env"] = {} # config["env"]["use_rehearsal_tree"] = use_rehearsal_tree # config["env"]["use_debate_flow_tree"] = use_debate_flow_tree - + # use_rehearsal_tree = config["env"]["use_rehearsal_tree"] # use_debate_flow_tree = config["env"]["use_debate_flow_tree"] # logger.info(f"Use rehearsal tree: {use_rehearsal_tree}") # logger.info(f"Use debate flow tree: {use_debate_flow_tree}") env_config = EnvConfig( - debater_config=[DebaterConfig(**config) for config in config["debater"]], - judge_config=JudgeConfig(**config["judge"]), - audience_config=AudienceConfig(**config["audience"]), - **config["env"] - ) + debater_config=[DebaterConfig(**config) for config in config["debater"]], + judge_config=JudgeConfig(**config["judge"]), + audience_config=AudienceConfig(**config["audience"]), + **config["env"], + ) env = Env(env_config, args.debug) if args.pre_only: @@ -279,31 +281,40 @@ def compare_debate(self, comparison_process, order_reverse=False): "debate_process": env.debate_process[1:], "debate_thoughts": { "for": env.debaters["for"].debate_thoughts, - "against": env.debaters["against"].debate_thoughts + "against": env.debaters["against"].debate_thoughts, }, "debate_tree": { "for": [ - env.debaters["for"].debate_tree.get_tree_info() if env.debaters["for"].type in ["treedebater"] else {}, - env.debaters["for"].oppo_debate_tree.get_tree_info() if env.debaters["for"].type in ["treedebater"] else {}, + ( + env.debaters["for"].debate_tree.get_tree_info() + if env.debaters["for"].type in ["treedebater"] + else {} + ), + ( + env.debaters["for"].oppo_debate_tree.get_tree_info() + if env.debaters["for"].type in ["treedebater"] + else {} + ), ], "against": [ - env.debaters["against"].debate_tree.get_tree_info() if env.debaters["against"].type in ["treedebater"] else {}, - env.debaters["against"].oppo_debate_tree.get_tree_info() if env.debaters["against"].type in ["treedebater"] else {}, - ] + ( + env.debaters["against"].debate_tree.get_tree_info() + if env.debaters["against"].type in ["treedebater"] + else {} + ), + ( + env.debaters["against"].oppo_debate_tree.get_tree_info() + if env.debaters["against"].type in ["treedebater"] + else {} + ), + ], }, - "conversation": { - "for": env.debaters["for"].conversation, - "against": env.debaters["against"].conversation - } + "conversation": {"for": env.debaters["for"].conversation, "against": env.debaters["against"].conversation}, } json.dump(record, open(save_file, "w"), indent=2) if not args.debug: evaluation, side_into = env.eval() logger.info(f"Result: {evaluation}") - record.update({ - "evaluation": evaluation, - "eval_side_info": side_into - }) + record.update({"evaluation": evaluation, "eval_side_info": side_into}) json.dump(record, open(save_file, "w"), indent=2) - diff --git a/src/evaluator.py b/src/evaluator.py index c441a3a..694e2c5 100644 --- a/src/evaluator.py +++ b/src/evaluator.py @@ -1,17 +1,16 @@ +import argparse +import json import os import sys -import json -import argparse +from functools import partial import numpy as np import pandas as pd - from tqdm import tqdm -from functools import partial -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) -from src.utils.tool import find_json, logger, extract_numbers +from src.utils.tool import extract_numbers, find_json, logger def extract_claims(llm, title, side, content, verbose=False): @@ -47,7 +46,6 @@ def extract_claims(llm, title, side, content, verbose=False): Return the main claims and supporting statements in a Json format. The key is the main claim and the value is the support materials. """ - prompt = TEMPLATE.format(title=title, myside=side, content=content) if verbose: print(prompt) @@ -125,9 +123,10 @@ def extract_obj_aspect(llm, title, side, content, claim_against=None): except: scores = {} print("Error in extracting scores") - + return scores, response[0] + def eval_surprise(llm, title, side, claims, n=3, reduction=False, verbose=False): system_prompt = """You are an advanced debate analysis tool designed to assess the surprise factor of claims and arguments presented in debates. Your task is to evaluate how unexpected, novel, or counterintuitive the claims are within the context of the debate topic. Follow these guidelines in your assessment: 1. Novelty of ideas: @@ -194,8 +193,10 @@ def eval_surprise(llm, title, side, claims, n=3, reduction=False, verbose=False) print("Error in extracting surprise scores") surprise = {} continue - - non_number_score = [v for v in surprise if ("surprise_score" not in v) or not isinstance(v["surprise_score"], (int, float))] + + non_number_score = [ + v for v in surprise if ("surprise_score" not in v) or not isinstance(v["surprise_score"], (int, float)) + ] if len(non_number_score) > 0: print("There is non-number values in the surprise scores") # print(non_number_score) @@ -205,7 +206,7 @@ def eval_surprise(llm, title, side, claims, n=3, reduction=False, verbose=False) scores[v["claim"]] = v["surprise_score"] surprises.append(surprise) score_list.append(scores) - + if reduction: result = {} for key in score_list[0].keys(): @@ -216,40 +217,46 @@ def eval_surprise(llm, title, side, claims, n=3, reduction=False, verbose=False) return score_list, surprises + def evaluate_support_strength(llm, motion, argument1, argument2, history=None): - if llm.func.__name__ == 'reward_model': # trained reward model, should be partial() type - relation_ship = 'supporting' + if llm.func.__name__ == "reward_model": # trained reward model, should be partial() type + relation_ship = "supporting" assert history[-1] == argument1 and history[-3] == argument2 prompt = f"You are given a chain of arguments, each one supporting or attacking the previous one. The first argument is: {history[0]} The second last one is: {history[-3]} The last one is: {history[-1]} Now you need to determine the impact of the last one to the second last one, given their relationship {relation_ship}. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful." return llm(prompt) - - prompt = (f"""There is a debate with a title \"{motion}\" Please evaluate the support strength of the first argument to the second argument.\n""" - f"Argument 1: {argument1}\n" - f"Argument 2: {argument2}\n" - f"""The two arguments are from the same side in a debate, and the support strength refers to how well the first argument adds to the second argument. Each score ranges from 1 to 3, with 1 being the lowest and 3 being the highest. Provide your evaluation as a single number in the format "Score: [score]". You can additionally provide a brief explanation of your evaluation.""") - logger.debug("[Support-Strength-Prompt] {}".format(prompt.strip().replace('\n',' ||| '))) + + prompt = ( + f"""There is a debate with a title \"{motion}\" Please evaluate the support strength of the first argument to the second argument.\n""" + f"Argument 1: {argument1}\n" + f"Argument 2: {argument2}\n" + f"""The two arguments are from the same side in a debate, and the support strength refers to how well the first argument adds to the second argument. Each score ranges from 1 to 3, with 1 being the lowest and 3 being the highest. Provide your evaluation as a single number in the format "Score: [score]". You can additionally provide a brief explanation of your evaluation.""" + ) + logger.debug("[Support-Strength-Prompt] {}".format(prompt.strip().replace("\n", " ||| "))) response = llm(prompt=prompt, temperature=0)[0] - logger.debug("[Support-Strength-Response] {}".format(response.strip().replace('\n',' ||| '))) - response = response.replace('*','') + logger.debug("[Support-Strength-Response] {}".format(response.strip().replace("\n", " ||| "))) + response = response.replace("*", "") pos = response.find("Score: ") - numbers = extract_numbers(response[pos:pos+15]) + numbers = extract_numbers(response[pos : pos + 15]) return numbers[0] + def evaluate_defense_strength(llm, motion, argument1, argument2, history=None): - if llm.func.__name__ == 'reward_model': # trained reward model, should be partial() type - relation_ship = 'attacking' + if llm.func.__name__ == "reward_model": # trained reward model, should be partial() type + relation_ship = "attacking" # print('[DEBUG]', argument1, argument2, history) assert history[-1] == argument1 and history[-2] == argument2 prompt = f"You are given a chain of arguments, each one supporting or attacking the previous one. The first argument is: {history[0]} The second last one is: {history[-2]} The last one is: {history[-1]} Now you need to determine the impact of the last one to the second last one, given their relationship {relation_ship}. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful." return llm(prompt) - - prompt = (f"""There is a debate with a title \"{motion}\" Please evaluate the rebuttal strength of the first argument to the second argument.\n""" - f"Argument 1: {argument1}\n" - f"Argument 2: {argument2}\n" - """The two arguments are from the different sides in a debate, and the rebuttal strength refers to how well the first argument undermines the second argument. Each score ranges from 1 to 3, with 1 being the lowest and 3 being the highest. Provide your evaluation as a single number in the format "Score: [score]". You can additionally provide a brief explanation of your evaluation.""") - logger.debug("[Support-Defense-Prompt] {}".format(prompt.strip().replace('\n',' ||| '))) + + prompt = ( + f"""There is a debate with a title \"{motion}\" Please evaluate the rebuttal strength of the first argument to the second argument.\n""" + f"Argument 1: {argument1}\n" + f"Argument 2: {argument2}\n" + """The two arguments are from the different sides in a debate, and the rebuttal strength refers to how well the first argument undermines the second argument. Each score ranges from 1 to 3, with 1 being the lowest and 3 being the highest. Provide your evaluation as a single number in the format "Score: [score]". You can additionally provide a brief explanation of your evaluation.""" + ) + logger.debug("[Support-Defense-Prompt] {}".format(prompt.strip().replace("\n", " ||| "))) response = llm(prompt=prompt, temperature=0)[0] - logger.debug("[Support-Defense-Response] {}".format(response.strip().replace('\n',' ||| '))) + logger.debug("[Support-Defense-Response] {}".format(response.strip().replace("\n", " ||| "))) pos = response.find("Score: ") - numbers = extract_numbers(response[pos:pos+15]) + numbers = extract_numbers(response[pos : pos + 15]) return numbers[0] diff --git a/src/ouragents.py b/src/ouragents.py index 3a79c2d..516a2dc 100644 --- a/src/ouragents.py +++ b/src/ouragents.py @@ -1,35 +1,39 @@ -import os -import re +import copy import json import math -import torch -from openai import OpenAI -from functools import partial -import traceback, random -from dataclasses import dataclass -import requests -import copy +import os +import random +import re import time -from tavily import TavilyClient - -import litellm +import traceback +from dataclasses import dataclass +from functools import partial import google.generativeai as genai -from sentence_transformers.util import (semantic_search, - dot_score, - normalize_embeddings) +import litellm +import requests +import torch +from openai import OpenAI +from sentence_transformers.util import dot_score, normalize_embeddings, semantic_search +from tavily import TavilyClient +from agents import Audience, AudienceConfig, Debater +from debate_tree import DebateTree, PrepareTree +from prepare import ClaimPool +from utils.constants import EMBEDDING_MODEL, REMAINING_ROUND_NUM, TIME_MODE_FOR_STATEMENT, TIME_TOLERANCE, WORDRATIO +from utils.helper import ( + TimeAdjuster, + build_logic_claims, + extract_statement, + get_actions_from_tree, + get_battlefields_from_actions, + get_retrieval_from_rehearsal_tree, + rank_evidence, +) from utils.model import HelperClient -from utils.tool import logger, get_response_with_retry, sort_by_importance, sort_by_action from utils.prompts import * from utils.time_estimator import LengthEstimator -from utils.helper import TimeAdjuster, rank_evidence, build_logic_claims, get_actions_from_tree, extract_statement, get_battlefields_from_actions, get_retrieval_from_rehearsal_tree -from utils.constants import TIME_MODE_FOR_STATEMENT, WORDRATIO, TIME_TOLERANCE, EMBEDDING_MODEL, REMAINING_ROUND_NUM -from prepare import ClaimPool -from debate_tree import DebateTree, PrepareTree - -from agents import Debater, Audience, AudienceConfig - +from utils.tool import get_response_with_retry, logger, sort_by_action, sort_by_importance class TreeDebater(Debater): @@ -37,24 +41,29 @@ def __init__(self, config, motion): super().__init__(config, motion) self.definition = None self.evidence_pool = [] - self.high_quality_evidence_pool = [] # this instead of self.evidence_pool is later used + self.high_quality_evidence_pool = [] # this instead of self.evidence_pool is later used self.pool_file = config.pool_file self.add_retrieval_feedback = config.add_retrieval_feedback # Add new flags for controlling the use of rehearsal tree and debate flow tree self.use_rehearsal_tree = config.use_rehearsal_tree self.use_debate_flow_tree = config.use_debate_flow_tree - logger.debug("[TreeDebater] " + f"use_rehearsal_tree: {self.use_rehearsal_tree}, use_debate_flow_tree: {self.use_debate_flow_tree}") + logger.debug( + "[TreeDebater] " + + f"use_rehearsal_tree: {self.use_rehearsal_tree}, use_debate_flow_tree: {self.use_debate_flow_tree}" + ) - self.helper_client = partial(HelperClient, model=self.config.model, temperature=0, max_tokens=config.max_tokens, n=1) - self.simulated_audience = [Audience(AudienceConfig(model=self.config.model, temperature=1)) for _ in range(1)] + self.helper_client = partial( + HelperClient, model=self.config.model, temperature=0, max_tokens=config.max_tokens, n=1 + ) + self.simulated_audience = [Audience(AudienceConfig(model=self.config.model, temperature=1)) for _ in range(1)] # Initialize debate trees only if they are enabled if self.use_debate_flow_tree: self.debate_tree = DebateTree(motion=motion, side=self.side) self.oppo_debate_tree = DebateTree(motion=motion, side=self.oppo_side) else: - # also create a dummy debate tree, otherwise in `_get_retrieval_debate_tree` will have error + # also create a dummy debate tree, otherwise in `_get_retrieval_debate_tree` will have error self.debate_tree = DebateTree(motion=motion, side=self.side) self.oppo_debate_tree = DebateTree(motion=motion, side=self.oppo_side) @@ -69,12 +78,16 @@ def __init__(self, config, motion): data["pro_debate_tree_obj"] = DebateTree.from_json(data["pro_debate_tree"]) data["con_debate_tree_obj"] = DebateTree.from_json(data["con_debate_tree"]) self.data_list = data_list - self.pro_embeddings = [torch.tensor([x["pro_embedding_level_1"] for x in data_list]), - torch.tensor([x["pro_embedding_level_2"] for x in data_list]), - torch.tensor([x["pro_embedding_level_3"] for x in data_list])] - self.con_embeddings = [torch.tensor([x["con_embedding_level_1"] for x in data_list]), - torch.tensor([x["con_embedding_level_2"] for x in data_list]), - torch.tensor([x["con_embedding_level_3"] for x in data_list])] + self.pro_embeddings = [ + torch.tensor([x["pro_embedding_level_1"] for x in data_list]), + torch.tensor([x["pro_embedding_level_2"] for x in data_list]), + torch.tensor([x["pro_embedding_level_3"] for x in data_list]), + ] + self.con_embeddings = [ + torch.tensor([x["con_embedding_level_1"] for x in data_list]), + torch.tensor([x["con_embedding_level_2"] for x in data_list]), + torch.tensor([x["con_embedding_level_3"] for x in data_list]), + ] self.used_evidence = set() @@ -87,7 +100,7 @@ def _get_evidence(self, claim): e["content"] = e["content"].replace("\n", " ") if "raw_content" in e and e["raw_content"] is not None: e["raw_content"] = e["raw_content"].replace("\n", " ")[:2048] - e["raw_content"] = re.sub(r'https?://\S+', '', e["raw_content"]) + e["raw_content"] = re.sub(r"https?://\S+", "", e["raw_content"]) if "url" in e: e.pop("url") else: @@ -99,10 +112,10 @@ def claim_generation(self, pool_size, definition=None, **kwargs): Generate the claim pool for the debater """ if self.pool_file is not None and os.path.exists(self.pool_file): - with open(self.pool_file, 'r') as file: + with open(self.pool_file, "r") as file: self.claim_pool = json.load(file)[:8] oppo_pool_file = self.pool_file.replace(f"pool_{self.side}", f"pool_{self.oppo_side}") - with open(oppo_pool_file, 'r') as file: + with open(oppo_pool_file, "r") as file: self.oppo_claim_pool = json.load(file)[:8] self.definition = self.claim_pool[0][0].get("definition", None) else: @@ -112,37 +125,45 @@ def claim_generation(self, pool_size, definition=None, **kwargs): motion = motion + "\nYour definition is: " + definition for side in ["for", "against"]: - claim_workspace = ClaimPool(motion=motion, side=side, model=self.config.model, pool_size=pool_size, **kwargs) + claim_workspace = ClaimPool( + motion=motion, side=side, model=self.config.model, pool_size=pool_size, **kwargs + ) claim_pool = claim_workspace.create_claim(need_score=True, need_evidence=(side == self.side)) # print(pool) logger.info(f"Claim Pool Size: {len(self.claim_pool)}") - save_file_name = f'{self.motion}_pool_{side}.json'.replace(' ', '_').lower() - with open(save_file_name, 'w') as file: + save_file_name = f"{self.motion}_pool_{side}.json".replace(" ", "_").lower() + with open(save_file_name, "w") as file: json.dump(self.claim_pool, file, indent=2) if side == self.side: self.claim_pool = claim_pool else: self.oppo_claim_pool = claim_pool - + prompt = propose_definition_prompt.format(motion=self.motion, act=self.act) - logger.debug("[Definition-Helper-Prompt] " + prompt.strip().replace('\n',' ||| ')) + logger.debug("[Definition-Helper-Prompt] " + prompt.strip().replace("\n", " ||| ")) response = self.helper_client(prompt=prompt)[0] - logger.debug("[Definition-Helper-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Definition-Helper-Response] " + response.strip().replace("\n", " ||| ")) if "None" in response: self.definition = None else: self.definition = response.strip() - - - def claim_selection(self, history=None): + def claim_selection(self, history=None): # NOTE: claim selection by overall framework, not sure if it is good if history and len(history) > 0: context = history[-1]["content"] else: context = "" - main_claims, group_idx, thoughts = build_logic_claims(self.helper_client, self.motion, self.side, self.claim_pool, context=context, definition=self.definition, use_rehearsal_tree=self.use_rehearsal_tree) + main_claims, group_idx, thoughts = build_logic_claims( + self.helper_client, + self.motion, + self.side, + self.claim_pool, + context=context, + definition=self.definition, + use_rehearsal_tree=self.use_rehearsal_tree, + ) # main_claims, group_idx, thoughts = build_cot_claims(self.helper_client, self.motion, self.side, self.claim_pool) self.main_claims = [self.claim_pool[idx][0] for idx in group_idx] @@ -151,7 +172,7 @@ def claim_selection(self, history=None): self.build_evidence_pool() self.debate_thoughts.append(thoughts) - + # Only build prepared tree list if rehearsal tree is enabled if self.use_rehearsal_tree: self.prepared_tree_list = self._get_prepared_tree(self.side) @@ -169,7 +190,6 @@ def build_evidence_pool(self): self.evidence_pool = high_quality_evidence_pool[:10] self.high_quality_evidence_pool = high_quality_evidence_pool - def _add_additional_info(self, prompt, history, **kwargs): tips = "" @@ -178,9 +198,21 @@ def _add_additional_info(self, prompt, history, **kwargs): actions = get_actions_from_tree(self.main_claims_content, self.debate_tree, self.oppo_debate_tree) action_str = "" for action in actions: - action["prepared_materials"] = self._retrieve_on_prepared_tree(action).strip() - battlefields = get_battlefields_from_actions(self.helper_client, self.motion, self.side, self.main_claims_content, actions, self.debate_tree, self.oppo_debate_tree) - battlefields = sorted(battlefields, key=lambda x: (sort_by_importance(x["battlefield_importance"]), len(x["actions"])), reverse=True) + action["prepared_materials"] = self._retrieve_on_prepared_tree(action).strip() + battlefields = get_battlefields_from_actions( + self.helper_client, + self.motion, + self.side, + self.main_claims_content, + actions, + self.debate_tree, + self.oppo_debate_tree, + ) + battlefields = sorted( + battlefields, + key=lambda x: (sort_by_importance(x["battlefield_importance"]), len(x["actions"])), + reverse=True, + ) battlefield_str = "Allocate time to the most important battlefields first. Present each battlefield as a complete unit. \n\n" used_actions = set() @@ -196,40 +228,60 @@ def _add_additional_info(self, prompt, history, **kwargs): for action in actions: action_type = action["action"] target_claim = action["target_claim"] - target_argument = action["target_argument"] if action_type != "propose" else action["prepared_materials"] - action_str += "\n\t" + f"*{action_type}* the claim: \"{target_claim}\" and the argument: \"{target_argument}\"" - battlefield_str += (f"**Battlefield Importance**: {battlefield['battlefield_importance']}\n" - f"**Battlefield**: {battlefield['battlefield']}\n" - f"**Battlefield Rationale**: {battlefield['battlefield_argument']}\n" - f"**Actions**:{action_str}\n" - ) + target_argument = ( + action["target_argument"] if action_type != "propose" else action["prepared_materials"] + ) + action_str += ( + "\n\t" + f'*{action_type}* the claim: "{target_claim}" and the argument: "{target_argument}"' + ) + battlefield_str += ( + f"**Battlefield Importance**: {battlefield['battlefield_importance']}\n" + f"**Battlefield**: {battlefield['battlefield']}\n" + f"**Battlefield Rationale**: {battlefield['battlefield_argument']}\n" + f"**Actions**:{action_str}\n" + ) battlefield_str += "\n" tips += "\n\n" + battlefield_str - + prompt = prompt.replace("{tips}", tips + "\n\n") return prompt - def opening_generation(self, history, max_time, time_control=False, **kwargs): self.status = "opening" self.listen(history) max_words = math.ceil(max_time / WORDRATIO["time"]) self.claim_selection(history) - + opening_thoughts = [x for x in self.debate_thoughts if x["mode"] == "choose_main_claims"] - framework, explanation = opening_thoughts[-1]["framework"], opening_thoughts[-1]["explanation"] if opening_thoughts else ("", "") - + framework, explanation = opening_thoughts[-1]["framework"], ( + opening_thoughts[-1]["explanation"] if opening_thoughts else ("", "") + ) + # Only include tree information if debate flow tree is enabled if self.use_debate_flow_tree: tree = self.debate_tree.print_tree(include_status=True) oppo_tree = self.oppo_debate_tree.print_tree(include_status=True, reverse=True) - prompt = expert_opening_prompt_2.format(motion=self.motion, act=self.act, claims="* "+"\n* ".join(self.main_claims_content), - tree=tree, oppo_tree=oppo_tree, framework=framework, explanation=explanation) + prompt = expert_opening_prompt_2.format( + motion=self.motion, + act=self.act, + claims="* " + "\n* ".join(self.main_claims_content), + tree=tree, + oppo_tree=oppo_tree, + framework=framework, + explanation=explanation, + ) else: # Use a simplified prompt without tree information - prompt = expert_opening_prompt_2.format(motion=self.motion, act=self.act, claims="* "+"\n* ".join(self.main_claims_content), - tree="", oppo_tree="", framework=framework, explanation=explanation) + prompt = expert_opening_prompt_2.format( + motion=self.motion, + act=self.act, + claims="* " + "\n* ".join(self.main_claims_content), + tree="", + oppo_tree="", + framework=framework, + explanation=explanation, + ) prompt = prompt.replace("{n_words}", str(max_words)) @@ -240,7 +292,7 @@ def opening_generation(self, history, max_time, time_control=False, **kwargs): prompt = self._add_additional_info(prompt, history, **kwargs) - response = self.speak(prompt, max_time=max_time, time_control=time_control, history=history, **kwargs) + response = self.speak(prompt, max_time=max_time, time_control=time_control, history=history, **kwargs) if self.use_debate_flow_tree: self._analyze_statement(response, self.side) return response @@ -249,48 +301,50 @@ def rebuttal_generation(self, history, max_time, time_control=False, **kwargs): self.status = "rebuttal" self.listen(history) max_words = math.ceil(max_time / WORDRATIO["time"]) - + # Only include tree information if debate flow tree is enabled if self.use_debate_flow_tree: your_tree = self.debate_tree.print_tree(include_status=True) oppo_tree = self.oppo_debate_tree.print_tree(include_status=True, reverse=True) - prompt = expert_rebuttal_prompt_2.format(motion=self.motion, act=self.act, counter_act=self.counter_act, - tree=your_tree, oppo_tree=oppo_tree) + prompt = expert_rebuttal_prompt_2.format( + motion=self.motion, act=self.act, counter_act=self.counter_act, tree=your_tree, oppo_tree=oppo_tree + ) else: # Use a simplified prompt without tree information - prompt = expert_rebuttal_prompt_2.format(motion=self.motion, act=self.act, counter_act=self.counter_act, - tree="", oppo_tree="") - + prompt = expert_rebuttal_prompt_2.format( + motion=self.motion, act=self.act, counter_act=self.counter_act, tree="", oppo_tree="" + ) + prompt = prompt.replace("{n_words}", str(max_words)) prompt = self._add_additional_info(prompt, history, **kwargs) - response = self.speak(prompt, max_time=max_time, time_control=time_control, history=history, **kwargs) + response = self.speak(prompt, max_time=max_time, time_control=time_control, history=history, **kwargs) if self.use_debate_flow_tree: self._analyze_statement(response, self.side) return response - + def closing_generation(self, history, max_time, time_control=False, **kwargs): self.status = "closing" self.listen(history) max_words = math.ceil(max_time / WORDRATIO["time"]) - + # Only include tree information if debate flow tree is enabled if self.use_debate_flow_tree: your_tree = self.debate_tree.print_tree(include_status=True) oppo_tree = self.oppo_debate_tree.print_tree(include_status=True, reverse=True) - prompt = expert_closing_prompt_2.format(act=self.act, counter_act=self.counter_act, - tree=your_tree, oppo_tree=oppo_tree) + prompt = expert_closing_prompt_2.format( + act=self.act, counter_act=self.counter_act, tree=your_tree, oppo_tree=oppo_tree + ) else: # Use a simplified prompt without tree information - prompt = expert_closing_prompt_2.format(act=self.act, counter_act=self.counter_act, - tree="", oppo_tree="") - + prompt = expert_closing_prompt_2.format(act=self.act, counter_act=self.counter_act, tree="", oppo_tree="") + prompt = prompt.replace("{n_words}", str(max_words)) prompt = self._add_additional_info(prompt, history, **kwargs) - response = self.speak(prompt, max_time=max_time, time_control=time_control, history=history, **kwargs) + response = self.speak(prompt, max_time=max_time, time_control=time_control, history=history, **kwargs) response = response.split("**Reference**")[0].strip() if self.use_debate_flow_tree: self._analyze_statement(response, self.side) @@ -299,25 +353,34 @@ def closing_generation(self, history, max_time, time_control=False, **kwargs): def speak(self, prompt, max_time, time_control=False, history=None, **kwargs): self._add_message("user", prompt) logger.debug(f"[Conversation-History] {json.dumps(self.conversation)}") - logger.debug("[Prompt] " + prompt.strip().replace('\n',' ||| ')) - + logger.debug("[Prompt] " + prompt.strip().replace("\n", " ||| ")) # add evidence based on audience feedback response = self._get_response(self.conversation, **kwargs) - logger.debug("[Response-Before-Post-Process] " + response.strip().replace('\n',' ||| ')) - feedback_for_revision, new_evidence, allocation_plan, ori_statement = self._get_revision_suggestion(statement=response, history=history, add_evidence=True, **kwargs) - response = self._length_adjust(ori_statement, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=1, **kwargs) - + logger.debug("[Response-Before-Post-Process] " + response.strip().replace("\n", " ||| ")) + feedback_for_revision, new_evidence, allocation_plan, ori_statement = self._get_revision_suggestion( + statement=response, history=history, add_evidence=True, **kwargs + ) + response = self._length_adjust( + ori_statement, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=1, **kwargs + ) + # check audience feedback again - feedback_for_revision, new_evidence, _, _ = self._get_revision_suggestion(statement=response, history=history, add_evidence=False, **kwargs) + feedback_for_revision, new_evidence, _, _ = self._get_revision_suggestion( + statement=response, history=history, add_evidence=False, **kwargs + ) if not time_control: - response = self._length_adjust(response, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=1, **kwargs) + response = self._length_adjust( + response, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=1, **kwargs + ) else: - response = self._length_adjust(response, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=10, **kwargs) - + response = self._length_adjust( + response, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=10, **kwargs + ) + return super().post_process(response, max_time, time_control, **kwargs) - + def listen(self, history): if len(history) == 0: return @@ -325,7 +388,7 @@ def listen(self, history): content = f"**Opponent's {history[-1]['stage'].title()} Statement**\n" + history[-1]["content"] self._add_message("user", content) - + # Only analyze statement if debate flow tree is enabled if self.use_debate_flow_tree: self._analyze_statement(history[-1]["content"], self.oppo_side) @@ -342,30 +405,41 @@ def _get_feedback_from_audience(self, statement, history, **kwargs): retrieval, retrieval_feedback = self._get_retrieval_debate_tree(include_points=False) if retrieval is not None: extra_tree_info += "\n\n" + retrieval_feedback - + history_str = "" for h in history: side = f"Opponent ({self.oppo_side})" if h["side"] == self.oppo_side else f"You ({self.side})" history_str += f"*{side}'s {h['stage'].title()} Statement*\t" + h["content"].replace("\n", " ") + "\n\n" - prompt=audience_feedback_prompt.format(motion=self.motion, side=self.side, stage=self.status.title(), statement=statement, retrieval=extra_tree_info, history=history_str) - logger.debug("[Audience-Feedback-Prompt] " + prompt.strip().replace('\n',' ||| ')) + prompt = audience_feedback_prompt.format( + motion=self.motion, + side=self.side, + stage=self.status.title(), + statement=statement, + retrieval=extra_tree_info, + history=history_str, + ) + logger.debug("[Audience-Feedback-Prompt] " + prompt.strip().replace("\n", " ||| ")) audience_feedback = [] flat_audience_feedback = "" for i, au in enumerate(self.simulated_audience): feedback = au.feedback(prompt) audience_feedback.append(feedback) - key_feedback = "Critical Issues and Minimal Revision Suggestions" + feedback.split("Critical Issues and Minimal Revision Suggestions")[-1] + key_feedback = ( + "Critical Issues and Minimal Revision Suggestions" + + feedback.split("Critical Issues and Minimal Revision Suggestions")[-1] + ) flat_audience_feedback += f"\n\n\nAudience {i+1} Feedback:\n" + key_feedback - logger.debug("[Audience-Feedback-Response] " + flat_audience_feedback.strip().replace('\n',' ||| ')) + logger.debug("[Audience-Feedback-Response] " + flat_audience_feedback.strip().replace("\n", " ||| ")) return flat_audience_feedback, audience_feedback - - + def _get_retrieval_debate_tree(self, **kwargs): if self.debate_tree.get_all_nodes() == []: current_tree_info = self.motion else: current_tree_info = self.debate_tree.print_tree(include_status=False, meta_info=False) - logger.debug(f"[Retrieval-Debate-Tree] Search for {self.side} side: " + current_tree_info.strip().replace('\\n',' ||| ')) + logger.debug( + f"[Retrieval-Debate-Tree] Search for {self.side} side: " + current_tree_info.strip().replace("\\n", " ||| ") + ) current_tree_embedding = self._get_embedding_from_cache(current_tree_info) memory_tree_embedding = self.pro_embeddings if self.side == "for" else self.con_embeddings if self.status == "opening": @@ -374,27 +448,46 @@ def _get_retrieval_debate_tree(self, **kwargs): memory_tree_embedding = memory_tree_embedding[1] elif self.status == "closing": memory_tree_embedding = memory_tree_embedding[2] - - hits = semantic_search(torch.tensor([current_tree_embedding]), torch.tensor(memory_tree_embedding), score_function=dot_score, top_k=1)[0] + + hits = semantic_search( + torch.tensor([current_tree_embedding]), + torch.tensor(memory_tree_embedding), + score_function=dot_score, + top_k=1, + )[0] retrieval_idx = [x["corpus_id"] for x in hits] retrieval_data = [self.data_list[idx] for idx in retrieval_idx] retrieval_motion = [data["motion"] for data in retrieval_data] retrieval_similarity = [x["score"] for x in hits] - retrieval_tree = [data["pro_debate_tree_obj"] if self.side == "for" else data["con_debate_tree_obj"] for data in retrieval_data] + retrieval_tree = [ + data["pro_debate_tree_obj"] if self.side == "for" else data["con_debate_tree_obj"] + for data in retrieval_data + ] retrieval_tree_info = [tree.print_tree(include_status=False) for tree in retrieval_tree] - retrieval_stage_statement = [x for data in retrieval_data for x in data["structured_arguments"] if x["stage"] == self.status and x["side"] == self.side] - logger.debug(f"[Retrieval-Debate-Tree] Retrieval Index: {retrieval_idx}, Retrieval Similarity: {retrieval_similarity}, Retrieval Motion: {retrieval_motion}") + retrieval_stage_statement = [ + x + for data in retrieval_data + for x in data["structured_arguments"] + if x["stage"] == self.status and x["side"] == self.side + ] + logger.debug( + f"[Retrieval-Debate-Tree] Retrieval Index: {retrieval_idx}, Retrieval Similarity: {retrieval_similarity}, Retrieval Motion: {retrieval_motion}" + ) logger.debug(f"[Retrieval-Debate-Tree] Retrieval Tree Info: {retrieval_tree_info}") - retrieval = [{ - "idx": idx, - "motion": motion, - "side": self.side, - "similarity": score, - "tree_info": tree_info, - "stage_statement": stage_statement, - } for idx, score, motion, tree_info, stage_statement in zip(retrieval_idx, retrieval_similarity, retrieval_motion, retrieval_tree_info, retrieval_stage_statement)] - + retrieval = [ + { + "idx": idx, + "motion": motion, + "side": self.side, + "similarity": score, + "tree_info": tree_info, + "stage_statement": stage_statement, + } + for idx, score, motion, tree_info, stage_statement in zip( + retrieval_idx, retrieval_similarity, retrieval_motion, retrieval_tree_info, retrieval_stage_statement + ) + ] retrieval_feedback = "" for ex in retrieval: @@ -426,9 +519,9 @@ def _get_retrieval_debate_tree(self, **kwargs): "retrieval_feedback": retrieval_feedback, } self.debate_thoughts.append(thoughts) - + return retrieval, retrieval_feedback - + def _get_prepared_tree(self, side): prepared_tree = [] if side == self.side: @@ -444,16 +537,20 @@ def _get_prepared_tree(self, side): data = x[0]["tree_structure"] tree = PrepareTree.from_json(data) root_claim = tree.root.claim - match_node, similarity = self.oppo_debate_tree.get_most_similar_node(root_claim, side=side, level=1, top_k=1, threshold=0.8) + match_node, similarity = self.oppo_debate_tree.get_most_similar_node( + root_claim, side=side, level=1, top_k=1, threshold=0.8 + ) if match_node is not None: match_trees.append((tree, similarity, match_node.claim)) - + sorted_match_trees = sorted(match_trees, key=lambda x: x[1], reverse=True) for i in range(min(len(sorted_match_trees), 3)): prepared_tree.append(sorted_match_trees[i][0]) similarity = sorted_match_trees[i][1] query_claim = sorted_match_trees[i][2] - logger.debug(f"[Get-Prepared-Tree] Opponent's Tree (similarity: {similarity:0.2f}) for claim: {query_claim}\n{tree.print_tree(include_status=True)}") + logger.debug( + f"[Get-Prepared-Tree] Opponent's Tree (similarity: {similarity:0.2f}) for claim: {query_claim}\n{tree.print_tree(include_status=True)}" + ) thoughts = { "stage": self.status, @@ -464,19 +561,28 @@ def _get_prepared_tree(self, side): self.debate_thoughts.append(thoughts) return prepared_tree - + def _retrieve_on_prepared_tree(self, action): # Skip retrieval if rehearsal tree is disabled if not self.use_rehearsal_tree: return "" - + # retrieve similar action from the prepared tree target_claim = action["target_claim"] action_type = action["action"] look_ahead_num = REMAINING_ROUND_NUM[f"{self.status}_{self.side}"] query_embedding = self._get_embedding_from_cache(target_claim) - additional_info, retrieval_nodes = get_retrieval_from_rehearsal_tree(action_type, target_claim, self.side, self.oppo_side, self.prepared_tree_list, self.prepared_oppo_tree_list, look_ahead_num, query_embedding) + additional_info, retrieval_nodes = get_retrieval_from_rehearsal_tree( + action_type, + target_claim, + self.side, + self.oppo_side, + self.prepared_tree_list, + self.prepared_oppo_tree_list, + look_ahead_num, + query_embedding, + ) thoughts = { "stage": self.status, @@ -488,9 +594,9 @@ def _retrieve_on_prepared_tree(self, action): "additional_info": additional_info, } self.debate_thoughts.append(thoughts) - + return "\n".join(additional_info) - + def _get_revision_suggestion(self, statement, history, add_evidence=True, **kwargs): statement = statement.replace("**Statement:**", "**Statement**").replace("**Statement**:", "**Statement**") parts = statement.split("**Statement**") @@ -503,9 +609,9 @@ def _get_revision_suggestion(self, statement, history, add_evidence=True, **kwar if self.status == "closing": return "", "", allocation_plan, statement - + feedback_from_audience, audience_feedback = self._get_feedback_from_audience(statement, history, **kwargs) - feedback_for_revision = (f"Revision Guidance:\n{feedback_from_audience}") + feedback_for_revision = f"Revision Guidance:\n{feedback_from_audience}" new_evidence = [] selected_ids = [] @@ -515,33 +621,50 @@ def _get_revision_suggestion(self, statement, history, add_evidence=True, **kwar new_evidence = [x for x in self.high_quality_evidence_pool if x["id"] not in self.used_evidence] selected_ids = [x["id"] for x in new_evidence] if len(new_evidence) > 10: - evidence_str = json.dumps([{k:v for k,v in x.items() if k != "raw_content"} for x in new_evidence]) - prompt = evidence_selection_prompt.format(motion=self.motion, side=self.side, stage=self.status, evidence=evidence_str, statement=statement, feedback=feedback_for_revision) - logger.debug("[Evidence-Selection-Prompt] " + prompt.strip().replace('\n',' ||| ')) + evidence_str = json.dumps([{k: v for k, v in x.items() if k != "raw_content"} for x in new_evidence]) + prompt = evidence_selection_prompt.format( + motion=self.motion, + side=self.side, + stage=self.status, + evidence=evidence_str, + statement=statement, + feedback=feedback_for_revision, + ) + logger.debug("[Evidence-Selection-Prompt] " + prompt.strip().replace("\n", " ||| ")) selected_ids, response = get_response_with_retry(self.helper_client, prompt, "selected_ids") - logger.debug("[Evidence-Selection-Response] " + response.strip().replace('\n',' ||| ')) - new_evidence = [e for e in new_evidence if e["id"] in selected_ids and e["id"] not in self.used_evidence] + logger.debug("[Evidence-Selection-Response] " + response.strip().replace("\n", " ||| ")) + new_evidence = [ + e for e in new_evidence if e["id"] in selected_ids and e["id"] not in self.used_evidence + ] if len(new_evidence) != len(selected_ids): - logger.warning(f"[Get-Expert-Audience-Revision-Evidence-Selection] Select {selected_ids}, finally {len(new_evidence)}") - logger.debug(f"[Get-Expert-Audience-Revision-Evidence-Selection] From {len(self.high_quality_evidence_pool)} evidence select {len(selected_ids)} evidence: {selected_ids}") + logger.warning( + f"[Get-Expert-Audience-Revision-Evidence-Selection] Select {selected_ids}, finally {len(new_evidence)}" + ) + logger.debug( + f"[Get-Expert-Audience-Revision-Evidence-Selection] From {len(self.high_quality_evidence_pool)} evidence select {len(selected_ids)} evidence: {selected_ids}" + ) self.used_evidence.update(selected_ids) logger.debug(f"[Used-Evidence] {self.used_evidence}") - self.debate_thoughts.append({ - "stage": self.status, - "side": self.side, - "mode": "revision", - "original_statement": statement, - "allocation_plan": allocation_plan, - "simulated_audience_feedback": audience_feedback, - "feedback_for_revision": feedback_for_revision, - "selected_evidence_id": selected_ids, - }) - - return feedback_for_revision, new_evidence, allocation_plan, statement - - def _length_adjust(self, statement, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=10, **kwargs): + self.debate_thoughts.append( + { + "stage": self.status, + "side": self.side, + "mode": "revision", + "original_statement": statement, + "allocation_plan": allocation_plan, + "simulated_audience_feedback": audience_feedback, + "feedback_for_revision": feedback_for_revision, + "selected_evidence_id": selected_ids, + } + ) + + return feedback_for_revision, new_evidence, allocation_plan, statement + + def _length_adjust( + self, statement, feedback_for_revision, new_evidence, allocation_plan, max_time, max_retry=10, **kwargs + ): budget, threshold = max_time, TIME_TOLERANCE time_adjuster = TimeAdjuster() estimator = LengthEstimator(mode=TIME_MODE_FOR_STATEMENT) @@ -552,22 +675,30 @@ def _length_adjust(self, statement, feedback_for_revision, new_evidence, allocat retry = 0 response_list = [] while not flag and retry < max_retry: - evidence_str = json.dumps([{k:v for k,v in x.items() if k != "raw_content"} for x in new_evidence]) - prompt = post_process_prompt.format(motion=self.motion, side=self.side, stage=self.status, - evidence=evidence_str, statement=statement, feedback=feedback_for_revision, - max_words=n_words, - allocation_plan=allocation_plan) - - logger.debug("[Get-Expert-Audience-Revision-Prompt] " + prompt.strip().replace('\n',' ||| ')) + evidence_str = json.dumps([{k: v for k, v in x.items() if k != "raw_content"} for x in new_evidence]) + prompt = post_process_prompt.format( + motion=self.motion, + side=self.side, + stage=self.status, + evidence=evidence_str, + statement=statement, + feedback=feedback_for_revision, + max_words=n_words, + allocation_plan=allocation_plan, + ) + + logger.debug("[Get-Expert-Audience-Revision-Prompt] " + prompt.strip().replace("\n", " ||| ")) revision = self.helper_client(prompt=prompt)[0] - logger.debug("[Get-Expert-Audience-Revision-Response] " + revision.strip().replace('\n',' ||| ')) + logger.debug("[Get-Expert-Audience-Revision-Response] " + revision.strip().replace("\n", " ||| ")) new_statement = revision.replace("Revised Statement:\n", "") new_statement = new_statement.replace("et al.,", "") new_statement = new_statement.replace("[X]", "") - response = re.sub(r' [X-Z][ \%]', '', new_statement) - - logger.debug("[Response-After-Post-Process] " + response.strip().replace('\n',' ||| ')) - current_cost, n_words, flag = time_adjuster.revise_helper(response, n_words, budget, threshold=threshold, ratio=ratio, estimator=estimator) + response = re.sub(r" [X-Z][ \%]", "", new_statement) + + logger.debug("[Response-After-Post-Process] " + response.strip().replace("\n", " ||| ")) + current_cost, n_words, flag = time_adjuster.revise_helper( + response, n_words, budget, threshold=threshold, ratio=ratio, estimator=estimator + ) response_list.append([response, current_cost]) retry += 1 if not flag and max_retry > 1: @@ -605,7 +736,7 @@ def _length_adjust(self, statement, feedback_for_revision, new_evidence, allocat def _get_embedding_from_cache(self, content: str): if content in self.embedding_cache: return self.embedding_cache[content] - + max_retry = 3 retry = 0 while retry < max_retry: @@ -616,7 +747,7 @@ def _get_embedding_from_cache(self, content: str): logger.error(f"[Get-Embedding-From-Cache] Error: {e}. Sleep 30 seconds and retry.") time.sleep(30) retry += 1 - + self.embedding_cache[content] = embedding return embedding @@ -630,12 +761,19 @@ def _analyze_statement(self, statements, statement_side): # Skip analysis if debate flow tree is disabled if not self.use_debate_flow_tree: return [] - + if statement_side == self.side: tree, oppo_tree = self.debate_tree, self.oppo_debate_tree else: tree, oppo_tree = self.oppo_debate_tree, self.debate_tree - claims = extract_statement(self.helper_client, self.motion, statements, tree=[tree.print_tree(include_status=True), oppo_tree.print_tree(include_status=True, reverse=True)], side=statement_side, stage=self.status) + claims = extract_statement( + self.helper_client, + self.motion, + statements, + tree=[tree.print_tree(include_status=True), oppo_tree.print_tree(include_status=True, reverse=True)], + side=statement_side, + stage=self.status, + ) for x in claims: for p in x["purpose"]: @@ -667,7 +805,6 @@ def _analyze_statement(self, statements, statement_side): self.debate_thoughts.append(thoughts) return claims - def reset_stage(self, stage, side, new_content, history): conversation = [x for x in self.conversation] @@ -682,8 +819,8 @@ def reset_stage(self, stage, side, new_content, history): self.conversation.append(x) assert self.conversation[-1]["role"] == "assistant", "The last message should be an assistant message" - self.conversation[-1]["content"] = new_content # update the last assistant message - + self.conversation[-1]["content"] = new_content # update the last assistant message + # reset the debate flow tree self.debate_tree = DebateTree(motion=self.motion, side=self.side) self.oppo_debate_tree = DebateTree(motion=self.motion, side=self.oppo_side) @@ -692,4 +829,4 @@ def reset_stage(self, stage, side, new_content, history): self._analyze_statement(x["content"], x["side"]) self._analyze_statement(new_content, side) - return \ No newline at end of file + return diff --git a/src/prepare.py b/src/prepare.py index 9678d7f..d5b819f 100644 --- a/src/prepare.py +++ b/src/prepare.py @@ -1,28 +1,36 @@ +import argparse +import json import os import re -import json from functools import partial -import numpy as np -import argparse import google.generativeai as genai +import numpy as np +from sentence_transformers.util import cos_sim from tavily import TavilyClient -from searcher import get_search_query, get_search_result, update_search_query, MAX_QUERY, get_source_info - -from utils.model import HelperClient, reward_model -from utils.tool import logger, get_response_with_retry -from utils.prompts import propose_definition_prompt, claim_propose_prompt -from utils.constants import EMBEDDING_MODEL, google_api_key - from debate_tree import PrepareTree -from sentence_transformers.util import cos_sim +from searcher import MAX_QUERY, get_search_query, get_search_result, get_source_info, update_search_query +from utils.constants import EMBEDDING_MODEL, google_api_key +from utils.model import HelperClient, reward_model +from utils.prompts import claim_propose_prompt, propose_definition_prompt +from utils.tool import get_response_with_retry, logger genai.configure(api_key=google_api_key) -class ClaimPool(): - def __init__(self, motion, side, model="gpt-4o", pool_size = 50, max_search_depth = 3, max_search_branch=3, use_rm_model=True, **kwargs) -> None: +class ClaimPool: + def __init__( + self, + motion, + side, + model="gpt-4o", + pool_size=50, + max_search_depth=3, + max_search_branch=3, + use_rm_model=True, + **kwargs, + ) -> None: self.motion = motion self.side = side self.pool_size = pool_size @@ -51,22 +59,21 @@ def __init__(self, motion, side, model="gpt-4o", pool_size = 50, max_search_dept self.reward_model = self.client self.tavily_client = TavilyClient(api_key=os.environ["TVLY_API_KEY"]) - def create_claim(self, need_score=True, need_evidence=True, max_search_depth=2, max_search_branch=3): prompt = propose_definition_prompt.format(motion=self.motion, act=self.act) - logger.debug("[Definition-Helper-Prompt] " + prompt.strip().replace('\n',' ||| ')) + logger.debug("[Definition-Helper-Prompt] " + prompt.strip().replace("\n", " ||| ")) response = self.client(prompt=prompt)[0] - logger.debug("[Definition-Helper-Response] " + response.strip().replace('\n',' ||| ')) - if 'None' in response: + logger.debug("[Definition-Helper-Response] " + response.strip().replace("\n", " ||| ")) + if "None" in response: self.definition = "" else: self.definition = response.replace("**Definition**: ", "").strip() prompt = claim_propose_prompt.format(motion=self.motion, act=self.act, size=self.pool_size) - logger.debug("[Claim-Propose-Prompt] " + prompt.replace('\n',' ||| ')) + logger.debug("[Claim-Propose-Prompt] " + prompt.replace("\n", " ||| ")) results, response = get_response_with_retry(self.client, prompt, "results", temperature=1.0) - logger.debug("[Claim-Propose-Response] " + json.dumps(response, indent=2).replace('\n',' ||| ')) + logger.debug("[Claim-Propose-Response] " + json.dumps(response, indent=2).replace("\n", " ||| ")) for item in results: strength = item["strength"] @@ -75,9 +82,16 @@ def create_claim(self, need_score=True, need_evidence=True, max_search_depth=2, new_claim = item["claim"] perspective = item["perspective"] explanation = item["explanation"] - self.pool.append({"definition": self.definition, "claim": new_claim, "perspective": perspective, "explanation": explanation, "strength": strength}) - - + self.pool.append( + { + "definition": self.definition, + "claim": new_claim, + "perspective": perspective, + "explanation": explanation, + "strength": strength, + } + ) + # self.grouped_pool = [[x] for x in self.pool] clusters = self.cluster_claims(self.pool) self.grouped_pool = [] @@ -91,9 +105,16 @@ def create_claim(self, need_score=True, need_evidence=True, max_search_depth=2, if need_score: for group in self.grouped_pool: - #TODO: only consider the first claim in each group + # TODO: only consider the first claim in each group for claim in group[:1]: - tree_structure, score = self.minimax_search(claim["claim"], motion=self.motion, side=self.side, root_argument=claim["explanation"], max_depth=max_search_depth, max_branch=max_search_branch) + tree_structure, score = self.minimax_search( + claim["claim"], + motion=self.motion, + side=self.side, + root_argument=claim["explanation"], + max_depth=max_search_depth, + max_branch=max_search_branch, + ) claim["minimax_search_score"] = score claim["tree_structure"] = tree_structure logger.debug(f'[Minimax Score] {claim}: {claim["minimax_search_score"]}') @@ -103,7 +124,6 @@ def create_claim(self, need_score=True, need_evidence=True, max_search_depth=2, return self.grouped_pool - def cluster_claims(self, pool): claim_embeddings = genai.embed_content(model=EMBEDDING_MODEL, content=[x["claim"] for x in pool])["embedding"] claim_cross_sim = cos_sim(claim_embeddings, claim_embeddings) @@ -125,24 +145,31 @@ def cluster_claims(self, pool): clusters.append(cluster) visited.add(i) if len(clusters) > 10: - logger.debug(f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), continue clustering with lower threshold ...") + logger.debug( + f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), continue clustering with lower threshold ..." + ) threhold -= 0.025 max_iter -= 1 elif len(clusters) < 5: - logger.debug(f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), continue clustering with higher threshold ...") + logger.debug( + f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), continue clustering with higher threshold ..." + ) threhold += 0.025 max_iter -= 1 else: stop = True - logger.debug(f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), stop clustering.") + logger.debug( + f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), stop clustering." + ) if max_iter <= 0: - logger.debug(f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), stop clustering.") + logger.debug( + f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}), stop clustering." + ) break logger.debug(f"Clustered {len(pool)} claims into {len(clusters)} groups (threshold: {threhold}): {clusters}") return clusters - - + def minimax_search(self, root_claim, motion, side, root_argument=None, max_depth=2, max_branch=3): tree = PrepareTree(root_claim, motion, side, self.client, self.reward_model, root_argument) tree.expand_tree(tree.root, max_level=max_depth, max_branch=max_branch) @@ -150,13 +177,15 @@ def minimax_search(self, root_claim, motion, side, root_argument=None, max_depth logger.debug(tree.print_tree(prefix="\n", include_status=True)) tree_structure = tree.get_tree_info() - best_path_idx, best_path, best_score = tree.root.get_minimax_score(max_depth=max_depth-1, level_decoy=0.8, support_weight=0.5) + best_path_idx, best_path, best_score = tree.root.get_minimax_score( + max_depth=max_depth - 1, level_decoy=0.8, support_weight=0.5 + ) best_path_str = "\n=> ".join([node.data for node in best_path]) logger.debug(f"Path IDX: {best_path_idx}") logger.debug(f"Path: {best_path_str}") logger.debug(f"Score: {best_score}") return tree_structure, best_score - + def get_evidence_pool(self): for k, g in enumerate(self.grouped_pool): claim = g[0]["claim"] @@ -174,25 +203,24 @@ def get_evidence_pool(self): "* Level-0: The main claim \n" "* Level-1: Your opponent's rebuttal to the main claim\n" "* Level-2: Your defense against the opponent's rebuttal\n\n" - "Prepare your search queries based on the simulated debate flow tree structure. " "The search queries should support the arguments in your root claim (Level-0) and defense (Level-2), while rebut the opponent's rebuttal (Level-1).\n" - "**Simulated Debate Flow Tree Structure for your main claim**\n" f"{tree_info}\n\n" - ) - search_queries = get_search_query(self.client, self.motion, self.act, claim, extra_prompt=minimax_simulated_tree_feedback) + search_queries = get_search_query( + self.client, self.motion, self.act, claim, extra_prompt=minimax_simulated_tree_feedback + ) if len(search_queries) > MAX_QUERY: search_queries = search_queries[:MAX_QUERY] - + search_results = get_search_result(self.tavily_client, search_queries) # Step 3. Update search queries new_search_queries = update_search_query(self.client, self.motion, self.act, claim, search_results) if len(new_search_queries) + len(search_queries) > MAX_QUERY: - new_search_queries = new_search_queries[:MAX_QUERY - len(search_queries)] + new_search_queries = new_search_queries[: MAX_QUERY - len(search_queries)] new_search_results = get_search_result(self.tavily_client, new_search_queries) # Step 4. Save the results @@ -201,11 +229,13 @@ def get_evidence_pool(self): # all_results = summarize_search_result(self.client, claim, all_results) for i, res in enumerate(all_results): - res["id"] = f"{k}_{i}" + res["id"] = f"{k}_{i}" all_results_with_source = get_source_info(self.client, all_results) if len(all_results_with_source) > 0: - logger.debug(f"[Evidence-Pool-Helper] Retrieved {len(all_results_with_source)} evidences for {claim} . Example e: {all_results_with_source[0]}") + logger.debug( + f"[Evidence-Pool-Helper] Retrieved {len(all_results_with_source)} evidences for {claim} . Example e: {all_results_with_source[0]}" + ) else: logger.debug(f"[Evidence-Pool-Helper] No evidences retrieved for {claim}") @@ -213,6 +243,7 @@ def get_evidence_pool(self): g[0]["retrieved_evidence"] = all_results return self.grouped_pool + # python3 prepare.py # python3 prepare.py --motion_file ../data/motion_list.txt --model gpt-4o # python3 prepare.py --motion_file ../data/motion_list.txt --model meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo @@ -222,7 +253,7 @@ def get_evidence_pool(self): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--motion_file", type=str, default=None) # ../data/motion_list.txt + parser.add_argument("--motion_file", type=str, default=None) # ../data/motion_list.txt parser.add_argument("--motion", type=str, default="Fast fashion (cheap, trend-driven clothing) should be banned") parser.add_argument("--model", type=str, default="gemini-2.0-flash") parser.add_argument("--no_evidence", action="store_true", default=False) @@ -239,14 +270,14 @@ def get_evidence_pool(self): with open(args.motion_file, "r") as f: motions = [x.strip() for x in f.readlines()] if args.max_n > 0: - motions = motions[:args.max_n] + motions = motions[: args.max_n] else: motions = [args.motion.strip()] for motion in motions: model = args.model model_name = args.model.split("/")[-1] - motion_name = motion.replace(' ', '_').lower() + motion_name = motion.replace(" ", "_").lower() pool_size = args.pool_size save_dir = f"{args.save_dir}/{model_name}" @@ -254,17 +285,23 @@ def get_evidence_pool(self): os.makedirs(save_dir) for side in ["for", "against"]: - save_file_name = f'{save_dir}/{motion_name}_pool_{side}.json' + save_file_name = f"{save_dir}/{motion_name}_pool_{side}.json" if os.path.exists(save_file_name): logger.info(f"Skip motion: {motion}") else: logger.info(f"Create motion for {save_file_name}...") - claim_workspace = ClaimPool(motion=motion, side=side, model=model, pool_size=pool_size, use_rm_model=not args.ban_rm_model) - claim_pool = claim_workspace.create_claim(need_score=not args.no_score, need_evidence=not args.no_evidence, max_search_depth=args.max_search_depth, max_search_branch=args.max_search_branch) + claim_workspace = ClaimPool( + motion=motion, side=side, model=model, pool_size=pool_size, use_rm_model=not args.ban_rm_model + ) + claim_pool = claim_workspace.create_claim( + need_score=not args.no_score, + need_evidence=not args.no_evidence, + max_search_depth=args.max_search_depth, + max_search_branch=args.max_search_branch, + ) logger.info(f"Claim Pool Size: {len(claim_pool)}") if len(claim_pool) > 0: - with open(save_file_name, 'w') as file: + with open(save_file_name, "w") as file: json.dump(claim_pool, file, indent=2) logger.info(f"Saved to {save_file_name}") - \ No newline at end of file diff --git a/src/scripts/debate_tree_extract.py b/src/scripts/debate_tree_extract.py index 2e20e58..4e92c5f 100644 --- a/src/scripts/debate_tree_extract.py +++ b/src/scripts/debate_tree_extract.py @@ -1,16 +1,18 @@ -import os -import json -from pathlib import Path -from tqdm import tqdm -from functools import partial import argparse +import json +import os import time +from functools import partial +from pathlib import Path + +from tqdm import tqdm from debate_tree import DebateTree from utils.helper import extract_statement from utils.model import HelperClient from utils.tool import logger + def analyze_statement(llm, motion, status, statements, side, pro_debate_tree, con_debate_tree): """ Analyze the statements: @@ -18,11 +20,18 @@ def analyze_statement(llm, motion, status, statements, side, pro_debate_tree, co 2. Match the opponent's claims with the debater's claims 3. Update the claim status """ - if side == "for": # statement is from the for side + if side == "for": # statement is from the for side tree, oppo_tree = pro_debate_tree, con_debate_tree else: tree, oppo_tree = con_debate_tree, pro_debate_tree - claims = extract_statement(llm, motion, statements, tree=[tree.print_tree(include_status=True), oppo_tree.print_tree(include_status=True)], side=side, stage=status) + claims = extract_statement( + llm, + motion, + statements, + tree=[tree.print_tree(include_status=True), oppo_tree.print_tree(include_status=True)], + side=side, + stage=status, + ) for x in claims: for p in x["purpose"]: @@ -47,7 +56,9 @@ def analyze_statement(llm, motion, status, statements, side, pro_debate_tree, co target_tree = p["targeted_debate_tree"] if action == "propose" or action == "reinforce" or action == "rebut": if target_tree != "you": - logger.warning(f"Propose or reinforce action is not allowed for the opponent's debate tree: {target}") + logger.warning( + f"Propose or reinforce action is not allowed for the opponent's debate tree: {target}" + ) continue elif action == "attack": if target_tree == "you": @@ -56,7 +67,7 @@ def analyze_statement(llm, motion, status, statements, side, pro_debate_tree, co else: logger.warning(f"Unknown action: {action}") continue - + target_tree = tree if target_tree == "you" else oppo_tree target_tree.update_node(action, new_claim=claim, new_argument=arguments, target=target) @@ -66,40 +77,39 @@ def analyze_statement(llm, motion, status, statements, side, pro_debate_tree, co def process_debate_file(input_path: str, output_path: str) -> None: """ Process a single debate JSON file and extract structured arguments. - + Args: input_path: Path to input JSON file output_path: Path to save processed JSON file api_key: Google API key """ - with open(input_path, 'r') as f: + with open(input_path, "r") as f: debate_data = json.load(f) if len(debate_data["debate_process"]) < 6: logger.info(f"Skipping debate {input_path} because it has less than 3 stages") return - + motion = debate_data["motion"] if "motion" in debate_data else debate_data["config"]["env"]["motion"] - processed_debate = { - "motion": motion, - "structured_arguments": [] - } + processed_debate = {"motion": motion, "structured_arguments": []} llm = partial(HelperClient, model="gemini-2.0-flash", temperature=0, max_tokens=8192, n=1) pro_debate_tree = DebateTree(motion, "for") con_debate_tree = DebateTree(motion, "against") - + # Create progress bar for arguments within the file - with tqdm(total=len(debate_data["debate_process"]), desc=f"Processing arguments in {Path(input_path).name}", leave=False) as pbar: + with tqdm( + total=len(debate_data["debate_process"]), desc=f"Processing arguments in {Path(input_path).name}", leave=False + ) as pbar: for stage in debate_data["debate_process"]: - claims = analyze_statement(llm, motion, stage["stage"], stage["content"], stage["side"], pro_debate_tree, con_debate_tree) - - processed_debate["structured_arguments"].append({ - "stage": stage["stage"], - "side": stage["side"], - "claims": claims - }) + claims = analyze_statement( + llm, motion, stage["stage"], stage["content"], stage["side"], pro_debate_tree, con_debate_tree + ) + + processed_debate["structured_arguments"].append( + {"stage": stage["stage"], "side": stage["side"], "claims": claims} + ) pbar.update(1) processed_debate["pro_debate_tree"] = pro_debate_tree.get_tree_info() @@ -108,14 +118,14 @@ def process_debate_file(input_path: str, output_path: str) -> None: logger.info(f"Pro_debate_tree: {pro_debate_tree.print_tree(include_status=True)}") logger.info(f"Con_debate_tree: {con_debate_tree.print_tree(include_status=True)}") - with open(output_path, 'w') as f: - json.dump(processed_debate, f, indent=2) + with open(output_path, "w") as f: + json.dump(processed_debate, f, indent=2) def process_input(input_path: str, output_path: str) -> None: """ Process either a single file or directory of JSON files. - + Args: input_path: Path to input file or directory output_path: Path to output file or directory @@ -123,7 +133,7 @@ def process_input(input_path: str, output_path: str) -> None: """ input_path = Path(input_path) output_path = Path(output_path) - + if input_path.is_file(): # Process single file if not output_path.parent.exists(): @@ -131,16 +141,20 @@ def process_input(input_path: str, output_path: str) -> None: print(f"\nProcessing file: {input_path.name}") process_debate_file(str(input_path), str(output_path)) print(f"✓ Saved processed file to {output_path}") - + elif input_path.is_dir(): # Process directory if not output_path.exists(): os.makedirs(output_path, exist_ok=True) - + # Get list of JSON files - json_files = [f for f in input_path.glob("*.json") if (not (output_path / f"processed_{f.name}").exists()) and (not f.name.startswith("processed_"))] + json_files = [ + f + for f in input_path.glob("*.json") + if (not (output_path / f"processed_{f.name}").exists()) and (not f.name.startswith("processed_")) + ] print(json_files) - + # Create progress bar for files with tqdm(total=len(json_files), desc="Processing files", unit="file") as pbar: for json_file in json_files: @@ -150,12 +164,12 @@ def process_input(input_path: str, output_path: str) -> None: def main(): - parser = argparse.ArgumentParser(description='Process debate JSON files to extract structured arguments') - parser.add_argument('input', help='Input JSON file or directory containing JSON files') - parser.add_argument('output', help='Output file path (if input is file) or directory (if input is directory)') - + parser = argparse.ArgumentParser(description="Process debate JSON files to extract structured arguments") + parser.add_argument("input", help="Input JSON file or directory containing JSON files") + parser.add_argument("output", help="Output file path (if input is file) or directory (if input is directory)") + args = parser.parse_args() - + try: process_input(args.input, args.output) print("\n✓ Processing completed successfully!") @@ -163,5 +177,6 @@ def main(): print(f"Error in debate_tree_extract.py: {e}") return + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/scripts/train_reward_model_llama.py b/src/scripts/train_reward_model_llama.py index a074088..a55c3c0 100644 --- a/src/scripts/train_reward_model_llama.py +++ b/src/scripts/train_reward_model_llama.py @@ -1,209 +1,216 @@ -import torch, json, os, wandb -from sklearn.metrics import accuracy_score, classification_report +import json +import os +from collections import Counter + import numpy as np +import torch +import wandb +from datasets import Dataset, concatenate_datasets, load_dataset -from transformers import LlamaForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments -from transformers import EvalPrediction -from datasets import load_dataset, Dataset -from datasets import concatenate_datasets -from collections import Counter # from datasets import load_metric -from sklearn.metrics import cohen_kappa_score, f1_score, accuracy_score +from sklearn.metrics import accuracy_score, classification_report, cohen_kappa_score, f1_score +from transformers import AutoTokenizer, EvalPrediction, LlamaForSequenceClassification, Trainer, TrainingArguments first_call_preprocess_function = True + # 1. Load and process the local dataset -def load_and_process_dataset(file_path, model_name, max_length=512, type='pro', version='version_b'): # 'pro' or 'con'; 'version_a' or 'version_b' - assert type == 'pro' or type == 'con', "Type must be 'pro' or 'con'" - +def load_and_process_dataset( + file_path, model_name, max_length=512, type="pro", version="version_b" +): # 'pro' or 'con'; 'version_a' or 'version_b' + assert type == "pro" or type == "con", "Type must be 'pro' or 'con'" + # Load dataset - dataset = load_dataset('json', data_files=file_path) - + dataset = load_dataset("json", data_files=file_path) + # Filter dataset based on 'impact_stance' - dataset = dataset.filter(lambda example: example['impact_stance'] == type) - + dataset = dataset.filter(lambda example: example["impact_stance"] == type) + # Initialize tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) - + # Set pad_token to eos_token tokenizer.pad_token = tokenizer.eos_token - + # Define preprocessing function def preprocess_function(examples): # Concatenate list of paths into a single string - relation_ship = 'supporting' if type == 'pro' else 'attacking' - - if version == 'version_a': - '''Version A implementation''' - texts = [f"You are given a chain of arguments, each one supporting or attacking the previous one. The first argument is: {path[-1]} The second last one is: {path[1]} The last one is: {path[0]} Now you need to determine the impact of the last one to the second last one, given their relationship {relation_ship}. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful." for path in examples['path']] - elif version == 'version_b': - '''Version B implementation''' + relation_ship = "supporting" if type == "pro" else "attacking" + + if version == "version_a": + """Version A implementation""" + texts = [ + f"You are given a chain of arguments, each one supporting or attacking the previous one. The first argument is: {path[-1]} The second last one is: {path[1]} The last one is: {path[0]} Now you need to determine the impact of the last one to the second last one, given their relationship {relation_ship}. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful." + for path in examples["path"] + ] + elif version == "version_b": + """Version B implementation""" texts = [] - for path, path_labels in zip(examples['path'], examples['path_labels']): + for path, path_labels in zip(examples["path"], examples["path_labels"]): # context = ' '.join(f"The {i}th claim is: {item} " for i, item in enumerate(list(reversed(path)))) - context = '' + context = "" for i, item in enumerate(list(reversed(path))): context += f"The {i+1}th claim is: {item} " - if i > 0 and path_labels[-i] == 'pro': + if i > 0 and path_labels[-i] == "pro": context += "This claim is supporting the previous claim.\n" - elif i > 0 and path_labels[-i] == 'con': + elif i > 0 and path_labels[-i] == "con": context += "This claim is supporting the previous claim.\n" else: context += "\n" - texts.append(f"You are given a chain of arguments, each one supporting or attacking the previous one. {context}. Now you need to determine the impact of the last claim to the second last one. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful.") + texts.append( + f"You are given a chain of arguments, each one supporting or attacking the previous one. {context}. Now you need to determine the impact of the last claim to the second last one. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful." + ) else: assert False, "Invalid version" global first_call_preprocess_function if first_call_preprocess_function: - print('[prompt]', texts[0]) + print("[prompt]", texts[0]) first_call_preprocess_function = False - + # Tokenize texts - tokenized = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length) - + tokenized = tokenizer(texts, truncation=True, padding="max_length", max_length=max_length) + # Add labels label_map = {"NOT IMPACTFUL": 0, "MEDIUM IMPACT": 1, "IMPACTFUL": 2} - tokenized['labels'] = [label_map[label] for label in examples['impact_label']] - + tokenized["labels"] = [label_map[label] for label in examples["impact_label"]] + return tokenized - + # Apply preprocessing - processed_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset['train'].column_names) + processed_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names) # Implement resampling def resample_dataset(dataset): # Convert labels to tensor and count samples for each class - labels_tensor = torch.tensor(dataset['labels']) + labels_tensor = torch.tensor(dataset["labels"]) label_counts = labels_tensor.bincount() max_count = label_counts.max().item() - + resampled_datasets = [] for label in range(len(label_counts)): - class_dataset = dataset.filter(lambda example: example['labels'] == label) - + class_dataset = dataset.filter(lambda example: example["labels"] == label) + # Calculate how many times to repeat the class dataset - repeat_factor = min(max_count // len(class_dataset), 3) #NOTE: <=3 times for each sample + repeat_factor = min(max_count // len(class_dataset), 3) # NOTE: <=3 times for each sample remainder = max_count % len(class_dataset) - + # Manually repeat the dataset repeated_datasets = [class_dataset] * repeat_factor resampled_class = concatenate_datasets(repeated_datasets) - + # Add additional samples if needed # if remainder > 0: # additional_samples = class_dataset.shuffle().select(range(remainder)) # resampled_class = concatenate_datasets([resampled_class, additional_samples]) - + resampled_datasets.append(resampled_class) - print('[resample_dataset]', label, len(class_dataset), len(resampled_class)) - + print("[resample_dataset]", label, len(class_dataset), len(resampled_class)) + # Combine all resampled datasets return concatenate_datasets(resampled_datasets).shuffle(seed=42) # Apply resampling to the train split - if 'train' in file_path: - processed_dataset['train'] = resample_dataset(processed_dataset['train']) + if "train" in file_path: + processed_dataset["train"] = resample_dataset(processed_dataset["train"]) # Set format for PyTorch - processed_dataset.set_format('torch') - + processed_dataset.set_format("torch") + return processed_dataset + # 2. Load LLaMA model and Tokenizer with corrected settings def load_model_and_tokenizer(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) - + # Set pad_token to eos_token tokenizer.pad_token = tokenizer.eos_token - + # Load the model with the correct number of labels model = LlamaForSequenceClassification.from_pretrained( - model_name, - num_labels=3, - torch_dtype=torch.bfloat16, - device_map="auto" + model_name, num_labels=3, torch_dtype=torch.bfloat16, device_map="auto" ) # Adjusted to 3 - + # Set the pad_token_id in the model's configuration model.config.pad_token_id = tokenizer.pad_token_id - + return model, tokenizer + # 2.5 metrics def compute_metrics(p: EvalPrediction): predictions = p.predictions label_ids = p.label_ids - + # If predictions are logits (usually a 2D array), we need to take argmax if len(predictions.shape) == 2: preds = np.argmax(predictions, axis=-1) else: preds = predictions # If already prediction categories, use directly - - print('[compute_metrics]', preds, label_ids) - + + print("[compute_metrics]", preds, label_ids) + # Calculate accuracy accuracy = accuracy_score(label_ids, preds) - + # Calculate multi-class F1 score - f1 = f1_score(label_ids, preds, average='macro') - + f1 = f1_score(label_ids, preds, average="macro") + # Calculate Kappa score kappa = cohen_kappa_score(label_ids, preds) - + # Combine all metrics - results = { - 'accuracy': accuracy, - 'f1': f1, - 'kappa': kappa - } - + results = {"accuracy": accuracy, "f1": f1, "kappa": kappa} + return results + def compute_confusion_matrix(p: EvalPrediction): - '''reference vs. prediction''' + """reference vs. prediction""" predictions = p.predictions label_ids = p.label_ids - + if len(predictions.shape) == 2: preds = np.argmax(predictions, axis=-1) else: - preds = predictions - - print('[compute_metrics]', preds, label_ids) + preds = predictions + + print("[compute_metrics]", preds, label_ids) cnt = np.zeros((3, 3)) for pred, label in zip(preds, label_ids): cnt[label][pred] += 1 - + return cnt + # 3. Fine-tune the model def fine_tune_model(model, tokenizer, train_dataset, val_dataset, run_name): # Set training arguments os.environ["WANDB_PROJECT"] = "debating" # name your W&B project from datetime import datetime + current_time = datetime.now().strftime("%Y%m%d_%H%M%S") - output_dir = os.path.join("./results", run_name + '_' + current_time) - + output_dir = os.path.join("./results", run_name + "_" + current_time) + training_args = TrainingArguments( output_dir=output_dir, evaluation_strategy="steps", - eval_steps=500, + eval_steps=500, save_strategy="steps", save_steps=500, save_total_limit=1, metric_for_best_model="f1", greater_is_better=True, load_best_model_at_end=True, - save_only_model=True, + save_only_model=True, per_device_train_batch_size=8, # Adjust based on GPU memory per_device_eval_batch_size=8, num_train_epochs=3, # Adjust as needed learning_rate=1e-5, - logging_dir='./logs', + logging_dir="./logs", logging_steps=100, run_name=run_name, fp16=False, @@ -211,9 +218,9 @@ def fine_tune_model(model, tokenizer, train_dataset, val_dataset, run_name): tf32=False, ) - model.use_weighted_loss = False #NOTE: try the regression idea + model.use_weighted_loss = False # NOTE: try the regression idea if model.use_weighted_loss: - print('[NOTE] Using weighted loss') + print("[NOTE] Using weighted loss") # Initialize Trainer trainer = Trainer( @@ -224,32 +231,37 @@ def fine_tune_model(model, tokenizer, train_dataset, val_dataset, run_name): tokenizer=tokenizer, compute_metrics=compute_metrics, ) - + # Start training trainer.train() predictions = trainer.predict(val_dataset) metrics = compute_metrics(predictions) - metrics = {f'final/{k}': v for k, v in metrics.items()} + metrics = {f"final/{k}": v for k, v in metrics.items()} print(metrics) wandb.log(metrics) + # 4. Main function def main(): # Load and process the dataset # type = 'pro' - type = 'con' - version = 'version_a' + type = "con" + version = "version_a" model_name = "meta-llama/Llama-3.2-3B-Instruct" - train_dataset = load_and_process_dataset('dataset/kialo/kialo/kialo_path.train.jsonl', model_name, type=type, version=version)['train'] - val_dataset = load_and_process_dataset('dataset/kialo/kialo/kialo_path.valid.jsonl', model_name, type=type, version=version)['train'] + train_dataset = load_and_process_dataset( + "dataset/kialo/kialo/kialo_path.train.jsonl", model_name, type=type, version=version + )["train"] + val_dataset = load_and_process_dataset( + "dataset/kialo/kialo/kialo_path.valid.jsonl", model_name, type=type, version=version + )["train"] print(train_dataset) print(val_dataset) - + # Load model and tokenizer model, tokenizer = load_model_and_tokenizer(model_name) - + # Fine-tune the model run_name = f"llama_{type}_{version}_resampling" fine_tune_model(model, tokenizer, train_dataset, val_dataset, run_name) @@ -260,28 +272,30 @@ def __init__(self, model_name): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.tokenizer.pad_token = self.tokenizer.eos_token self.model = LlamaForSequenceClassification.from_pretrained( - model_name, - num_labels=3, - torch_dtype=torch.bfloat16, - device_map="auto" + model_name, num_labels=3, torch_dtype=torch.bfloat16, device_map="auto" ) self.model.config.pad_token_id = self.tokenizer.pad_token_id def __call__(self, prompt): - inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.model.device) + inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to( + self.model.device + ) with torch.no_grad(): outputs = self.model(**inputs) return torch.argmax(outputs.logits, dim=-1).item() + def evaluate_support_strength(model, motion, argument1, argument2, history): - relation_ship = 'supporting' + relation_ship = "supporting" prompt = f"You are given a chain of arguments, each one supporting or attacking the previous one. The first argument is: {history[0]} The second last one is: {history[-3]} The last one is: {history[-1]} Now you need to determine the impact of the last one to the second last one, given their relationship {relation_ship}. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful." return model(prompt) + def evaluate_defense_strength(model, motion, argument1, argument2, history): - relation_ship = 'attacking' + relation_ship = "attacking" prompt = f"You are given a chain of arguments, each one supporting or attacking the previous one. The first argument is: {history[0]} The second last one is: {history[-2]} The last one is: {history[-1]} Now you need to determine the impact of the last one to the second last one, given their relationship {relation_ship}. Output only a number among 0, 1, or 2 in your response. 0 means not impactful; 1 means medium impactful; 2 means impactful." return model(prompt) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/searcher.py b/src/searcher.py index 25c61f8..ae5f0e3 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -1,15 +1,15 @@ -import re -import os -import json import argparse +import json +import os +import re from functools import partial from tavily import TavilyClient -from utils.prompts import search_prompt, iterative_search_prompt, summarize_result_prompt +from utils.db import get_cached_answer, save_query from utils.model import HelperClient +from utils.prompts import iterative_search_prompt, search_prompt, summarize_result_prompt from utils.tool import logger -from utils.db import save_query, get_cached_answer MAX_QUERY = 10 @@ -25,7 +25,7 @@ def find_tavily(text): ridx = text.find("```", lidx + 9) if ridx == -1: break - tavily_block = text[lidx + 9:ridx].strip().split("\n") + tavily_block = text[lidx + 9 : ridx].strip().split("\n") tavily_blocks.extend(tavily_block) start_idx = ridx + 3 @@ -35,11 +35,11 @@ def find_tavily(text): def clean_raw_content(raw_content): if raw_content is None: return "" - pattern = re.compile(r' +') + pattern = re.compile(r" +") raw_content = raw_content.replace("\n", " ") raw_content = raw_content.replace("\\n", " ") raw_content = raw_content.replace("\t", " ") - raw_content = pattern.sub(' ', raw_content) + raw_content = pattern.sub(" ", raw_content) return raw_content @@ -52,16 +52,16 @@ def get_search_result(tavily_client, query_list): logger.debug(f"[Search-Helper] Searching Hit in Cache") response_list.extend(json.loads(cached_answer[0])) continue - + max_retry = 3 while max_retry > 0: try: response = tavily_client.search( query=query, search_depth="advanced", - max_results = 5, + max_results=5, include_raw_content=True, - exclude_domains=["arxiv.org"] + exclude_domains=["arxiv.org"], ) break except Exception as e: @@ -98,47 +98,51 @@ def get_search_result(tavily_client, query_list): response_list.extend(results) return response_list + def get_search_query(llm_client, motion, stance, claim=None, extra_prompt=None): prompt = search_prompt - prompt += ( - f"**Topic**: {motion}\n\n" - f"**Stance**: {stance}\n\n" - ) + prompt += f"**Topic**: {motion}\n\n" f"**Stance**: {stance}\n\n" if claim is not None: prompt += "\n\n**Claim**: {claim}\n\n".format(claim=claim) if extra_prompt is not None: prompt += "\n\n" + extra_prompt - logger.debug("[Search-Helper-Prompt] " + prompt.strip().replace('\n',' ||| ')) + logger.debug("[Search-Helper-Prompt] " + prompt.strip().replace("\n", " ||| ")) response = llm_client(prompt=prompt)[0] - logger.debug("[Search-Helper-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Search-Helper-Response] " + response.strip().replace("\n", " ||| ")) queries = find_tavily(response) - queries = [q.replace("\"", "") for q in queries] + queries = [q.replace('"', "") for q in queries] logger.debug("[Search-Helper-Queries] " + " ||| ".join(queries)) return queries def update_search_query(llm_client, motion, stance, claim, results): - simple_results = [{"query": r["query"], "title": r["title"], "url": r["url"], "content": r["content"]} for r in results] - prompt = iterative_search_prompt.format(motion=motion, stance=stance, claim=claim, results=json.dumps(simple_results, indent=2)) - logger.debug("[Search-Helper-Update-Prompt] " + prompt.strip().replace('\n',' ||| ')) + simple_results = [ + {"query": r["query"], "title": r["title"], "url": r["url"], "content": r["content"]} for r in results + ] + prompt = iterative_search_prompt.format( + motion=motion, stance=stance, claim=claim, results=json.dumps(simple_results, indent=2) + ) + logger.debug("[Search-Helper-Update-Prompt] " + prompt.strip().replace("\n", " ||| ")) response = llm_client(prompt=prompt)[0] - logger.debug("[Search-Helper-Update-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Search-Helper-Update-Response] " + response.strip().replace("\n", " ||| ")) queries = find_tavily(response) logger.debug("[Search-Helper-Queries] " + " ||| ".join(queries)) return queries + def summarize_search_result(llm_client, claim, search_results): for r in search_results: query = r["query"] content = {"title": r["title"], "url": r["url"], "content": r["content"]} prompt = summarize_result_prompt.format(claim=claim, query=query, results=json.dumps(content, indent=2)) - logger.debug("[Search-Summarize-Prompt] " + prompt.strip().replace('\n',' ||| ')) + logger.debug("[Search-Summarize-Prompt] " + prompt.strip().replace("\n", " ||| ")) response = llm_client(prompt=prompt)[0] - logger.debug("[Search-Summarize-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Search-Summarize-Response] " + response.strip().replace("\n", " ||| ")) r["argument"] = response return search_results - + + def get_source_info(llm, evidence): for e in evidence: e["raw_content"] = " ".join(e["raw_content"].split()[:256]) @@ -155,16 +159,25 @@ def get_source_info(llm, evidence): reliability += 1 if "publication" in e and e["publication"] != "": reliability += 1 - if "arxiv" in e.get("url", "").lower() or "arxiv" in e.get("source", "").lower() or "arxiv" in e.get("publication", "").lower(): + if ( + "arxiv" in e.get("url", "").lower() + or "arxiv" in e.get("source", "").lower() + or "arxiv" in e.get("publication", "").lower() + ): reliability = 0 e["reliability"] = reliability - + return evidence + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, default="gemini-1.5-pro") - parser.add_argument("--claim", type=str, default="Fat taxes disproportionately burden low-income households, exacerbating existing inequalities.") + parser.add_argument( + "--claim", + type=str, + default="Fat taxes disproportionately burden low-income households, exacerbating existing inequalities.", + ) parser.add_argument("--motion", type=str, default="we should use fat tax") parser.add_argument("--stance", type=str, default="support") args = parser.parse_args() @@ -182,7 +195,7 @@ def get_source_info(llm, evidence): # Step 3. Update search queries new_search_queries = update_search_query(llm_client, args.claim, search_results) if len(new_search_queries) + len(search_queries) > MAX_QUERY: - new_search_queries = new_search_queries[:MAX_QUERY - len(search_queries)] + new_search_queries = new_search_queries[: MAX_QUERY - len(search_queries)] new_search_results = get_search_result(tavily_client, new_search_queries) # Step 4. Save the results @@ -190,5 +203,3 @@ def get_source_info(llm, evidence): all_results = summarize_search_result(llm_client, args.claim, all_results) json.dump(all_results, open(f"{args.claim}_search_results.json", "w"), indent=2) print(f"Saved to {args.claim}_search_results.json") - - diff --git a/src/tts.py b/src/tts.py index 64c8b7e..93cb4bb 100644 --- a/src/tts.py +++ b/src/tts.py @@ -1,58 +1,57 @@ -import json import argparse +import json import os -import shutil import re -import torch -from mutagen.mp3 import MP3 +import shutil from io import BytesIO from pathlib import Path + +import speech_recognition as sr +import torch +from mutagen.mp3 import MP3 from openai import OpenAI +from pydub import AudioSegment +from utils.constants import openai_api_key from utils.time_estimator import LengthEstimator from utils.tool import remove_citation, remove_subtitles -from utils.constants import openai_api_key - -import speech_recognition as sr -from pydub import AudioSegment # python tts.py -i ../log_files/1.json -o ../results/audio -name case1_1 + def get_options(): - parser = argparse.ArgumentParser(description='Text-to-Speech options') - parser.add_argument('-i', type=str, help='input file path') - parser.add_argument('-o', type=str, default="../results/audio",help='output directory path') - parser.add_argument('-name', type=str, default="case1_1",help='output name') + parser = argparse.ArgumentParser(description="Text-to-Speech options") + parser.add_argument("-i", type=str, help="input file path") + parser.add_argument("-o", type=str, default="../results/audio", help="output directory path") + parser.add_argument("-name", type=str, default="case1_1", help="output name") return parser.parse_args() + # I find different voices have very similar speech duration, so just use one voice for estimation def query_time(content): client = OpenAI() - response = client.audio.speech.create( - model="tts-1", - voice="echo", - input=content[:4096] - ) + response = client.audio.speech.create(model="tts-1", voice="echo", input=content[:4096]) audio_bytes = BytesIO(response.content) - + return MP3(audio_bytes).info.length # speech_file_path = "temp.mp3" # response.stream_to_file(speech_file_path) # return MP3(speech_file_path).info.length + def convert_text_to_speech(content, output_path, voice="echo"): client = OpenAI() audio_content, reference = remove_citation(content) audio_content = remove_subtitles(audio_content) response = client.audio.speech.create( - model="tts-1", - voice=voice, - input=audio_content[:4096], # 4096 is the max limit - # instructions=instructions, - response_format="mp3", - ) + model="tts-1", + voice=voice, + input=audio_content[:4096], # 4096 is the max limit + # instructions=instructions, + response_format="mp3", + ) response.stream_to_file(output_path) @@ -67,47 +66,55 @@ def convert_text_to_speech(content, output_path, voice="echo"): def convert_debate_to_speech(input_file, output_dir, name): with open(input_file, "r") as file: data = json.load(file) - + config = data["config"] # title = config["env"]["motion"].replace(" ", "_").lower().replace("'", "").replace(",", "").replace(".", "") output_path = f"{output_dir}/{name}" os.makedirs(output_path, exist_ok=True) shutil.copy(input_file, f"{output_path}/{name}.json") - for stage in ["opening", "rebuttal", "closing"]: - # for stage in ["closing"]: + # for stage in ["closing"]: for side in ["for", "against"]: - # for side in ["for"]: + # for side in ["for"]: fname = f"{stage}_{side}" speech_file_path = f"{output_path}/{fname}.mp3" content = [x for x in data["debate_process"] if x["side"] == side and x["stage"] == stage] content = " ".join([x["content"] for x in content]) print(f"Generating speech to {speech_file_path}") convert_text_to_speech(content, speech_file_path) - print('word count & syllable count & speech length:', LengthEstimator(mode="words").query_time(content), LengthEstimator(mode="syllables").query_time(content), MP3(speech_file_path).info.length) - print('word count per sec & syllable count per sec:', LengthEstimator(mode="words").query_time(content)/MP3(speech_file_path).info.length, LengthEstimator(mode="syllables").query_time(content)/MP3(speech_file_path).info.length) + print( + "word count & syllable count & speech length:", + LengthEstimator(mode="words").query_time(content), + LengthEstimator(mode="syllables").query_time(content), + MP3(speech_file_path).info.length, + ) + print( + "word count per sec & syllable count per sec:", + LengthEstimator(mode="words").query_time(content) / MP3(speech_file_path).info.length, + LengthEstimator(mode="syllables").query_time(content) / MP3(speech_file_path).info.length, + ) - -from pydub import AudioSegment import nltk +from pydub import AudioSegment from pydub.silence import split_on_silence + def trim_audio_by_sentences(input_file, output_file, max_duration=240000): # 240000 ms = 4 minutes """ Trim an MP3 file to contain complete sentences within the max duration. - + Args: input_file (str): Path to input MP3 file max_duration (int): Maximum duration in milliseconds (default: 4 minutes) - + Returns: str: Path to the trimmed output file """ # Load the MP3 file audio = AudioSegment.from_mp3(input_file) - + # Initialize speech recognizer recognizer = sr.Recognizer() @@ -115,27 +122,27 @@ def trim_audio_by_sentences(input_file, output_file, max_duration=240000): # 24 recognizer.energy_threshold = 300 # Increase sensitivity recognizer.dynamic_energy_threshold = True recognizer.pause_threshold = 0.8 # Shorter pause threshold for better sentence detection - + # Split audio on silence to get rough chunks chunks = split_on_silence( audio, min_silence_len=500, # minimum silence length (ms) - silence_thresh=-40, # silence threshold (dB) - keep_silence=500 # keep some silence between chunks + silence_thresh=-40, # silence threshold (dB) + keep_silence=500, # keep some silence between chunks ) - + # Process chunks and combine them within time limit trimmed_audio = AudioSegment.empty() trimmed_sentences = [] - + for chunk in chunks: # Check if adding this chunk would exceed the time limit if len(trimmed_audio) + len(chunk) > max_duration: break - + # Convert chunk to wav for speech recognition chunk_wav = chunk.export(format="wav") - + try: # Perform speech recognition with sr.AudioFile(chunk_wav) as source: @@ -143,21 +150,21 @@ def trim_audio_by_sentences(input_file, output_file, max_duration=240000): # 24 text = recognizer.recognize_google( audio_data, language="en-US", # Specify language for better results - show_all=True # Get detailed results + show_all=True, # Get detailed results ) # Extract the most confident result - if isinstance(text, dict) and 'alternative' in text: - text = text['alternative'][0]['transcript'] + if isinstance(text, dict) and "alternative" in text: + text = text["alternative"][0]["transcript"] else: continue - + # Add basic punctuation based on pauses and speech patterns text = add_basic_punctuation(text) - + # Split text into sentences sentences = nltk.sent_tokenize(text, language="english") - + # If there's only one sentence in the chunk if len(sentences) == 1: if len(trimmed_audio) + len(chunk) <= max_duration: @@ -167,33 +174,33 @@ def trim_audio_by_sentences(input_file, output_file, max_duration=240000): # 24 # Split chunk proportionally by sentence length total_chars = len(text) current_pos = 0 - + for sentence in sentences: sentence_ratio = len(sentence) / total_chars sentence_duration = int(len(chunk) * sentence_ratio) - sentence_audio = chunk[current_pos:current_pos + sentence_duration] - + sentence_audio = chunk[current_pos : current_pos + sentence_duration] + if len(trimmed_audio) + len(sentence_audio) <= max_duration: trimmed_audio += sentence_audio trimmed_sentences.append(sentence) else: # Stop if we can't add more sentences break - + current_pos += sentence_duration - + except sr.UnknownValueError: # If speech recognition fails, treat chunk as a single unit if len(trimmed_audio) + len(chunk) <= max_duration: trimmed_audio += chunk - + # Export the trimmed audio trimmed_audio.export(output_file, format="mp3", bitrate="192k") - + # Print the duration of the trimmed audio duration_seconds = len(trimmed_audio) / 1000 print(f"Trimmed audio duration: {duration_seconds:.2f} seconds") - + return duration_seconds, trimmed_sentences @@ -202,41 +209,42 @@ def add_basic_punctuation(text): Add basic punctuation based on common speech patterns and word cues. """ # Common question words - question_words = {'what', 'when', 'where', 'who', 'why', 'how', 'which', 'whose', 'whom'} - + question_words = {"what", "when", "where", "who", "why", "how", "which", "whose", "whom"} + # Split into word sequences words = text.split() result = [] - + for i, word in enumerate(words): word = word.lower() next_word = words[i + 1].lower() if i + 1 < len(words) else "" - + # Add question marks if word in question_words and i == 0: words[-1] = words[-1] + "?" - + # Add periods for common sentence endings if i > 0 and i < len(words) - 1: prev_word = words[i - 1].lower() - if prev_word in {'so', 'then', 'therefore', 'thus', 'hence', 'consequently'}: + if prev_word in {"so", "then", "therefore", "thus", "hence", "consequently"}: words[i - 2] = words[i - 2] + "." - + # Capitalize first word of apparent sentences - if i == 0 or words[i - 1].endswith(('.', '?', '!')): + if i == 0 or words[i - 1].endswith((".", "?", "!")): word = word.capitalize() - + result.append(word) - + # Add final period if missing text = " ".join(result) - if not text[-1] in {'.', '?', '!'}: + if not text[-1] in {".", "?", "!"}: text += "." - + return text + if __name__ == "__main__": input_file = "../log_files/1.json" output_dir = "../results/audio" name = "case1_1" - convert_debate_to_speech(input_file, output_dir, name) \ No newline at end of file + convert_debate_to_speech(input_file, output_dir, name) diff --git a/src/utils/constants.py b/src/utils/constants.py index e664531..88b3a4f 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -1,5 +1,5 @@ -import os import json +import os WORKSPACE_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../.." @@ -12,7 +12,7 @@ ####################### API Keys ####################### -KEY_FILE = os.path.join(WORKSPACE_DIR, 'src/configs', 'api_key.json') +KEY_FILE = os.path.join(WORKSPACE_DIR, "src/configs", "api_key.json") if os.path.exists(KEY_FILE): print(f"Loading API keys from {KEY_FILE}") @@ -36,22 +36,15 @@ ####################### Time Estimation ####################### WORD_BUDGET_FOR_DRAFT = 500 -LENGTH_MODE_FOR_DRAFT = "phonemes" # rough estimatation, phonemes/words/syllables -TIME_MODE_FOR_STATEMENT = "fastspeech" # precise estimatation, fastspeech/openai +LENGTH_MODE_FOR_DRAFT = "phonemes" # rough estimatation, phonemes/words/syllables +TIME_MODE_FOR_STATEMENT = "fastspeech" # precise estimatation, fastspeech/openai TIME_TOLERANCE = 15 # seconds OPENING_TIME = REBUTTAL_TIME = 240 CLOSING_TIME = 120 DEFAULT_MAX_WORDS = 520 -WORDRATIO = { - "phonemes": 4.5, - "words": 1, - "syllables": 1.75, - "fastspeech": 0.46, - "openai": 0.46, - "time": 0.46 -} +WORDRATIO = {"phonemes": 4.5, "words": 1, "syllables": 1.75, "fastspeech": 0.46, "openai": 0.46, "time": 0.46} REMAINING_ROUND_NUM = { "opening_for": 3, @@ -59,6 +52,5 @@ "rebuttal_for": 1, "rebuttal_against": 0, "closing_for": 0, - "closing_against": 0 + "closing_against": 0, } - diff --git a/src/utils/db.py b/src/utils/db.py index d3ef799..4f806dd 100644 --- a/src/utils/db.py +++ b/src/utils/db.py @@ -1,77 +1,84 @@ import os import sqlite3 -from typing import Optional from datetime import datetime +from typing import Optional CACHE_DIR = "../.cache" + def init_db(force: bool = False): db_name = f"{CACHE_DIR}/search.db" if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR) - + if force and os.path.exists(".cache/search.db"): os.remove(".cache/search.db") conn = sqlite3.connect(db_name) - + c = conn.cursor() - c.execute('''CREATE TABLE IF NOT EXISTS queries + c.execute( + """CREATE TABLE IF NOT EXISTS queries (query TEXT PRIMARY KEY, answer TEXT, created_at TIMESTAMP, - updated_at TIMESTAMP)''') + updated_at TIMESTAMP)""" + ) if force: current_time = datetime.now().isoformat() - c.execute("INSERT OR REPLACE INTO queries VALUES (?, ?, ?, ?)", - ("What is the purpose of this database?", - "To cache query results for faster retrieval.", - current_time, - current_time)) + c.execute( + "INSERT OR REPLACE INTO queries VALUES (?, ?, ?, ?)", + ( + "What is the purpose of this database?", + "To cache query results for faster retrieval.", + current_time, + current_time, + ), + ) print("Database has been initialized with default data.") - + print("Database has been initialized.") conn.commit() conn.close() + def save_query(query: str, answer: str): - conn = sqlite3.connect(f'{CACHE_DIR}/search.db') + conn = sqlite3.connect(f"{CACHE_DIR}/search.db") c = conn.cursor() current_time = datetime.now().isoformat() - + # Check if the query already exists c.execute("SELECT created_at FROM queries WHERE query = ?", (query,)) existing = c.fetchone() - + if existing: # Update existing query - c.execute("UPDATE queries SET answer = ?, updated_at = ? WHERE query = ?", - (answer, current_time, query)) + c.execute("UPDATE queries SET answer = ?, updated_at = ? WHERE query = ?", (answer, current_time, query)) else: # Insert new query - c.execute("INSERT INTO queries VALUES (?, ?, ?, ?)", - (query, answer, current_time, current_time)) - + c.execute("INSERT INTO queries VALUES (?, ?, ?, ?)", (query, answer, current_time, current_time)) + conn.commit() conn.close() - + def get_cached_answer(query: str) -> Optional[tuple]: - conn = sqlite3.connect(f'{CACHE_DIR}/search.db') + conn = sqlite3.connect(f"{CACHE_DIR}/search.db") c = conn.cursor() c.execute("SELECT answer, created_at, updated_at FROM queries WHERE query = ?", (query,)) result = c.fetchone() conn.close() return result if result else None + def remove_query(query: str) -> bool: - conn = sqlite3.connect('cache/search.db') + conn = sqlite3.connect("cache/search.db") c = conn.cursor() - + try: c.execute("DELETE FROM queries WHERE query = ?", (query,)) conn.commit() - + if c.rowcount > 0: print(f"Query '{query}' has been removed from the database.") return True @@ -82,4 +89,4 @@ def remove_query(query: str) -> bool: print(f"An error occurred: {e}") return False finally: - conn.close() \ No newline at end of file + conn.close() diff --git a/src/utils/fs_wrapper.py b/src/utils/fs_wrapper.py index 3c4ad08..99f55c0 100644 --- a/src/utils/fs_wrapper.py +++ b/src/utils/fs_wrapper.py @@ -1,25 +1,23 @@ +import os +import sys + import numpy as np -import yaml import torch +import yaml -import sys -import os - -fastspeech_path = os.path.join(os.path.dirname(__file__), '..', '..','dependencies') +fastspeech_path = os.path.join(os.path.dirname(__file__), "..", "..", "dependencies") sys.path.append(fastspeech_path) from fastspeech2.synthesize import preprocess_english -from fastspeech2.utils.model import get_vocoder, get_model_2 -from fastspeech2.utils.tools import to_device, synth_samples_for_length, synth_samples - +from fastspeech2.utils.model import get_model_2, get_vocoder +from fastspeech2.utils.tools import synth_samples, synth_samples_for_length, to_device root = f"{fastspeech_path}/fastspeech2/" + def pad_1D(inputs, PAD=0): def pad_data(x, length, PAD): - x_padded = np.pad( - x, (0, length - x.shape[0]), mode="constant", constant_values=PAD - ) + x_padded = np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=PAD) return x_padded max_len = max((len(x) for x in inputs)) @@ -27,17 +25,12 @@ def pad_data(x, length, PAD): return padded + class FastSpeechWrapper: def __init__(self, batch_size=8): - preprocess_config = yaml.load( - open(f"{root}/config/LJSpeech/preprocess.yaml", "r"), Loader=yaml.FullLoader - ) - model_config = yaml.load( - open(f"{root}/config/LJSpeech/model.yaml", "r"), Loader=yaml.FullLoader - ) - train_config = yaml.load( - open(f"{root}/config/LJSpeech/train.yaml", "r"), Loader=yaml.FullLoader - ) + preprocess_config = yaml.load(open(f"{root}/config/LJSpeech/preprocess.yaml", "r"), Loader=yaml.FullLoader) + model_config = yaml.load(open(f"{root}/config/LJSpeech/model.yaml", "r"), Loader=yaml.FullLoader) + train_config = yaml.load(open(f"{root}/config/LJSpeech/train.yaml", "r"), Loader=yaml.FullLoader) self.configs = (preprocess_config, model_config, train_config) pitch_control, energy_control, duration_control = 1.0, 1.0, 1.0 @@ -72,7 +65,7 @@ def synthesize(self, batchs): pitch_control, energy_control, duration_control = self.control_values lengths = [] - batchs = [batchs[i:i + self.batch_size] for i in range(0, len(batchs), self.batch_size)] + batchs = [batchs[i : i + self.batch_size] for i in range(0, len(batchs), self.batch_size)] for batch in batchs: ids = [d[0] for d in batch] speakers = np.array([d[2] for d in batch]) @@ -81,15 +74,12 @@ def synthesize(self, batchs): text_lens = np.array([d[4] for d in batch]) texts = pad_1D(texts) data = (ids, raw_texts, speakers, texts, text_lens, max(text_lens)) - + data = to_device(data, self.device) with torch.no_grad(): # Forward output = self.model( - *(data[2:]), - p_control=pitch_control, - e_control=energy_control, - d_control=duration_control + *(data[2:]), p_control=pitch_control, e_control=energy_control, d_control=duration_control ) length = synth_samples_for_length( data, @@ -103,7 +93,6 @@ def synthesize(self, batchs): return lengths - if __name__ == "__main__": texts = [ "The quick brown fox jumps over the lazy dog.", @@ -125,9 +114,9 @@ def synthesize(self, batchs): We believe the answer lies in carefully designed government oversight. Think of it like traffic lights – they're there to ensure everyone gets through the intersection safely and fairly, not just the fastest cars. Government frameworks, with clear rules and open communication, bring that same fairness to healthcare. Research, like the 2022 study by Leider et al. published in *[Journal Name]*, shows that transparency builds public trust, especially during tough times. Remember the early days of the pandemic? Clear government guidance was crucial then, unlike the often confusing messages from private systems. Now, our opponents talk about market-based solutions and charity. But even in well-off countries like Switzerland, as the 2023 King's Fund report shows, not everyone gets equal access. And charity, while kind, is like patching potholes – it helps a few, but it doesn't fix the whole road. It can't guarantee everyone gets the care they deserve. Our opponents also claim government slows innovation. But look at the pandemic! The rapid development of vaccines and telehealth happened *because* of government funding and coordination. So, when healthcare is limited, we must prioritize fairness. Government rationing, with clear rules and public input, isn't perfect, but it's the most just way to ensure everyone, regardless of their income, has a fair chance. It's about ensuring everyone has access to the care they need, not just those who can afford it. We urge you to support this vital measure.""", - """Thank you very much. So I think that if you want to invest in tires, you should invest in tires. I think that there is income inequality happening in the United States. There is education inequality. There is a planet which is slowly becoming uninhabitable if you look at the Flint water crisis. If you look at droughts that happen in California all the time and if you want to help, these are real problems that exist that we need to help people who are currently not having all of their basic human rights fulfilled. These are things that the government should be investing money in and should probably be investing more money in because we see them being problems in our society that are hurting people. What I’m going to do in this speech is I’m going to continue talking about these criteria, continue talking about why we're not meeting basic needs and why also the market itself is probably solving this problem already. Before that, two points of rebuttal to what we just heard from Project Debater. So firstly, we heard that this is technology that would end up benefiting society but we're not sure we haven't yet heard evidence that shows us why it would benefit all of society, perhaps some parts of society, maybe upper middle class or upper class citizens could benefit from these inspiring research, could benefit from the technological innovations. But most of society, people who are currently in the United States have resource scarcity, people who are hungry, people who do not have access to good education, aren't really helped by this. So we think it is like that, a government subsidy should go to something that helps everyone particularly weaker classes in society. Second point is this idea of an exploding industry which creates jobs and international cooperation. So firstly, we've heard evidence that this already exists, right? We've heard evidence that companies are investing in this as is. And secondly, we think that international cooperation or the specific things have alternatives. We can cooperate over other types of economic trade deals. We can cooperate in other ways with different countries. It's not necessary to very specifically fund space 98 exploration to get these benefits. So as we remember, there are two criteria that I believe the government needs to meet before subsidizing something. It being a basic human need, we don't see space exploration meeting that and B, that this is something that can't otherwise exist, right? So we've already heard from Project Debater how huge this industry is, right? How much investment there's already going on in the private sector and we think this is because there's lots of curiosity especially among wealthy people who maybe want to get to space for personal use or who want to build a colony on Mars and then rent out the rooms there. We know that Elon Musk is doing this already. We know that other people are doing it and we think they're spending money and willing to spend even more money because of the competition between them. So Project Debater should know better than all of us how competitions often bear extremely impressive fruit, right? We think that when wealthy philanthropist or people who are willing to fund research on their own race each other to be the first to achieve new heights in terms of space exploration, that brings us to great achievements already and we think that the private market is doing this well enough already. Considering that we already have movement in that direction, again we see Elon Musk's company, we see all of these companies working already. We think that it's not that the government money won't help out if it were to be given, we just think it doesn't meet the criteria in comparison to other things, right? So given the fact that the market already has a lot of money invested in this, already has movement in those research directions, and given the fact that we still don't think this is a good enough plan to prioritize over other basic needs that the government should be providing people. We think that at the end of the day, given the fact that there are also alternatives to getting all of these benefits of international cooperation, it simply doesn't justify specifically the government allocating its funds for this purpose when it should be allocating them towards other needs of other people.""" - ] - + """Thank you very much. So I think that if you want to invest in tires, you should invest in tires. I think that there is income inequality happening in the United States. There is education inequality. There is a planet which is slowly becoming uninhabitable if you look at the Flint water crisis. If you look at droughts that happen in California all the time and if you want to help, these are real problems that exist that we need to help people who are currently not having all of their basic human rights fulfilled. These are things that the government should be investing money in and should probably be investing more money in because we see them being problems in our society that are hurting people. What I’m going to do in this speech is I’m going to continue talking about these criteria, continue talking about why we're not meeting basic needs and why also the market itself is probably solving this problem already. Before that, two points of rebuttal to what we just heard from Project Debater. So firstly, we heard that this is technology that would end up benefiting society but we're not sure we haven't yet heard evidence that shows us why it would benefit all of society, perhaps some parts of society, maybe upper middle class or upper class citizens could benefit from these inspiring research, could benefit from the technological innovations. But most of society, people who are currently in the United States have resource scarcity, people who are hungry, people who do not have access to good education, aren't really helped by this. So we think it is like that, a government subsidy should go to something that helps everyone particularly weaker classes in society. Second point is this idea of an exploding industry which creates jobs and international cooperation. So firstly, we've heard evidence that this already exists, right? We've heard evidence that companies are investing in this as is. And secondly, we think that international cooperation or the specific things have alternatives. We can cooperate over other types of economic trade deals. We can cooperate in other ways with different countries. It's not necessary to very specifically fund space 98 exploration to get these benefits. So as we remember, there are two criteria that I believe the government needs to meet before subsidizing something. It being a basic human need, we don't see space exploration meeting that and B, that this is something that can't otherwise exist, right? So we've already heard from Project Debater how huge this industry is, right? How much investment there's already going on in the private sector and we think this is because there's lots of curiosity especially among wealthy people who maybe want to get to space for personal use or who want to build a colony on Mars and then rent out the rooms there. We know that Elon Musk is doing this already. We know that other people are doing it and we think they're spending money and willing to spend even more money because of the competition between them. So Project Debater should know better than all of us how competitions often bear extremely impressive fruit, right? We think that when wealthy philanthropist or people who are willing to fund research on their own race each other to be the first to achieve new heights in terms of space exploration, that brings us to great achievements already and we think that the private market is doing this well enough already. Considering that we already have movement in that direction, again we see Elon Musk's company, we see all of these companies working already. We think that it's not that the government money won't help out if it were to be given, we just think it doesn't meet the criteria in comparison to other things, right? So given the fact that the market already has a lot of money invested in this, already has movement in those research directions, and given the fact that we still don't think this is a good enough plan to prioritize over other basic needs that the government should be providing people. We think that at the end of the day, given the fact that there are also alternatives to getting all of these benefits of international cooperation, it simply doesn't justify specifically the government allocating its funds for this purpose when it should be allocating them towards other needs of other people.""", + ] + fs = FastSpeechWrapper(batch_size=2) lengths = fs.query_time(texts) - print(lengths) \ No newline at end of file + print(lengths) diff --git a/src/utils/helper.py b/src/utils/helper.py index df07622..86fc85c 100644 --- a/src/utils/helper.py +++ b/src/utils/helper.py @@ -1,11 +1,13 @@ import json + import pandas as pd -from .tool import logger, get_response_with_retry -from .time_estimator import LengthEstimator -from .prompts import * -from .tool import identify_number_in_text, sort_by_action + from debate_tree import PrepareTree +from .prompts import * +from .time_estimator import LengthEstimator +from .tool import get_response_with_retry, identify_number_in_text, logger, sort_by_action + ##################### Evidence ##################### @@ -14,17 +16,20 @@ def select_query(llm, motion, stance, claim, action, candidate_queries): Select the retrieval query for the claim return: query, a list of queries, ["query1", "query2", ...] """ - prompt = select_query_prompt.format(claim=claim, motion=motion, stance=stance, action=action, candidate_queries=candidate_queries) - logger.debug("[Query-Helper-Prompt] " + prompt.strip().replace('\n',' ||| ')) + prompt = select_query_prompt.format( + claim=claim, motion=motion, stance=stance, action=action, candidate_queries=candidate_queries + ) + logger.debug("[Query-Helper-Prompt] " + prompt.strip().replace("\n", " ||| ")) query, response = get_response_with_retry(llm, prompt, "query") - logger.debug("[Query-Helper-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Query-Helper-Response] " + response.strip().replace("\n", " ||| ")) return query + def rank_evidence(candidate_evidence, selected_queries=None): if selected_queries is not None: selected_evidence = [x for x in candidate_evidence if x["query"] in selected_queries] else: - selected_evidence = candidate_evidence + selected_evidence = candidate_evidence titles, uniq_evidence = [], [] for e in selected_evidence: @@ -38,18 +43,30 @@ def rank_evidence(candidate_evidence, selected_queries=None): uniq_evidence.append(e) titles.append(e["title"]) - #NOTE: the database can have some evidences + # NOTE: the database can have some evidences for e in uniq_evidence: - if "arxiv" in e.get("source","").lower() or "arxiv" in e.get("publication","").lower(): + if "arxiv" in e.get("source", "").lower() or "arxiv" in e.get("publication", "").lower(): e["reliability"] = -1 - - sorted_evidence = [x for _, _, x in sorted(zip([x.get("n_numbers", 0) for x in uniq_evidence], [x.get("reliability", 0) for x in uniq_evidence], uniq_evidence), key=lambda pair: (pair[0], pair[1]), reverse=True)] + + sorted_evidence = [ + x + for _, _, x in sorted( + zip( + [x.get("n_numbers", 0) for x in uniq_evidence], + [x.get("reliability", 0) for x in uniq_evidence], + uniq_evidence, + ), + key=lambda pair: (pair[0], pair[1]), + reverse=True, + ) + ] return sorted_evidence ##################### Opening ##################### + def build_cot_claims(llm, motion, side, claim_pool): # if "perspective" in claim_pool[0][0]: # claims = [{"claim": x[0]["claim"], "perspective": x[0]["perspective"], "concept": x[0]["concept"], "explanation": x[0]["explanation"]} for x in claim_pool] @@ -66,9 +83,9 @@ def build_cot_claims(llm, motion, side, claim_pool): "Use Json format with one key of **selection**. The value is a list of selected claims (string) that can be used in this debate.\n" ) - logger.debug("[CoT-Claims-Prompt] " + prompt.strip().replace('\n',' ||| ')) + logger.debug("[CoT-Claims-Prompt] " + prompt.strip().replace("\n", " ||| ")) selected_claims, response = get_response_with_retry(llm, prompt, "selection") - logger.debug("[CoT-Claims-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[CoT-Claims-Response] " + response.strip().replace("\n", " ||| ")) # selected_claims = [x if x.endswith(".") else x + '.' for x in selected_claims] claim_content = [x[0]["claim"] for x in claim_pool] @@ -81,7 +98,7 @@ def build_cot_claims(llm, motion, side, claim_pool): "mode": "choose_main_claims", "all_claims": claims, "selected_claims": selected_claims, - "selected_idx": selected_idx + "selected_idx": selected_idx, } return selected_claims, selected_idx, thoughts @@ -90,15 +107,17 @@ def build_cot_claims(llm, motion, side, claim_pool): def build_logic_claims(llm, motion, side, claim_pool, context="", definition="", use_rehearsal_tree=True, top_k=None): """ Choose the main claims from the sorted claims based on the logic chain - # """ + #""" # Step 1. Sort claim groups by their highest minimax_search_score - sorted_idx = sorted(range(len(claim_pool)), key=lambda i: claim_pool[i][0]["minimax_search_score"], reverse=True) # sort the groups by the highest value - + sorted_idx = sorted( + range(len(claim_pool)), key=lambda i: claim_pool[i][0]["minimax_search_score"], reverse=True + ) # sort the groups by the highest value + # Step 2. Keep only top-k if specified if top_k is not None: sorted_idx = sorted_idx[:top_k] - + ranked_claims = [claim_pool[i][0]["claim"] for i in sorted_idx] # Step 3. Gather original claims & tree info @@ -116,19 +135,21 @@ def build_logic_claims(llm, motion, side, claim_pool, context="", definition="", tree_info = "" # Step 4. Construct prompt - prompt = main_claim_selection.format(motion=motion, side=side, tree=tree_info, claims="\n".join(ori_claims), context=context, definition=definition) - logger.debug("[Logic-Claims-Prompt] " + prompt.strip().replace('\n',' ||| ')) + prompt = main_claim_selection.format( + motion=motion, side=side, tree=tree_info, claims="\n".join(ori_claims), context=context, definition=definition + ) + logger.debug("[Logic-Claims-Prompt] " + prompt.strip().replace("\n", " ||| ")) content, response = get_response_with_retry(llm, prompt, "selection") - logger.debug("[Logic-Claims-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Logic-Claims-Response] " + response.strip().replace("\n", " ||| ")) # Step 5. Parse model outputs selected_claims = content["claims"] framework = content["framework"] explanation = content["explanation"] - selected_claims = [x if x.endswith(".") else x + '.' for x in selected_claims] + selected_claims = [x if x.endswith(".") else x + "." for x in selected_claims] selected_idx = [ori_claims.index(x) for x in selected_claims] - + # Step 6. Record reasoning info thoughts = { "stage": "preparation", @@ -143,6 +164,7 @@ def build_logic_claims(llm, motion, side, claim_pool, context="", definition="", return selected_claims, selected_idx, thoughts + ##################### Debate Flow Tree ##################### @@ -151,14 +173,16 @@ def get_actions_from_tree(claims, tree, oppo_tree): if tree.max_level == 0: for claim in claims: - actions.append({ - "idx": len(actions), - "action": "propose", - "target_claim": claim, - "target_argument": "", - "importance": "high", - "targeted_debate_tree": "you" - }) + actions.append( + { + "idx": len(actions), + "action": "propose", + "target_claim": claim, + "target_argument": "", + "importance": "high", + "targeted_debate_tree": "you", + } + ) else: for level in range(tree.max_level): nodes = tree.get_nodes_by_level(level + 1) @@ -167,14 +191,16 @@ def get_actions_from_tree(claims, tree, oppo_tree): else: action = "reinforce" if (level + 1) % 2 == 1 else "rebut" for node in nodes: - actions.append({ - "idx": len(actions), - "action": action, - "target_claim": node.claim, - "target_argument": "".join(node.argument), - "targeted_debate_tree": "you", - }) - + actions.append( + { + "idx": len(actions), + "action": action, + "target_claim": node.claim, + "target_argument": "".join(node.argument), + "targeted_debate_tree": "you", + } + ) + if oppo_tree.max_level != 0: for level in range(oppo_tree.max_level): nodes = oppo_tree.get_nodes_by_level(level + 1) @@ -183,14 +209,16 @@ def get_actions_from_tree(claims, tree, oppo_tree): else: action = "attack" if (level + 1) % 2 == 1 else "reinforce" for node in nodes: - actions.append({ - "idx": len(actions), - "action": action, - "target_claim": node.claim, - "target_argument": "".join(node.argument), - "targeted_debate_tree": "opponent", - }) - + actions.append( + { + "idx": len(actions), + "action": action, + "target_claim": node.claim, + "target_argument": "".join(node.argument), + "targeted_debate_tree": "opponent", + } + ) + logger.debug(f"[Debate-Flow-Tree-Action] {actions}") df = pd.DataFrame(actions) @@ -201,10 +229,17 @@ def get_actions_from_tree(claims, tree, oppo_tree): def get_battlefields_from_actions(llm, motion, side, claims, actions, tree, oppo_tree): - prompt = debate_flow_tree_action_eval_prompt.format(motion=motion, side=side, claims=claims, actions=json.dumps(actions, indent=2), tree=tree.print_tree(include_status=True), oppo_tree=oppo_tree.print_tree(include_status=True)) - logger.debug("[Debate-Flow-Tree-Action-Eval-Prompt] " + prompt.strip().replace('\n',' ||| ')) + prompt = debate_flow_tree_action_eval_prompt.format( + motion=motion, + side=side, + claims=claims, + actions=json.dumps(actions, indent=2), + tree=tree.print_tree(include_status=True), + oppo_tree=oppo_tree.print_tree(include_status=True), + ) + logger.debug("[Debate-Flow-Tree-Action-Eval-Prompt] " + prompt.strip().replace("\n", " ||| ")) eval_results, response = get_response_with_retry(llm, prompt, "response") - logger.debug("[Debate-Flow-Tree-Action-Eval-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Debate-Flow-Tree-Action-Eval-Response] " + response.strip().replace("\n", " ||| ")) battlefields = [] for eval_result in eval_results: @@ -221,54 +256,76 @@ def get_battlefields_from_actions(llm, motion, side, claims, actions, tree, oppo } battlefields.append(battlefield) - return battlefields -def get_retrieval_from_rehearsal_tree(action_type, target_claim, side, oppo_side, prepared_tree_list, prepared_oppo_tree_list, look_ahead_num, query_embedding): +def get_retrieval_from_rehearsal_tree( + action_type, + target_claim, + side, + oppo_side, + prepared_tree_list, + prepared_oppo_tree_list, + look_ahead_num, + query_embedding, +): additional_info = [] retrieval_nodes = [] if prepared_tree_list is None: return additional_info, retrieval_nodes - for tree in prepared_tree_list: if action_type == "propose" or action_type == "reinforce": if action_type == "propose": match_node = tree.get_node_by_claim(target_claim, side=side) similarity = 1.0 if match_node is not None else 0.0 else: - match_node, similarity = tree.get_most_similar_node(target_claim, query_embedding=query_embedding, side=side, top_k=1, threshold=0.8) - + match_node, similarity = tree.get_most_similar_node( + target_claim, query_embedding=query_embedding, side=side, top_k=1, threshold=0.8 + ) + if match_node is not None: - logger.debug(f"[Prepared-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}") + logger.debug( + f"[Prepared-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}" + ) score = match_node.get_strength(max_depth=look_ahead_num) - match_node.argument = [match_node.argument] if isinstance(match_node.argument, str) else match_node.argument + match_node.argument = ( + [match_node.argument] if isinstance(match_node.argument, str) else match_node.argument + ) if len(match_node.argument) > 0: node_info = " ".join(match_node.argument) + f"(Strength: {score:.1f})\n\t" else: node_info = f"(Strength: {score:.1f})\n\t" additional_info.append(node_info) - retrieval_nodes.append(["Prepared-Tree-Retrieval", action_type, target_claim, match_node.claim, similarity, node_info]) + retrieval_nodes.append( + ["Prepared-Tree-Retrieval", action_type, target_claim, match_node.claim, similarity, node_info] + ) if action_type == "propose": break elif action_type == "attack" or action_type == "rebut": - match_node, similarity = tree.get_most_similar_node(target_claim, query_embedding=query_embedding, side=oppo_side, top_k=1, threshold=0.8) + match_node, similarity = tree.get_most_similar_node( + target_claim, query_embedding=query_embedding, side=oppo_side, top_k=1, threshold=0.8 + ) if match_node is not None: - logger.debug(f"[Prepared-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}") + logger.debug( + f"[Prepared-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}" + ) node_info = "" for c in match_node.children: score = c.get_strength(max_depth=look_ahead_num) node_info += f"{c.claim} (Strength: {score:.1f})\n\t" additional_info.append(node_info) - retrieval_nodes.append(["Prepared-Tree-Retrieval", action_type, target_claim, match_node.claim, similarity, node_info]) + retrieval_nodes.append( + ["Prepared-Tree-Retrieval", action_type, target_claim, match_node.claim, similarity, node_info] + ) else: raise ValueError(f"Invalid action: {action_type}") - if additional_info == [] and match_node is None: - logger.debug(f"[Prepared-Tree-Retrieval-Summary] {action_type} Miss. No additional info found for [{target_claim}]") + logger.debug( + f"[Prepared-Tree-Retrieval-Summary] {action_type} Miss. No additional info found for [{target_claim}]" + ) else: logger.debug(f"[Prepared-Tree-Retrieval-Summary] {action_type} Hit. Additional info: {additional_info}") @@ -276,34 +333,66 @@ def get_retrieval_from_rehearsal_tree(action_type, target_claim, side, oppo_side if prepared_oppo_tree_list is not None: for tree in prepared_oppo_tree_list: if action_type == "attack" or action_type == "rebut": - match_node, similarity = tree.get_most_similar_node(target_claim, query_embedding=query_embedding, side=oppo_side, top_k=1, threshold=0.8) + match_node, similarity = tree.get_most_similar_node( + target_claim, query_embedding=query_embedding, side=oppo_side, top_k=1, threshold=0.8 + ) if match_node is not None: - logger.debug(f"[Prepared-Opponent-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}") + logger.debug( + f"[Prepared-Opponent-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}" + ) node_info = "" for c in match_node.children: score = c.get_strength(max_depth=look_ahead_num) node_info += f"{c.claim} (Strength: {score:.1f})\n\t" additional_info_from_oppo_tree.append(node_info) - retrieval_nodes.append(["Prepared-Opponent-Tree-Retrieval", action_type, target_claim, match_node.claim, similarity, node_info]) + retrieval_nodes.append( + [ + "Prepared-Opponent-Tree-Retrieval", + action_type, + target_claim, + match_node.claim, + similarity, + node_info, + ] + ) elif action_type == "propose" or action_type == "reinforce": - match_node, similarity = tree.get_most_similar_node(target_claim, query_embedding=query_embedding, side=side, top_k=1, threshold=0.8) + match_node, similarity = tree.get_most_similar_node( + target_claim, query_embedding=query_embedding, side=side, top_k=1, threshold=0.8 + ) if match_node is not None: - logger.debug(f"[Prepared-Opponent-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}") + logger.debug( + f"[Prepared-Opponent-Tree-Retrieval] {action_type} Hit: [{target_claim}] with [{match_node.claim}], Similarity: {similarity:0.2f}" + ) score = match_node.get_strength(max_depth=look_ahead_num) - match_node.argument = [match_node.argument] if isinstance(match_node.argument, str) else match_node.argument + match_node.argument = ( + [match_node.argument] if isinstance(match_node.argument, str) else match_node.argument + ) if len(match_node.argument) > 0: node_info = " ".join(match_node.argument) + f"(Strength: {score:.1f})\n\t" else: node_info = f"(Strength: {score:.1f})\n\t" additional_info_from_oppo_tree.append(node_info) - retrieval_nodes.append(["Prepared-Opponent-Tree-Retrieval", action_type, target_claim, match_node.claim, similarity, node_info]) + retrieval_nodes.append( + [ + "Prepared-Opponent-Tree-Retrieval", + action_type, + target_claim, + match_node.claim, + similarity, + node_info, + ] + ) else: raise ValueError(f"Invalid action: {action_type}") - + if additional_info_from_oppo_tree == [] and match_node is None: - logger.debug(f"[Prepared-Opponent-Tree-Retrieval-Summary] {action_type} Miss. No additional info found for [{target_claim}]") + logger.debug( + f"[Prepared-Opponent-Tree-Retrieval-Summary] {action_type} Miss. No additional info found for [{target_claim}]" + ) else: - logger.debug(f"[Prepared-Opponent-Tree-Retrieval-Summary] {action_type} Hit. Additional info: {additional_info_from_oppo_tree}") + logger.debug( + f"[Prepared-Opponent-Tree-Retrieval-Summary] {action_type} Hit. Additional info: {additional_info_from_oppo_tree}" + ) additional_info = additional_info + additional_info_from_oppo_tree return additional_info, retrieval_nodes @@ -311,6 +400,7 @@ def get_retrieval_from_rehearsal_tree(action_type, target_claim, side, oppo_side ##################### Time-Adjuster ##################### + class TimeAdjuster: def __init__(self): self.L = None @@ -319,27 +409,30 @@ def __init__(self): def revise_helper(self, statement, n_words, budget, threshold=5, ratio=0.46, estimator=None): current_cost = estimator.query_time(statement) words_count = LengthEstimator(mode="words").query_time(statement) - logger.debug("[Efficient-Fit-Length] " + f"use {n_words} words in the prompt, real words: {words_count}, real cost: {current_cost:0.2f}, target interval: [{budget-threshold}, {budget}]") + logger.debug( + "[Efficient-Fit-Length] " + + f"use {n_words} words in the prompt, real words: {words_count}, real cost: {current_cost:0.2f}, target interval: [{budget-threshold}, {budget}]" + ) - if budget <= 0 or current_cost >= budget-threshold and current_cost <= budget + 1: + if budget <= 0 or current_cost >= budget - threshold and current_cost <= budget + 1: return current_cost, n_words, True # step1. determine the first endpoint if self.L is None and self.R is None: - if current_cost < budget-threshold: + if current_cost < budget - threshold: self.L = n_words return current_cost, n_words + int((budget - current_cost) / ratio), False else: self.R = n_words - return current_cost, n_words - int((current_cost - (budget-threshold)) / ratio), False + return current_cost, n_words - int((current_cost - (budget - threshold)) / ratio), False # step2. determine the second endpoint if self.L is None: - if current_cost < budget-threshold: + if current_cost < budget - threshold: self.L = n_words return current_cost, (self.L + self.R) // 2, False else: - return current_cost, n_words - int((current_cost - (budget-threshold)) / ratio), False + return current_cost, n_words - int((current_cost - (budget - threshold)) / ratio), False if self.R is None: if current_cost > budget: self.R = n_words @@ -348,26 +441,33 @@ def revise_helper(self, statement, n_words, budget, threshold=5, ratio=0.46, est return current_cost, n_words + int((budget - current_cost) / ratio), False # step3. binary search in [L, R], always terminate and w.h.p. can terminate when R-L > 1 - if current_cost < budget-threshold: + if current_cost < budget - threshold: self.L = n_words else: self.R = n_words return current_cost, (self.L + self.R) // 2, False - + ##################### Anaylsis ##################### + def extract_statement(llm, motion, statement, claims=None, tree=None, side=None, stage=None): if claims is not None: prompt = extract_statment_by_claim_prompt.format(motion=motion, statement=statement, claim=json.dumps(claims)) elif tree is not None: - prompt = extract_statment_with_tree_prompt.format(motion=motion, statement=statement, claim=json.dumps(claims), tree=tree[0], oppo_tree=tree[1], side=side, stage=stage) + prompt = extract_statment_with_tree_prompt.format( + motion=motion, + statement=statement, + claim=json.dumps(claims), + tree=tree[0], + oppo_tree=tree[1], + side=side, + stage=stage, + ) else: prompt = extract_statment_prompt.format(motion=motion, statement=statement) - - - logger.debug("[Analyze-Helper-Prompt] " + prompt.strip().replace('\n',' ||| ')) + + logger.debug("[Analyze-Helper-Prompt] " + prompt.strip().replace("\n", " ||| ")) claims, response = get_response_with_retry(llm, prompt, "statements") - logger.debug("[Analyze-Helper-Response] " + response.strip().replace('\n',' ||| ')) + logger.debug("[Analyze-Helper-Response] " + response.strip().replace("\n", " ||| ")) return claims - diff --git a/src/utils/model.py b/src/utils/model.py index 109d9f9..923820d 100644 --- a/src/utils/model.py +++ b/src/utils/model.py @@ -1,11 +1,10 @@ -import numpy as np import litellm - +import numpy as np import torch -from transformers import LlamaForSequenceClassification, AutoTokenizer +from transformers import AutoTokenizer, LlamaForSequenceClassification +from utils.constants import ATTACK_RM_PATH, SUPPORT_RM_PATH, google_api_key from utils.tool import logger -from utils.constants import google_api_key, ATTACK_RM_PATH, SUPPORT_RM_PATH safety_setting = [ { @@ -27,7 +26,15 @@ ] -def HelperClient(prompt, model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", temperature=0.7, max_tokens=1000, n=1, stop=None, sys=None) -> list: +def HelperClient( + prompt, + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + temperature=0.7, + max_tokens=1000, + n=1, + stop=None, + sys=None, +) -> list: if sys is not None: messages = [{"role": "system", "content": sys}] else: @@ -40,22 +47,17 @@ def HelperClient(prompt, model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", te model_name = f"deepseek/{model}" elif "gemini" in model.lower(): model_name = f"gemini/{model}" - kwargs = { - "api_key": google_api_key, - "safety_settings": safety_setting - } + kwargs = {"api_key": google_api_key, "safety_settings": safety_setting} elif "gpt" in model.lower() or "o1" in model.lower(): model_name = model elif "moonshot" in model.lower() or "kimi" in model.lower(): # Kimi/Moonshot API support import os + model_name = f"moonshot/{model}" # Reduce max_tokens for moonshot models to avoid exceeding limits max_tokens = min(max_tokens, 4096) - kwargs = { - "api_key": os.environ.get("MOONSHOT_API_KEY", ""), - "api_base": "https://api.moonshot.cn/v1" - } + kwargs = {"api_key": os.environ.get("MOONSHOT_API_KEY", ""), "api_base": "https://api.moonshot.cn/v1"} # print(f"[HelperClient] Using Moonshot model: {model_name}") # print(f"Moonshot API Key: {kwargs['api_key'][:5]}****") else: @@ -66,79 +68,72 @@ def HelperClient(prompt, model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", te for _ in range(n): # # Check if we need JSON response format and if the model supports it # use_json_format = ("json" in prompt.lower() or (sys is not None and "json" in sys.lower())) - + # # Only use response_format for models that support it # if use_json_format and ("gpt-4o" in model_name.lower() or "gpt-4-turbo" in model_name.lower() or "gpt-3.5-turbo" in model_name.lower()): if "json" in prompt.lower() or (sys is not None and "json" in sys.lower()): - response = litellm.completion(model=model_name, - response_format={ "type": "json_object" }, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - stop=stop, - **kwargs) + response = litellm.completion( + model=model_name, + response_format={"type": "json_object"}, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + stop=stop, + **kwargs, + ) else: - response = litellm.completion(model=model_name, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - stop=stop, - **kwargs) + response = litellm.completion( + model=model_name, messages=messages, temperature=temperature, max_tokens=max_tokens, stop=stop, **kwargs + ) responses.append(response.choices[0].message.content) return responses - - - - models_loaded = False pro_model = None con_model = None tokenizer = None + class RM: def __init__(self, model_name): # Check if the model path exists locally import os + if os.path.exists(model_name): # Load from local path self.tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=True) self.tokenizer.pad_token = self.tokenizer.eos_token self.model = LlamaForSequenceClassification.from_pretrained( - model_name, - num_labels=3, - torch_dtype=torch.bfloat16, - device_map="auto", - local_files_only=True + model_name, num_labels=3, torch_dtype=torch.bfloat16, device_map="auto", local_files_only=True ) else: # Try to load from Hugging Face Hub self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.tokenizer.pad_token = self.tokenizer.eos_token self.model = LlamaForSequenceClassification.from_pretrained( - model_name, - num_labels=3, - torch_dtype=torch.bfloat16, - device_map="auto" + model_name, num_labels=3, torch_dtype=torch.bfloat16, device_map="auto" ) self.model.config.pad_token_id = self.tokenizer.pad_token_id - def __call__(self, prompt: str, soft=False, temperature=0.7, max_tokens=1000, n=1) -> float: - inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.model.device) + def __call__(self, prompt: str, soft=False, temperature=0.7, max_tokens=1000, n=1) -> float: + inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to( + self.model.device + ) with torch.no_grad(): outputs = self.model(**inputs) if soft: # 平滑计分 p = torch.softmax(outputs.logits, dim=-1).tolist() p = np.array(p[0]) - score = p * np.array([0,1,2]) + score = p * np.array([0, 1, 2]) score = score.sum(axis=-1) return score.item() else: print(torch.argmax(outputs.logits, dim=-1).item()) return torch.argmax(outputs.logits, dim=-1).item() -def reward_model(prompt, type="pro", temperature=0.7, max_tokens=1000, n=1, soft=False): # type is "pro" / "con" + +def reward_model(prompt, type="pro", temperature=0.7, max_tokens=1000, n=1, soft=False): # type is "pro" / "con" global pro_model, con_model, models_loaded if not models_loaded: logger.info("Logging reward model ...") diff --git a/src/utils/prompts/__init__.py b/src/utils/prompts/__init__.py index f24ed0b..9fca128 100644 --- a/src/utils/prompts/__init__.py +++ b/src/utils/prompts/__init__.py @@ -1,49 +1,34 @@ -import os import glob +import os -from .opening import ( - propose_definition_prompt, - claim_propose_prompt, - main_claim_selection, - default_opening_prompt, - expert_opening_prompt_2, - ) -from .rebuttal import ( - default_rebuttal_prompt, - expert_rebuttal_prompt_2 - ) -from .closing import ( - default_closing_prompt, - expert_closing_prompt_2 - ) - +from .closing import default_closing_prompt, expert_closing_prompt_2 from .evaluation import ( - judge_system_prompt, - audience_system_prompt_pre, - audience_system_prompt_post, - Impactful_finegrained, - tree_data_list - ) - - + Impactful_finegrained, + audience_system_prompt_post, + audience_system_prompt_pre, + judge_system_prompt, + tree_data_list, +) +from .opening import ( + claim_propose_prompt, + default_opening_prompt, + expert_opening_prompt_2, + main_claim_selection, + propose_definition_prompt, +) from .others import ( - debater_system_prompt, - expert_debater_system_prompt, - extract_statment_prompt, - extract_statment_by_claim_prompt, - audience_feedback_prompt, - post_process_prompt, - evidence_selection_prompt, - select_query_prompt, - extract_statment_with_tree_prompt, - rhetorical_techniques_prompt, - debate_flow_tree_action_prompt, - debate_flow_tree_action_eval_prompt - ) - -from .search import ( - search_prompt, - iterative_search_prompt, - summarize_result_prompt, - extract_author_prompt -) \ No newline at end of file + audience_feedback_prompt, + debate_flow_tree_action_eval_prompt, + debate_flow_tree_action_prompt, + debater_system_prompt, + evidence_selection_prompt, + expert_debater_system_prompt, + extract_statment_by_claim_prompt, + extract_statment_prompt, + extract_statment_with_tree_prompt, + post_process_prompt, + rhetorical_techniques_prompt, + select_query_prompt, +) +from .rebuttal import default_rebuttal_prompt, expert_rebuttal_prompt_2 +from .search import extract_author_prompt, iterative_search_prompt, search_prompt, summarize_result_prompt diff --git a/src/utils/prompts/closing.py b/src/utils/prompts/closing.py index 9a0c488..93565be 100644 --- a/src/utils/prompts/closing.py +++ b/src/utils/prompts/closing.py @@ -1,105 +1,92 @@ context = ( - "Now it comes the closing statement, where you summarize your key points and reaffirm your position ({act} the topic) .\n" - "Your position is to {act} the topic. The opponent is to {counter_act} the topic.\n\n" + "Now it comes the closing statement, where you summarize your key points and reaffirm your position ({act} the topic) .\n" + "Your position is to {act} the topic. The opponent is to {counter_act} the topic.\n\n" ) default_closing_prompt = context + ( - "Your response should be about {{n_words}} words and do not output other things than our response.\n" - ) + "Your response should be about {{n_words}} words and do not output other things than our response.\n" +) expert_closing_prompt = context + ( - "## Primary Objectives of a Closing Statement\n" - "- Convince the judges that your team won more battlegrounds.\n" - "- Demonstrate your team's strengths and the opponent's weaknesses within each battleground based on the clash outcomes.\n" - "\n" - "## Rules \n" - "- Ensure your writing is fluent, natural, and indistinguishable from human writing.\n" - "- Avoid empty appeals to values. Remember, value appeals should connect back to the topic and your stance.\n" - "- When citing data or theories, provide sources. Do not introduce new information or data in the closing statement.\n" - "- Avoid repeating arguments from previous speeches. Instead of mechanically listing points, focus on deepening your arguments and ensuring logical coherence.\n" - "- This is a closing statement, not the time for new arguments. Prioritize depth over breadth.\n" - "- Base your arguments on the identified battlegrounds and clashes.\n" - "\n" - ) + "## Primary Objectives of a Closing Statement\n" + "- Convince the judges that your team won more battlegrounds.\n" + "- Demonstrate your team's strengths and the opponent's weaknesses within each battleground based on the clash outcomes.\n" + "\n" + "## Rules \n" + "- Ensure your writing is fluent, natural, and indistinguishable from human writing.\n" + "- Avoid empty appeals to values. Remember, value appeals should connect back to the topic and your stance.\n" + "- When citing data or theories, provide sources. Do not introduce new information or data in the closing statement.\n" + "- Avoid repeating arguments from previous speeches. Instead of mechanically listing points, focus on deepening your arguments and ensuring logical coherence.\n" + "- This is a closing statement, not the time for new arguments. Prioritize depth over breadth.\n" + "- Base your arguments on the identified battlegrounds and clashes.\n" + "\n" +) expert_closing_prompt_2 = expert_closing_prompt + ( - "## Your Task \n" - "Deliver a compelling closing statement of {{n_words}} words that:\n\n" - "1. **Frames the Debate by Battlefield:** Clearly outline the core conflict between the two sides, highlighting the key issues at stake. \n" - "2. **Summarize Your Victory by Impact Analysis:** \n" - "- Show how your arguments achieved better outcomes for key stakeholders\n" - "- Demonstrate superior cost-benefit ratio with specific metrics\n" - "- Link outcomes back to judging criteria established in opening\n" - "3. **Expose Critical Flaws in Opposition:**\n" - "- Highlight unfulfilled burdens of proof\n" - "- Show major assumptions that were successfully challenged\n" - "- Demonstrate patterns of failed responses to your key arguments\n" - "4. **Delivers a Persuasive Conclusion:** Leave a lasting impression by reinforcing your key points and appealing to the audience's values. \n\n" - - "## Workflow \n" - "1. Analyze the debate flow trees to strategically allocate your word budget based on argument importance and status. Structure your response according to the above points.\n" - "2. Generate conversational yet language suitable arguments following the allocated word budget. Write as you would speak, Write as you would speak, with VARIED sentence lengths. Short sentences are preferred. Use ORATORICAL language while keeping the original argument sequence. \n" - "3. Use clear verbal signposting. Use clear transitions, including listener-oriented phrases: 'As you'll see ', 'Consider this '. Signal key points: 'The crucial issue here is ', 'What's particularly important '. \n" - "4. You SHOULD connect with your audience by:\n" - "- Reframe scenarios to directly involve the audience by using 'you' as the protagonist and describing how the situation would impact their lives, making abstract concepts personally relevant and emotionally resonant." - "- Use analogies, metaphors, or vivid imagery to make complex ideas more accessible and memorable." - "5. Deliver a {{n_words}}-word closing statement. The response should be written in clear, flowing transcript for the competitive debate game of prose without any bullet points, asterisks, or numbered lists. Present only the final text. \n\n" - "Note that it's possible that the debate flow tree is not provided, in this case, you can just generate a closing statement without following the debate flow tree.\n" - - "## Tree Structures\n" - "Two debate flow trees track the exchange of arguments. Each node contains:\n" - "* Data: The specific claims and arguments\n" - "* Visit Count: Number of times addressed in debate\n" - "* Status: 'proposed' (new), 'attacked' (challenged), or 'solved' (resolved)\n\n" - - "Your Tree:\n" - "* Level-1: Your main claims and arguments\n" - "* Level-2: Opponent's rebuttals on your main claims and arguments\n" - "* Level-3: Your defenses on the rebuttals\n" - "Opponent's Tree:\n" - "* Level-1: Opponent's main claims and arguments\n" - "* Level-2: Your rebuttals on the opponent's main claims and arguments\n" - "* Level-3: Opponent's defenses on your rebuttals\n\n" - - "## Time Allocation Guidelines\n" - "1. Prioritize nodes with:\n" - " * High visit counts which means the main conflict and battlefield\n" - " * 'Attacked' status in your Level-2 which means the opponent's main claims that requires defense\n" - " * 'Proposed' status in your opponent's Level-1 which means the opponent's main claims that you can attack\n" - "2. Balance time between:\n" - " * Attacking the opponent's main claims\n" - " * Defending your attacked claims\n" - " * Reinforcing your successful defenses or unattacked claims\n" - " * Pressing advantages in successful attacks\n" - " * Countering the opponent's attacks\n" - - "## Key Considerations \n" - "* **Track Record on Key Claims**: \n" - "- Which of our claims stood up to opposition challenges?\n" - "- Which opposition claims did we successfully refute?\n" - "- What assumptions did we successfully challenge?\n" - "* **Impact Comparison**:\n" - "- Compare concrete outcomes of both approaches\n" - "- Show superior achievement of debate objectives\n" - "- Link to established judging criteria\n" - - - "## Tips\n" - "{{tips}}\n\n" - - "## Input Information\n" - "Debate flow trees with node data:\n" - "Your Tree: \n{tree}\n\n" - "Opponent's Tree: \n{oppo_tree}\n\n" - - "## Important Notes\n" - "1. Revisit scenarios or examples from your opening and rebuttal statements to reinforce key points\n" - "2. Use contrasts to highlight the strengths of your position. When opponents emphasize their strengths or your weaknesses, demonstrate how your position offers greater advantages or fewer drawbacks through direct comparison\n\n" - "3. Do not use new evidences or arguments in the closing statement. Focus on summarizing and reinforcing your existing points. Do not need to include the reference section in the closing statement.\n\n" - - - "## Output with the format (two parts start with **Closing Plan** and **Statement**):\n" - "**Closing Plan**: allocate your word budget based on the debate flow trees and explain your rationale. Make sure the total words is {{n_words}}.\n" - "**Statement**: after the closing plan, generate a closing statement of {{n_words}} words in total, do not include any other text\n" - - + "## Your Task \n" + "Deliver a compelling closing statement of {{n_words}} words that:\n\n" + "1. **Frames the Debate by Battlefield:** Clearly outline the core conflict between the two sides, highlighting the key issues at stake. \n" + "2. **Summarize Your Victory by Impact Analysis:** \n" + "- Show how your arguments achieved better outcomes for key stakeholders\n" + "- Demonstrate superior cost-benefit ratio with specific metrics\n" + "- Link outcomes back to judging criteria established in opening\n" + "3. **Expose Critical Flaws in Opposition:**\n" + "- Highlight unfulfilled burdens of proof\n" + "- Show major assumptions that were successfully challenged\n" + "- Demonstrate patterns of failed responses to your key arguments\n" + "4. **Delivers a Persuasive Conclusion:** Leave a lasting impression by reinforcing your key points and appealing to the audience's values. \n\n" + "## Workflow \n" + "1. Analyze the debate flow trees to strategically allocate your word budget based on argument importance and status. Structure your response according to the above points.\n" + "2. Generate conversational yet language suitable arguments following the allocated word budget. Write as you would speak, Write as you would speak, with VARIED sentence lengths. Short sentences are preferred. Use ORATORICAL language while keeping the original argument sequence. \n" + "3. Use clear verbal signposting. Use clear transitions, including listener-oriented phrases: 'As you'll see ', 'Consider this '. Signal key points: 'The crucial issue here is ', 'What's particularly important '. \n" + "4. You SHOULD connect with your audience by:\n" + "- Reframe scenarios to directly involve the audience by using 'you' as the protagonist and describing how the situation would impact their lives, making abstract concepts personally relevant and emotionally resonant." + "- Use analogies, metaphors, or vivid imagery to make complex ideas more accessible and memorable." + "5. Deliver a {{n_words}}-word closing statement. The response should be written in clear, flowing transcript for the competitive debate game of prose without any bullet points, asterisks, or numbered lists. Present only the final text. \n\n" + "Note that it's possible that the debate flow tree is not provided, in this case, you can just generate a closing statement without following the debate flow tree.\n" + "## Tree Structures\n" + "Two debate flow trees track the exchange of arguments. Each node contains:\n" + "* Data: The specific claims and arguments\n" + "* Visit Count: Number of times addressed in debate\n" + "* Status: 'proposed' (new), 'attacked' (challenged), or 'solved' (resolved)\n\n" + "Your Tree:\n" + "* Level-1: Your main claims and arguments\n" + "* Level-2: Opponent's rebuttals on your main claims and arguments\n" + "* Level-3: Your defenses on the rebuttals\n" + "Opponent's Tree:\n" + "* Level-1: Opponent's main claims and arguments\n" + "* Level-2: Your rebuttals on the opponent's main claims and arguments\n" + "* Level-3: Opponent's defenses on your rebuttals\n\n" + "## Time Allocation Guidelines\n" + "1. Prioritize nodes with:\n" + " * High visit counts which means the main conflict and battlefield\n" + " * 'Attacked' status in your Level-2 which means the opponent's main claims that requires defense\n" + " * 'Proposed' status in your opponent's Level-1 which means the opponent's main claims that you can attack\n" + "2. Balance time between:\n" + " * Attacking the opponent's main claims\n" + " * Defending your attacked claims\n" + " * Reinforcing your successful defenses or unattacked claims\n" + " * Pressing advantages in successful attacks\n" + " * Countering the opponent's attacks\n" + "## Key Considerations \n" + "* **Track Record on Key Claims**: \n" + "- Which of our claims stood up to opposition challenges?\n" + "- Which opposition claims did we successfully refute?\n" + "- What assumptions did we successfully challenge?\n" + "* **Impact Comparison**:\n" + "- Compare concrete outcomes of both approaches\n" + "- Show superior achievement of debate objectives\n" + "- Link to established judging criteria\n" + "## Tips\n" + "{{tips}}\n\n" + "## Input Information\n" + "Debate flow trees with node data:\n" + "Your Tree: \n{tree}\n\n" + "Opponent's Tree: \n{oppo_tree}\n\n" + "## Important Notes\n" + "1. Revisit scenarios or examples from your opening and rebuttal statements to reinforce key points\n" + "2. Use contrasts to highlight the strengths of your position. When opponents emphasize their strengths or your weaknesses, demonstrate how your position offers greater advantages or fewer drawbacks through direct comparison\n\n" + "3. Do not use new evidences or arguments in the closing statement. Focus on summarizing and reinforcing your existing points. Do not need to include the reference section in the closing statement.\n\n" + "## Output with the format (two parts start with **Closing Plan** and **Statement**):\n" + "**Closing Plan**: allocate your word budget based on the debate flow trees and explain your rationale. Make sure the total words is {{n_words}}.\n" + "**Statement**: after the closing plan, generate a closing statement of {{n_words}} words in total, do not include any other text\n" ) diff --git a/src/utils/prompts/evaluation.py b/src/utils/prompts/evaluation.py index b7d6f51..054a72c 100644 --- a/src/utils/prompts/evaluation.py +++ b/src/utils/prompts/evaluation.py @@ -28,7 +28,7 @@ Effectiveness in addressing and countering the opposing team's points Overall persuasiveness and impact of each team's case -Your final vote should reflect your honest opinion of which team presented the more convincing argument, taking into account your initial leaning. Give your vote in the format: "My vote is [For/Against]." """ +Your final vote should reflect your honest opinion of which team presented the more convincing argument, taking into account your initial leaning. Give your vote in the format: "My vote is [For/Against]." """ Impactful_finegrained = """Topic: {root} @@ -51,11 +51,11 @@ }} Here, score is 0, 1 or 2, where 0 is not impactful, 1 is medium impactful, and 2 is impactful.""" -import os import json +import os file_path = os.path.join(os.path.dirname(__file__), "tree_list.json") if os.path.exists(file_path): tree_data_list = json.load(open(file_path, "r")) else: - tree_data_list = [] \ No newline at end of file + tree_data_list = [] diff --git a/src/utils/prompts/opening.py b/src/utils/prompts/opening.py index b80e227..6668261 100644 --- a/src/utils/prompts/opening.py +++ b/src/utils/prompts/opening.py @@ -5,8 +5,8 @@ ) default_opening_prompt = context + ( - "Please give an opening statement using three claims with {{n_words}} words, do not output other things.\n" - ) + "Please give an opening statement using three claims with {{n_words}} words, do not output other things.\n" +) expert_opening_prompt = context + ( "\n## Rules\n" @@ -22,23 +22,21 @@ expert_opening_prompt_2 = context + ( "\n\n## Workflow \n" "1. Create a opening plan based on the current debate flow trees. It should include the definition, judging criteria, and the battlefields to discuss. \n" - "- If the position is to support the topic, discuss the given definition with necessary details and address uncertainties. " - "- If the position is to oppose the topic, discuss the opponent's EXISTING definition in their debate flow tree if unclear or problematic. Make only necessary clarifications. \n" - "- If the definition concerns a specific subset of a broader category, clearly distinguish it by highlighting unique characteristics, mechanisms, or impacts. " + "- If the position is to support the topic, discuss the given definition with necessary details and address uncertainties. " + "- If the position is to oppose the topic, discuss the opponent's EXISTING definition in their debate flow tree if unclear or problematic. Make only necessary clarifications. \n" + "- If the definition concerns a specific subset of a broader category, clearly distinguish it by highlighting unique characteristics, mechanisms, or impacts. " "2. Select among the definition, judging criteria, and battlefields and allocate your word budget based on the importance of each point. You should allocate more words to the more important points and can ignore the less important points. \n" - "\t- If the definition is selected to discuss, discuss it first. \n" - "\t- If the judging criteria is selected to discuss, discuss it next. \n" - "\t- Among the selected battlefields, discuss the battlefield in the order of importance. \n" + "\t- If the definition is selected to discuss, discuss it first. \n" + "\t- If the judging criteria is selected to discuss, discuss it next. \n" + "\t- Among the selected battlefields, discuss the battlefield in the order of importance. \n" "3. Follow the opening plan to generate conversational arguments. Write as you would speak, with VARIED sentence lengths. Use short and simple words and sentences that are easy to understand.\n" "4. Deliver a {{n_words}}-word opening statement. Present only the final text in clear, flowing prose without bullet points, asterisks, or numbered lists. \n" "Note that it's possible that the debate flow tree is not provided, in this case, you can just generate a opening statement without following the debate flow tree.\n" - "## Debate Flow Tree Structure\n" "You are given two debate trees that model the back-and-forth between you and your opponent. Each node contains:\n" "* Data: The specific claims and arguments\n" "* Visit Count: Number of times addressed in debate\n" "* Status: 'proposed' (new), 'attacked' (challenged), or 'solved' (resolved)\n\n" - "Your Debate Tree:\n" "* Level-1 Nodes: Your main claims and arguments\n" "* Level-2 Nodes: Opponent's attacks on your claims and arguments\n" @@ -47,30 +45,24 @@ "* Level-1 Nodes: Opponent's main claims and arguments\n" "* Level-2 Nodes: Your attacks on their claims and arguments\n" "* Level-3 Nodes: Opponent's rebuttal on your attacks\n\n" - - - "## Input Information \n" + "## Input Information \n" "Debate flow trees with node data:\n" "**Your Tree**: \n{tree}\n\n" "**Opponent's Tree**: \n{oppo_tree}\n\n" "**Your Main Claims**: \n{claims}\n\n" "{{definition}}\n\n" - "## Battlefields\n" "{{tips}}\n\n" - - "## Important Notes\n" "1. Organize your points logically with clear purpose statements. \n" - " - Clearly mention the actions you will take in each point. For example, 'we will address [X] concerns mentioned by the opponent, which are ' for attack / rebut actions or 'after that, we will propose our claims that' for propose action.\n" - " - Use clear indicators like first, second, third, etc. to organize your points.\n" + " - Clearly mention the actions you will take in each point. For example, 'we will address [X] concerns mentioned by the opponent, which are ' for attack / rebut actions or 'after that, we will propose our claims that' for propose action.\n" + " - Use clear indicators like first, second, third, etc. to organize your points.\n" "2. Avoid restating or simply repeating the same evidence or arguments across different points.\n" "3. Avoid repeating the similar arguments in your previous statement. Use the phrase 'as we have discussed' to refer to the previous statement.\n" "4. Only use facts that are generally accepted and don't require specific citation. Don't hallucinate any particular experimental results, statistical findings from named studies, or quotes from specific researchers until the evidence pool is provided\n" "5. When presenting alternatives or counterarguments, offer specific, implementable solutions rather than just criticism.\n" "6. Address both systemic and individual factors when relevant, showing how they can complement rather than contradict each other.\n" "\n" - "## Output with the format (two parts, start with **Opening Plan** and then **Statement**):\n" "**Opening Plan**: Allocate your word budget and explain your rationale. Briefly mention one or two rhetorical techniques and logical fallacies to discuss. Ensure the total is {{n_words}} words. \n" "**Statement**: Generate an opening statement of {{n_words}} words in total, with no additional text\n" @@ -78,51 +70,44 @@ propose_definition_prompt = ( - "The debate topic is: {motion}. You side is to {act} this topic .\n" - "Please clarify the topic to address any:\n" - "\t- Target groups, such as age, gender, income, etc.\n" - "\t- Ambiguous or technical terms, such as 'ration'.\n" - "\t- Important time, such as 'still', 'current', 'recent'.\n" - "\t- Difference between two similar terms, such as 'matter' and 'crucial', 'writer' and 'writing'.\n" - "\t- Multiple interpretations of key phrases\n" - "\t- Scope-related uncertainties\n" - "3. If there are some similar existing policies, you can refer to them for inspiration.\n\n" - "Provide your response in the format: '**Definition**: [your one or two-sentence definition]'\n" - "Don't output anything else." - ) + "The debate topic is: {motion}. You side is to {act} this topic .\n" + "Please clarify the topic to address any:\n" + "\t- Target groups, such as age, gender, income, etc.\n" + "\t- Ambiguous or technical terms, such as 'ration'.\n" + "\t- Important time, such as 'still', 'current', 'recent'.\n" + "\t- Difference between two similar terms, such as 'matter' and 'crucial', 'writer' and 'writing'.\n" + "\t- Multiple interpretations of key phrases\n" + "\t- Scope-related uncertainties\n" + "3. If there are some similar existing policies, you can refer to them for inspiration.\n\n" + "Provide your response in the format: '**Definition**: [your one or two-sentence definition]'\n" + "Don't output anything else." +) main_claim_selection = ( "## Task: Select Persuasive Claims for Debate\n" "You are participating in a formal debate on the topic: {motion}. Your position is {side}.\n" "Select most persuasive claims from the provided options, using the debate tree information.\n\n" "Note that it's possible that the debate tree is not provided, in this case, you can select claims without considering the debate tree.\n\n" - "## Simulated Debate Flow Tree Structure\n" "Each claim has a simulated debate flow tree that simluate the potential back-and-forth between you and your opponent under this claim:\n" "* Level-0: The root claim (potential main claim for selection)\n" "* Level-1: Your opponent's rebuttal to the root claim\n" "* Level-2: Your defense against the opponent's rebuttal\n\n" - "## Selection Criteria\n" "1. Diversity and Contrastive: Selected claims should cover different perspectives of the topic and be contrastive to each other without overlap\n" "2. Comprehensiveness: Claims should form a logical framework that addresses the most important aspects of the topic and distinguishes the specific subject from its broader category\n" "3. Consistency: Claims must be logically consistent with each other and with their Level-2 defenses\n" "4. Defensibility: Claims should have strong defenses at Level-2 against opponent rebuttals\n" "5. Context-aware: Consider the opponent's opening statement when selecting claims if it is provided\n\n" - "## Input\n" "**Definition of the debate topic**:\n" "{definition}\n\n" - "**Simulated Debate Flow Tree for each claim**:\n" "{tree}\n\n" - "**Opponent's opening statement**:\n" "{context}\n\n" - "**Claims to select from (All Level-0 claims)**:\n" "{claims}\n\n" - "## Output\n" "Provide results in JSON format with three fields under the key of *selection*:\n" "* claims: a list of your selected claims. Each claim is a string. It usually contains 3 *very different claims* from non-overlapping perspectives.\n" diff --git a/src/utils/prompts/others.py b/src/utils/prompts/others.py index 69fe375..383acf8 100644 --- a/src/utils/prompts/others.py +++ b/src/utils/prompts/others.py @@ -29,55 +29,58 @@ Await the specific debate topic and your assigned stance before beginning.\n\n""" - - -expert_debater_system_prompt = debater_system_prompt + ( - "\n\n" - "## Knowledge\n" - "### Constructive Arguments:\n" - "- A strong constructive argument defines all key terms within the topic, establishing the boundaries for the debate.\n" - "- It presents a clear judging standard (framework) that clarifies how your arguments support your stance.\n" - "- It includes well-structured arguments (contentions) that support your stance. These arguments should be logically connected and each contain at least one of the following: data, theory, or examples. Each argument should link back to the judging standard.\n" - "\n" - "### Theories, Data, and Examples:\n" - "- **Theory** refers to established principles or models developed by experts in relevant fields (e.g., scientists, sociologists) to explain observed phenomena.\n" - "- **Data** refers to specific, quantifiable information obtained through research methods like surveys or statistical analysis.\n" - "- **Examples** illustrate arguments through real-world instances, events, or narratives involving individuals or groups.\n" - "Using a combination of theories, data, and examples strengthens the persuasiveness of your arguments.\n" - "\n" -) + ( - "### Definition Debate\n" - "- A definition debate arises when both sides disagree on the core concept of the topic, vying for the right to define it. Definition forms the cornerstone of argumentation.\n" - "- Common methods of contesting definitions:\n" - " - **Appeal to Authority:** Citing authoritative sources to substantiate the definition.\n" - " - **Appeal to Common Sense (Context):** Utilizing relatable scenarios/examples to evoke common understanding and validate the definition.\n" - " - **Appeal to Absurdity:** Demonstrating that the opponent's definition is overly broad/unreasonable, rendering the topic self-evident and leaving no room for debate.\n" - " - **Appeal to Logic:** Employing counterexamples or logical reasoning to expose flaws in the opponent's definition and reinforce the validity of one's own definition (common rebuttal techniques outlined below can also be applied).\n" - "\nCombining multiple methods of contesting definitions can yield more effective results.\n" - "\n" - "### Framework Debate\n" - "- A framework debate arises when both sides disagree on the criteria for evaluating the topic, vying for the right to set the standard of judgment. The framework serves as the yardstick for assessing arguments.\n" - "- Common methods of contesting frameworks are similar to those used in definition debates.\n" -) + ( - "\n" - "### Battleground\n" - "\"Battleground\" is a debate term referring to the **core** issues contested by both sides. A battleground might involve:\n" - "- Disputing the definition of a word or concept.\n" - "- Contesting the criteria for judgment.\n" - "- Debating the interpretation of data or theories.\n" - "- Arguing over the meaning of values.\n" - "\n" - "The team winning more battlegrounds generally wins the debate. Not every issue raised qualifies as a battleground. Identifying and analyzing these battlegrounds is crucial for determining the debate's trajectory and outcome.\n" - "\n" - "### Clash\n" - "\"Clash\" refers to the direct confrontation of arguments and evidence within a **specific battleground**. The outcome of a clash determines which side wins that particular battleground.\n" - "### Criteria Debate\n" - "- A criteria debate arises when both sides disagree on the standards for evaluating the topic, vying for the right to establish these criteria. Criteria serve as the yardstick for assessing arguments.\n" - "- Strategies for winning a criteria debate are similar to those used in definition debates∏o'p\n" - "\n" - "### Values\n" - "- Incorporating a profound discussion of values can enhance your closing statement. However, remember that values should serve your **stance** and ultimately contribute to winning the debate. They must be grounded in well-developed arguments.\n" - "- Avoid vague appeals to values. Instead, derive values directly from the topic and your team's stance, potentially delving into the essence of the social issue at hand. Connect these values back to the topic and your stance, using them to further substantiate your position.\n" +expert_debater_system_prompt = ( + debater_system_prompt + + ( + "\n\n" + "## Knowledge\n" + "### Constructive Arguments:\n" + "- A strong constructive argument defines all key terms within the topic, establishing the boundaries for the debate.\n" + "- It presents a clear judging standard (framework) that clarifies how your arguments support your stance.\n" + "- It includes well-structured arguments (contentions) that support your stance. These arguments should be logically connected and each contain at least one of the following: data, theory, or examples. Each argument should link back to the judging standard.\n" + "\n" + "### Theories, Data, and Examples:\n" + "- **Theory** refers to established principles or models developed by experts in relevant fields (e.g., scientists, sociologists) to explain observed phenomena.\n" + "- **Data** refers to specific, quantifiable information obtained through research methods like surveys or statistical analysis.\n" + "- **Examples** illustrate arguments through real-world instances, events, or narratives involving individuals or groups.\n" + "Using a combination of theories, data, and examples strengthens the persuasiveness of your arguments.\n" + "\n" + ) + + ( + "### Definition Debate\n" + "- A definition debate arises when both sides disagree on the core concept of the topic, vying for the right to define it. Definition forms the cornerstone of argumentation.\n" + "- Common methods of contesting definitions:\n" + " - **Appeal to Authority:** Citing authoritative sources to substantiate the definition.\n" + " - **Appeal to Common Sense (Context):** Utilizing relatable scenarios/examples to evoke common understanding and validate the definition.\n" + " - **Appeal to Absurdity:** Demonstrating that the opponent's definition is overly broad/unreasonable, rendering the topic self-evident and leaving no room for debate.\n" + " - **Appeal to Logic:** Employing counterexamples or logical reasoning to expose flaws in the opponent's definition and reinforce the validity of one's own definition (common rebuttal techniques outlined below can also be applied).\n" + "\nCombining multiple methods of contesting definitions can yield more effective results.\n" + "\n" + "### Framework Debate\n" + "- A framework debate arises when both sides disagree on the criteria for evaluating the topic, vying for the right to set the standard of judgment. The framework serves as the yardstick for assessing arguments.\n" + "- Common methods of contesting frameworks are similar to those used in definition debates.\n" + ) + + ( + "\n" + "### Battleground\n" + '"Battleground" is a debate term referring to the **core** issues contested by both sides. A battleground might involve:\n' + "- Disputing the definition of a word or concept.\n" + "- Contesting the criteria for judgment.\n" + "- Debating the interpretation of data or theories.\n" + "- Arguing over the meaning of values.\n" + "\n" + "The team winning more battlegrounds generally wins the debate. Not every issue raised qualifies as a battleground. Identifying and analyzing these battlegrounds is crucial for determining the debate's trajectory and outcome.\n" + "\n" + "### Clash\n" + '"Clash" refers to the direct confrontation of arguments and evidence within a **specific battleground**. The outcome of a clash determines which side wins that particular battleground.\n' + "### Criteria Debate\n" + "- A criteria debate arises when both sides disagree on the standards for evaluating the topic, vying for the right to establish these criteria. Criteria serve as the yardstick for assessing arguments.\n" + "- Strategies for winning a criteria debate are similar to those used in definition debates∏o'p\n" + "\n" + "### Values\n" + "- Incorporating a profound discussion of values can enhance your closing statement. However, remember that values should serve your **stance** and ultimately contribute to winning the debate. They must be grounded in well-developed arguments.\n" + "- Avoid vague appeals to values. Instead, derive values directly from the topic and your team's stance, potentially delving into the essence of the social issue at hand. Connect these values back to the topic and your stance, using them to further substantiate your position.\n" + ) ) rhetorical_techniques_prompt = """ @@ -156,146 +159,134 @@ By skillfully employing these rhetorical techniques, your debate performance will have both intellectual depth and emotional resonance, effectively persuading your audience. """ -extract_statment_prompt = \ - ( - "## Task: Analyze the statements\n" - "You are now a skilled debater participating in a formal debate competition. Your task is to analyze the statements and identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" - "1. These claims are used to support the debate topic. Therefore, do not include the debate topic as the claim.\n" - "2. Identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" - "3. For each claim, provide a brief summary of the claim and the evidence or reasoning used to support it.\n" - "4. It should be at least 3 claims in the statement.\n\n" - "## Input Information\n" - "**Debate Topic**: \n{motion}. \n\n" - "**Statement**: \n{statement}. \n\n" - "##Response Format\n" - "Provide your response in JSON format with one key of **statements**. The value of this key is a list of claims and their arguments (evidence or reasoning). \n" - "The keys of each element of the list are **claim** and **arguments**. The value of **claim** is the main claim. The value of **arguments** is a list of reasoning and evidence used to support the claim." - ) +extract_statment_prompt = ( + "## Task: Analyze the statements\n" + "You are now a skilled debater participating in a formal debate competition. Your task is to analyze the statements and identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" + "1. These claims are used to support the debate topic. Therefore, do not include the debate topic as the claim.\n" + "2. Identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" + "3. For each claim, provide a brief summary of the claim and the evidence or reasoning used to support it.\n" + "4. It should be at least 3 claims in the statement.\n\n" + "## Input Information\n" + "**Debate Topic**: \n{motion}. \n\n" + "**Statement**: \n{statement}. \n\n" + "##Response Format\n" + "Provide your response in JSON format with one key of **statements**. The value of this key is a list of claims and their arguments (evidence or reasoning). \n" + "The keys of each element of the list are **claim** and **arguments**. The value of **claim** is the main claim. The value of **arguments** is a list of reasoning and evidence used to support the claim." +) -extract_statment_with_tree_prompt = \ - ( - "## Task: Analyze the statements\n" - "Your task is to analyze the statements and identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" - "1. These claims are used to support your stance on the debate topic. Therefore, do not include the debate topic as the claim.\n" - "2. Identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" - "3. For each claim, put the original statement for this claim in **content** and summarize the evidence or reasoning in the statement in **arguments**.\n" - "4. The type of the claim can be **common**, **definition**, **criteria**. **definition** and **criteria** only appear in the opening stage to clarify the definition of the debate topic and the criteria for judging the debate topic.\n" - "5. You are given two debate trees that models the back-and-forth between you and your opponent. Your extracted claims can be used to: \n" - "\t- propose the main claims under Level-0 of your debate tree (only if there is no Level-1 in your debate tree) \n" - "\t- rebut the opponent's attacks in Level-2 of your debate tree. The extracted claim should be the counter-claim to the opponent's attack in Level-2 of your debate tree\n" - "\t- reinforce the main claims in Level-1 of your debate tree. Only use this action if this claim is not designed to rebut the opponent's attack in Level-2 of your debate tree\n" - "\t- attack the opponent's proposed claims in Level-1 of your opponent's debate tree. The extracted claim should be the counter-claim to the opponent's proposed claim in Level-1 of your opponent's debate tree\n" - "The purpose of the claim should be consistent with the debate tree. \n" - "Each claim should be used for one of the above purposes or the combination of them. For example, if the node in Level-2 of your debate tree is the same with the node in Level-1 of your opponent's debate tree, the purpose of the claim will be **rebut** and **attack**.\n" - "Please provide all the possible purposes for each claim. The purpose includes a list of dictionaries with the following three keys: " - "\n- **action**: propose, reinforce, rebut or attack " - "\n- **targeted_debate_tree**: you or opponent" - "\n- **target**: the *claim* value of the node in the debate tree. return `N/A` if there is no target" - "\n - For propose: the target is the proposed claim to be added in Level-0 of your debate tree. It should be the same as the claim" - "\n - For rebut: the target should be the claim of the Level-2 nodes in your debate tree" - "\n - For attack: the target should be the claim of the Level-1 nodes in your opponent's debate tree" - "\n - For reinforce: the target should be the claim of the Level-1 nodes in your debate tree, or the claim of the Level-2 nodes in your opponent's debate tree" - "6. It should be at least 3 claims in the statement.\n\n" - - "## Tree Structure\n" - "You are given a debate tree that models the back-and-forth between you and your opponent:\n" - "Your debate tree: \n" - "* Level-1: The main claims proposed by you\n" - "* Level-2: Your opponent's attacks on your claims\n" - "* Level-3: Your rebuttal on the attacks\n\n" - "Opponent's debate tree: \n" - "* Level-1: The main claims proposed by your opponent\n" - "* Level-2: Your attacks on the opponent's claims\n" - "* Level-3: The opponent's rebuttal on your attacks\n\n" - - "## Input Information\n" - "**Debate Topic**: {motion} \n\n" - "**Your Stance**: {side} \n\n" - "**Current Stage**: {stage} \n\n" - "**Statement**: {statement} \n\n" - "**Your Debate Tree**: \n{tree} \n\n" - "**Opponent's Debate Tree**: \n{oppo_tree} \n\n" - - "##Response Format\n" - "Provide your response in JSON format with one key of **statements**. The value of this key is a list of claims and their arguments (evidence or reasoning). \n" - "The keys of each element of the list are **claim**, **content**, **type**, **arguments**, **purpose**.\n" - "- The value of **claim** is the main claim. \n" - "- The value of **content** is the original part of the statement for the claim, including the claim and the evidence or reasoning used to support the claim. \n" - "- The value of **type** is the type of the claim, it can be **common**, **definition**, **criteria**. \n" - "- The value of **arguments** is a list of summarized reasoning and evidence used to support the claim. \n" - "- The value of **purpose** is a list of all possible purposes of the claim. \n\n" - ) +extract_statment_with_tree_prompt = ( + "## Task: Analyze the statements\n" + "Your task is to analyze the statements and identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" + "1. These claims are used to support your stance on the debate topic. Therefore, do not include the debate topic as the claim.\n" + "2. Identify the key claims presented in the statement and evidence or reasoning to support the claims.\n" + "3. For each claim, put the original statement for this claim in **content** and summarize the evidence or reasoning in the statement in **arguments**.\n" + "4. The type of the claim can be **common**, **definition**, **criteria**. **definition** and **criteria** only appear in the opening stage to clarify the definition of the debate topic and the criteria for judging the debate topic.\n" + "5. You are given two debate trees that models the back-and-forth between you and your opponent. Your extracted claims can be used to: \n" + "\t- propose the main claims under Level-0 of your debate tree (only if there is no Level-1 in your debate tree) \n" + "\t- rebut the opponent's attacks in Level-2 of your debate tree. The extracted claim should be the counter-claim to the opponent's attack in Level-2 of your debate tree\n" + "\t- reinforce the main claims in Level-1 of your debate tree. Only use this action if this claim is not designed to rebut the opponent's attack in Level-2 of your debate tree\n" + "\t- attack the opponent's proposed claims in Level-1 of your opponent's debate tree. The extracted claim should be the counter-claim to the opponent's proposed claim in Level-1 of your opponent's debate tree\n" + "The purpose of the claim should be consistent with the debate tree. \n" + "Each claim should be used for one of the above purposes or the combination of them. For example, if the node in Level-2 of your debate tree is the same with the node in Level-1 of your opponent's debate tree, the purpose of the claim will be **rebut** and **attack**.\n" + "Please provide all the possible purposes for each claim. The purpose includes a list of dictionaries with the following three keys: " + "\n- **action**: propose, reinforce, rebut or attack " + "\n- **targeted_debate_tree**: you or opponent" + "\n- **target**: the *claim* value of the node in the debate tree. return `N/A` if there is no target" + "\n - For propose: the target is the proposed claim to be added in Level-0 of your debate tree. It should be the same as the claim" + "\n - For rebut: the target should be the claim of the Level-2 nodes in your debate tree" + "\n - For attack: the target should be the claim of the Level-1 nodes in your opponent's debate tree" + "\n - For reinforce: the target should be the claim of the Level-1 nodes in your debate tree, or the claim of the Level-2 nodes in your opponent's debate tree" + "6. It should be at least 3 claims in the statement.\n\n" + "## Tree Structure\n" + "You are given a debate tree that models the back-and-forth between you and your opponent:\n" + "Your debate tree: \n" + "* Level-1: The main claims proposed by you\n" + "* Level-2: Your opponent's attacks on your claims\n" + "* Level-3: Your rebuttal on the attacks\n\n" + "Opponent's debate tree: \n" + "* Level-1: The main claims proposed by your opponent\n" + "* Level-2: Your attacks on the opponent's claims\n" + "* Level-3: The opponent's rebuttal on your attacks\n\n" + "## Input Information\n" + "**Debate Topic**: {motion} \n\n" + "**Your Stance**: {side} \n\n" + "**Current Stage**: {stage} \n\n" + "**Statement**: {statement} \n\n" + "**Your Debate Tree**: \n{tree} \n\n" + "**Opponent's Debate Tree**: \n{oppo_tree} \n\n" + "##Response Format\n" + "Provide your response in JSON format with one key of **statements**. The value of this key is a list of claims and their arguments (evidence or reasoning). \n" + "The keys of each element of the list are **claim**, **content**, **type**, **arguments**, **purpose**.\n" + "- The value of **claim** is the main claim. \n" + "- The value of **content** is the original part of the statement for the claim, including the claim and the evidence or reasoning used to support the claim. \n" + "- The value of **type** is the type of the claim, it can be **common**, **definition**, **criteria**. \n" + "- The value of **arguments** is a list of summarized reasoning and evidence used to support the claim. \n" + "- The value of **purpose** is a list of all possible purposes of the claim. \n\n" +) -extract_statment_by_claim_prompt = \ - ( - "## Task: Analyze the statements\n" - "You are now a skilled debater participating in a formal debate competition. Your task is to analyze the statements and identify the evidence or reasoning to support the given claims.\n" - "1. These claims are used to support the debate topic. Therefore, do not include the debate topic as the claim.\n" - "2. Identify the evidence or reasoning to support the given claims.\n" - "3. For each given claim, provide a brief summary of the claim and the evidence or reasoning used to support it.\n" - "4. It should be at least 3 claims in the statement.\n\n" - "## Input Information\n" - "**Debate Topic**: \n{motion}. \n\n" - "**Statements**: \n{statement}. \n\n" - "**Claim**: \n{claim}. \n\n" - "##Response Format\n" - "Provide your response in JSON format with one key of **statements**. The value of this key is a list of claims and their arguments (evidence or reasoning). \n" - "The keys of each element of the list are **claim** and **arguments**. The value of **claim** MUST come from the given **Claim**. The value of **arguments** is a list of reasoning and evidence used to support the claim." - ) +extract_statment_by_claim_prompt = ( + "## Task: Analyze the statements\n" + "You are now a skilled debater participating in a formal debate competition. Your task is to analyze the statements and identify the evidence or reasoning to support the given claims.\n" + "1. These claims are used to support the debate topic. Therefore, do not include the debate topic as the claim.\n" + "2. Identify the evidence or reasoning to support the given claims.\n" + "3. For each given claim, provide a brief summary of the claim and the evidence or reasoning used to support it.\n" + "4. It should be at least 3 claims in the statement.\n\n" + "## Input Information\n" + "**Debate Topic**: \n{motion}. \n\n" + "**Statements**: \n{statement}. \n\n" + "**Claim**: \n{claim}. \n\n" + "##Response Format\n" + "Provide your response in JSON format with one key of **statements**. The value of this key is a list of claims and their arguments (evidence or reasoning). \n" + "The keys of each element of the list are **claim** and **arguments**. The value of **claim** MUST come from the given **Claim**. The value of **arguments** is a list of reasoning and evidence used to support the claim." +) -select_query_prompt = \ - ( - "## Query Selection Task\n\n" - "You are now a skilled debater participating in a formal debate competition. Your task is to select the most effective search query/queries that will help you {action} the following claim. You should keep the debate topic and your stance in mind. \n\n" - "### Input Information\n" - "**Debate Topic**:\n" - "{motion}\n" - "**Stance**:\n" - "{stance}\n" - "**Target Claim**:\n" - "{claim}\n\n" - "**Available Query Options**:\n" - "{candidate_queries}\n\n" - "### Selection Criteria\n" - "1. Select queries that are most likely to return evidence directly relevant to {action} the claim or the logic chain. The query should also be directly related to your stance on the debate topic.\n" - "2. If you need to attack the target claim, you should create counter-claims to the target claim and select the queries to support the counter-claims. \n" - "3. If you need to attack the target logic chain, you should find evidence to If you need to attack the target logic chain, you should find evidence to demonstrate that the connection between two adjacent claims is flawed, meaning one claim does not necessarily lead to the next. \n" - "4. Prioritize specific queries over general ones\n" - "5. The selected queries MUST come from **Available Query Options**\n\n" - "### Response Format\n" - "Respond with a JSON object containing:\n" - "```json\n" - "{{\n" - ' "query": ["selected_query_1", "selected_query_2"]\n' - "}}\n" - "```\n" - "Note: Include only the most relevant queries that will help {action} the claim or the logic chain." - - ) +select_query_prompt = ( + "## Query Selection Task\n\n" + "You are now a skilled debater participating in a formal debate competition. Your task is to select the most effective search query/queries that will help you {action} the following claim. You should keep the debate topic and your stance in mind. \n\n" + "### Input Information\n" + "**Debate Topic**:\n" + "{motion}\n" + "**Stance**:\n" + "{stance}\n" + "**Target Claim**:\n" + "{claim}\n\n" + "**Available Query Options**:\n" + "{candidate_queries}\n\n" + "### Selection Criteria\n" + "1. Select queries that are most likely to return evidence directly relevant to {action} the claim or the logic chain. The query should also be directly related to your stance on the debate topic.\n" + "2. If you need to attack the target claim, you should create counter-claims to the target claim and select the queries to support the counter-claims. \n" + "3. If you need to attack the target logic chain, you should find evidence to If you need to attack the target logic chain, you should find evidence to demonstrate that the connection between two adjacent claims is flawed, meaning one claim does not necessarily lead to the next. \n" + "4. Prioritize specific queries over general ones\n" + "5. The selected queries MUST come from **Available Query Options**\n\n" + "### Response Format\n" + "Respond with a JSON object containing:\n" + "```json\n" + "{{\n" + ' "query": ["selected_query_1", "selected_query_2"]\n' + "}}\n" + "```\n" + "Note: Include only the most relevant queries that will help {action} the claim or the logic chain." +) audience_feedback_prompt = ( "## Your Task\n" "You are a panel of debate audience members to provide comprehensive feedback on how the statement impacts and persuades a general audience.\n\n" - "### Audience Panel Composition\n" "- General public with varied educational backgrounds\n" "- Students and educators from different fields\n" "- Professionals interested in policy and social issues\n\n" - "### Evaluation Dimensions\n" "1. **Core Message Clarity**\n" " - Accessibility of main arguments\n" " - Understanding of key evidence\n" " - Clarity of logical flow\n" " - Technical term explanation\n\n" - "2. **Engagement Impact**\n" " - Effectiveness of examples and analogies\n" " - Connection with audience interests\n" " - Memorability of key points\n" " - Attention maintenance\n\n" - "3. **Evidence Presentation**\n" " - Evidence is relevant and supports the argument\n" " - Data clarity and visualization\n" @@ -303,29 +294,24 @@ " - Case study effectiveness\n" " - Avoid evidence title\n" " - Statistics interpretation\n\n" - "4. **Persuasive Elements**\n" " - Emotional resonance\n" " - Practical relevance\n" " - Solution feasibility\n" " - Call to action clarity\n\n" - "### Guidelines\n" "- Evaluate all dimensions thoroughly\n" "- Identify the most significant barriers to audience understanding in the {stage} statement\n" "- Consider which issues could be addressed with minimal revisions on the {stage} statement\n" "- Focus on high-impact, low-disruption improvements\n\n" - "### Tips\n" "- The opening statement should focus on the main claims and their supporting evidences. \n" "- The rebuttal statement should focus on the logic chain and the counter-claims. \n" "- The closing statement should focus on more on the emotional appeal and the call to action instead of the evidence. \n\n" - "## Retrieval Information\n" "Here are debate flow trees and action allocations from human debates. " "Use the structure and allocation strategy to provide better feedback.\n\n" "{retrieval}\n\n" - "### Input Information\n" "**Debate Topic**:\n" "{motion}\n\n" @@ -333,14 +319,12 @@ "{history}\n\n" "**Current {side}'s {stage} Statement to be evaluated**:\n" "{statement}\n\n" - "### Output Format\n" "[Comprehensive Analysis]\n" "Core Message Clarity:\n" "Engagement Impact:\n" "Evidence Presentation:\n" "Persuasive Elements:\n" - "[Critical Issues and Minimal Revision Suggestions]\n" "1. Issue:\n" " Impact on Audience:\n" @@ -353,14 +337,12 @@ evidence_selection_prompt = ( "From the provided list of evidence dictionaries, select the 10 most useful pieces that would best support a debate argument.\n\n" - "## Context Information:\n" "**Debate Topic**: {motion}\n" "**Your Stance**: {side}\n" "**Current Stage**: {stage}\n" "**Current Statement**: {statement}\n\n" "**Revision Guidance for the current statement**: {feedback}\n\n" - "## Selection Criteria:\n" "1. Primary criteria:\n" " - Contains specific numerical data and statistics that DIRECTLY SUPPORT your stance and statement\n" @@ -370,106 +352,88 @@ " - Comes from credible academic sources (peer-reviewed journals, established institutions)\n" " - Directly relates to the debate topic and would be helpful and effective\n" " - Can help address the feedback points\n\n" - "2. Alignment verification:\n" " - For each piece of evidence, explicitly confirm that it supports rather than contradicts your position\n" " - If a source partially supports and partially contradicts your stance, only use it if you can accurately represent the supportive parts without mischaracterizing the source\n" " - Avoid evidence that requires significant interpretation or context-shifting to fit your argument\n\n" - "3. Diversity requirements:\n" " - Selected evidence should cover different aspects of the argument\n" " - Avoid selecting multiple pieces that make the same point\n" " - Try to select from different sources/query batches (e.g. don't only select from 3_x series)\n" " - Balance between theoretical research and practical applications\n\n" - "4. Source quality hierarchy:\n" " - Peer-reviewed academic papers\n" " - Research from established institutions\n" " - Articles from reputable media outlets\n" " - Industry reports and whitepapers\n\n" - "5. Evidence assessment questions (answer these for each piece):\n" " - Does this evidence directly support my stance or does it require significant reframing?\n" " - Does this evidence contrast how the issue manifests in the specific subject versus the broader category?\n" " - Does the evidence contain any statements that could be used more effectively by the opposing side?\n" " - Is the source credible and recent enough to be persuasive?\n" " - Does this evidence provide unique information not covered by other selected pieces?\n\n" - "Evidence list:\n" "{evidence}\n\n" - "## Workflow\n" - "- First, analyze each evidence piece for alignment with your position, your current statement and the revision guidance.\n" + "- First, analyze each evidence piece for alignment with your position, your current statement and the revision guidance.\n" "- For each candidate evidence, write a brief note about whether it truly supports your stance. \n" "- Select your final choices based on the selection criteria.\n" "- Be careful and strict to select the evidence. Only return the evidence that is helpful and effective. If there is no such evidence, return an empty analysis object and an empty list.\n\n" - "## Output Format\n" "Return a JSON object with two fields:\n" "1. 'analysis': A brief analysis of why each selected piece supports your position (limit to 1-2 sentences per piece)\n" "2. 'selected_ids': An array of at most 10 evidence IDs (the 'id' field from each selected dictionary)\n\n" - "Format:\n" - '{{\n' + "{{\n" ' "analysis": {{\n' ' "id1": "This evidence supports my stance because...",\n' ' "id2": "This evidence aligns with my position by..."\n' - ' }},\n' + " }},\n" ' "selected_ids": ["id1", "id2", "id3", ..., "id10"]\n' - '}}' - + "}}" ) post_process_prompt = ( "## Your Task\n" "Revise your current {stage} statement based on the feedback from the experts and audience. Transform the statement into a more natural and persuasive spoken argument while maintaining academic credibility. The new statement should be around {max_words} words and support your stance of the topic. \n\n" - "### Workflow\n" "1. Based on **Feedback to consider** to fix the critical issues mentioned by experts and audience with minimal revision. \n" - " - You should try your best to fill in the [X] in the *Minimal Revision Suggestion* of the feedback, and use the suggested words to revise the original statements, remember to stand firm on your stance {side}. \n" - " - If you cannot fill in the [X], you should ignore this point. \n" + " - You should try your best to fill in the [X] in the *Minimal Revision Suggestion* of the feedback, and use the suggested words to revise the original statements, remember to stand firm on your stance {side}. \n" + " - If you cannot fill in the [X], you should ignore this point. \n" "2. Follow the minimal revision suggestions to revise the original statements, remember to stand firm on your stance {side}. \n" "3. For each point, find the most relevant evidence that can support the WHOLE LOGIC of the point, instead of partially support some arguments in this point. If you cannot find such evidence, keep the point as it is. If you find the evidence, explicitly cite the evidence following the evidence guidelines. \n" "4. During the revision, DO NOT change the factual information of the original statement. \n" "5. Be confident and assertive in your statement. DO NOT use words like 'may', 'possible', 'likely', 'might', etc. to express your uncertainty. \n" "6. If there is no overview in the original statement, you should add one. If there is no indication of the order of the points (such as first, second, finally, etc.) in the original statement, you should add them. \n" "7. The new statement should also follow the allocation plan and be around {max_words} words and support your stance of the topic.\n\n" - "## Evidence Guidelines\n" "CRITICAL REQUIREMENT: The statement is a spoken transcript. Therefore, you MUST mention the source of the evidence in the statement instead of just citing the evidence with a number because the audience does not have access to the reference list when listening to the statement. Failing to properly attribute sources verbally will significantly undermine both your credibility and the persuasive impact of your entire presentation.\n" "- Only use evidence that directly supports your complete argument, rather than evidence that only partially supports certain aspects. \n" "- ALWAYS mention the source of the evidence. Never just cite evidence like 'A study shows that...' without specifying the source. \n" "- Integrate evidence naturally into your argument flow and clearly connect each citation to specific claims. Include the time period of the evidence for better understanding. For example:\n" - " - \"Research from *PMC in 2023* has demonstrated that couples who perceive more financial difficulties generally report more conflicts and lower relationship satisfaction [1].\"\n" - " - \"According to *a 2023 study in Journal of Social and Personal Relationships*, almost every participant could recall a recent financial disagreement with their partner when prompted, highlighting how pervasive these tensions can be [2].\"\n" - " - \"*ABC News reporting in 2023* featured financial advisor Kate McCallum's preference for focusing on fairness over strict equity, as it creates a more holistic approach to relationship finances [3].\"\n" + ' - "Research from *PMC in 2023* has demonstrated that couples who perceive more financial difficulties generally report more conflicts and lower relationship satisfaction [1]."\n' + ' - "According to *a 2023 study in Journal of Social and Personal Relationships*, almost every participant could recall a recent financial disagreement with their partner when prompted, highlighting how pervasive these tensions can be [2]."\n' + ' - "*ABC News reporting in 2023* featured financial advisor Kate McCallum\'s preference for focusing on fairness over strict equity, as it creates a more holistic approach to relationship finances [3]."\n' "- When citing evidence, weave in the source credentials (background or profile of the source) to build authority:\n" - " - \"According to Benjamin Karney, a social psychology professor at UCLA, whose extensive research published in the Annual Review of Psychology reveals...\"\n" - " - \"Financial experts at *American business magazine Forbes* have found that couples with large income gaps face unique challenges that can't be solved with simplistic approaches...\"\n" - " - \"*Stanford psychology scholars* have demonstrated through controlled studies that...\"\n" + ' - "According to Benjamin Karney, a social psychology professor at UCLA, whose extensive research published in the Annual Review of Psychology reveals..."\n' + ' - "Financial experts at *American business magazine Forbes* have found that couples with large income gaps face unique challenges that can\'t be solved with simplistic approaches..."\n' + ' - "*Stanford psychology scholars* have demonstrated through controlled studies that..."\n' "- Ensure each citation clearly connects to the source in your reference list and includes sufficient publication details:\n" " - List ALL your references in a standard Chicago format in the **Reference** section. Make sure each source has a clear number such as [1], [2], etc. The section should come after the statement. Include full publication information (author, title, publication, date) for each source. Do not include web links or URL information.\n" " - Use correct formatting: [1], [2], etc. immediately after the claim being cited\n" " - In the reference section, provide complete source information including author name(s), full title, publication name in italics, and publication date\n" "- Develop a robust evidence foundation with multiple sources rather than relying on just one or two studies. Be prepared to cite sources if asked." - "### Input Information\n" - "### Feedback to Consider\n" - "{feedback}\n\n" - + "{feedback}\n\n" "**Evidence to select (Do not use non-existing evidence)**:\n" "{evidence}\n\n" - "**Debate Topic**:\n" "{motion}\n\n" - "**Stand Firm on Your Stance**:\n" "{side} side\n\n" - "**Your current {stage} Statement**:\n" "{statement}\n\n" - "**Allocation Plan**:\n" "{allocation_plan}\n\n" "### Output Format: Generate ONLY the revised statement text below in around {max_words} words. IMPORTANT: Make sure that you are following the constraint of the number of words, the above feedback for consideration, and the allocation plan. The output must NOT begin with any title, heading, or introductory phrase like '**Opening Statement: ...**' or similar. Start directly with the first sentence of the statement. No additional explanations.\n" @@ -480,13 +444,11 @@ debate_flow_tree_action_prompt = ( "## Get Strategic Actions from Debate Flow Tree\n" "You are given two debate trees that model the back-and-forth between you and your opponent. You should create a list of actions based on the debate flow tree. \n" - "## Debate Flow Tree Structure\n" "Each node contains:\n" "* Data: The specific claims and arguments\n" "* Visit Count: Number of times addressed in debate\n" "* Status: 'proposed' (new), 'attacked' (challenged)\n\n" - "Your Debate Tree:\n" "* Level-1 Nodes: Your main claims\n" "* Level-2 Nodes: Opponent's attacks on your claims\n" @@ -495,25 +457,22 @@ "* Level-1 Nodes: Opponent's main claims\n" "* Level-2 Nodes: Your attacks on their claims\n" "* Level-3 Nodes: Opponent's rebuttal on your attacks\n\n" - "Based on these tree structures, you can take the following actions: \n" "\t- Propose main claims as the Level-1 nodes of your debate tree \n" "\t- Attack the opponent's main claims in Level-1 nodes of the opponent's debate tree\n" "\t- Rebut the opponent's attacks in Level-2 nodes of your debate tree. \n" "\t- Reinforce the main claims in Level-1 nodes of your debate tree. Only use this action if this claim is not designed to rebut the opponent's attack in Level-2 of your debate tree\n\n" - "## Techniques to get the counter-argument or construct the rebuttal\n" "To rebut or attack a argument node, you can use the following techniques to get the counter-argument or construct the rebuttal:\n" "- **Pointing out logical fallacies:** Identify errors in the opponent's reasoning, such as reversing cause and effect, equivocation (shifting the meaning of a key term), straw man arguments, circular reasoning, or tautology (repeating the same idea in different words).\n" "- **Pointing out factual errors:** Highlight inaccuracies or weaknesses in the opponent's evidence, such as insufficient data, incorrect facts, or biased sources.\n" "- **Pointing out error logic:** Identify flawed logic underlying opponent's framework.\n" - " - **Example 1:** \"You conclusion is based on the premise of A, but A is not always true. For example, .... Therefore, your conclusion is not always correct.\"\n" - " - **Example 2:** \"You conclusion relies on the premise of A and the hidden premise of B, but B is not always true. For example, .... Therefore, your conclusion is not always correct.\"\n" - " - **Example 3:** \"You claim A and B can lead to C, but that is not always the case. For example, .... Therefore, your conclusion is not always correct.\"\n" + ' - **Example 1:** "You conclusion is based on the premise of A, but A is not always true. For example, .... Therefore, your conclusion is not always correct."\n' + ' - **Example 2:** "You conclusion relies on the premise of A and the hidden premise of B, but B is not always true. For example, .... Therefore, your conclusion is not always correct."\n' + ' - **Example 3:** "You claim A and B can lead to C, but that is not always the case. For example, .... Therefore, your conclusion is not always correct."\n' "- **Leveling the playing field:** This technique aims to neutralize the opponent's advantage or minimize the perceived harm of your side's position by demonstrating that both sides share the same issue or benefit.\n" - " - **Example 1:** \"You claim A, but B also has this problem. Therefore, both sides are equal in this regard, both having the issue.\"\n" - " - **Example 2:** \"You mention the benefits of A, but B also offers the same benefits. So, both sides are equal in this aspect, both being advantageous.\"\n\n" - + ' - **Example 1:** "You claim A, but B also has this problem. Therefore, both sides are equal in this regard, both having the issue."\n' + ' - **Example 2:** "You mention the benefits of A, but B also offers the same benefits. So, both sides are equal in this aspect, both being advantageous."\n\n' "## Retrieval Human Debate Information\n" "Here are debate flow trees and action allocations from human debates. DO NOT copy the exemplar motion or statement content. Instead, use the structure and allocation strategy to improve your approach.\n" "1. Study the speaking style (short sentences, varied lengths, clear transitions)\n" @@ -524,19 +483,14 @@ "6. Balance depth versus breadth as demonstrated in the exemplar\n" "7. Check for potential blind spots in your argument preparation\n" "8. Apply the exemplar's strategic sequencing to your own arguments\n\n" - "{retrieval}\n\n" - - - "## Input Information \n" + "## Input Information \n" "Debate Topic: {motion}\n\n" "Your Stance: {side}\n\n" "Main Claims: {claims}\n\n" - "Debate flow trees with node data:\n" "**Your Tree**: \n{tree}\n\n" "**Opponent's Tree**: \n{oppo_tree}\n\n" - "## Output in JSON format with one key of **response**, and the value is a list of all possible actions. Each action is a dictionary with the following keys:\n" "- **action**: The action to take, such as 'propose', 'attack', 'rebut', 'reinforce'\n" "- **target_claim**: The claim to propose, attack, rebut or reinforce. For propose action, it is the claim to be proposed as the Level-1 node of your debate tree. For attack action, it should be the claim of the opponent's Level-1 nodes. For rebut action, it should be the claim of your Level-2 nodes. For reinforce action, it should be the claim of your Level-1 nodes.\n" @@ -552,16 +506,14 @@ "1. When given debate points based on the flow tree, you'll group them into strategic *battlefields* based on shared purposes and underlying arguments. \n" "2. After grouping the debate points into battlefields, you should evaluate the importance of each battlefield based on the debate flow tree. \n" "3. Finally, you should use the techniques to generate the argument for each battlefield.\n" - "\t- Be specific and concise to the target claim and target argument. You should always try to discuss the evidence. \n" - "\t- Do not repeat the similar arguments in different points. \n" - "\t- For each claim about a specific subject, contrast how the issue manifests in the specific subject versus the broader category. Explain why the proposed action specifically targeting the subject is justified based on these unique characteristics. " - + "\t- Be specific and concise to the target claim and target argument. You should always try to discuss the evidence. \n" + "\t- Do not repeat the similar arguments in different points. \n" + "\t- For each claim about a specific subject, contrast how the issue manifests in the specific subject versus the broader category. Explain why the proposed action specifically targeting the subject is justified based on these unique characteristics. " "### Debate Flow Tree Structure\n" "Each node contains:\n" "* Data: The specific claims and arguments\n" "* Visit Count: Number of times addressed in debate\n" "* Status: 'proposed' (new), 'attacked' (challenged)\n\n" - "Your Debate Tree:\n" "* Level-1 Nodes: Your main claims\n" "* Level-2 Nodes: Opponent's attacks on your claims\n" @@ -570,14 +522,12 @@ "* Level-1 Nodes: Opponent's main claims\n" "* Level-2 Nodes: Your attacks on their claims\n" "* Level-3 Nodes: Opponent's rebuttal on your attacks\n\n" - "### Debate Point Structure:\n" "Each debate point contains:\n" "- **Action**: Your specific debate move (attack, defend, propose, reinforce, etc.)\n" "- **Target Claim**: The specific claim you're addressing\n" "- **Target Argument**: The specific arguments have been discussed in this debate\n" "- **Prepared Materials**: Pre-prepared evidence supporting your position. It may not be mentioned in this debate process.\n\n" - "### Grouping Debate Points into Battlefields\n" "Group related actions into strategic 'battlefields' when they share common underlying arguments or evidence:\n" "1. Same Argument, Different Actions:\n" @@ -586,39 +536,33 @@ " - Example: When rebutting an opponent's claim A AND attacking your opponent's claim B rely on the similar reasoning or related evidence\n" " - ... \n" "2. Same Evidence, Multiple Targets:\n" - " - Example: When a single piece of evidence can counter multiple opponent claims\n" + " - Example: When a single piece of evidence can counter multiple opponent claims\n" "By organizing debate points into these logical groupings, you'll create a more cohesive and efficient debate strategy.RetryClaude can make mistakes. Please double-check responses.\n\n" - "## Techniques to get the counter-argument or construct the rebuttal for each battlefield\n" "To rebut or attack a argument node, you can use following techniques to get the counter-argument or construct the rebuttal. They should be presented in this order:\n" "1. **Pointing out logical fallacies:** Identify errors in the opponent's reasoning, such as reversing cause and effect, equivocation (shifting the meaning of a key term), straw man arguments, circular reasoning, or tautology (repeating the same idea in different words).\n" " - You can use the prepared materials for this technique. \n" "2. **Pointing out error logic:** Identify flawed logic underlying opponent's framework.\n" " - You can use the prepared materials for this technique. \n" - " - **Example 1:** \"You conclusion is based on the premise of A, but A is not always true. For example, .... Therefore, your conclusion is not always correct.\"\n" - " - **Example 2:** \"You conclusion relies on the premise of A and the hidden premise of B, but B is not always true. For example, .... Therefore, your conclusion is not always correct.\"\n" - " - **Example 3:** \"You claim A and B can lead to C, but that is not always the case. For example, .... Therefore, your conclusion is not always correct.\"\n" + ' - **Example 1:** "You conclusion is based on the premise of A, but A is not always true. For example, .... Therefore, your conclusion is not always correct."\n' + ' - **Example 2:** "You conclusion relies on the premise of A and the hidden premise of B, but B is not always true. For example, .... Therefore, your conclusion is not always correct."\n' + ' - **Example 3:** "You claim A and B can lead to C, but that is not always the case. For example, .... Therefore, your conclusion is not always correct."\n' "3. **Pointing out factual errors:** Highlight inaccuracies or weaknesses in the opponent's evidence, such as insufficient data, incorrect facts, or biased sources.\n" " - You should refer to the evidence in the debate flow tree to find the evidence that can be used to point out the factual errors. \n" - " - Example 1: \"Your argument relies on a survey with only 50 participants from a single geographic region. This sample size is too small and geographically limited to support such broad conclusions about national attitudes. Without more representative data, your claim lacks sufficient factual support.\"" - " - Example 2: \"You cite a study published in a workshop without peer review. This selective use of potentially biased sources weakens the credibility of your evidence.\"" + ' - Example 1: "Your argument relies on a survey with only 50 participants from a single geographic region. This sample size is too small and geographically limited to support such broad conclusions about national attitudes. Without more representative data, your claim lacks sufficient factual support."' + ' - Example 2: "You cite a study published in a workshop without peer review. This selective use of potentially biased sources weakens the credibility of your evidence."' "4. **Leveling the playing field:** This technique aims to neutralize the opponent's advantage or minimize the perceived harm of your side's position by demonstrating that both sides share the same issue or benefit.\n" - " - **Example 1:** \"You claim A, but B also has this problem. Therefore, both sides are equal in this regard, both having the issue.\"\n" - " - **Example 2:** \"You mention the benefits of A, but B also offers the same benefits. So, both sides are equal in this aspect, both being advantageous.\"\n\n" - " - **Example 3:** \"Instead of focusing of claim A, we can use alternative method such as B to solve the problem. \"\n" - - - "## Input Information \n" + ' - **Example 1:** "You claim A, but B also has this problem. Therefore, both sides are equal in this regard, both having the issue."\n' + ' - **Example 2:** "You mention the benefits of A, but B also offers the same benefits. So, both sides are equal in this aspect, both being advantageous."\n\n' + ' - **Example 3:** "Instead of focusing of claim A, we can use alternative method such as B to solve the problem. "\n' + "## Input Information \n" "Debate Topic: {motion}\n\n" "Your Stance: {side}\n\n" - "### Debate flow trees with node data:\n" "**Your Tree**: \n{tree}\n\n" "**Opponent's Tree**: \n{oppo_tree}\n\n" - "### Debate points: \n" "{actions}\n\n" - "## Output in JSON format with one key of **response**, and the value is a list of the arguments and the importance of the battlefield. Each battlefield is a dictionary with the following keys:\n" "- **battlefield**: the description of the battlefield. It should be a sentence that summarizes shared underlying arguments or evidence in the battlefield. \n" "- **idx_list**: the list of 'idx' of the debate points in the given debate points. These data points are grouped into the same battlefield. \n" @@ -626,7 +570,7 @@ " - Note: One battlefield CANNOT include multiple 'propose' actions with different target claims.\n" "- **unified_argument**: the argument for the battlefield. You should use the techniques above in order. \n" "- **importance**: The importance of the battlefield. It indicates the priority of the battlefield. It should be one of the following: 'high', 'medium', 'low'.\n" - "\t- If this battlefield has been fully discussed your or opponent's debate flow tree, the importance should be 'low'. \n" - "\t- If this battlefield include a new claim to be proposed, the importance should be 'high'. \n" - "\t- In other cases, based on the structure of the current debate flow tree and human debate examples, assign the corresponding importance to different actions. \n" -) \ No newline at end of file + "\t- If this battlefield has been fully discussed your or opponent's debate flow tree, the importance should be 'low'. \n" + "\t- If this battlefield include a new claim to be proposed, the importance should be 'high'. \n" + "\t- In other cases, based on the structure of the current debate flow tree and human debate examples, assign the corresponding importance to different actions. \n" +) diff --git a/src/utils/prompts/rebuttal.py b/src/utils/prompts/rebuttal.py index 96c20eb..3c2a200 100644 --- a/src/utils/prompts/rebuttal.py +++ b/src/utils/prompts/rebuttal.py @@ -8,36 +8,36 @@ default_rebuttal_prompt = context + ( - "Your response should be about {{n_words}} words and do not output other things than our response. When attacking, be aggresive and focus on a certain point that favors your side.\n" - ) + "Your response should be about {{n_words}} words and do not output other things than our response. When attacking, be aggresive and focus on a certain point that favors your side.\n" +) expert_rebuttal_prompt = context + ( - "\n## Knowledge\n" - "### Structure of a Rebuttal\n" - "A complete rebuttal should consist of multiple points, with each point containing four parts:\n" - "- **Lead-in:** Introduce the opponent's argument, evidence, reasoning, etc., that you will be refuting.\n" - "- **Explanation:** Briefly explain the opponent's argument to ensure clarity.\n" - "- **Rebuttal:** This is the core of your point. Utilize rebuttal techniques to directly challenge the opponent's claim.\n" - "- **Impact:** Concisely summarize the impact of your rebuttal and how it benefits your side.\n" - "\n" - "Note: Typically, the lead-in and explanation are combined into one sentence. The rebuttal is the most crucial part, and the impact summarizes its effect.\n" - "\n" - "### Rebuttal Techniques\n" - "- **Pointing out logical fallacies:** Identify errors in the opponent's reasoning, such as reversing cause and effect, equivocation (shifting the meaning of a key term), straw man arguments, circular reasoning, or tautology (repeating the same idea in different words).\n" - "- **Pointing out factual errors:** Highlight inaccuracies or weaknesses in the opponent's evidence, such as insufficient data, incorrect facts, or biased sources.\n" - " - **Example 1:** \"The evidence you provided is not enough to support your claim. For example, .... Therefore, your claim is not always correct.\"\n" - " - **Example 2:** \"The evidence you provided is from a biased source. For example, .... Therefore, your claim is not always correct.\"\n" - "- **Pointing out error logic:** Identify flawed logic underlying opponent's framework.\n" - " - **Example 1:** \"You conclusion is based on the premise of A, but A is not always true. For example, .... Therefore, your conclusion is not always correct.\"\n" - " - **Example 2:** \"You conclusion relies on the premise of A and the hidden premise of B, but B is not always true. For example, .... Therefore, your conclusion is not always correct.\"\n" - " - **Example 3:** \"You claim A and B can lead to C, but that is not always the case. For example, .... Therefore, your conclusion is not always correct.\"\n" - "- **Leveling the playing field:** This technique aims to neutralize the opponent's advantage or minimize the perceived harm of your side's position by demonstrating that both sides share the same issue or benefit.\n" - " - **Example 1:** \"You claim A, but B also has this problem. Therefore, both sides are equal in this regard, both having the issue.\"\n" - " - **Example 2:** \"You mention the benefits of A, but B also offers the same benefits. So, both sides are equal in this aspect, both being advantageous.\"\n" - "- **Acknowledging and countering:** Start by acknowledging a valid point made by your opponent before explaining why your position still offers a better solution.\n" - " - **Example:** \"While I acknowledge that [opponent's valid point], this concern is outweighed by [your counter-argument] because [evidence/reasoning].\"\n" - "\n" - ) + "\n## Knowledge\n" + "### Structure of a Rebuttal\n" + "A complete rebuttal should consist of multiple points, with each point containing four parts:\n" + "- **Lead-in:** Introduce the opponent's argument, evidence, reasoning, etc., that you will be refuting.\n" + "- **Explanation:** Briefly explain the opponent's argument to ensure clarity.\n" + "- **Rebuttal:** This is the core of your point. Utilize rebuttal techniques to directly challenge the opponent's claim.\n" + "- **Impact:** Concisely summarize the impact of your rebuttal and how it benefits your side.\n" + "\n" + "Note: Typically, the lead-in and explanation are combined into one sentence. The rebuttal is the most crucial part, and the impact summarizes its effect.\n" + "\n" + "### Rebuttal Techniques\n" + "- **Pointing out logical fallacies:** Identify errors in the opponent's reasoning, such as reversing cause and effect, equivocation (shifting the meaning of a key term), straw man arguments, circular reasoning, or tautology (repeating the same idea in different words).\n" + "- **Pointing out factual errors:** Highlight inaccuracies or weaknesses in the opponent's evidence, such as insufficient data, incorrect facts, or biased sources.\n" + ' - **Example 1:** "The evidence you provided is not enough to support your claim. For example, .... Therefore, your claim is not always correct."\n' + ' - **Example 2:** "The evidence you provided is from a biased source. For example, .... Therefore, your claim is not always correct."\n' + "- **Pointing out error logic:** Identify flawed logic underlying opponent's framework.\n" + ' - **Example 1:** "You conclusion is based on the premise of A, but A is not always true. For example, .... Therefore, your conclusion is not always correct."\n' + ' - **Example 2:** "You conclusion relies on the premise of A and the hidden premise of B, but B is not always true. For example, .... Therefore, your conclusion is not always correct."\n' + ' - **Example 3:** "You claim A and B can lead to C, but that is not always the case. For example, .... Therefore, your conclusion is not always correct."\n' + "- **Leveling the playing field:** This technique aims to neutralize the opponent's advantage or minimize the perceived harm of your side's position by demonstrating that both sides share the same issue or benefit.\n" + ' - **Example 1:** "You claim A, but B also has this problem. Therefore, both sides are equal in this regard, both having the issue."\n' + ' - **Example 2:** "You mention the benefits of A, but B also offers the same benefits. So, both sides are equal in this aspect, both being advantageous."\n' + "- **Acknowledging and countering:** Start by acknowledging a valid point made by your opponent before explaining why your position still offers a better solution.\n" + ' - **Example:** "While I acknowledge that [opponent\'s valid point], this concern is outweighed by [your counter-argument] because [evidence/reasoning]."\n' + "\n" +) expert_rebuttal_prompt_2 = expert_rebuttal_prompt + ( "## Workflow: Generate a spoken transcript for the rebuttal statement under the word budget ({{n_words}} words). \n" @@ -47,13 +47,11 @@ "4. Deliver a rebuttal statement with {{n_words}} words in total. Present only the final text.\n" "Note that it's possible that the debate flow tree is not provided, in this case, you can just generate a rebuttal statement without following the debate flow tree.\n" "\n" - "## Debate Flow Tree Structure\n" "You are given two debate trees that model the back-and-forth between you and your opponent. Each node contains:\n" "* Data: The specific claims and arguments\n" "* Visit Count: Number of times addressed in debate\n" "* Status: 'proposed' (new), 'attacked' (challenged), or 'solved' (resolved)\n\n" - "Your Debate Tree:\n" "* Level-1 Nodes: Your main claims and arguments\n" "* Level-2 Nodes: Opponent's attacks on your claims and arguments\n" @@ -62,31 +60,25 @@ "* Level-1 Nodes: Opponent's main claims and arguments\n" "* Level-2 Nodes: Your attacks on their claims and arguments\n" "* Level-3 Nodes: Opponent's rebuttal on your attacks\n\n" - "## Input Information\n" "Debate flow trees with node data:\n" "**Your Tree**: \n{tree}\n\n" "**Opponent's Tree**: \n{oppo_tree}\n\n" - "## Battlefields\n" "{{tips}}\n\n" - - "## Important Notes\n" "1. Organize your points logically with clear purpose statements. \n" - " - Clearly mention the actions you will take in each point. For example, 'we will address [X] concerns mentioned by the opponent, which are ' for attack / rebut actions or 'after that,we will reinforce our claims that' for reinforce action.\n" - " - Use clear indicators like first, second, third, etc. to organize your points.\n" - # " - Among the selected battlefields, discuss the one with attack / rebut actions first. \n" - # " - If the opponent's argument is not clear, you can ask the opponent to clarify it first.\n" + " - Clearly mention the actions you will take in each point. For example, 'we will address [X] concerns mentioned by the opponent, which are ' for attack / rebut actions or 'after that,we will reinforce our claims that' for reinforce action.\n" + " - Use clear indicators like first, second, third, etc. to organize your points.\n" + # " - Among the selected battlefields, discuss the one with attack / rebut actions first. \n" + # " - If the opponent's argument is not clear, you can ask the opponent to clarify it first.\n" "2. Avoid restating or simply repeating the same evidence or arguments across different points.\n" "3. Avoid repeating the similar arguments in your previous statement. Use the phrase 'as we have discussed' to refer to the previous statement.\n" "4. Only use facts that are generally accepted and don't require specific citation. Don't hallucinate any particular experimental results, statistical findings from named studies, or quotes from specific researchers until the evidence pool is provided\n" "5. When presenting alternatives or counterarguments, offer specific, implementable solutions rather than just criticism.\n" "6. Address both systemic and individual factors when relevant, showing how they can complement rather than contradict each other.\n" "\n" - "## Output with the format (two parts, start with **Rebuttal Plan** and then **Statement**):\n" "**Rebuttal Plan**: First, allocate words for the overview of the rebuttal. Then, allocate the rest of the word budget among the battlefields. Explain your rationale. Briefly mention one or two rhetorical techniques to use and logical fallacies to discuss. Make sure the total words is {{n_words}}.\n" "**Statement**: After the rebuttal plan, generate a rebuttal statement of {{n_words}} words in total, do not include any other text\n\n" - ) diff --git a/src/utils/prompts/search.py b/src/utils/prompts/search.py index bf099d2..3b68b58 100644 --- a/src/utils/prompts/search.py +++ b/src/utils/prompts/search.py @@ -46,7 +46,8 @@ "If you think current information is enough, return [Finish] to end the search process." ) -summarize_result_prompt = ("## Task: Organize retrieved information into an argument (within 100 words) to answer the query and support the claim\n" +summarize_result_prompt = ( + "## Task: Organize retrieved information into an argument (within 100 words) to answer the query and support the claim\n" "- When citing data and academic research, provide sources within the context and avoid using information not present in the provided materials. Ensure your arguments are supported by data and academic evidence.\n" "- When citing data, **using specific figures** instead of just descriptive language will make your argument more persuasive.\n" "- When citing data and academic research, **don't just** list the information, **but also explain** how it supports your point.\n" @@ -57,10 +58,10 @@ extract_author_prompt = ( "## Extract the author of the article from the given content.\n" - "1. Identify the author of the article based on the **raw_content** of the evidence. If the information of the author is not available, just output \"Unknown\" for **author**.\n" - "2. Provide the information of the author, such as the affiliation, the position, and the expertise. If the information of the author is not available, just output \"Unknown\" for **author_info**.\n" - "3. Extract the publication information, such as the journal or conference name and year. If the information of the author is not available, just output \"Unknown\" for **publication**.\n" - "4. Make sure the author's expertise is related to the evidence, the source and the publication. If the author is an expert in medicine while the article is about economic, output \"Unknown\" for and **author** and **author_info** for this mismatch.\n" + '1. Identify the author of the article based on the **raw_content** of the evidence. If the information of the author is not available, just output "Unknown" for **author**.\n' + '2. Provide the information of the author, such as the affiliation, the position, and the expertise. If the information of the author is not available, just output "Unknown" for **author_info**.\n' + '3. Extract the publication information, such as the journal or conference name and year. If the information of the author is not available, just output "Unknown" for **publication**.\n' + '4. Make sure the author\'s expertise is related to the evidence, the source and the publication. If the author is an expert in medicine while the article is about economic, output "Unknown" for and **author** and **author_info** for this mismatch.\n' "### Input Information\n" "**Evidence**: \n{evidence}. \n\n" "### Response Format\n" @@ -70,4 +71,4 @@ ' "authors": [{{"id": "", "author": "", "author_info": "", "publication": ""}}]\n' "}}\n" "```\n" -) \ No newline at end of file +) diff --git a/src/utils/time_estimator.py b/src/utils/time_estimator.py index 6a05afc..2570158 100644 --- a/src/utils/time_estimator.py +++ b/src/utils/time_estimator.py @@ -1,16 +1,19 @@ +import json import os import re -import json +from io import BytesIO +from typing import Dict, List + import syllables from g2p_en import G2p -from typing import List, Dict -from openai import OpenAI from mutagen.mp3 import MP3 -from io import BytesIO -from .fs_wrapper import FastSpeechWrapper +from openai import OpenAI + from .constants import openai_api_key +from .fs_wrapper import FastSpeechWrapper from .tool import remove_citation, remove_subtitles + class LengthEstimator: def __init__(self, mode): self.mode = mode @@ -25,7 +28,7 @@ def query_time(self, content: List[str], mode=None) -> List[float]: self.client = FastSpeechWrapper(batch_size=2) elif self.mode == "openai": self.client = OpenAI(api_key=openai_api_key) - + if isinstance(content, str): content = [content] clean_content = [remove_citation(c)[0] for c in content] @@ -38,18 +41,13 @@ def query_time(self, content: List[str], mode=None) -> List[float]: length = [LengthEstimator.count_phonemes(c) for c in clean_content] elif self.mode == "fastspeech": length = self.client.query_time(clean_content) - length = [l * 1.11 - 7 if l > 100 else l for l in length] # fit openai speed + length = [l * 1.11 - 7 if l > 100 else l for l in length] # fit openai speed elif self.mode == "openai": length = [] for c in clean_content: - response = self.client.audio.speech.create( - model="tts-1", - voice="echo", - input=c[:4096] - ) + response = self.client.audio.speech.create(model="tts-1", voice="echo", input=c[:4096]) audio_bytes = BytesIO(response.content) - length.append(MP3(audio_bytes).info.length) else: @@ -57,19 +55,18 @@ def query_time(self, content: List[str], mode=None) -> List[float]: if len(length) == 1: return length[0] return length - @staticmethod def count_words(text): """ Count the number of words in a text string. - + Args: text (str): The input text to count words from - + Returns: int: Number of words in the text - + Features: - Handles multiple spaces/newlines - Considers hyphenated words as single words @@ -79,68 +76,68 @@ def count_words(text): """ if not isinstance(text, str): raise TypeError("Input must be a string") - + if not text.strip(): return 0 - + # Replace multiple spaces/newlines with single space - text = ' '.join(text.split()) - + text = " ".join(text.split()) + # Handle special cases def is_word(token): # Check if token contains at least one letter or number return any(c.isalnum() for c in token) - + # Split on spaces and filter out non-words words = [word for word in text.split() if is_word(word)] - + return len(words) @staticmethod def count_syllables(text): if not isinstance(text, str): raise TypeError("Input must be a string") - + if not text.strip(): return 0 - + # Replace multiple spaces/newlines with single space - text = ' '.join(text.split()) - + text = " ".join(text.split()) + # Handle special cases def is_word(token): # Check if token contains at least one letter or number return any(c.isalnum() for c in token) - + # Split on spaces and filter out non-words words = [word for word in text.split() if is_word(word)] n_count = syllables.estimate(" ".join(words)) - + return n_count @staticmethod def count_phonemes(text): if not isinstance(text, str): raise TypeError("Input must be a string") - + if not text.strip(): return 0 - + # Replace multiple spaces/newlines with single space - text = ' '.join(text.split()) - + text = " ".join(text.split()) + # Handle special cases def is_word(token): # Check if token contains at least one letter or number return any(c.isalnum() for c in token) - + # Split on spaces and filter out non-words words = [word for word in text.split() if is_word(word)] g2p = G2p() phonemes = [x for x in g2p(" ".join(words)) if x != " "] n_count = len(phonemes) - + return n_count @@ -150,4 +147,4 @@ def is_word(token): content = [ """Thank you very much. So I think that if you want to invest in tires, you should invest in tires. I think that there is income inequality happening in the United States. There is education inequality. There is a planet which is slowly becoming uninhabitable if you look at the Flint water crisis. If you look at droughts that happen in California all the time and if you want to help, these are real problems that exist that we need to help people who are currently not having all of their basic human rights fulfilled. These are things that the government should be investing money in and should probably be investing more money in because we see them being problems in our society that are hurting people. What I’m going to do in this speech is I’m going to continue talking about these criteria, continue talking about why we're not meeting basic needs and why also the market itself is probably solving this problem already. Before that, two points of rebuttal to what we just heard from Project Debater. So firstly, we heard that this is technology that would end up benefiting society but we're not sure we haven't yet heard evidence that shows us why it would benefit all of society, perhaps some parts of society, maybe upper middle class or upper class citizens could benefit from these inspiring research, could benefit from the technological innovations. But most of society, people who are currently in the United States have resource scarcity, people who are hungry, people who do not have access to good education, aren't really helped by this. So we think it is like that, a government subsidy should go to something that helps everyone particularly weaker classes in society. Second point is this idea of an exploding industry which creates jobs and international cooperation. So firstly, we've heard evidence that this already exists, right? We've heard evidence that companies are investing in this as is. And secondly, we think that international cooperation or the specific things have alternatives. We can cooperate over other types of economic trade deals. We can cooperate in other ways with different countries. It's not necessary to very specifically fund space 98 exploration to get these benefits. So as we remember, there are two criteria that I believe the government needs to meet before subsidizing something. It being a basic human need, we don't see space exploration meeting that and B, that this is something that can't otherwise exist, right? So we've already heard from Project Debater how huge this industry is, right? How much investment there's already going on in the private sector and we think this is because there's lots of curiosity especially among wealthy people who maybe want to get to space for personal use or who want to build a colony on Mars and then rent out the rooms there. We know that Elon Musk is doing this already. We know that other people are doing it and we think they're spending money and willing to spend even more money because of the competition between them. So Project Debater should know better than all of us how competitions often bear extremely impressive fruit, right? We think that when wealthy philanthropist or people who are willing to fund research on their own race each other to be the first to achieve new heights in terms of space exploration, that brings us to great achievements already and we think that the private market is doing this well enough already. Considering that we already have movement in that direction, again we see Elon Musk's company, we see all of these companies working already. We think that it's not that the government money won't help out if it were to be given, we just think it doesn't meet the criteria in comparison to other things, right? So given the fact that the market already has a lot of money invested in this, already has movement in those research directions, and given the fact that we still don't think this is a good enough plan to prioritize over other basic needs that the government should be providing people. We think that at the end of the day, given the fact that there are also alternatives to getting all of these benefits of international cooperation, it simply doesn't justify specifically the government allocating its funds for this purpose when it should be allocating them towards other needs of other people.""" ] - print(estimator.query_time(content)) \ No newline at end of file + print(estimator.query_time(content)) diff --git a/src/utils/tool.py b/src/utils/tool.py index d570068..d8a18a6 100644 --- a/src/utils/tool.py +++ b/src/utils/tool.py @@ -1,47 +1,52 @@ -import re -import os import logging +import os +import re +import time from typing import List + from pulp import LpMaximize, LpProblem, LpVariable -import time from .constants import MAX_TRY_NUM from .prompts import debater_system_prompt log_file_path = "" + def get_output_path(base_dir="../log_files/", suffix="log"): global log_file_path if not os.path.exists(base_dir): os.makedirs(base_dir) - log_files = [f for f in os.listdir(base_dir) if f.endswith('.log')] + log_files = [f for f in os.listdir(base_dir) if f.endswith(".log")] if log_files: - max_num = max(int(f.split('.')[0]) for f in log_files) + max_num = max(int(f.split(".")[0]) for f in log_files) new_log_file = f"{max_num + 1}.{suffix}" else: new_log_file = f"1.{suffix}" log_file_path = os.path.join(base_dir, new_log_file) return log_file_path + def create_log(log_file=None): - log = logging.getLogger('debate_logger') + log = logging.getLogger("debate_logger") if not log.hasHandlers(): # Check if the logger already has handlers log.setLevel(logging.DEBUG) # Set the logger level to DEBUG if log_file is None: log_file = get_output_path() print(f"Log file: {log_file}") - + # File handler for logging to a file with DEBUG level file_handler = logging.FileHandler(log_file) file_handler.setLevel(logging.DEBUG) - file_formatter = logging.Formatter('%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + file_formatter = logging.Formatter( + "%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S" + ) file_handler.setFormatter(file_formatter) # Stream handler for logging to console with INFO level stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.INFO) - stream_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + stream_formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") stream_handler.setFormatter(stream_formatter) # Add handlers to the logger @@ -49,26 +54,30 @@ def create_log(log_file=None): log.addHandler(stream_handler) return log -logger = create_log() + +logger = create_log() + def find_json(x): - idx = x.find('{') - ridx = x.rfind('}') + idx = x.find("{") + ridx = x.rfind("}") if idx == -1 or ridx == -1: - return '' - return x[idx:ridx+1] + return "" + return x[idx : ridx + 1] + def extract_numbers(s): - numbers = [float(n) for n in re.findall(r'[-+]?[0-9]*\.?[0-9]+', s)] + numbers = [float(n) for n in re.findall(r"[-+]?[0-9]*\.?[0-9]+", s)] return numbers + def remove_citation(text: str, keep_main=False) -> str: """ Remove citations in formats like (1), (1,2), [1], [1,2,3] from text. - + Args: text (str): Input text containing citations - + Returns: str: Text with citations removed """ @@ -77,35 +86,36 @@ def remove_citation(text: str, keep_main=False) -> str: statement = statement.replace("## Reference", "Reference") statement = statement.replace("Reference", "**Reference**") - if "**Reference**" in statement: content, reference = statement.split("**Reference**")[:2] else: - content, reference = statement, '' + content, reference = statement, "" if not keep_main: - pattern = r'\([^0-9_\s\W]*\)' - content = re.sub(pattern, '', content) - + pattern = r"\([^0-9_\s\W]*\)" + content = re.sub(pattern, "", content) + # Remove citations with square brackets [] - pattern = r'\[[^0-9_\s\W]*\]' - content = re.sub(pattern, '', content) - + pattern = r"\[[^0-9_\s\W]*\]" + content = re.sub(pattern, "", content) + # Remove extra whitespace that might be left # content = re.sub(r'\s+', ' ', content) - + return content.strip(), reference.strip() + def remove_subtitles(content): - pattern = r'^\*\*.*\*\*$' + pattern = r"^\*\*.*\*\*$" # Remove lines matching the pattern - cleaned_text = re.sub(pattern, '', content, flags=re.MULTILINE) + cleaned_text = re.sub(pattern, "", content, flags=re.MULTILINE) # Remove any leading/trailing whitespace or empty lines - cleaned_text = '\n'.join([line for line in cleaned_text.split('\n')]) + cleaned_text = "\n".join([line for line in cleaned_text.split("\n")]) return cleaned_text + def find_max_three_indices(list_of_lists): max_values = [] for sublist in list_of_lists: @@ -121,33 +131,33 @@ def find_max_three_indices(list_of_lists): return result_indices + def extract_list_from_response(response): - ''' + """ response format: 1. [...] 2. [...] - ''' + """ # Split the response into lines - lines = response.strip().split('\n') - + lines = response.strip().split("\n") + # Initialize an empty list to store the extracted items extracted_list = [] - + # Iterate through each line for line in lines: # Remove leading/trailing whitespace line = line.strip() - + # Check if the line starts with a number followed by a period - if line and line[0].isdigit() and '. ' in line: + if line and line[0].isdigit() and ". " in line: # Split the line at the first occurrence of '. ' - _, item = line.split('. ', 1) - + _, item = line.split(". ", 1) + # Add the item to the extracted list extracted_list.append(item) - - return extracted_list + return extracted_list def lp_optimize(actions: List[str], rewards: List[float], costs: List[float], budget: float): @@ -157,7 +167,7 @@ def lp_optimize(actions: List[str], rewards: List[float], costs: List[float], bu # Define the problem problem = LpProblem("Maximize_Rewards_With_Constraints", LpMaximize) # Define binary decision variables - x = {a: LpVariable(a, cat='Binary') for a in actions} + x = {a: LpVariable(a, cat="Binary") for a in actions} # Objective function problem += sum(rewards[i] * x[actions[i]] for i in range(len(actions))), "Total_Reward" # Budget constraint @@ -165,7 +175,7 @@ def lp_optimize(actions: List[str], rewards: List[float], costs: List[float], bu # Add "no three actions" constraints (e.g., a1, a2, a3 cannot all be selected) for i in range(0, len(actions), 3): - problem += x[actions[i]] + x[actions[i+1]] + x[actions[i+2]] <= 1, f"No_Three_Actions_{i}_{i+1}_{i+2}" + problem += x[actions[i]] + x[actions[i + 1]] + x[actions[i + 2]] <= 1, f"No_Three_Actions_{i}_{i+1}_{i+2}" # Solve the problem problem.solve() @@ -197,32 +207,33 @@ def get_response_with_retry(llm, prompt, required_key, **kwargs): time.sleep(30) return content, response + def convert_messages_to_prompt(messages): prompt = [] for message in messages: - role = message.get('role', '') - content = message.get('content', '') - if role == 'system': + role = message.get("role", "") + content = message.get("content", "") + if role == "system": prompt.append(f"System: {content}") - elif role == 'user': + elif role == "user": prompt.append(f"Human: {content}") - elif role == 'assistant': + elif role == "assistant": prompt.append(f"Assistant: {content}") return "\n\n".join(prompt) def identify_number_in_text(text): - pattern = r' [-+]?[0-9]*\.?[0-9]+(?:%|\b) ' + pattern = r" [-+]?[0-9]*\.?[0-9]+(?:%|\b) " numbers = re.findall(pattern, text) numbers = [x.strip() for x in numbers] return numbers - def sort_by_importance(importance): importance_order = {"high": 3, "medium": 2, "low": 1} return importance_order.get(importance, 0) + def sort_by_action(action): action_order = {"attack": 3, "reinforce": 2, "propose": 1} - return action_order.get(action, 0) \ No newline at end of file + return action_order.get(action, 0)