From 2c1ecc53d9bb6405a4dc700417ce031ed8ffc08f Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Wed, 6 May 2026 00:32:21 -0600 Subject: [PATCH 1/8] storage options added to the init command --- benchmark_qed/autoe/cli.py | 64 +++++ benchmark_qed/autoe/config.py | 4 +- benchmark_qed/autoq/cli.py | 19 ++ benchmark_qed/autoq/config.py | 4 +- benchmark_qed/cli/config_resolver.py | 195 +++++++++++++ benchmark_qed/cli/init_config.py | 409 ++++++++++++++++++++------- benchmark_qed/data/cli.py | 56 +--- docs/cli/autoe.md | 58 +++- docs/cli/autoq.md | 61 +++- docs/cli/config_init.md | 127 +++++++++ docs/cli/data.md | 23 +- docs/cli/retrieval_scores.md | 32 +++ docs/datasets.md | 17 +- docs/developing.md | 12 +- docs/notebooks/autoe.ipynb | 4 +- mkdocs.yaml | 3 + tests/test_config_resolver.py | 125 ++++++++ tests/test_init_config_cli.py | 91 ++++++ 18 files changed, 1114 insertions(+), 190 deletions(-) create mode 100644 benchmark_qed/cli/config_resolver.py create mode 100644 tests/test_config_resolver.py create mode 100644 tests/test_init_config_cli.py diff --git a/benchmark_qed/autoe/cli.py b/benchmark_qed/autoe/cli.py index eb6ff8f..7b60274 100644 --- a/benchmark_qed/autoe/cli.py +++ b/benchmark_qed/autoe/cli.py @@ -41,6 +41,11 @@ ) from benchmark_qed.autoe.pairwise import analyze_criteria, get_pairwise_scores from benchmark_qed.autoe.reference import get_reference_scores +from benchmark_qed.cli.config_resolver import ( + AccountUrlOption, + ConnectionStringOption, + resolve_config_path, +) from benchmark_qed.cli.utils import print_df from benchmark_qed.llm.factory import ModelFactory @@ -148,10 +153,17 @@ def pairwise_scores( help="The key in the JSON file that contains the question ID. This is used to match questions across different conditions." ), ] = "question_id", + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Generate scores for the different conditions provided in the JSON file.""" if exclude_criteria is None: exclude_criteria = [] + comparison_spec = resolve_config_path( + comparison_spec, + account_url=account_url, + connection_string=connection_string, + ) config = load_config(PairwiseConfig, comparison_spec) config.criteria = [ @@ -282,10 +294,17 @@ def reference_scores( help="The key in the JSON file that contains the question ID. This is used to match questions across different conditions." ), ] = "question_id", + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Generate scores for the generated answers provided in the JSON file.""" if exclude_criteria is None: exclude_criteria = [] + comparison_spec = resolve_config_path( + comparison_spec, + account_url=account_url, + connection_string=connection_string, + ) config = load_config(ReferenceConfig, comparison_spec) config.criteria = [ @@ -397,6 +416,8 @@ def assertion_scores( str, typer.Option(help="Assertions key in JSON (single-RAG mode only)."), ] = "assertions", + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Score assertions for RAG method(s). @@ -425,6 +446,12 @@ def assertion_scores( """ import yaml + config_path = resolve_config_path( + config_path, + account_url=account_url, + connection_string=connection_string, + ) + # Load raw YAML to detect format with Path(config_path).open(encoding="utf-8") as f: raw_config = yaml.safe_load(f) @@ -672,6 +699,8 @@ def hierarchical_assertion_scores( help="The key in assertions that contains the supporting assertions list." ), ] = "supporting_assertions", + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Score hierarchical assertions with supporting assertions. @@ -708,6 +737,11 @@ def hierarchical_assertion_scores( """ import yaml + config_path = resolve_config_path( + config_path, + account_url=account_url, + connection_string=connection_string, + ) # Load raw YAML to detect format with Path(config_path).open(encoding="utf-8") as f: raw_config = yaml.safe_load(f) @@ -933,6 +967,9 @@ def assertion_significance( help="Path to the assertion significance configuration YAML file." ), ], + *, + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Run statistical significance tests on standard assertion scores. @@ -954,6 +991,11 @@ def assertion_significance( """ from benchmark_qed.autoe.assertion import compare_assertion_scores_significance + config_path = resolve_config_path( + config_path, + account_url=account_url, + connection_string=connection_string, + ) config = load_config(AssertionSignificanceConfig, config_path) rich_print("[bold]Running assertion significance tests[/bold]") @@ -996,6 +1038,9 @@ def hierarchical_assertion_significance( help="Path to the hierarchical assertion significance config YAML." ), ], + *, + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Run statistical significance tests on hierarchical assertion scores. @@ -1018,6 +1063,11 @@ def hierarchical_assertion_significance( compare_hierarchical_assertion_scores_significance, ) + config_path = resolve_config_path( + config_path, + account_url=account_url, + connection_string=connection_string, + ) config = load_config(HierarchicalAssertionSignificanceConfig, config_path) rich_print("[bold]Running hierarchical assertion significance tests[/bold]") @@ -1073,6 +1123,8 @@ def generate_retrieval_reference( bool, typer.Option(help="Whether to print the model usage statistics."), ] = False, + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Generate retrieval reference data (cluster relevance) for a question set. @@ -1097,6 +1149,11 @@ def generate_retrieval_reference( Otherwise, text units will be loaded from text_units_path and clustered. """ # Run all async work in a single event loop + config_path = resolve_config_path( + config_path, + account_url=account_url, + connection_string=connection_string, + ) asyncio.run(_generate_retrieval_reference_async(config_path, print_model_usage)) @@ -1410,6 +1467,8 @@ def retrieval_scores( int, typer.Option(help="Maximum concurrent relevance assessments."), ] = 8, + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Evaluate retrieval metrics (precision, recall, fidelity) for RAG methods. @@ -1428,6 +1487,11 @@ def retrieval_scores( RationaleRelevanceRater, ) + config_path = resolve_config_path( + config_path, + account_url=account_url, + connection_string=connection_string, + ) config = load_config(RetrievalScoresConfig, config_path) config.output_dir.mkdir(parents=True, exist_ok=True) diff --git a/benchmark_qed/autoe/config.py b/benchmark_qed/autoe/config.py index 0707003..ef3298a 100644 --- a/benchmark_qed/autoe/config.py +++ b/benchmark_qed/autoe/config.py @@ -87,12 +87,12 @@ class BaseAutoEConfig(BaseModel): input_storage: StorageConfig | None = Field( default=None, - description="Optional storage configuration for reading input from blob/cosmos. When omitted, reads from local filesystem paths.", + description="Optional storage configuration for reading input from Azure Blob Storage. When omitted, reads from local filesystem paths.", ) output_storage: StorageConfig | None = Field( default=None, - description="Optional storage configuration for writing output to blob/cosmos. When omitted, writes to the local filesystem path specified in the CLI.", + description="Optional storage configuration for writing output to Azure Blob Storage. When omitted, writes to the local filesystem path specified in the CLI.", ) @model_validator(mode="after") diff --git a/benchmark_qed/autoq/cli.py b/benchmark_qed/autoq/cli.py index 11d88fc..1de1f77 100644 --- a/benchmark_qed/autoq/cli.py +++ b/benchmark_qed/autoq/cli.py @@ -49,6 +49,11 @@ from benchmark_qed.autoq.question_gen.data_questions.local_question_gen import ( DataLocalQuestionGen, ) +from benchmark_qed.cli.config_resolver import ( + AccountUrlOption, + ConnectionStringOption, + resolve_config_path, +) from benchmark_qed.llm.factory import ModelFactory from benchmark_qed.llm.type.base import ChatModel @@ -519,8 +524,15 @@ def autoq( bool, typer.Option(help="Whether to print the model usage statistics after scoring."), ] = False, + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: """Generate questions from the input data.""" + configuration_path = resolve_config_path( + configuration_path, + account_url=account_url, + connection_string=connection_string, + ) config = load_config(QuestionGenerationConfig, configuration_path) if generation_types is None: @@ -958,6 +970,8 @@ def generate_assertions( bool, typer.Option(help="Whether to print the model usage statistics."), ] = False, + account_url: AccountUrlOption = None, + connection_string: ConnectionStringOption = None, ) -> None: r"""Generate assertions for existing questions. @@ -981,6 +995,11 @@ def generate_assertions( output/data_linked_questions/candidate_questions.json \ output/data_linked_questions/ --type linked """ + configuration_path = resolve_config_path( + configuration_path, + account_url=account_url, + connection_string=connection_string, + ) config = load_config(QuestionGenerationConfig, configuration_path) loop = asyncio.get_event_loop() diff --git a/benchmark_qed/autoq/config.py b/benchmark_qed/autoq/config.py index 61df046..aec8e51 100644 --- a/benchmark_qed/autoq/config.py +++ b/benchmark_qed/autoq/config.py @@ -68,7 +68,7 @@ class InputConfig(BaseModel): ) storage: StorageConfig | None = Field( default=None, - description="Optional storage configuration for reading input from blob/cosmos. When omitted, reads from the local filesystem.", + description="Optional storage configuration for reading input from Azure Blob Storage. When omitted, reads from the local filesystem.", ) @@ -689,7 +689,7 @@ class QuestionGenerationConfig(BaseModel): output_storage: StorageConfig | None = Field( default=None, - description="Optional storage configuration for writing output to blob/cosmos. When omitted, writes to the local filesystem path specified in the CLI.", + description="Optional storage configuration for writing output to Azure Blob Storage. When omitted, writes to the local filesystem path specified in the CLI.", ) diff --git a/benchmark_qed/cli/config_resolver.py b/benchmark_qed/cli/config_resolver.py new file mode 100644 index 0000000..000386b --- /dev/null +++ b/benchmark_qed/cli/config_resolver.py @@ -0,0 +1,195 @@ +# Copyright (c) 2025 Microsoft Corporation. +"""Resolve CLI config-file arguments that may live in Azure Blob Storage. + +CLI commands such as ``benchmark-qed autoq `` accept a +local filesystem path for the configuration file. To allow the configuration to +also live in Azure Blob Storage, this module exposes :func:`resolve_config_path` +which detects ``blob://`` URIs, downloads the configuration *and every sibling +file under the same prefix* (so prompt templates resolve correctly) into a +temporary directory, and returns the local path to the downloaded settings +file. + +Authentication may be provided either as direct arguments to +:func:`resolve_config_path` (the CLI surfaces ``--account-url`` / +``--connection-string`` options) or via the environment variables +``AZURE_STORAGE_CONNECTION_STRING`` / ``AZURE_STORAGE_ACCOUNT_URL`` as a +fallback. + +URI format: ``blob:///``. ``Pathlib`` collapses ``//`` to ``/`` +when Typer coerces the argument, so ``blob://`` is also +accepted. +""" + +from __future__ import annotations + +import asyncio +import os +import re +import tempfile +from pathlib import Path +from typing import TYPE_CHECKING, Annotated + +import typer +from graphrag_storage.storage_config import StorageConfig +from graphrag_storage.storage_factory import create_storage + +if TYPE_CHECKING: + from graphrag_storage import Storage + +_BLOB_SCHEMES = ("blob://", "blob:/") + + +# Reusable Annotated type aliases for CLI commands that accept a config path +# which may be a blob:// URI. Adding these as keyword options on a command +# lets users pass credentials inline (no env vars required). +AccountUrlOption = Annotated[ + str | None, + typer.Option( + "--account-url", + help=( + "Azure Blob Storage account URL for managed-identity auth, used " + "when the config path is a blob:// URI. Falls back to " + "$AZURE_STORAGE_ACCOUNT_URL." + ), + ), +] + +ConnectionStringOption = Annotated[ + str | None, + typer.Option( + "--connection-string", + help=( + "Azure Blob Storage connection string, used when the config path " + "is a blob:// URI. Falls back to " + "$AZURE_STORAGE_CONNECTION_STRING." + ), + ), +] + + +def is_blob_uri(path: Path | str) -> bool: + """Return True when ``path`` looks like a ``blob://`` URI.""" + return str(path).startswith(_BLOB_SCHEMES) + + +def parse_blob_uri(uri: str) -> tuple[str, str]: + """Parse ``blob:///`` into ``(container, key)``. + + Also accepts the single-slash form produced when the URI is round-tripped + through :class:`pathlib.Path` (e.g. ``blob://``). + """ + rest: str | None = None + if uri.startswith("blob://"): + rest = uri[len("blob://") :] + elif uri.startswith("blob:/"): + rest = uri[len("blob:/") :] + if rest is None or "/" not in rest: + msg = f"Invalid blob URI {uri!r}: expected blob:///" + raise typer.BadParameter(msg) + container, _, key = rest.partition("/") + if not container or not key: + msg = f"Invalid blob URI {uri!r}: expected blob:///" + raise typer.BadParameter(msg) + return container, key + + +def _create_blob_storage( + container: str, + *, + base_dir: str | None = None, + account_url: str | None = None, + connection_string: str | None = None, +) -> Storage: + """Create a blob :class:`Storage` from explicit args or environment vars.""" + connection_string = connection_string or os.getenv( + "AZURE_STORAGE_CONNECTION_STRING" + ) + account_url = account_url or os.getenv("AZURE_STORAGE_ACCOUNT_URL") + if not connection_string and not account_url: + msg = ( + "Cannot read blob:// config: pass --account-url or " + "--connection-string, or set AZURE_STORAGE_ACCOUNT_URL / " + "AZURE_STORAGE_CONNECTION_STRING in the environment." + ) + raise typer.BadParameter(msg) + config = StorageConfig( + type="blob", + container_name=container, + connection_string=connection_string, + account_url=account_url, + base_dir=base_dir, + ) + return create_storage(config) + + +async def _download_tree(storage: Storage, dest: Path) -> int: + """Download every blob in ``storage`` (already scoped to a base_dir) into ``dest``. + + Returns the number of files downloaded. + """ + keys = list(storage.find(re.compile(r".*"))) + count = 0 + for key in keys: + if not key: + continue + local_path = dest / key + local_path.parent.mkdir(parents=True, exist_ok=True) + data = await storage.get(key, as_bytes=True) + if data is None: + continue + local_path.write_bytes(data if isinstance(data, bytes) else str(data).encode()) + count += 1 + return count + + +def resolve_config_path( + path: Path | str, + *, + account_url: str | None = None, + connection_string: str | None = None, +) -> Path: + """Resolve a CLI config-file argument that may be a ``blob://`` URI. + + For local paths this returns ``Path(path)`` unchanged. For ``blob://`` URIs + it downloads the configuration file *and every sibling file under the same + prefix* (so referenced prompt templates resolve via relative paths) into a + fresh temporary directory and returns the local path to the downloaded + config file. + + ``account_url`` / ``connection_string`` take precedence over the + corresponding ``AZURE_STORAGE_*`` environment variables. + """ + if not is_blob_uri(path): + return Path(path) + + container, key = parse_blob_uri(str(path)) + parent_key, _, filename = key.rpartition("/") + if not filename: + msg = f"Invalid blob URI {path!r}: missing config filename." + raise typer.BadParameter(msg) + + storage = _create_blob_storage( + container, + base_dir=parent_key or None, + account_url=account_url, + connection_string=connection_string, + ) + + tmp_root = Path(tempfile.mkdtemp(prefix="benchmark-qed-blob-")) + typer.echo( + f"Downloading config from blob://{container}/" + f"{parent_key + '/' if parent_key else ''}* to {tmp_root}" + ) + downloaded = asyncio.get_event_loop().run_until_complete( + _download_tree(storage, tmp_root) + ) + + local_settings = tmp_root / filename + if not local_settings.exists(): + msg = ( + f"Config file not found in blob storage: {path} " + f"(downloaded {downloaded} file(s) under prefix " + f"{parent_key!r}; check the URI and credentials)." + ) + raise typer.BadParameter(msg) + return local_settings diff --git a/benchmark_qed/cli/init_config.py b/benchmark_qed/cli/init_config.py index 009ee2c..f6722ed 100644 --- a/benchmark_qed/cli/init_config.py +++ b/benchmark_qed/cli/init_config.py @@ -1,11 +1,14 @@ # Copyright (c) 2025 Microsoft Corporation. """Autoq CLI for generating questions.""" +import asyncio from enum import StrEnum from pathlib import Path from typing import Annotated import typer +from graphrag_storage.storage_config import StorageConfig +from graphrag_storage.storage_factory import create_storage from benchmark_qed.autod.prompts import summarization from benchmark_qed.autoe.prompts import assertion as assertion_prompts @@ -96,23 +99,9 @@ class ConfigType(StrEnum): text_column: body_nitf # The column in the dataset that contains the text to be processed. Modify this based on your dataset. metadata_columns: [headline, firstcreated] # Additional metadata columns to include in the input. Modify this based on your dataset. file_encoding: utf-8-sig - # storage: # Optional: read input from Azure Blob Storage instead of local filesystem. - # type: blob - # container_name: my-datasets # The blob container name (acts as the root folder). - # connection_string: $${{AZURE_STORAGE_CONNECTION_STRING}} # Auth option 1: connection string. - # # account_url: https://.blob.core.windows.net # Auth option 2: managed identity (use instead of connection_string). - # # base_dir: path/within/container # Optional prefix path. dataset_path is resolved relative to this. - +{{INPUT_STORAGE}} ## Output Storage Configuration -# output_storage: # Optional: write output to Azure Blob Storage instead of local filesystem. -# type: blob -# container_name: my-output # The blob container name (acts as the root folder). -# connection_string: $${{AZURE_STORAGE_CONNECTION_STRING}} # Auth option 1: connection string. -# # account_url: https://.blob.core.windows.net # Auth option 2: managed identity (use instead of connection_string). -# # base_dir: path/within/container # Optional prefix path. The CLI output argument is resolved relative to this. -# # Example: with container_name=my-output, base_dir=experiments/run1, and CLI arg "output", -# # files are written to: my-output/experiments/run1/output/ - +{{OUTPUT_STORAGE}} ## Encoder configuration encoding: model_name: o200k_base @@ -257,16 +246,7 @@ class ConfigType(StrEnum): """ AUTOE_ASSERTION_CONTENT = f"""## Storage Configuration -# input_storage: # Optional: read input answers/assertions from Azure Blob Storage. -# type: blob -# container_name: my-datasets # The blob container name (acts as the root folder). -# account_url: https://.blob.core.windows.net # Auth via managed identity. -# # connection_string: $${{AZURE_STORAGE_CONNECTION_STRING}} # Or use connection string instead. -# output_storage: # Optional: write output scores to Azure Blob Storage. -# type: blob -# container_name: my-output -# account_url: https://.blob.core.windows.net - +{{STORAGE}} ## Input Configuration generated: name: vector_rag @@ -287,16 +267,7 @@ class ConfigType(StrEnum): prompt: prompts/assertion_system_prompt.txt""" AUTOE_PAIRWISE_CONTENT = f"""## Storage Configuration -# input_storage: # Optional: read input answers from Azure Blob Storage. -# type: blob -# container_name: my-datasets # The blob container name (acts as the root folder). -# account_url: https://.blob.core.windows.net # Auth via managed identity. -# # connection_string: $${{AZURE_STORAGE_CONNECTION_STRING}} # Or use connection string instead. -# output_storage: # Optional: write output scores to Azure Blob Storage. -# type: blob -# container_name: my-output -# account_url: https://.blob.core.windows.net - +{{STORAGE}} ## Input Configuration base: name: vector_rag @@ -327,16 +298,7 @@ class ConfigType(StrEnum): AUTOE_REFERENCE_CONTENT = f"""## Storage Configuration -# input_storage: # Optional: read input answers from Azure Blob Storage. -# type: blob -# container_name: my-datasets # The blob container name (acts as the root folder). -# account_url: https://.blob.core.windows.net # Auth via managed identity. -# # connection_string: $${{AZURE_STORAGE_CONNECTION_STRING}} # Or use connection string instead. -# output_storage: # Optional: write output scores to Azure Blob Storage. -# type: blob -# container_name: my-output -# account_url: https://.blob.core.windows.net - +{{STORAGE}} ## Input Configuration reference: name: lazygraphrag @@ -364,8 +326,147 @@ class ConfigType(StrEnum): prompt: prompts/reference_system_prompt.txt""" +AUTOQ_INPUT_STORAGE_SNIPPET = """ storage: + type: blob + container_name: my-datasets # The blob container name (acts as the root folder). + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} # Auth option 1: connection string. + # account_url: https://.blob.core.windows.net # Auth option 2: managed identity (use instead of connection_string). + # base_dir: path/within/container # Optional prefix path. dataset_path is resolved relative to this.""" + + +AUTOQ_OUTPUT_STORAGE_SNIPPET = """output_storage: + type: blob + container_name: my-output # The blob container name (acts as the root folder). + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} # Auth option 1: connection string. + # account_url: https://.blob.core.windows.net # Auth option 2: managed identity (use instead of connection_string). + # base_dir: path/within/container # Optional prefix path. The CLI output argument is resolved relative to this.""" + + +AUTOE_STORAGE_SNIPPET = """input_storage: + type: blob + container_name: my-datasets # The blob container name (acts as the root folder). + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} # Auth option 1: connection string. + # account_url: https://.blob.core.windows.net # Auth option 2: managed identity (use instead of connection_string). +output_storage: + type: blob + container_name: my-output + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} + # account_url: https://.blob.core.windows.net""" + + +def _commentify(active: str, indent: int = 0) -> str: + """Comment out a YAML block by inserting `# ` after the given leading indent on each line. + + Also escapes ``$`` as ``$$`` so that ``${VAR}`` placeholders inside the commented + block are not picked up by ``graphrag_common``'s ``string.Template`` env-var + substitution (which scans the whole file regardless of YAML comments). + """ + prefix = " " * indent + out_lines = [] + for line in active.splitlines(): + escaped = line.replace("$", "$$") + if not escaped.strip(): + out_lines.append(escaped) + elif escaped.startswith(prefix): + out_lines.append(prefix + "# " + escaped[indent:]) + else: + out_lines.append("# " + escaped) + return "\n".join(out_lines) + + +def _get_content(config_type: ConfigType) -> str: + """Get the base template content for a config type.""" + match config_type: + case ConfigType.autoq: + return AUTOQ_CONTENT + case ConfigType.autoe_pairwise: + return AUTOE_PAIRWISE_CONTENT + case ConfigType.autoe_reference: + return AUTOE_REFERENCE_CONTENT + case ConfigType.autoe_assertion: + return AUTOE_ASSERTION_CONTENT + + +def _render_content( + config_type: ConfigType, + storage_type: str, + *, + container_name: str | None = None, + account_url: str | None = None, + connection_string: str | None = None, + base_dir: str | None = None, +) -> str: + """Render template content with the storage section in either commented or active form. + + Args: + config_type: The type of configuration to generate. + storage_type: Either 'local' (storage config commented out as documentation) + or 'blob' (active Azure Blob storage config injected). + container_name: Optional container name to pre-fill in storage config. + account_url: Optional account URL to pre-fill in storage config. + connection_string: Optional connection string to pre-fill in storage config. + base_dir: Optional base directory to pre-fill in storage config. + """ + template = _get_content(config_type) + active = storage_type == "blob" + + if config_type == ConfigType.autoq: + input_block = ( + AUTOQ_INPUT_STORAGE_SNIPPET + if active + else _commentify(AUTOQ_INPUT_STORAGE_SNIPPET, indent=2) + ) + output_block = ( + AUTOQ_OUTPUT_STORAGE_SNIPPET + if active + else _commentify(AUTOQ_OUTPUT_STORAGE_SNIPPET, indent=0) + ) + content = template.replace("{INPUT_STORAGE}", input_block).replace( + "{OUTPUT_STORAGE}", output_block + ) + else: + storage_block = ( + AUTOE_STORAGE_SNIPPET + if active + else _commentify(AUTOE_STORAGE_SNIPPET, indent=0) + ) + content = template.replace("{STORAGE}", storage_block) + + if not active: + return content + + # Substitute user-provided values into the active blob storage section. + # Auth methods are mutually exclusive: pick exactly one active line. + if container_name: + content = content.replace("my-datasets", container_name) + content = content.replace("my-output", container_name) + if connection_string: + # connection_string is the active default; just substitute the value. + content = content.replace( + "connection_string: ${AZURE_STORAGE_CONNECTION_STRING}", + f"connection_string: {connection_string}", + ) + elif account_url: + # Activate account_url and comment out the default connection_string. + content = content.replace( + "# account_url: https://.blob.core.windows.net", + f"account_url: {account_url}", + ) + content = content.replace( + "connection_string: ${AZURE_STORAGE_CONNECTION_STRING}", + "# connection_string: $${AZURE_STORAGE_CONNECTION_STRING}", + ) + if base_dir: + content = content.replace( + "# base_dir: path/within/container", + f"base_dir: {base_dir}", + ) + + return content + + def __copy_prompts(prompts_path: Path, output_path: Path) -> None: - """Copy prompts from the prompts directory to the output directory.""" + """Copy prompts from the prompts directory to the local output directory.""" if not output_path.exists(): output_path.mkdir(parents=True, exist_ok=True) for prompt_file in prompts_path.iterdir(): @@ -376,6 +477,69 @@ def __copy_prompts(prompts_path: Path, output_path: Path) -> None: ) +def __get_prompt_files(prompts_path: Path) -> dict[str, str]: + """Get prompt file contents as a dict of {filename: content}.""" + result = {} + for prompt_file in prompts_path.iterdir(): + if prompt_file.is_file() and prompt_file.suffix == ".txt": + result[prompt_file.name] = prompt_file.read_text(encoding="utf-8") + return result + + +def _write_to_local( + root: Path, + settings_content: str, + prompt_mapping: dict[str, dict[str, str]], +) -> None: + """Write settings and prompts to local filesystem.""" + input_folder = root / "input" + if not input_folder.exists(): + input_folder.mkdir(parents=True, exist_ok=True) + typer.echo(f"Input folder created at {input_folder}") + typer.echo( + "Please place your input files in the 'input' folder before running, " + "or modify the settings.yaml to point to your input files." + ) + + settings = root / "settings.yaml" + settings.write_text(settings_content, encoding="utf-8") + + for folder_path, files in prompt_mapping.items(): + output_path = root / folder_path + output_path.mkdir(parents=True, exist_ok=True) + for filename, file_content in files.items(): + (output_path / filename).write_text(file_content, encoding="utf-8") + + +def _write_to_blob( + settings_content: str, + prompt_mapping: dict[str, dict[str, str]], + *, + container_name: str | None = None, + account_url: str | None = None, + connection_string: str | None = None, + base_dir: str | None = None, +) -> None: + """Write settings and prompts to Azure Blob Storage.""" + config = StorageConfig( + type="blob", + container_name=container_name, + account_url=account_url, + connection_string=connection_string, + base_dir=base_dir, + ) + storage = create_storage(config) + + async def _upload() -> None: + await storage.set("settings.yaml", settings_content) + await storage.set(".env", "OPENAI_API_KEY=") + for folder_path, files in prompt_mapping.items(): + for filename, file_content in files.items(): + await storage.set(f"{folder_path}/{filename}", file_content) + + asyncio.get_event_loop().run_until_complete(_upload()) + + @app.command() def init( config_type: Annotated[ @@ -387,72 +551,121 @@ def init( root: Annotated[ Path, typer.Argument(help="The path to root directory with the input folder.") ], + storage_type: Annotated[ + str, + typer.Option( + "--storage-type", + "-s", + help="Storage setup mode for generated settings. Use 'blob' to scaffold active Azure Blob storage sections, or 'local' (default) for commented-out storage config.", + ), + ] = "local", + container_name: Annotated[ + str | None, + typer.Option( + "--container-name", + help="The blob container name to pre-fill in the generated storage config.", + ), + ] = None, + account_url: Annotated[ + str | None, + typer.Option( + "--account-url", + help="The storage account URL to pre-fill (uses managed identity for auth).", + ), + ] = None, + connection_string: Annotated[ + str | None, + typer.Option( + "--connection-string", + help="The storage connection string to pre-fill (alternative to --account-url).", + ), + ] = None, + base_dir: Annotated[ + str | None, + typer.Option( + "--base-dir", + help="Base prefix path within the container to pre-fill in storage config.", + ), + ] = None, ) -> None: """Generate settings file.""" - input_folder = root / "input" - if not input_folder.exists(): - input_folder.mkdir(parents=True, exist_ok=True) - typer.echo(f"Input folder created at {input_folder}") - typer.echo( - "Please place your input files in the 'input' folder before running, or modify the settings.yaml to point to your input files." - ) + settings_content = _render_content( + config_type, + storage_type, + container_name=container_name, + account_url=account_url, + connection_string=connection_string, + base_dir=base_dir, + ) - settings = root / "settings.yaml" - prompts_folder = root / "prompts" + # Collect prompt files based on config type + prompt_mapping: dict[str, dict[str, str]] = {} match config_type: case ConfigType.autoq: - settings.write_text(AUTOQ_CONTENT, encoding="utf-8") - __copy_prompts( - Path(summarization.__file__).parent, - prompts_folder / "summarization", + prompt_mapping["prompts/summarization"] = __get_prompt_files( + Path(summarization.__file__).parent ) - __copy_prompts( - Path(activity_context_prompts.__file__).parent, - prompts_folder / "activity_questions" / "activity_context", + prompt_mapping["prompts/activity_questions/activity_context"] = ( + __get_prompt_files(Path(activity_context_prompts.__file__).parent) ) - __copy_prompts( - Path(activity_global_prompts.__file__).parent, - prompts_folder / "activity_questions" / "activity_global", + prompt_mapping["prompts/activity_questions/activity_global"] = ( + __get_prompt_files(Path(activity_global_prompts.__file__).parent) ) - __copy_prompts( - Path(activity_local_prompts.__file__).parent, - prompts_folder / "activity_questions" / "activity_local", + prompt_mapping["prompts/activity_questions/activity_local"] = ( + __get_prompt_files(Path(activity_local_prompts.__file__).parent) ) - __copy_prompts( - Path(data_global_prompts.__file__).parent, - prompts_folder / "data_questions" / "data_global", + prompt_mapping["prompts/data_questions/data_global"] = __get_prompt_files( + Path(data_global_prompts.__file__).parent ) - __copy_prompts( - Path(data_local_prompts.__file__).parent, - prompts_folder / "data_questions" / "data_local", + prompt_mapping["prompts/data_questions/data_local"] = __get_prompt_files( + Path(data_local_prompts.__file__).parent ) - __copy_prompts( - Path(data_linked_prompts.__file__).parent, - prompts_folder / "data_questions" / "data_linked", + prompt_mapping["prompts/data_questions/data_linked"] = __get_prompt_files( + Path(data_linked_prompts.__file__).parent ) - __copy_prompts( - Path(data_questions_prompts.__file__).parent, - prompts_folder / "data_questions", + prompt_mapping["prompts/data_questions"] = __get_prompt_files( + Path(data_questions_prompts.__file__).parent ) - __copy_prompts( - Path(autoq_assertion_prompts.__file__).parent, - prompts_folder / "data_questions" / "assertions", + prompt_mapping["prompts/data_questions/assertions"] = __get_prompt_files( + Path(autoq_assertion_prompts.__file__).parent ) case ConfigType.autoe_pairwise: - settings.write_text(AUTOE_PAIRWISE_CONTENT, encoding="utf-8") - __copy_prompts(Path(pairwise_prompts.__file__).parent, prompts_folder) + prompt_mapping["prompts"] = __get_prompt_files( + Path(pairwise_prompts.__file__).parent + ) case ConfigType.autoe_reference: - settings.write_text(AUTOE_REFERENCE_CONTENT, encoding="utf-8") - __copy_prompts(Path(reference_prompts.__file__).parent, prompts_folder) + prompt_mapping["prompts"] = __get_prompt_files( + Path(reference_prompts.__file__).parent + ) case ConfigType.autoe_assertion: - settings.write_text(AUTOE_ASSERTION_CONTENT, encoding="utf-8") - __copy_prompts(Path(assertion_prompts.__file__).parent, prompts_folder) - - typer.echo(f"Configuration file created at {settings}") + prompt_mapping["prompts"] = __get_prompt_files( + Path(assertion_prompts.__file__).parent + ) - env_file = root / ".env" - if not env_file.exists(): - env_file.write_text("OPENAI_API_KEY=", encoding="utf-8") - typer.echo( - f"Change the OPENAI_API_KEY placeholder at {env_file} with your actual OPENAI_API_KEY." - ) + # Write to blob storage or local filesystem + if storage_type == "blob" and (account_url or connection_string): + _write_to_blob( + settings_content=settings_content, + prompt_mapping=prompt_mapping, + container_name=container_name, + account_url=account_url, + connection_string=connection_string, + base_dir=base_dir, + ) + target = f"blob://{container_name or 'container'}" + if base_dir: + target += f"/{base_dir}" + typer.echo(f"Configuration files uploaded to {target}") + else: + _write_to_local( + root=root, + settings_content=settings_content, + prompt_mapping=prompt_mapping, + ) + typer.echo(f"Configuration file created at {root / 'settings.yaml'}") + env_file = root / ".env" + if not env_file.exists(): + env_file.write_text("OPENAI_API_KEY=", encoding="utf-8") + typer.echo( + f"Change the OPENAI_API_KEY placeholder at {env_file} with your actual OPENAI_API_KEY." + ) diff --git a/benchmark_qed/data/cli.py b/benchmark_qed/data/cli.py index e653059..14b3edc 100644 --- a/benchmark_qed/data/cli.py +++ b/benchmark_qed/data/cli.py @@ -2,7 +2,6 @@ """Data downloader CLI.""" import asyncio -import base64 import zipfile from enum import StrEnum from io import BytesIO @@ -33,36 +32,14 @@ def _get_dataset_url(dataset: Dataset) -> str: return f"https://raw.githubusercontent.com/microsoft/benchmark-qed/refs/heads/main/datasets/{dataset}/raw_data.zip" -async def _upload_zip_to_storage( - zip_bytes: bytes, storage: Storage, storage_type: str = "blob" -) -> None: - """Extract a zip archive and upload its contents to storage. - - Args: - zip_bytes: The zip file contents as bytes. - storage: The Storage backend to upload to. - storage_type: The storage type ('blob', 'cosmosdb', or 'file'). - Used to determine path sanitization rules. - """ +async def _upload_zip_to_storage(zip_bytes: bytes, storage: Storage) -> None: + """Extract a zip archive and upload its contents to blob storage.""" with zipfile.ZipFile(BytesIO(zip_bytes), "r") as zip_ref: for name in zip_ref.namelist(): if name.endswith("/"): continue data = zip_ref.read(name) - - # Cosmos DB requires special handling: can't have slashes in IDs - if storage_type == "cosmosdb": - # Replace path separators with underscores for Cosmos DB ID compatibility - safe_key = name.replace("/", "_") - # Try to decode as UTF-8; fall back to base64 for binary files - try: - content = data.decode("utf-8-sig") - except UnicodeDecodeError: - content = base64.b64encode(data).decode("ascii") - await storage.set(safe_key, content) - else: - # Blob storage can handle raw bytes directly - await storage.set(name, data) + await storage.set(name, data) @app.command() @@ -78,12 +55,12 @@ def download( storage_type: Annotated[ str | None, typer.Option( - help="Storage type: 'blob' for Azure Blob Storage, 'cosmosdb' for Azure Cosmos DB. Omit for local filesystem." + help="Storage type: 'blob' for Azure Blob Storage. Omit for local filesystem." ), ] = None, container_name: Annotated[ str | None, - typer.Option(help="The blob container or Cosmos DB container name."), + typer.Option(help="The blob container name."), ] = None, account_url: Annotated[ str | None, @@ -95,14 +72,10 @@ def download( help="The storage connection string (alternative to account_url)." ), ] = None, - database_name: Annotated[ - str | None, - typer.Option(help="The Cosmos DB database name (required for cosmosdb type)."), - ] = None, base_dir: Annotated[ str | None, typer.Option( - help="Base prefix in cloud storage. Cloud files will be stored as: base_dir/output_dir/. If omitted, files are stored under output_dir/ only." + help="Base prefix in blob storage. Files will be stored as: base_dir/output_dir/. If omitted, files are stored under output_dir/ only." ), ] = None, ) -> None: @@ -110,11 +83,11 @@ def download( For local filesystem, the dataset is extracted to `output_dir`. - For cloud storage (blob/Cosmos DB): + For Azure Blob Storage: - Files are stored under `{base_dir}/{output_dir}` if `base_dir` is provided. - Files are stored under `{output_dir}` if `base_dir` is omitted. - This ensures cloud storage mirrors the local directory structure. + This ensures blob storage mirrors the local directory structure. """ typer.echo( "By downloading this dataset, you agree to the terms of use described here: https://github.com/microsoft/benchmark-qed/blob/main/datasets/LICENSE." @@ -124,12 +97,16 @@ def download( abort=True, ) + if storage_type and storage_type != "blob": + msg = f"Unsupported storage type: {storage_type!r}. Only 'blob' is supported." + raise typer.BadParameter(msg) + api_url = _get_dataset_url(dataset) response = requests.get(api_url, timeout=60) response.raise_for_status() - # Determine base directory for cloud storage - # Include output_dir in the path so cloud storage mirrors local directory structure + # Determine base directory for blob storage + # Include output_dir in the path so blob storage mirrors local directory structure output_path = output_dir.as_posix().strip("./") storage_base_dir = f"{base_dir}/{output_path}" if base_dir else output_path or None @@ -139,14 +116,11 @@ def download( container_name=container_name, account_url=account_url, connection_string=connection_string, - database_name=database_name, base_dir=storage_base_dir, ) storage = create_storage(config) loop = asyncio.get_event_loop() - loop.run_until_complete( - _upload_zip_to_storage(response.content, storage, storage_type) - ) + loop.run_until_complete(_upload_zip_to_storage(response.content, storage)) else: output_dir.mkdir(parents=True, exist_ok=True) output_file = output_dir / f"{dataset}.zip" diff --git a/docs/cli/autoe.md b/docs/cli/autoe.md index ea96809..199fa34 100644 --- a/docs/cli/autoe.md +++ b/docs/cli/autoe.md @@ -5,19 +5,55 @@ This section describes the configuration schema for performing relative comparis To create a template configuration file, run: ```sh -benchmark-qed config init autoe_pairwise local/pairwise_test/settings.yaml +benchmark-qed config init autoe_pairwise ./local/pairwise_test +``` + +To create a template with active Azure Blob Storage configuration: + +```sh +benchmark-qed config init autoe_pairwise ./local/pairwise_test --storage-type blob ``` To perform pairwise scoring with your configuration file, use: ```sh -benchmark-qed autoe pairwise-scores local/pairwise_test/settings.yaml local/pairwise_test/output +benchmark-qed autoe pairwise-scores ./local/pairwise_test/settings.yaml ./local/pairwise_test/output ``` +If your config lives in Azure Blob Storage, pass a `blob://` URI and supply credentials inline with `--account-url` (managed identity) or `--connection-string`: + +```sh +benchmark-qed autoe pairwise-scores blob://my-container/pairwise_test/settings.yaml ./local/output \ + --account-url https://.blob.core.windows.net +``` + +The same `--account-url` / `--connection-string` options are available on every `autoe` subcommand: `pairwise-scores`, `reference-scores`, `assertion-scores`, `hierarchical-assertion-scores`, `assertion-significance`, `hierarchical-assertion-significance`, `generate-retrieval-reference`, and `retrieval-scores`. If neither is supplied, auth falls back to `$AZURE_STORAGE_ACCOUNT_URL` / `$AZURE_STORAGE_CONNECTION_STRING`. + For information about the `config init` command, refer to: [Config Init CLI](config_init.md) --- +### Storage Configuration + +AutoE supports reading input answers from and writing output scores to Azure Blob Storage. When using `--storage-type blob` during `config init`, the generated settings file includes active storage sections. + +```yaml +## Storage Configuration +input_storage: + type: blob + container_name: my-datasets + account_url: https://.blob.core.windows.net # Managed identity + # connection_string: ${AZURE_STORAGE_CONNECTION_STRING} # Alternative: connection string +output_storage: + type: blob + container_name: my-output + account_url: https://.blob.core.windows.net +``` + +When storage is configured, all `answer_base_path` values are resolved relative to the blob container (and optional `base_dir`). Without storage configuration, paths are resolved from the local filesystem. + +--- + ### Classes and Fields #### `Condition` @@ -140,13 +176,19 @@ This section explains how to configure reference-based scoring, where generated To create a template configuration file, run: ```sh -benchmark-qed config init autoe_reference local/reference_test/settings.yaml +benchmark-qed config init autoe_reference ./local/reference_test +``` + +To create a template with active Azure Blob Storage configuration: + +```sh +benchmark-qed config init autoe_reference ./local/reference_test --storage-type blob ``` To perform reference-based scoring with your configuration file, run: ```sh -benchmark-qed autoe reference-scores local/reference_test/settings.yaml local/reference_test/output +benchmark-qed autoe reference-scores ./local/reference_test/settings.yaml ./local/reference_test/output ``` For information about the `config init` command, see: [Config Init CLI](config_init.md) @@ -259,7 +301,13 @@ The command auto-detects the config format: To create a template configuration file, run: ```sh -benchmark-qed config init autoe_assertion local/assertion_test/settings.yaml +benchmark-qed config init autoe_assertion ./local/assertion_test +``` + +To create a template with active Azure Blob Storage configuration: + +```sh +benchmark-qed config init autoe_assertion ./local/assertion_test --storage-type blob ``` For information about the `config init` command, refer to: [Config Init CLI](config_init.md) diff --git a/docs/cli/autoq.md b/docs/cli/autoq.md index 02611b0..54b3796 100644 --- a/docs/cli/autoq.md +++ b/docs/cli/autoq.md @@ -5,19 +5,76 @@ This section provides an overview of the configuration schema for the question g To create a template configuration file, run: ```sh -benchmark-qed config init autoq local/autoq_test/settings.yaml +benchmark-qed config init autoq ./local/autoq_test +``` + +To create a template with active Azure Blob Storage configuration: + +```sh +benchmark-qed config init autoq ./local/autoq_test --storage-type blob ``` To generate synthetic queries using your configuration file, run: ```sh -benchmark-qed autoq local/autoq_test/settings.yaml local/autoq_test/output +benchmark-qed autoq ./local/autoq_test/settings.yaml ./local/autoq_test/output +``` + +If your config lives in Azure Blob Storage, pass a `blob://` URI and supply credentials inline with `--account-url` (managed identity) or `--connection-string`: + +```sh +benchmark-qed autoq blob://my-container/autoq_test/settings.yaml ./local/output \ + --account-url https://.blob.core.windows.net ``` +```sh +benchmark-qed autoq blob://my-container/autoq_test/settings.yaml ./local/output \ + --connection-string "" +``` + +The same `--account-url` / `--connection-string` options are available on `autoq generate-assertions`. If neither is supplied, auth falls back to `$AZURE_STORAGE_ACCOUNT_URL` / `$AZURE_STORAGE_CONNECTION_STRING`. + For more information about the `config init` command, see: [Config Init CLI](config_init.md) --- +### Storage Configuration + +AutoQ supports reading input data from and writing output to Azure Blob Storage. When using `--storage-type blob` during `config init`, the generated settings file includes active storage sections. + +#### Input Storage + +Configured under the `input:` section in `settings.yaml`: + +```yaml +input: + dataset_path: ./input + input_type: json + text_column: body_nitf + file_encoding: utf-8-sig + storage: + type: blob + container_name: my-datasets + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} + # account_url: https://.blob.core.windows.net # Alternative: managed identity + # base_dir: path/within/container # Optional prefix path +``` + +#### Output Storage + +Configured at the top level in `settings.yaml`: + +```yaml +output_storage: + type: blob + container_name: my-output + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} + # account_url: https://.blob.core.windows.net + # base_dir: path/within/container +``` + +--- + ### Classes and Fields #### `InputConfig` diff --git a/docs/cli/config_init.md b/docs/cli/config_init.md index 43730e2..70b0e58 100644 --- a/docs/cli/config_init.md +++ b/docs/cli/config_init.md @@ -2,6 +2,133 @@ This page documents the command-line interface of the benchmark-qed configuration package. +### Init Command + +The `config init` command generates a starter `settings.yaml` file, prompt templates, and an `.env` file for the specified workflow. + +#### Arguments + +| Argument | Description | +|---|---| +| `config_type` | The type of configuration to generate. One of: `autoq`, `autoe_pairwise`, `autoe_reference`, `autoe_assertion`. | +| `root` | The path to the root directory where the configuration will be created. | + +#### Options + +| Option | Short | Type | Default | Description | +|---|---|---|---|---| +| `--storage-type` | `-s` | `str` | `local` | Storage setup mode. Use `blob` to scaffold active Azure Blob Storage sections in the generated settings file. Default (`local`) keeps storage config commented out as examples. | +| `--container-name` | | `str` | `None` | The blob container name to pre-fill in the generated storage config. | +| `--account-url` | | `str` | `None` | The storage account URL to pre-fill (uses managed identity for auth). | +| `--connection-string` | | `str` | `None` | The storage connection string to pre-fill (alternative to `--account-url`). | +| `--base-dir` | | `str` | `None` | Base prefix path within the container to pre-fill in storage config. | + +#### Local Filesystem (Default) + +```bash +benchmark-qed config init autoq ./my_project +``` + +This creates `./my_project/settings.yaml` with blob storage sections **commented out** as documentation examples. Input is read from local files. + +#### Azure Blob Storage + +```bash +benchmark-qed config init autoq ./my_project --storage-type blob +``` + +This creates `./my_project/settings.yaml` with **active** (uncommented) blob storage sections for both input and output. You then fill in your container name and credentials. + +You can also pre-fill your storage credentials directly. When credentials (`--account-url` or `--connection-string`) are supplied, the generated `settings.yaml`, `.env`, and `prompts/` are uploaded **directly to your blob container** — no local files are created: + +```bash +benchmark-qed config init autoq ./my_project \ + --storage-type blob \ + --container-name my-datasets \ + --account-url https://myaccount.blob.core.windows.net \ + --base-dir experiments/run1 +``` + +Or using a connection string: + +```bash +benchmark-qed config init autoq ./my_project \ + --storage-type blob \ + --container-name my-datasets \ + --connection-string "$AZURE_STORAGE_CONNECTION_STRING" +``` + +> **Note:** When `--storage-type blob` is used **without** `--account-url` or `--connection-string`, files are written locally with active blob sections in `settings.yaml` for you to fill in manually. + +#### Examples + +```bash +# Generate AutoQ config with local filesystem (default) +benchmark-qed config init autoq ./autoq_project + +# Generate AutoQ config with active blob storage scaffolding +benchmark-qed config init autoq ./autoq_project -s blob + +# Generate AutoE pairwise config with blob storage +benchmark-qed config init autoe_pairwise ./pairwise_project --storage-type blob + +# Generate AutoE reference config with blob storage +benchmark-qed config init autoe_reference ./reference_project -s blob + +# Generate AutoE assertion config with blob storage +benchmark-qed config init autoe_assertion ./assertion_project -s blob +``` + +#### Generated Files + +The command creates the following structure in the `root` directory (when writing locally): + +``` +root/ +├── settings.yaml # Main configuration file +├── .env # Environment variables (e.g., OPENAI_API_KEY) +├── input/ # Placeholder for input data +└── prompts/ # Prompt template files (copied from package defaults) +``` + +When `--storage-type blob` is used together with `--account-url` or `--connection-string`, the same `settings.yaml`, `.env`, and `prompts/` files are written **directly to the configured blob container** (under `--base-dir` if provided) instead of locally. + +#### Running CLI commands against a blob-stored config + +Once the `settings.yaml` and `prompts/` tree live in blob storage, the `autoq` and `autoe` CLI commands accept a `blob://` URI in place of a local path. Pass your blob credentials inline via `--account-url` (managed identity) or `--connection-string`: + +```bash +benchmark-qed autoq blob://my-container/experiments/run1/settings.yaml output \ + --account-url https://myaccount.blob.core.windows.net +``` + +Or with a connection string: + +```bash +benchmark-qed autoq blob://my-container/experiments/run1/settings.yaml output \ + --connection-string "" +``` + +The CLI streams `settings.yaml` and **every sibling file under the same prefix** (e.g. `prompts/**/*.txt`, `.env`) into a temporary local directory before loading the config. If neither option is supplied, auth falls back to the `AZURE_STORAGE_CONNECTION_STRING` or `AZURE_STORAGE_ACCOUNT_URL` environment variables. Input data and output destinations still flow through the `input:` / `output_storage:` blocks declared inside `settings.yaml`. + +Every command that accepts a config path also accepts `--account-url` and `--connection-string`: `autoq`, `autoq generate-assertions`, `autoe pairwise-scores`, `autoe reference-scores`, `autoe assertion-scores`, `autoe hierarchical-assertion-scores`, `autoe assertion-significance`, `autoe hierarchical-assertion-significance`, `autoe generate-retrieval-reference`, and `autoe retrieval-scores`. + +#### Storage Configuration in Generated Settings + +When `--storage-type blob` is used: + +**For AutoQ (`autoq`):** + +- An **input storage** block is added under the `input:` section with `type: blob`, `container_name`, and auth options. +- An **output storage** block is added at the top level (`output_storage:`) for writing results to blob. + +**For AutoE (`autoe_pairwise`, `autoe_reference`, `autoe_assertion`):** + +- An **input_storage** block is added for reading answers/assertions from blob. +- An **output_storage** block is added for writing scores to blob. + +Both blocks include placeholders for `container_name`, `connection_string`, and `account_url` (managed identity). + ::: mkdocs-typer2 :module: benchmark_qed.cli.init_config :name: config \ No newline at end of file diff --git a/docs/cli/data.md b/docs/cli/data.md index 8e887c9..33be49d 100644 --- a/docs/cli/data.md +++ b/docs/cli/data.md @@ -4,25 +4,24 @@ This page documents the command-line interface of the benchmark-qed data downloa ### Download Command -The `data download` command downloads datasets from GitHub and optionally uploads them to cloud storage. +The `data download` command downloads datasets from GitHub and optionally uploads them to Azure Blob Storage. #### Arguments | Argument | Description | |---|---| | `dataset` | The dataset to download. One of: `AP_news`, `podcast`, `example_answers`. | -| `output_dir` | The directory (local) or path prefix (cloud) to save the downloaded dataset. | +| `output_dir` | The directory (local) or path prefix (blob) to save the downloaded dataset. | #### Options | Option | Type | Default | Description | |---|---|---|---| -| `--storage-type` | `str` | `None` | Storage backend: `blob` for Azure Blob Storage, `cosmosdb` for Azure Cosmos DB. Omit for local filesystem. | -| `--container-name` | `str` | `None` | The blob container or Cosmos DB container name. Required when `--storage-type` is set. | +| `--storage-type` | `str` | `None` | Storage backend: `blob` for Azure Blob Storage. Omit for local filesystem. | +| `--container-name` | `str` | `None` | The blob container name. Required when `--storage-type` is set. | | `--account-url` | `str` | `None` | The storage account URL. Uses managed identity (DefaultAzureCredential) for authentication. | | `--connection-string` | `str` | `None` | The storage connection string. Alternative to `--account-url` for authentication. | -| `--database-name` | `str` | `None` | The Cosmos DB database name. Required when `--storage-type` is `cosmosdb`. | -| `--base-dir` | `str` | `None` | Base prefix in cloud storage. Files are stored under `{base_dir}/{output_dir}/`. If omitted, files are stored under `{output_dir}/` only. | +| `--base-dir` | `str` | `None` | Base prefix in blob storage. Files are stored under `{base_dir}/{output_dir}/`. If omitted, files are stored under `{output_dir}/` only. | #### Local Filesystem @@ -51,16 +50,6 @@ benchmark-qed data download AP_news input \ --connection-string "$AZURE_STORAGE_CONNECTION_STRING" ``` -#### Azure Cosmos DB - -```bash -benchmark-qed data download AP_news input \ - --storage-type cosmosdb \ - --container-name my-container \ - --database-name my-database \ - --account-url https://myaccount.documents.azure.com -``` - #### Path Structure - **With `--base-dir`**: Files are stored as `{base_dir}/{output_dir}/{file_path}` @@ -68,7 +57,7 @@ benchmark-qed data download AP_news input \ - **Without `--base-dir`**: Files are stored as `{output_dir}/{file_path}` - Example: `input/2023/11/22/file.json` -This ensures cloud storage mirrors your local directory structure. +This ensures blob storage mirrors your local directory structure. ::: mkdocs-typer2 :module: benchmark_qed.data.cli diff --git a/docs/cli/retrieval_scores.md b/docs/cli/retrieval_scores.md index 561e723..99f755c 100644 --- a/docs/cli/retrieval_scores.md +++ b/docs/cli/retrieval_scores.md @@ -121,8 +121,26 @@ python -m benchmark_qed autoe generate-retrieval-reference \ python -m benchmark_qed autoe generate-retrieval-reference \ retrieval_reference_config.yaml \ --print-model-usage + +# Reading the config from Azure Blob Storage (managed identity) +benchmark-qed autoe generate-retrieval-reference \ + blob://my-container/retrieval/retrieval_reference_config.yaml \ + --account-url https://.blob.core.windows.net + +# Or with a connection string +benchmark-qed autoe generate-retrieval-reference \ + blob://my-container/retrieval/retrieval_reference_config.yaml \ + --connection-string "" ``` +#### Options + +| Option | Type | Default | Description | +|---|---|---|---| +| `--print-model-usage` | `bool` | `False` | Print LLM usage statistics after the run. | +| `--account-url` | `str` | `None` | Azure Blob Storage account URL (managed identity), used when `config_path` is a `blob://` URI. Falls back to `$AZURE_STORAGE_ACCOUNT_URL`. | +| `--connection-string` | `str` | `None` | Azure Blob Storage connection string, used when `config_path` is a `blob://` URI. Falls back to `$AZURE_STORAGE_CONNECTION_STRING`. | + ### Output Structure ``` @@ -241,8 +259,22 @@ python -m benchmark_qed autoe retrieval-scores \ python -m benchmark_qed autoe retrieval-scores \ retrieval_scores_config.yaml \ --print-model-usage + +# Reading the config from Azure Blob Storage (managed identity) +benchmark-qed autoe retrieval-scores \ + blob://my-container/retrieval/retrieval_scores_config.yaml \ + --account-url https://.blob.core.windows.net ``` +#### Options + +| Option | Type | Default | Description | +|---|---|---|---| +| `--print-model-usage` | `bool` | `False` | Print LLM usage statistics after the run. | +| `--max-concurrent` | `int` | `8` | Maximum concurrent relevance assessments. | +| `--account-url` | `str` | `None` | Azure Blob Storage account URL (managed identity), used when `config_path` is a `blob://` URI. Falls back to `$AZURE_STORAGE_ACCOUNT_URL`. | +| `--connection-string` | `str` | `None` | Azure Blob Storage connection string, used when `config_path` is a `blob://` URI. Falls back to `$AZURE_STORAGE_CONNECTION_STRING`. | + ### Output Files ``` diff --git a/docs/datasets.md b/docs/datasets.md index 0fc90d6..5f23462 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -42,26 +42,13 @@ You can download datasets directly into Azure Blob Storage by providing storage The `OUTPUT_DIR` argument (e.g., `input`) becomes the prefix path within the blob container. The dataset zip is downloaded from GitHub, extracted in memory, and each file is uploaded directly to the storage backend. -## Downloading to Azure Cosmos DB - -You can also download datasets into Azure Cosmos DB: - -```sh -benchmark-qed data download AP_news input \ - --storage-type cosmosdb \ - --container-name my-container \ - --database-name my-database \ - --account-url https://.documents.azure.com -``` - ## Storage Options Reference | Option | Description | |---|---| -| `--storage-type` | Storage backend: `blob` for Azure Blob Storage, `cosmosdb` for Azure Cosmos DB. Omit for local filesystem. | -| `--container-name` | The blob container or Cosmos DB container name. | +| `--storage-type` | Storage backend: `blob` for Azure Blob Storage. Omit for local filesystem. | +| `--container-name` | The blob container name. | | `--account-url` | The storage account URL (uses managed identity for authentication). | | `--connection-string` | The storage connection string (alternative to `--account-url`). | -| `--database-name` | The Cosmos DB database name (required for `cosmosdb` type). | You can also find these datasets in the [datasets directory](https://github.com/microsoft/benchmark-qed/tree/main/datasets). \ No newline at end of file diff --git a/docs/developing.md b/docs/developing.md index 9d95b21..ee1b970 100644 --- a/docs/developing.md +++ b/docs/developing.md @@ -32,7 +32,7 @@ Follow these steps to generate synthetic queries using AutoQ: ```sh uv run benchmark-qed data download AP_news input ``` - You can also download directly to Azure Blob Storage or Cosmos DB. See the [Datasets documentation](datasets.md) for storage options. + You can also download directly to Azure Blob Storage. See the [Datasets documentation](datasets.md) for storage options. 3. **Initialize the configuration:** ```sh @@ -42,7 +42,7 @@ Follow these steps to generate synthetic queries using AutoQ: - `.env`: Stores environment variables for the AutoQ pipeline. Open this file and replace `` with your OpenAI or Azure API key. - `settings.yaml`: Contains pipeline settings. Edit this file as needed for your use case. - The generated `settings.yaml` includes commented-out sections for configuring Azure Blob Storage or Cosmos DB as input and output backends. Uncomment and fill in the `storage` section under `input` to read data from blob storage, or the `output_storage` section to write results to blob storage instead of the local filesystem. + The generated `settings.yaml` includes commented-out sections for configuring Azure Blob Storage as input and output backends. Uncomment and fill in the `storage` section under `input` to read data from blob storage, or the `output_storage` section to write results to blob storage instead of the local filesystem. 4. **Generate synthetic queries:** ```sh @@ -75,7 +75,7 @@ Follow these steps to compare RAG answer pairs using the pairwise scoring pipeli ```sh uv run benchmark-qed data download example_answers input ``` - You can also download directly to Azure Blob Storage or Cosmos DB. See the [Datasets documentation](datasets.md) for storage options. + You can also download directly to Azure Blob Storage. See the [Datasets documentation](datasets.md) for storage options. 3. **Create a configuration file for pairwise comparison:** ```sh @@ -85,7 +85,7 @@ Follow these steps to compare RAG answer pairs using the pairwise scoring pipeli - `.env`: Contains environment variables for the pairwise comparison tests. Open this file and replace `` with your OpenAI or Azure API key. - `settings.yaml`: Contains pipeline settings, which you can modify as needed. - The generated `settings.yaml` includes commented-out `input_storage` and `output_storage` sections for configuring Azure Blob Storage or Cosmos DB backends. + The generated `settings.yaml` includes commented-out `input_storage` and `output_storage` sections for configuring Azure Blob Storage backends. 4. **Run the pairwise comparison:** ```sh @@ -110,7 +110,7 @@ Follow these steps to score RAG answers against reference answers using example ```sh uv run benchmark-qed data download example_answers input ``` - You can also download directly to Azure Blob Storage or Cosmos DB. See the [Datasets documentation](datasets.md) for storage options. + You can also download directly to Azure Blob Storage. See the [Datasets documentation](datasets.md) for storage options. 3. **Create a configuration file for reference scoring:** ```sh @@ -120,7 +120,7 @@ Follow these steps to score RAG answers against reference answers using example - `.env`: Contains environment variables for the reference scoring pipeline. Open this file and replace `` with your OpenAI or Azure API key. - `settings.yaml`: Contains pipeline settings, which you can modify as needed. - The generated `settings.yaml` includes commented-out `input_storage` and `output_storage` sections for configuring Azure Blob Storage or Cosmos DB backends. + The generated `settings.yaml` includes commented-out `input_storage` and `output_storage` sections for configuring Azure Blob Storage backends. 4. **Run the reference scoring:** ```sh diff --git a/docs/notebooks/autoe.ipynb b/docs/notebooks/autoe.ipynb index 31a2d05..b7db73c 100644 --- a/docs/notebooks/autoe.ipynb +++ b/docs/notebooks/autoe.ipynb @@ -877,7 +877,7 @@ " top_k_assertions=None, # Use all assertions\n", " pass_threshold=pass_threshold,\n", " # Assertions are in input_dir (not in RAG subdirs)\n", - " assertions_filename_template=\"{question_set}_assertions.json\", # noqa: RUF027\n", + " assertions_filename_template=f\"{question_set}_assertions.json\",\n", " # Answers are in RAG subdirs with question_set name\n", " answers_path_template=\"{input_dir}/{generated_rag}/{question_set}_answers.json\",\n", " run_significance_test=True, # Run Friedman/Wilcoxon tests\n", @@ -1255,4 +1255,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/mkdocs.yaml b/mkdocs.yaml index a49ef52..0112ac5 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -38,6 +38,7 @@ nav: - CLI: - AutoQ: cli/autoq.md - AutoE: cli/autoe.md + - Retrieval Metrics: cli/retrieval_scores.md - LLM Configuration: cli/llm_config.md - Config Init: cli/config_init.md - Dataset Download: cli/data.md @@ -45,6 +46,8 @@ nav: - AutoQ: notebooks/autoq.ipynb - AutoE: notebooks/autoe.ipynb - AutoD: notebooks/autod.ipynb + - Assertion Generation: notebooks/assertion_gen.ipynb + - Retrieval Metrics: notebooks/retrieval_metrics.ipynb - Datasets: datasets.md repo_url: https://github.com/microsoft/benchmark-qed diff --git a/tests/test_config_resolver.py b/tests/test_config_resolver.py new file mode 100644 index 0000000..330e10c --- /dev/null +++ b/tests/test_config_resolver.py @@ -0,0 +1,125 @@ +# Copyright (c) 2025 Microsoft Corporation. +"""Tests for the blob:// config-path resolver.""" + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import typer + +from benchmark_qed.cli.config_resolver import ( + is_blob_uri, + parse_blob_uri, + resolve_config_path, +) + + +def test_is_blob_uri_detects_double_slash() -> None: + assert is_blob_uri("blob://my-container/path/to/settings.yaml") + + +def test_is_blob_uri_detects_single_slash_path_form() -> None: + # pathlib.Path("blob://x/y") collapses to "blob:/x/y" on POSIX. + assert is_blob_uri("blob:/my-container/path/to/settings.yaml") + + +def test_is_blob_uri_rejects_local_path() -> None: + assert not is_blob_uri("/tmp/settings.yaml") + assert not is_blob_uri("./settings.yaml") + assert not is_blob_uri(Path("settings.yaml")) + + +def test_parse_blob_uri_double_slash() -> None: + container, key = parse_blob_uri("blob://my-container/dir/settings.yaml") + assert container == "my-container" + assert key == "dir/settings.yaml" + + +def test_parse_blob_uri_single_slash() -> None: + container, key = parse_blob_uri("blob:/my-container/dir/settings.yaml") + assert container == "my-container" + assert key == "dir/settings.yaml" + + +def test_parse_blob_uri_invalid_raises() -> None: + with pytest.raises(typer.BadParameter): + parse_blob_uri("blob://only-container") + with pytest.raises(typer.BadParameter): + parse_blob_uri("not-a-blob-uri") + + +def test_resolve_config_path_local_passthrough(tmp_path: Path) -> None: + local = tmp_path / "settings.yaml" + local.write_text("input: {}", encoding="utf-8") + result = resolve_config_path(local) + assert result == local + + +def test_resolve_config_path_blob_downloads_tree(monkeypatch) -> None: + """resolve_config_path downloads the settings file and sibling files.""" + monkeypatch.setenv( + "AZURE_STORAGE_ACCOUNT_URL", "https://acct.blob.core.windows.net" + ) + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + + fake_storage = MagicMock() + # Storage is created with base_dir=parent_key, so find() returns paths + # already relative to that base_dir (matching the real AzureBlobStorage + # behavior of stripping base_dir from blob names). + fake_storage.find.return_value = iter([ + "settings.yaml", + ".env", + "prompts/foo.txt", + "prompts/sub/bar.txt", + ]) + + async def fake_get(key: str, as_bytes: bool | None = False) -> bytes: # noqa: RUF029 + return f"content of {key}".encode() + + fake_storage.get = AsyncMock(side_effect=fake_get) + + with patch( + "benchmark_qed.cli.config_resolver.create_storage", + return_value=fake_storage, + ): + result = resolve_config_path("blob://my-container/autoq_test/settings.yaml") + + assert result.name == "settings.yaml" + assert result.exists() + assert result.read_text(encoding="utf-8") == "content of settings.yaml" + + root = result.parent + assert (root / ".env").read_text(encoding="utf-8") == "content of .env" + assert (root / "prompts/foo.txt").read_text(encoding="utf-8") == ( + "content of prompts/foo.txt" + ) + assert (root / "prompts/sub/bar.txt").read_text(encoding="utf-8") == ( + "content of prompts/sub/bar.txt" + ) + + +def test_resolve_config_path_blob_missing_credentials(monkeypatch) -> None: + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False) + with pytest.raises(typer.BadParameter, match="AZURE_STORAGE"): + resolve_config_path("blob://my-container/autoq/settings.yaml") + + +def test_resolve_config_path_blob_missing_settings_file(monkeypatch) -> None: + monkeypatch.setenv( + "AZURE_STORAGE_ACCOUNT_URL", "https://acct.blob.core.windows.net" + ) + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + + fake_storage = MagicMock() + fake_storage.find.return_value = iter(["prompts/foo.txt"]) + fake_storage.get = AsyncMock(return_value=b"x") + + with ( + patch( + "benchmark_qed.cli.config_resolver.create_storage", + return_value=fake_storage, + ), + pytest.raises(typer.BadParameter, match="not found"), + ): + resolve_config_path("blob://my-container/autoq/settings.yaml") diff --git a/tests/test_init_config_cli.py b/tests/test_init_config_cli.py new file mode 100644 index 0000000..0adf47a --- /dev/null +++ b/tests/test_init_config_cli.py @@ -0,0 +1,91 @@ +# Copyright (c) 2025 Microsoft Corporation. +"""Tests for config init scaffolding behavior.""" + +from pathlib import Path +from unittest.mock import patch + +from benchmark_qed.cli.init_config import ConfigType, init + + +def test_init_autoq_default_uses_local_storage_template(tmp_path: Path) -> None: + """Default init keeps blob storage examples commented out.""" + init(ConfigType.autoq, tmp_path) + + settings = (tmp_path / "settings.yaml").read_text(encoding="utf-8") + + assert " # storage:\n" in settings + assert "# output_storage:\n" in settings + + +def test_init_autoq_blob_scaffolds_active_storage_sections(tmp_path: Path) -> None: + """Blob mode scaffolds active input/output storage sections for AutoQ.""" + init(ConfigType.autoq, tmp_path, storage_type="blob") + + settings = (tmp_path / "settings.yaml").read_text(encoding="utf-8") + + assert " storage:\n type: blob\n container_name: my-datasets" in settings + assert "output_storage:\n type: blob\n container_name: my-output" in settings + + +def test_init_autoe_blob_scaffolds_active_storage_sections(tmp_path: Path) -> None: + """Blob mode scaffolds active input/output storage sections for AutoE configs.""" + init(ConfigType.autoe_reference, tmp_path, storage_type="blob") + + settings = (tmp_path / "settings.yaml").read_text(encoding="utf-8") + + assert "input_storage:\n type: blob\n container_name: my-datasets" in settings + assert "output_storage:\n type: blob\n container_name: my-output" in settings + + +def test_init_autoq_blob_with_custom_values(tmp_path: Path) -> None: + """Blob mode with custom values uploads settings with pre-filled values to blob storage.""" + with patch("benchmark_qed.cli.init_config._write_to_blob") as mock_write_blob: + init( + ConfigType.autoq, + tmp_path, + storage_type="blob", + container_name="my-container", + account_url="https://myaccount.blob.core.windows.net", + base_dir="data/project1", + ) + + mock_write_blob.assert_called_once() + kwargs = mock_write_blob.call_args.kwargs + settings = kwargs["settings_content"] + + assert kwargs["container_name"] == "my-container" + assert kwargs["account_url"] == "https://myaccount.blob.core.windows.net" + assert kwargs["base_dir"] == "data/project1" + assert "container_name: my-container" in settings + assert "account_url: https://myaccount.blob.core.windows.net" in settings + assert "base_dir: data/project1" in settings + # No local files should be created + assert not (tmp_path / "settings.yaml").exists() + assert not (tmp_path / "input").exists() + + +def test_init_autoe_blob_with_connection_string(tmp_path: Path) -> None: + """Blob mode with connection string uploads settings with the value to blob storage.""" + with patch("benchmark_qed.cli.init_config._write_to_blob") as mock_write_blob: + init( + ConfigType.autoe_pairwise, + tmp_path, + storage_type="blob", + container_name="scoring-data", + connection_string="DefaultEndpointsProtocol=https;AccountName=test", + ) + + mock_write_blob.assert_called_once() + kwargs = mock_write_blob.call_args.kwargs + settings = kwargs["settings_content"] + + assert kwargs["container_name"] == "scoring-data" + assert ( + kwargs["connection_string"] == "DefaultEndpointsProtocol=https;AccountName=test" + ) + assert "container_name: scoring-data" in settings + assert ( + "connection_string: DefaultEndpointsProtocol=https;AccountName=test" in settings + ) + # No local files should be created + assert not (tmp_path / "settings.yaml").exists() From 28394365988f8b0aaef31026dfec34065ca7cdd5 Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Wed, 6 May 2026 00:32:39 -0600 Subject: [PATCH 2/8] storage options added to the init command --- docs/cli/config_init.md | 4 ++++ docs/cli/data.md | 4 ++++ docs/datasets.md | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/docs/cli/config_init.md b/docs/cli/config_init.md index 70b0e58..aafe1ab 100644 --- a/docs/cli/config_init.md +++ b/docs/cli/config_init.md @@ -33,6 +33,10 @@ This creates `./my_project/settings.yaml` with blob storage sections **commented #### Azure Blob Storage +!!! note "Supported cloud backends" + Only **Azure Blob Storage** (`--storage-type blob`) is currently supported. + Azure Cosmos DB and other backends are **not supported**. + ```bash benchmark-qed config init autoq ./my_project --storage-type blob ``` diff --git a/docs/cli/data.md b/docs/cli/data.md index 33be49d..acec4c1 100644 --- a/docs/cli/data.md +++ b/docs/cli/data.md @@ -6,6 +6,10 @@ This page documents the command-line interface of the benchmark-qed data downloa The `data download` command downloads datasets from GitHub and optionally uploads them to Azure Blob Storage. +!!! note "Supported cloud backends" + Only **Azure Blob Storage** (`--storage-type blob`) is currently supported. + Azure Cosmos DB and other backends are **not supported**. + #### Arguments | Argument | Description | diff --git a/docs/datasets.md b/docs/datasets.md index 5f23462..b775bfa 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -44,6 +44,10 @@ The `OUTPUT_DIR` argument (e.g., `input`) becomes the prefix path within the blo ## Storage Options Reference +!!! note "Supported cloud backends" + Only **Azure Blob Storage** (`--storage-type blob`) is currently supported. + Azure Cosmos DB and other backends are **not supported**. + | Option | Description | |---|---| | `--storage-type` | Storage backend: `blob` for Azure Blob Storage. Omit for local filesystem. | From 09e385829419efb22f38849b4f0cd20402948f80 Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Wed, 6 May 2026 00:34:48 -0600 Subject: [PATCH 3/8] version added --- .semversioner/next-release/minor-20260506063439936521.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .semversioner/next-release/minor-20260506063439936521.json diff --git a/.semversioner/next-release/minor-20260506063439936521.json b/.semversioner/next-release/minor-20260506063439936521.json new file mode 100644 index 0000000..ab8838c --- /dev/null +++ b/.semversioner/next-release/minor-20260506063439936521.json @@ -0,0 +1,4 @@ +{ + "type": "minor", + "description": "storage improvements" +} From c2e8e5bb9341331b6e595bc14aafb9d42085d9a4 Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Wed, 6 May 2026 14:58:00 -0600 Subject: [PATCH 4/8] check fix --- docs/notebooks/autoe.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/autoe.ipynb b/docs/notebooks/autoe.ipynb index 500eb2e..7b45cd4 100644 --- a/docs/notebooks/autoe.ipynb +++ b/docs/notebooks/autoe.ipynb @@ -394,7 +394,7 @@ " top_k_assertions=None, # Use all assertions\n", " pass_threshold=pass_threshold,\n", " # Assertions are in input_dir (not in RAG subdirs)\n", - " assertions_filename_template=f\"{question_set}_assertions.json\",\n", + " assertions_filename_template=\"{question_set}_assertions.json\",\n", " # Answers are in RAG subdirs with question_set name\n", " answers_path_template=\"{input_dir}/{generated_rag}/{question_set}_answers.json\",\n", " run_significance_test=True, # Run Friedman/Wilcoxon tests\n", @@ -404,7 +404,7 @@ " answer_text_key=\"answer\",\n", ")\n", "\n", - "print_df(results_df, \"Assertion Scoring Results Summary\")" + "print_df(results_df, \"Assertion Scoring Results Summary\")\n" ] }, { @@ -581,4 +581,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From edb90b964567d6f7094bad65bf9243e4df46ee98 Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Wed, 6 May 2026 14:59:57 -0600 Subject: [PATCH 5/8] check fix --- docs/notebooks/autoe.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/autoe.ipynb b/docs/notebooks/autoe.ipynb index 7b45cd4..823515f 100644 --- a/docs/notebooks/autoe.ipynb +++ b/docs/notebooks/autoe.ipynb @@ -394,7 +394,7 @@ " top_k_assertions=None, # Use all assertions\n", " pass_threshold=pass_threshold,\n", " # Assertions are in input_dir (not in RAG subdirs)\n", - " assertions_filename_template=\"{question_set}_assertions.json\",\n", + " assertions_filename_template=\"{question_set}_assertions.json\", # noqa: RUF027\n", " # Answers are in RAG subdirs with question_set name\n", " answers_path_template=\"{input_dir}/{generated_rag}/{question_set}_answers.json\",\n", " run_significance_test=True, # Run Friedman/Wilcoxon tests\n", @@ -404,7 +404,7 @@ " answer_text_key=\"answer\",\n", ")\n", "\n", - "print_df(results_df, \"Assertion Scoring Results Summary\")\n" + "print_df(results_df, \"Assertion Scoring Results Summary\")" ] }, { From c52351fa281b250c44eb8ce933c39f33c5897861 Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Fri, 8 May 2026 10:17:39 -0600 Subject: [PATCH 6/8] Update skills with Azure Blob Storage configuration options --- .apm/skills/benchmark-qed-autoe/SKILL.md | 37 ++++++++++++++++ .apm/skills/benchmark-qed-autoq/SKILL.md | 44 +++++++++++++++++++ .apm/skills/benchmark-qed-setup/SKILL.md | 56 ++++++++++++++++++++++++ 3 files changed, 137 insertions(+) diff --git a/.apm/skills/benchmark-qed-autoe/SKILL.md b/.apm/skills/benchmark-qed-autoe/SKILL.md index 432cf67..4075e87 100644 --- a/.apm/skills/benchmark-qed-autoe/SKILL.md +++ b/.apm/skills/benchmark-qed-autoe/SKILL.md @@ -54,6 +54,8 @@ uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoe | `--alpha` | `0.05` | P-value threshold for significance | | `--exclude-criteria` | `[]` | Criteria to exclude (repeatable) | | `--print-model-usage` | `false` | Print LLM token usage | +| `--account-url` | `null` | Azure Blob Storage account URL (managed-identity auth). Use when the config path is a `blob://` URI. | +| `--connection-string` | `null` | Azure Blob Storage connection string. Use when the config path is a `blob://` URI. | **Config requires**: `base` (reference method), `others` (methods to compare), `question_sets`, `criteria`, `trials` (must be even), `llm_config`, `prompt_config` @@ -88,6 +90,8 @@ uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoe |--------|---------|-------------| | `--alpha` | `0.05` | Significance threshold (multi-RAG) | | `--print-model-usage` | `false` | Print LLM token usage | +| `--account-url` | `null` | Azure Blob Storage account URL (managed-identity auth). Use when the config path is a `blob://` URI. | +| `--connection-string` | `null` | Azure Blob Storage connection string. Use when the config path is a `blob://` URI. | **Auto-detection**: If the YAML contains a `rag_methods` key, it runs in multi-RAG mode with automated significance testing. Otherwise, single-RAG mode. @@ -178,3 +182,36 @@ For comparing multiple RAG methods, use multi-RAG config format (include `rag_me - **Long-running**: Evaluation with many questions and trials can take hours. Use background execution. - **No `config init` for hierarchical/retrieval**: The `benchmark-qed-setup` skill only supports `autoe_assertion`, `autoe_pairwise`, and `autoe_reference`. For hierarchical, multi-RAG, and retrieval configs, create YAML manually. - **Advanced config types**: Use the `benchmark-qed-setup` skill for configuration guidance on advanced config types. + +## Azure Blob Storage + +All `autoe` commands support reading config files from Azure Blob Storage using `blob://` URIs: + +```bash +# Config file in blob storage +uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoe assertion-scores \ + blob://my-container/eval/settings.yaml ./eval_output \ + --account-url https://myaccount.blob.core.windows.net +``` + +In addition, `settings.yaml` supports `input_storage` and `output_storage` blocks so the evaluation pipeline can read answers/assertions from and write results to Azure Blob Storage: + +```yaml +# Read answers and assertions from blob storage +input_storage: + type: blob + container_name: my-datasets + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} + # Or use managed identity: + # account_url: https://myaccount.blob.core.windows.net + +# Write evaluation output to blob storage +output_storage: + type: blob + container_name: my-output + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} +``` + +When using storage blocks, `answer_base_path` and `assertions_path` in the config are resolved relative to the storage container (not the local filesystem). + +See [references/config-reference.md](../benchmark-qed-setup/references/config-reference.md) for full `StorageConfig` fields. diff --git a/.apm/skills/benchmark-qed-autoq/SKILL.md b/.apm/skills/benchmark-qed-autoq/SKILL.md index 4443213..72b2ed8 100644 --- a/.apm/skills/benchmark-qed-autoq/SKILL.md +++ b/.apm/skills/benchmark-qed-autoq/SKILL.md @@ -41,6 +41,8 @@ uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoq |--------|-------------| | `--generation-types` | Specific types to generate (repeatable). CLI default: all except `data_linked`, but this skill always includes `data_linked` | | `--print-model-usage` | Print LLM token usage stats | +| `--account-url` | Azure Blob Storage account URL (managed-identity auth). Use when the config path is a `blob://` URI. Falls back to `$AZURE_STORAGE_ACCOUNT_URL`. | +| `--connection-string` | Azure Blob Storage connection string. Use when the config path is a `blob://` URI. Falls back to `$AZURE_STORAGE_CONNECTION_STRING`. | **Generation types and dependencies:** @@ -73,6 +75,12 @@ uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoq # Generate local + linked questions uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoq settings.yaml ./output \ --generation-types data_local --generation-types data_linked + +# Use a config stored in Azure Blob Storage +uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoq \ + blob://my-container/configs/settings.yaml ./output \ + --account-url https://myaccount.blob.core.windows.net \ + --generation-types data_local ``` **Output structure:** @@ -105,6 +113,8 @@ uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed genera |--------|-------------| | `--type` / `-t` | Assertion type: `local`, `global`, or `linked` (default: `local`) | | `--print-model-usage` | Print LLM token usage stats | +| `--account-url` | Azure Blob Storage account URL (managed-identity auth). Use when the config path is a `blob://` URI. | +| `--connection-string` | Azure Blob Storage connection string. Use when the config path is a `blob://` URI. | **Examples:** ```bash @@ -155,6 +165,39 @@ uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed assert - [ ] Step 4: (Optional) Generate additional assertions — use `generate-assertions` - [ ] Step 5: (Optional) Check assertion quality — use `assertion-stats` +## Azure Blob Storage + +All `autoq` and `generate-assertions` commands support reading config files from Azure Blob Storage using `blob://` URIs: + +```bash +# Config file in blob storage — the CLI downloads the config (and sibling prompt files) to a temp directory +uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed autoq \ + blob://my-container/project/settings.yaml ./output \ + --connection-string "$AZURE_STORAGE_CONNECTION_STRING" +``` + +In addition, `settings.yaml` supports `input.storage` and `output_storage` blocks to read/write data from Azure Blob Storage: + +```yaml +# Read input data from blob storage +input: + dataset_path: data/input.csv + storage: + type: blob + container_name: my-datasets + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} + # Or use managed identity: + # account_url: https://myaccount.blob.core.windows.net + +# Write output to blob storage +output_storage: + type: blob + container_name: my-output + connection_string: ${AZURE_STORAGE_CONNECTION_STRING} +``` + +See [references/config-reference.md](../benchmark-qed-setup/references/config-reference.md) for full `StorageConfig` fields. + ## Gotchas - **Path resolution**: The `autoq` and `generate-assertions` commands resolve `output_dir` (and other relative paths) **relative to the settings.yaml file's directory**, not the current working directory. Always `cd` into the workspace directory first, or use absolute paths. For example, running `benchmark-qed autoq workspace/settings.yaml workspace/output` from the repo root creates output at `workspace/workspace/output/` (not `workspace/output/`). @@ -163,3 +206,4 @@ uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed assert - **Output is in files, not stdout**: All results are written to JSON/CSV/Parquet files. Parse the output files, not CLI stdout. - **Generation ordering**: `data_global` and `data_linked` depend on `data_local`. `activity_global` depends on `activity_local`. Running dependent types without their prerequisites produces silent empty results. - **`data_linked` CLI opt-in**: The CLI excludes `data_linked` by default, but this skill always includes it. If running the CLI manually outside this skill, add `--generation-types data_linked`. +- **Blob URI format**: Use `blob:///` for config paths. The CLI downloads the config and all sibling files (prompt templates) to a temp directory so relative paths resolve correctly. diff --git a/.apm/skills/benchmark-qed-setup/SKILL.md b/.apm/skills/benchmark-qed-setup/SKILL.md index b1b8443..c58e7db 100644 --- a/.apm/skills/benchmark-qed-setup/SKILL.md +++ b/.apm/skills/benchmark-qed-setup/SKILL.md @@ -70,6 +70,26 @@ Example: uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed config init autoq ./my_workspace ``` +**Storage options** for `config init`: +| Option | Description | +|--------|-------------| +| `--storage-type` / `-s` | `local` (default) or `blob`. When `blob`, storage config sections are scaffolded as active YAML (not commented out). | +| `--container-name` | Pre-fill the blob container name in generated storage config. | +| `--account-url` | Pre-fill the account URL (managed-identity auth) in generated storage config. | +| `--connection-string` | Pre-fill the connection string in generated storage config. | +| `--base-dir` | Pre-fill a base prefix path within the container. | + +When `--storage-type blob` is combined with `--account-url` or `--connection-string`, the generated config and prompt files are also uploaded directly to the blob container. + +Example (blob): +```bash +uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed config init autoq ./my_workspace \ + --storage-type blob \ + --container-name my-datasets \ + --account-url https://myaccount.blob.core.windows.net \ + --base-dir experiments/run1 +``` + This creates: ``` root/ @@ -95,6 +115,23 @@ echo y | uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-q **Available datasets**: `AP_news`, `podcast`, `example_answers` +**Storage options** for `data download`: +| Option | Description | +|--------|-------------| +| `--storage-type` | Set to `blob` to upload the dataset to Azure Blob Storage instead of extracting locally. | +| `--container-name` | The blob container name. | +| `--account-url` | Azure storage account URL (managed-identity auth). | +| `--connection-string` | Azure storage connection string (alternative to `--account-url`). | +| `--base-dir` | Base prefix in blob storage. Files are stored as `{base_dir}/{output_dir}/`. | + +Example (download to blob): +```bash +echo y | uvx --from "git+https://github.com/microsoft/benchmark-qed" benchmark-qed data download AP_news datasets \ + --storage-type blob \ + --container-name my-datasets \ + --account-url https://myaccount.blob.core.windows.net +``` + ### Step 3 — Gather Configuration Choices from the User Before writing any values into `settings.yaml`, **prompt the user with `ask_user`** to collect the LLM / auth / endpoint settings. Do not guess — these decisions are environment-specific and getting them wrong wastes downstream LLM calls. Use enum/boolean fields whenever possible so the user picks from a known set rather than typing free-form text. @@ -138,6 +175,22 @@ Only ask the questions relevant to the chosen `config_type`: - `autoe_reference`: `reference.name` + `reference.answer_base_path`, list of `generated`, and `question_sets`. - `autoe_assertion`: in single-RAG mode, `generated.name` + `generated.answer_base_path` and `assertions.assertions_path`. In multi-RAG mode (`rag_methods` provided), ask for `input_dir`, `output_dir`, `rag_methods` list, and `question_sets`. +#### Storage fields (all config types, optional) + +Ask the user if they want to use Azure Blob Storage for input/output. If yes, collect: + +| Field | Type | Notes | +|-------|------|-------| +| `use_blob_storage` | boolean | Whether to configure cloud storage. | +| `storage_container_name` | string | Azure Blob container name (e.g. `my-datasets`). | +| `storage_auth_method` | enum (`connection_string`, `managed_identity`) | How to authenticate to Azure. | +| `storage_connection_string_env_var` | string | Env var name for connection string (default: `AZURE_STORAGE_CONNECTION_STRING`). Only when `storage_auth_method=connection_string`. | +| `storage_account_url` | string (uri) | Storage account URL. Only when `storage_auth_method=managed_identity`. | +| `storage_base_dir` | string | Optional prefix path within the container. | +| `separate_output_container` | boolean | Whether output uses a different container than input. | + +If storage is enabled, write the appropriate `input.storage`, `input_storage`, and/or `output_storage` blocks into `settings.yaml`. + If the user declines a field, fall back to the documented default and call out the assumption in your response. ### Step 4 — Apply the Answers @@ -212,3 +265,6 @@ Key highlights: - Config types `autoe_pairwise`, `autoe_reference`, and `autoe_assertion` generate different settings.yaml templates — use the correct type for your evaluation method. - Prompts are copied as `.txt` files using Python `string.Template` syntax (`$variable` or `${variable}`). - **`prompt_config` key**: The runtime expects `prompt_config` (singular) for all autoe config types. Both `benchmark-qed init` and `config init` now generate the correct key. If you hand-edit YAML, ensure you use `prompt_config`, not `prompts_config`. +- **`config init --storage-type blob`**: When combined with `--account-url` or `--connection-string`, the command uploads the generated `settings.yaml` and prompt files directly to blob storage. Without those auth options, it only scaffolds the storage YAML sections locally. +- **Blob URI format**: CLI commands accept `blob:///` for config paths. The CLI downloads the config and all sibling files (prompt templates) to a temp directory so relative paths resolve correctly. Credentials can be passed via `--account-url`/`--connection-string` or the environment variables `AZURE_STORAGE_ACCOUNT_URL`/`AZURE_STORAGE_CONNECTION_STRING`. +- **Storage config in YAML**: AutoQ uses `input.storage` (nested under `input`) and `output_storage` (top-level). AutoE uses `input_storage` and `output_storage` (both top-level). When storage is omitted, local filesystem is used. From 24cdb2d5981413d66a833343fe8e245edca99d11 Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Fri, 8 May 2026 10:19:37 -0600 Subject: [PATCH 7/8] Refactor parse_blob_uri to use Path.parts --- benchmark_qed/cli/config_resolver.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/benchmark_qed/cli/config_resolver.py b/benchmark_qed/cli/config_resolver.py index 000386b..942846b 100644 --- a/benchmark_qed/cli/config_resolver.py +++ b/benchmark_qed/cli/config_resolver.py @@ -78,18 +78,12 @@ def parse_blob_uri(uri: str) -> tuple[str, str]: Also accepts the single-slash form produced when the URI is round-tripped through :class:`pathlib.Path` (e.g. ``blob://``). """ - rest: str | None = None - if uri.startswith("blob://"): - rest = uri[len("blob://") :] - elif uri.startswith("blob:/"): - rest = uri[len("blob:/") :] - if rest is None or "/" not in rest: - msg = f"Invalid blob URI {uri!r}: expected blob:///" - raise typer.BadParameter(msg) - container, _, key = rest.partition("/") - if not container or not key: + parts = Path(uri).parts + if len(parts) < 3 or parts[0] != "blob:": msg = f"Invalid blob URI {uri!r}: expected blob:///" raise typer.BadParameter(msg) + container = parts[1] + key = "/".join(parts[2:]) return container, key From 264f6c4861c1311d6dc93643ef999d0fdd6067e0 Mon Sep 17 00:00:00 2001 From: Gaudy Blanco Date: Fri, 8 May 2026 10:24:02 -0600 Subject: [PATCH 8/8] Fix: only create input folder in local mode, not blob mode --- benchmark_qed/cli/init_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmark_qed/cli/init_config.py b/benchmark_qed/cli/init_config.py index af7a702..7bbefaa 100644 --- a/benchmark_qed/cli/init_config.py +++ b/benchmark_qed/cli/init_config.py @@ -595,8 +595,6 @@ def init( base_dir=base_dir, ) - ensure_input_folder(root) - # Collect prompt files based on config type prompt_mapping: dict[str, dict[str, str]] = {} match config_type: @@ -656,6 +654,7 @@ def init( target += f"/{base_dir}" typer.echo(f"Configuration files uploaded to {target}") else: + ensure_input_folder(root) _write_to_local( root=root, settings_content=settings_content,