diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..c893d390 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +.venv +.git +__pycache__/ +datasets/ \ No newline at end of file diff --git a/.env.template b/.env.template index 0a54b30d..a9686e62 100644 --- a/.env.template +++ b/.env.template @@ -1,2 +1,13 @@ -HF_TOKEN= -HF_ORGANIZATION= \ No newline at end of file +OPENROUTER_API_KEY= + +BENCHMARK_NAME="test" +BENCHMARK_SYSTEM_PROMPT="test prompt" +INPUT_S3_BUCKET="layerlens-private-test-organization" +INPUT_S3_KEY="benchmarks/test-project/benchmark-name/data.zip" +OUTPUT_S3_BUCKET="layerlens-private-test-organization" +OUTPUT_S3_KEY="benchmarks/test-project/benchmark-name/" + +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= + +HF_HUB_OFFLINE=1 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..205c9e96 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,47 @@ +name: YourBench CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.12] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + run: pip install uv + + - name: Create virtual environment + run: uv venv + + - name: Install dependencies + run: | + . .venv/bin/activate + uv pip install -e . + uv pip install pytest pytest-cov + + - name: Run tests + run: | + . .venv/bin/activate + python -m pytest tests/ --cov=yourbench --cov-report=xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml + fail_ci_if_error: false diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 747e15bf..dca9d628 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -1,5 +1,8 @@ name: Quality +permissions: + contents: read + on: push: branches: diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..dd39ca16 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,43 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Copy all yourbench files +COPY . . + +# Install dependencies and yourbench in editable mode +RUN pip install --upgrade pip && \ + pip install boto3 pyyaml awscli && \ + pip install -e . + +# Verify installation +RUN yourbench --version || echo "Yourbench installation verification failed but continuing build" + +# Environment variables (will be overridden at runtime) +ENV BENCHMARK_NAME="" +ENV BENCHMARK_SYSTEM_PROMPT="" +ENV INPUT_S3_BUCKET="" +ENV INPUT_S3_KEY="" +ENV OUTPUT_S3_BUCKET="" +ENV OUTPUT_S3_KEY="" +ENV OPENROUTER_API_KEY="" +ENV AWS_ACCESS_KEY_ID="" +ENV AWS_SECRET_ACCESS_KEY="" +ENV AWS_DEFAULT_REGION="us-east-1" +ENV WORKDIR="/app" + +# Create a startup script to run the processing workflow +RUN printf '#!/bin/bash\n\ + echo "Running yourbench workflow..."\n\ + exec python run_yourbench.py\n' > /app/entrypoint.sh && \ + chmod +x /app/entrypoint.sh + +# Use the startup script as entry point +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/README.docker.md b/README.docker.md new file mode 100644 index 00000000..953df1b0 --- /dev/null +++ b/README.docker.md @@ -0,0 +1,67 @@ +# YourbenchProcessor Docker Container + +This Docker container automates the process of: +1. Downloading data from AWS S3 +2. Processing with yourbench +3. Uploading results back to AWS S3 + +## Required Environment Variables + +The container requires the following environment variables: + +- `INPUT_S3_BUCKET`: S3 bucket name for input data +- `INPUT_S3_KEY`: S3 object key for input data (ZIP file) +- `OUTPUT_S3_BUCKET`: S3 bucket name for output results +- `OUTPUT_S3_KEY`: S3 object key for output results +- `OPENROUTER_API_KEY`: API key for OpenRouter +- `AWS_ACCESS_KEY_ID`: AWS access key with S3 permissions +- `AWS_SECRET_ACCESS_KEY`: AWS secret key with S3 permissions +- `AWS_DEFAULT_REGION`: AWS region (default: us-east-1) + +## Building the Docker Image + +```bash +docker build -t yourbench-processor . +``` + +## Running the Container + +```bash +docker run -e INPUT_S3_BUCKET=your-input-bucket \ + -e INPUT_S3_KEY=input/data.zip \ + -e OUTPUT_S3_BUCKET=your-output-bucket \ + -e OUTPUT_S3_KEY=output/results.zip \ + -e OPENROUTER_API_KEY=your-openrouter-key \ + -e AWS_ACCESS_KEY_ID=your-aws-key-id \ + -e AWS_SECRET_ACCESS_KEY=your-aws-secret \ + -e AWS_DEFAULT_REGION=us-east-1 \ + yourbench-processor +``` + +## Process Flow + +1. Downloads the specified zip file from S3 +2. Extracts contents to `task/data/raw` directory +3. Creates a `config.yaml` file in `task/dataset` directory +4. Runs yourbench with the created config +5. Zips the `task/dataset` directory +6. Uploads the zipped results back to S3 + +## Local Testing + +For local testing without Docker: + +```bash +# Set environment variables +export INPUT_S3_BUCKET=your-input-bucket +export INPUT_S3_KEY=input/data.zip +export OUTPUT_S3_BUCKET=your-output-bucket +export OUTPUT_S3_KEY=output/results.zip +export OPENROUTER_API_KEY=your-openrouter-key +export AWS_ACCESS_KEY_ID=your-aws-key-id +export AWS_SECRET_ACCESS_KEY=your-aws-secret +export AWS_DEFAULT_REGION=us-east-1 + +# Run the script +python run_yourbench.py +``` diff --git a/README.md b/README.md index 1f961225..696c8684 100644 --- a/README.md +++ b/README.md @@ -1,198 +1,176 @@ - -
- - - YourBench Logo + YourBench Logo

YourBench: A Dynamic Benchmark Generation Framework

-

- - [GitHub] - · - [Dataset] - · - [Documentation] - · - [Paper] - -

- - GitHub Repo stars -

- - YourBench Demo Video -
- Watch Demo on YouTube -
- Watch our 3-minute demo of the YourBench pipeline -
+

+ + [GitHub] · + [Dataset] · + [Documentation] · + [Paper] +

--- -> **YourBench** is an open-source framework for generating domain-specific benchmarks in a zero-shot manner. It aims to keep your large language models on their toes—even as new data sources, domains, and knowledge demands evolve. +> **YourBench** is an open-source framework for generating domain-specific benchmarks in a zero-shot manner. It aims to keep your large language models on their toes – even as new data sources, domains, and knowledge demands evolve. -**Highlights**: -- **Dynamic Benchmark Generation**: Produce diverse, up-to-date questions from real-world source documents (PDF, Word, HTML, even multimedia). -- **Scalable & Structured**: Seamlessly handles ingestion, summarization, and multi-hop chunking for large or specialized datasets. -- **Zero-Shot Focus**: Emulates real-world usage scenarios by creating fresh tasks that guard against memorized knowledge. -- **Extensible**: Out-of-the-box pipeline stages (ingestion, summarization, question generation), plus an easy plugin mechanism to accommodate custom models or domain constraints. +YourBench tackles a critical evaluation gap for LLMs. Traditional static benchmarks are quickly **saturated** or contaminated by training data, making it hard to assess models on new knowledge. Domain-specific or up-to-date evaluation is often costly and slow with human annotation. **YourBench addresses this by enabling dynamic, automated generation of reliable, domain-tailored benchmarks directly from your data, without manual labeling**. In a recent study, YourBench replicated several subsets of a popular benchmark (MMLU) using minimal source text for **under \$15** in total cost, while preserving the original ranking of model performance (Spearman ρ = 1). By grounding questions in user-provided documents, YourBench ensures evaluations stay relevant and **truly test a model’s knowledge on content it hasn’t seen before**. ---- - -## Quick Start (Alpha) +## Installation -```bash -# 1. Clone the repo -git clone https://github.com/huggingface/yourbench.git -cd yourbench - -# Use uv to install the dependencies -# pip install uv # if you do not have uv already -uv venv -source .venv/bin/activate -uv sync -uv pip install -e . - -# 3. Get a key from https://openrouter.ai/ and add it to the .env file (or make your own config with a different model!) -touch .env -echo "HF_TOKEN=" >> .env -echo "HF_ORGANIZATION=" >> .env - -# 4. Run the pipeline with an example config -yourbench run --config example/configs/example.yaml -``` +YourBench is available on PyPI and requires **Python 3.12+**. You can install it as follows: -**Note**: The above instructions are a work-in-progress, and more comprehensive usage info will be provided soon. +* **Install via PyPI (stable release):** + ```bash + # uv (recommended; get it here: https://docs.astral.sh/uv/getting-started/installation/) + uv pip install yourbench -# Process Flow + # pip (standard support) + pip install yourbench + ``` -![Process Flow](docs/assets/process-figure.png) + This will install the latest published version (e.g. `0.3.1`). +* **Install from source (development version):** -## Key Features + ```bash + git clone https://github.com/huggingface/yourbench.git + cd yourbench + + # uv, recommended + uv venv + source .venv/bin/activate + uv pip install -e . -- **Automated Benchmark Generation** - Generate question-answer pairs that test LLMs on specific domains or knowledge slices, derived directly from your raw documents. + # pip + pip install -e . + ``` -- **Flexible Pipeline** - Each stage (ingestion, summarization, chunking, multi-/single-hop QG, deduplication) can be enabled or disabled via YAML config. Fine-grained control allows minimal or comprehensive runs. + Installing from source is recommended if you want the latest updates or to run the included example configuration. -- **Robust Config System** - A single YAML config controls model roles, data paths, chunking parameters, question generation instructions, deduplication thresholds, etc. +> **Note:** If you plan to use models that require API access (e.g. OpenAI GPT-4o or Hugging Face Inference API), make sure to have the appropriate credentials. You’ll also need a Hugging Face token (to optionally to upload results). See below for how to configure these before running YourBench. -- **Multi-Model Ensemble Support** - Use different LLMs for ingestion, summarization, question generation, or answering. This fosters broader coverage and question style diversity. +## Quickstart Usage -- **Deduplication & Quality Filtering** - Automatic grouping of near-duplicate questions to prune and keep a curated set. +Once installed, YourBench can be run from the command line to generate a custom evaluation set. Here’s a quick example: -- **Extensive Logging & Analysis** - Built-in modules measure dataset coverage, question distribution, difficulty metrics, and more. +```bash +# 1. (Optional) If not done already, install YourBench +pip install yourbench -- **Public or Private** - Optionally push ingested or generated data to the Hugging Face Hub or keep it local. +# 2. Prepare your API credentials (for model inference and Hub access) +# For example, create a .env file with required keys: +# echo "OPENROUTER_API_KEY=" >> .env # Example +echo "HF_TOKEN=" >> .env # Hugging Face token (for Hub datasets & inference) +echo "HF_ORGANIZATION=" >> .env # (Optional) Organization name for dataset pushing -- **Extensible** - Each pipeline step is modular. Easily add custom question-generation prompts, chunking logic, or domain-specific expansions. +# 3. Run the pipeline on the provided example config (uses sample docs and models) +yourbench run --config example/configs/simple_example.yaml ---- +# 4. (Optional) Run the pipeline on your own documents: +yourbench run --config my_custom_config.yaml +``` -## Core Concepts & Workflow +The **example configuration** `example/configs/simple_example.yaml` (included in the repository) demonstrates a basic setup. It specifies sample documents and default models for each stage of the pipeline. In step 3 above, YourBench will automatically ingest the example documents, generate a set of Q\&A pairs, and output a Hugging Face Dataset containing the evaluation questions and answers. -YourBench follows a multi-stage approach: +For your own data, you can create a YAML config pointing to your documents and preferred models. For instance, you might specify a folder of PDFs or text files under a `documents` field, and choose which LLM to use for question generation. **YourBench is fully configurable** – you can easily **toggle stages** on or off and swap in different models. *For example: you could disable the summarization stage for very short texts, or use a powerful, large, API model for question generation while using a faster local model for summarization.* The possibilities are endless! Simply adjust the YAML, and the pipeline will accommodate it. (See the [usage example](https://github.com/huggingface/yourbench/blob/main/example/configs/advanced_example.yaml) for all available options!) -1. **Document Ingestion** - Convert PDFs, HTML, Word, or text into a standardized Markdown format. +## Process Flow -2. **Summarization** - Generate a concise "global summary" for each document, using a designated summarization LLM. +![YourBench pipeline process flow diagram – from document ingestion to evaluation](docs/assets/yourbench_pipeline.png) -3. **Chunking** - Split or chunk documents (and optionally combine multiple smaller segments) based on text similarity or length constraints. +Under the hood, YourBench follows a multi-stage pipeline to turn raw documents into a ready-to-use benchmark dataset: -4. **Question Generation** - - **Single-Shot**: Create straightforward, single-chunk questions. - - **Multi-Hop**: Combine multiple chunks to produce more complex, integrative questions. +1. **Document Ingestion** – Convert PDFs, HTML, Word docs, or raw text files into a standardized format (Markdown) for downstream processing. +2. **Summarization** – Generate a concise *global summary* of each document using a designated summarization model. This helps distill key points and limit the scope for question generation. +3. **Chunking** – Split documents into smaller chunks (and optionally merge small pieces) based on semantic similarity or length constraints. This ensures long or complex documents are broken into manageable sections for Q\&A generation. +4. **Question Generation** – For each chunk (or combination of chunks), generate questions: -5. **Deduplication** - Remove or group near-duplicate questions across your dataset using embedding-based similarity. + * *Single-Hop:* Create straightforward questions answerable from a single chunk. + * *Multi-Hop:* Combine multiple chunks to produce more complex questions that require integrating information from different parts of the content. +5. **Deduplication** – Remove or group together near-duplicate questions using embedding-based similarity, to avoid redundant entries in your benchmark. +6. **Analysis** – Evaluate the question set for coverage and difficulty. YourBench provides logging and analysis tools to measure how well the questions cover the source content, the distribution of topics, estimated difficulty levels, etc., and can run custom analysis modules. +7. **Export** – Finally, output the generated Q\&A benchmark. The results can be saved as a local dataset (using the Hugging Face `datasets` format) or even uploaded to the Hugging Face Hub for sharing. This makes it easy to evaluate models on the new benchmark or even set up a public leaderboard. -6. **Analysis** - Evaluate question distribution, difficulty, coverage, or run custom analyses. +Throughout this process, **YourBench ensures the questions are grounded in your provided documents**, rather than what an LLM might already know. By using documents (and even an optional fresh document dataset like *Tempora-0325* for time-sensitive topics), the pipeline minimizes reliance on a model’s parametric memory, yielding more truthful and up-to-date evaluation queries. -7. **Export** - The resulting question sets can be stored locally or uploaded as a new dataset on the Hugging Face Hub. +## Try it Online (Hugging Face Spaces) ---- +You can **try YourBench right away in your browser** – no installation needed: -## 🧰 Development +* **[YourBench Demo Space](https://huggingface.co/spaces/yourbench/demo)** – Use our ready-to-go web demo to upload a document (or paste text) and generate a custom evaluation set with **one click**, complete with an instant model leaderboard. **(This free demo will use a default set of models to answer the questions and show how different models perform.)** +* **[YourBench Advanced Space](https://huggingface.co/spaces/yourbench/advanced)** – For power users, the advanced demo lets you provide a custom YAML config and plug in your own models or API endpoints. This gives you full control over the pipeline (choose specific models, adjust chunking parameters, etc.) via a convenient UI, right from the browser. -We use: -- [Ruff](https://github.com/astral-sh/ruff) for code formatting and linting -- [pytest](https://docs.pytest.org/) for testing +👉 Both hosted apps are available on Hugging Face Spaces under the **[yourbench](https://huggingface.co/yourbench)** organization. Give them a try to see how YourBench can generate benchmarks tailored to your use-case in minutes. +## Contributing -## 🚀 Try YourBench on Hugging Face +Contributions are welcome! If you’d like to improve YourBench or add new features, please follow these steps: -To test YourBench on your own documents: +1. **Fork** the repository (on GitHub). +2. **Create a branch** for your feature (`git checkout -b feature/amazing-feature`). +3. **Install dev dependencies** (e.g. `pip install -r requirements.txt` or use `poetry/uv` if available) and set up the project for development. +4. **Make your changes**, adding new tests if applicable. +5. **Run tests** (`pytest`) and ensure code style compliance with `make style` and `make quality` (we use [Ruff](https://github.com/charliermarsh/ruff) for linting). +6. **Commit** your changes (`git commit -m 'Add amazing feature'`). +7. **Push** to your branch (`git push origin your-amazing-feature`). +8. Open a **Pull Request** on the main repository. -- Use the [Demo Space](https://huggingface.co/spaces/yourbench/demo) to generate a dataset and leaderboard in one click – entirely free -- Use the [Advanced Space](https://huggingface.co/spaces/yourbench/advanced) for full control over the pipeline, with custom configs and your own inference +We actively review PRs and welcome improvements or fixes from the community. For major changes, feel free to open an issue first to discuss the idea. +## Highlights -## 🤝 Contributing -1. Fork the repository -2. Create your feature branch (`git checkout -b feature/amazing-feature`) -3. Install development dependencies -4. Make your changes -5. Run tests and ensure code style compliance -6. Commit your changes (`git commit -m 'Add amazing feature'`) -7. Push to the branch (`git push origin feature/amazing-feature`) -8. Open a Pull Request +* **Dynamic Benchmark Generation** – Produce diverse, up-to-date question-answer pairs derived from real-world source documents (PDF, Word, HTML, even multimedia). +* **Scalable & Structured** – Seamlessly handle ingestion, summarization, and multi-hop chunking for large or specialized datasets. +* **Extensible Pipeline** – Use out-of-the-box stages (ingestion, summarization, question generation) or plug in custom models and logic to accommodate domain-specific needs. +* **Robust Configuration** – Control the entire pipeline via a single YAML config (model choices, data paths, chunking parameters, generation prompts, deduplication thresholds, etc.). +* **Multi-Model Support** – Assign different LLMs for each stage (ingestion, summarization, QG, answering), fostering broader coverage and question-style diversity. +* **Deduplication & Quality Filtering** – Automatically group near-duplicates to prune questions and retain a curated set of high-quality queries. +* **Logging & Analysis** – Built-in metrics evaluate dataset coverage, question distribution, difficulty, and more. +* **Flexible Output** – Save generated benchmarks locally or push them to the Hugging Face Hub for sharing or public leaderboards. -## 📄 License +
-This project is licensed under the Apache-2.0 License - see the [LICENSE](LICENSE) file for details. + + YourBench Demo Video +
+ Watch Demo on YouTube +
+ Watch our 3-minute demo of the YourBench pipeline +
+
-## 🙏 Acknowledgments +## License -- [Sentence Transformers](https://www.sbert.net/) for semantic embeddings -- [Hugging Face](https://huggingface.co/) for dataset infrastructure +This project is licensed under the Apache 2.0 License – see the [LICENSE](LICENSE) file for details. You are free to use, modify, and distribute YourBench in either commercial or academic projects under the terms of this license. ## Citation -If YourBench is helpful to you, please cite!: +If you use **YourBench** in your research or applications, please consider citing our paper: -``` +```bibtex @misc{shashidhar2025yourbencheasycustomevaluation, - title={YourBench: Easy Custom Evaluation Sets for Everyone}, + title={YourBench: Easy Custom Evaluation Sets for Everyone}, author={Sumuk Shashidhar and Clémentine Fourrier and Alina Lozovskia and Thomas Wolf and Gokhan Tur and Dilek Hakkani-Tür}, year={2025}, eprint={2504.01833}, archivePrefix={arXiv}, primaryClass={cs.CL}, - url={https://arxiv.org/abs/2504.01833}, + url={https://arxiv.org/abs/2504.01833} } ``` diff --git a/docs/assets/process-figure.png b/docs/assets/process-figure.png deleted file mode 100644 index 849fc798..00000000 Binary files a/docs/assets/process-figure.png and /dev/null differ diff --git a/docs/assets/yourbench_pipeline.png b/docs/assets/yourbench_pipeline.png new file mode 100644 index 00000000..417abffa Binary files /dev/null and b/docs/assets/yourbench_pipeline.png differ diff --git a/docs/yourbench/analysis/view_sample_questions.md b/docs/yourbench/analysis/view_sample_questions.md new file mode 100644 index 00000000..2ebe4e5e --- /dev/null +++ b/docs/yourbench/analysis/view_sample_questions.md @@ -0,0 +1,33 @@ +## Viewing Sample Questions + +Once you have run the pipeline and generated the `single_shot_questions` and +`multi_hop_questions` subsets, you can quickly preview a handful of them from +the command line. + +```bash +# syntax: yourbench analyze view_sample_questions CONFIG_PATH [SAMPLE_SIZE] + +yourbench analyze view_sample_questions example/configs/simple_example.yaml 5 +```` + +* **`CONFIG_PATH`** – path to the YAML/JSON config you used for the pipeline. +* **`SAMPLE_SIZE`** – *(optional, default = 5)* number of random questions to + display from each subset. + +The command prints two Rich tables: + +| Column | Description | +| -------------- | ----------------------------------------------------- | +| **Q #** | Running index of the sampled question | +| **Q Type** | Model-reported category (factual, conceptual, etc.) | +| **Question** | Full text of the question | +| **Answer** | Correct answer (or correct option letter) | +| **Choices** | Multiple-choice options if present; “N/A” otherwise | +| **Difficulty** | Estimated difficulty 1-10 (as generated by the model) | + +This quick preview is handy for: + +* sanity-checking that generation looks reasonable before large runs; +* spot-checking difficulty levels or citation formatting; +* debugging prompt or parsing issues without opening full datasets. + diff --git a/example/configs/advanced_example.yaml b/example/configs/advanced_example.yaml index a79d89d6..88e4802a 100644 --- a/example/configs/advanced_example.yaml +++ b/example/configs/advanced_example.yaml @@ -180,7 +180,8 @@ pipeline: chunking: run: true chunking_configuration: - chunking_mode: fast_chunking # "fast_chunking" or "semantic_chunking" + # chunking_mode: fast_chunking # "fast_chunking" or "semantic_chunking" + # WARNING: SEMANTIC CHUNKING IS TEMPORARILY REMOVED AFTER v0.3.1. l_max_tokens: 128 # Each chunk’s maximum token length token_overlap: 0 # Overlap between chunks in token-based splitting encoding_name: cl100k_base # Tokenizer name for measuring token lengths @@ -208,7 +209,7 @@ pipeline: # additional_instructions: "Generate factual, short-answer questions at a college level." # Control the format of generated questions: - # question_type: "open-ended" # "open-ended" (default): model generates the answer to the question + # question_mode: "open-ended" # "open-ended" (default): model generates the answer to the question # # "multi-choice": model creates options (A), (B), (C), (D) and selects the correct one # If your documents are large, you can randomly sample chunks to reduce cost: @@ -222,7 +223,7 @@ pipeline: # additional_instructions: "Try to integrate multiple pieces of evidence across chunks to create deeper questions." # Control the format of generated questions: - # question_type: "open-ended" # "open-ended" (default): model generates the answer to the question + # question_mode: "open-ended" # "open-ended" (default): model generates the answer to the question # # "multi-choice": model creates options (A), (B), (C), (D) and selects the correct one # Similarly, you can sample the multi-hop chunks to cut down on inference: @@ -237,6 +238,7 @@ pipeline: # quick or “lightweight” downstream evaluations. lighteval: run: true + include_document_text: false # Set to false to exclude full document text from the dataset (saves memory) # 8) CITATION_SCORE_FILTERING # Finally, runs fuzzy-match scoring on each question’s “citations” vs. the diff --git a/pyproject.toml b/pyproject.toml index 54ddf83f..0425c300 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "yourbench" -version = "0.3.0" +version = "0.3.1" authors = [ { name = "Sumuk Shashidhar", email = "sumuks2@illinois.edu" }, { name = "Alina Lozovskaia", email = "alina.lozovskaia@huggingface.co" }, @@ -12,36 +12,22 @@ readme = "README.md" requires-python = ">=3.12, <3.13" dependencies = [ "asyncio>=3.4.3", - "black>=25.1.0", "click>=8.1.7", "datasets>=3.3.0", "hf-transfer>=0.1.9", - "huggingface-hub[inference,hf_xet]>=0.30.2", + "huggingface-hub[inference,hf_xet]==0.30.2", "loguru>=0.7.3", "markitdown[all]>=0.0.2", - "matplotlib>=3.10.0", "python-dotenv>=1.0.1", "rich>=13.7.0", "ruff>=0.11.1", - "scikit-learn>=1.6.1", - "seaborn>=0.13.2", "thefuzz>=0.22.1", "tiktoken>=0.9.0", "tqdm>=4.67.1", + "trafilatura>=2.0.0", "typer>=0.15.2", ] -[project.optional-dependencies] -semantic = [ - "torch>=2.6.0", - "transformers>=4.48.3", - "bert-score>=0.3.13", - "rouge-score>=0.1.2", -] -all = [ - "yourbench[semantic]", -] - [project.scripts] yourbench = "yourbench.main:main" diff --git a/run_docker.sh b/run_docker.sh new file mode 100755 index 00000000..e2e4306f --- /dev/null +++ b/run_docker.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Script to build and run the yourbench Docker container +set -e + +# Load environment variables from .env if present +if [ -f .env ]; then + set -a + . .env + set +a +fi + +# Build the Docker image +echo "Building Docker image..." +docker build -t yourbench-processor . + +# Check if environment variables are set +if [ -z "$INPUT_S3_BUCKET" ] || [ -z "$INPUT_S3_KEY" ] || [ -z "$OUTPUT_S3_BUCKET" ] || [ -z "$OUTPUT_S3_KEY" ] || [ -z "$OPENROUTER_API_KEY" ]; then + echo "Error: Required environment variables are not set." + echo "Please set these variables before running:" + echo " - BENCHMARK_NAME: benchmark name" + echo " - BENCHMARK_SYSTEM_PROMPT: benchmark system prompt" + echo " - INPUT_S3_BUCKET: S3 bucket containing input data" + echo " - INPUT_S3_KEY: S3 key for input data zip file" + echo " - OUTPUT_S3_BUCKET: S3 bucket for output data" + echo " - OUTPUT_S3_KEY: S3 key for output data" + echo " - OPENROUTER_API_KEY: API key for OpenRouter" + echo "" + echo "Example:" + echo " export BENCHMARK_NAME=benchmark-name" + echo " export BENCHMARK_SYSTEM_PROMPT=benchmark-system-prompt" + echo " export INPUT_S3_BUCKET=my-input-bucket" + echo " export INPUT_S3_KEY=input/data.zip" + echo " export OUTPUT_S3_BUCKET=my-output-bucket" + echo " export OUTPUT_S3_KEY=output/results.zip" + echo " export OPENROUTER_API_KEY=your-api-key" + exit 1 +fi + +# Run the Docker container +echo "Running yourbench processor Docker container..." +docker run --rm \ + -e BENCHMARK_NAME="$BENCHMARK_NAME" \ + -e BENCHMARK_SYSTEM_PROMPT="$BENCHMARK_SYSTEM_PROMPT" \ + -e INPUT_S3_BUCKET="$INPUT_S3_BUCKET" \ + -e INPUT_S3_KEY="$INPUT_S3_KEY" \ + -e OUTPUT_S3_BUCKET="$OUTPUT_S3_BUCKET" \ + -e OUTPUT_S3_KEY="$OUTPUT_S3_KEY" \ + -e OPENROUTER_API_KEY="$OPENROUTER_API_KEY" \ + -e AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ + -e AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ + -e AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" \ + yourbench-processor + +echo "Yourbench processing complete!" diff --git a/run_yourbench.py b/run_yourbench.py new file mode 100644 index 00000000..1585c5b0 --- /dev/null +++ b/run_yourbench.py @@ -0,0 +1,222 @@ +import os +import boto3 +import zipfile +import yaml +import logging +from pathlib import Path + +from yourbench.utils.convert_to_excel_module import convert_datasets_to_excel +from yourbench.utils.convert_to_atlas_module import convert_dataset + +# Setup logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# Default instructions injected into BOTH question-generation stages +# (single-shot + multi-hop) as `additional_instructions`. yourbench's default +# is "Generate questions to test an undergraduate student", which yields +# generic, self-contained comprehension questions that frontier models can +# answer WITHOUT the source — useless for measuring document-specific +# knowledge. This forces closed-book, fact-anchored questions instead. +# Override per-deployment with the QUESTION_GENERATION_INSTRUCTIONS env var. +DEFAULT_CLOSED_BOOK_INSTRUCTIONS = ( + "Generate CLOSED-BOOK, factual-recall questions that test whether a model has the " + "specific knowledge contained in this document. Every question MUST hinge on a concrete, " + "document-specific detail stated in the source — an exact name, number, value, configuration, " + "limit, default, sequence of steps, or described behavior. A knowledgeable expert who has NOT " + "read this document must be UNABLE to answer it from general domain knowledge or from the wording " + "of the question itself. Do NOT generate generic questions such as 'what are the benefits of X', " + "'how does X enhance/improve Y', or 'why is X important' — those are answerable by reasoning alone. " + "Keep each answer short, concrete, and verifiable against the source (a fact, value, name, or brief " + "enumeration), not an open-ended explanation. Prefer questions a generic model would plausibly get " + "wrong without this document." +) + + +def download_from_s3(bucket_name, object_key, local_path): + """Download file from S3 bucket""" + logger.info(f"Downloading {object_key} from bucket {bucket_name} to {local_path}") + s3_client = boto3.client("s3") + s3_client.download_file(bucket_name, object_key, local_path) + logger.info("Download completed") + + +def unzip_file(zip_path, extract_dir): + """Unzip file to specified directory""" + logger.info(f"Extracting {zip_path} to {extract_dir}") + os.makedirs(extract_dir, exist_ok=True) + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(extract_dir) + logger.info("Extraction completed") + + +def create_config_file(config_content, config_path): + """Create config.yaml file""" + logger.info(f"Creating config file at {config_path}") + os.makedirs(os.path.dirname(config_path), exist_ok=True) + with open(config_path, "w") as f: + yaml.dump(yaml.safe_load(config_content), f) + logger.info("Config file created") + + +def run_yourbench(config_path): + """Run yourbench with the provided config using direct Python API call.""" + logger.info(f"Running yourbench with config {config_path}") + try: + from yourbench.main import main as yourbench_main + import sys + + # Simulate CLI arguments for Typer + sys_argv_backup = sys.argv.copy() + sys.argv = ["yourbench", "run", "--config", str(config_path)] + try: + yourbench_main() + except SystemExit as e: + logger.info( + f"yourbench exited with code {e.code} (caught SystemExit, continuing)" + ) + finally: + sys.argv = sys_argv_backup + logger.info("yourbench execution completed successfully") + return "Execution completed successfully" + except Exception as e: + logger.error(f"Error during yourbench execution: {str(e)}") + raise + + +def upload_to_s3(local_path, bucket_name, object_key): + logger.info(f"Uploading {local_path} to bucket {bucket_name} as {object_key}") + s3_client = boto3.client("s3") + s3_client.upload_file(local_path, bucket_name, object_key) + logger.info("Upload completed") + + +def upload_directory_to_s3(directory_path, bucket_name, s3_prefix=""): + for filename in os.listdir(directory_path): + local_path = os.path.join(directory_path, filename) + if os.path.isfile(local_path): + object_key = os.path.join(s3_prefix, filename) if s3_prefix else filename + upload_to_s3(local_path, bucket_name, object_key) + + +def main(): + # Get environment variables + benchmark_name = os.environ.get("BENCHMARK_NAME") + benchmark_system_prompt = os.environ.get("BENCHMARK_SYSTEM_PROMPT") + input_bucket = os.environ.get("INPUT_S3_BUCKET") + input_key = os.environ.get("INPUT_S3_KEY") + output_bucket = os.environ.get("OUTPUT_S3_BUCKET") + output_key = os.environ.get("OUTPUT_S3_KEY") + + if not all([input_bucket, input_key, output_bucket, output_key]): + logger.error("Missing required environment variables") + raise ValueError("Required environment variables are missing") + + # Define local paths + base_dir = Path(os.environ.get("WORKDIR", "/app")) + download_path = base_dir / "input.zip" + raw_data_dir = base_dir / "task/data/raw" + dataset_dir = base_dir / "task/dataset" + config_path = dataset_dir / "config.yaml" + excel_dir = base_dir / "task/excel" + + # Create required directories + os.makedirs(raw_data_dir, exist_ok=True) + os.makedirs(dataset_dir, exist_ok=True) + + # Step 1: Download file from S3 + download_from_s3(input_bucket, input_key, download_path) + + # Step 2: Unzip file to raw data directory + unzip_file(download_path, raw_data_dir) + + # Step 3: Create config.yaml + config_content = """ +hf_configuration: + hf_dataset_name: task + +local_dataset_dir: task/dataset + +model_list: + - model_name: openai/gpt-4o + provider: null + base_url: "https://openrouter.ai/api/v1" + api_key: $OPENROUTER_API_KEY + max_concurrent_requests: 10 + +pipeline: + ingestion: + source_documents_dir: task/data/raw + output_dir: task/data/processed + upload_ingest_to_hub: + summarization: + chunking: + single_shot_question_generation: + chunk_sampling: + mode: "count" + value: 50 + multi_hop_question_generation: + chunk_sampling: + mode: "count" + value: 50 + lighteval: + run: true + include_document_text: false # Set to false to exclude full document text from the dataset (saves memory) + citation_score_filtering: +""" + # Inject closed-book question-generation instructions into both generation + # stages. Without this, yourbench uses its generic default and produces + # questions answerable without the source document. + question_instructions = os.environ.get( + "QUESTION_GENERATION_INSTRUCTIONS", DEFAULT_CLOSED_BOOK_INSTRUCTIONS + ).strip() + config_dict = yaml.safe_load(config_content) + pipeline_cfg = config_dict.setdefault("pipeline", {}) + for stage in ("single_shot_question_generation", "multi_hop_question_generation"): + stage_cfg = pipeline_cfg.get(stage) or {} + stage_cfg["additional_instructions"] = question_instructions + pipeline_cfg[stage] = stage_cfg + logger.info(f"Creating config file at {config_path} (closed-book question instructions applied)") + os.makedirs(os.path.dirname(config_path), exist_ok=True) + with open(config_path, "w") as f: + yaml.dump(config_dict, f) + + # Step 4: Run yourbench + run_yourbench(config_path) + + # Step 5: Convert datasets to Excel + convert_datasets_to_excel(str(dataset_dir), str(excel_dir), logger=logger) + + # Step 6: Convert to Atlas format + try: + lighteval_path = dataset_dir / "lighteval" + if lighteval_path.exists(): + logger.info(f"Converting lighteval dataset to Atlas format") + convert_dataset( + hf_path=str(lighteval_path), + name=benchmark_name, + system_prompt=benchmark_system_prompt, + full_description="Dataset for evaluating built-in knowledge", + short_description="Fact-based knowledge", + category="YourBench", + output_dir=str(base_dir / "task"), # creates task/atlas_dataset/ + ) + logger.info(f"Atlas conversion completed.") + else: + logger.warning( + f"Lighteval dataset not found at {lighteval_path}, skipping Atlas conversion" + ) + except Exception as e: + logger.error(f"Atlas conversion failed: {e}") + logger.warning("Continuing with the rest of the pipeline") + + upload_directory_to_s3( + base_dir / "task" / benchmark_name, output_bucket, output_key + ) + logger.info("All tasks completed successfully") + + +if __name__ == "__main__": + main() diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py new file mode 100644 index 00000000..d4c47df0 --- /dev/null +++ b/tests/integration/test_pipeline.py @@ -0,0 +1,446 @@ +import os +import shutil +import tempfile +from unittest.mock import MagicMock, patch + +import pytest + +from datasets import Dataset + + +# Fixture for temporary directory +@pytest.fixture +def temp_dir(): + dir_path = tempfile.mkdtemp() + yield dir_path + shutil.rmtree(dir_path) + + +# Fixture for mock configuration +@pytest.fixture +def mock_config(temp_dir): + return { + "settings": {"debug": False}, + "hf_configuration": { + "token": "fake_token", + "hf_organization": "fake_org", + "private": True, + "hf_dataset_name": "fake_dataset", + "concat_if_exist": False, + }, + "local_dataset_dir": temp_dir, + "model_list": [ + { + "model_name": "fake_model", + "provider": None, + "api_key": "fake_key", + "base_url": "http://localhost:8000/v1", + "max_concurrent_requests": 1, + } + ], + "model_roles": { + "ingestion": ["fake_model"], + "summarization": ["fake_model"], + "chunking": ["fake_model"], + "single_shot_question_generation": ["fake_model"], + "multi_hop_question_generation": ["fake_model"], + }, + "pipeline": { + "ingestion": { + "run": True, + "source_documents_dir": os.path.join(temp_dir, "raw"), + "output_dir": os.path.join(temp_dir, "processed"), + }, + "upload_ingest_to_hub": { + "run": False, + "source_documents_dir": os.path.join(temp_dir, "processed"), + }, + "summarization": {"run": True}, + "chunking": { + "run": True, + "chunking_configuration": { + "l_max_tokens": 128, # Only max_tokens is used now + "h_min": 2, + "h_max": 5, + "num_multihops_factor": 2, + }, + }, + "single_shot_question_generation": { + "run": True, + "question_mode": "open-ended", + "additional_instructions": "Generate questions to test a curious adult", + "chunk_sampling": { + "mode": "count", + "value": 1, + "random_seed": 123, + }, + }, + "multi_hop_question_generation": { + "run": True, + "question_mode": "multi-choice", + "additional_instructions": "Generate multi-choice questions to test a curious adult", + "chunk_sampling": { + "mode": "count", + "value": 1, + "random_seed": 42, + }, + }, + "lighteval": {"run": True}, + }, + } + + +# Test for ingestion stage with mocked components +@pytest.mark.parametrize("mock_no_docs", [False, True]) +def test_ingestion_stage(mock_config, temp_dir, mock_no_docs): + """ + Test the ingestion stage of the YourBench pipeline. + + Verifies that the ingestion stage correctly processes source documents. + """ + # Create test document structure + raw_dir = mock_config["pipeline"]["ingestion"]["source_documents_dir"] + output_dir = mock_config["pipeline"]["ingestion"]["output_dir"] + os.makedirs(raw_dir, exist_ok=True) + os.makedirs(output_dir, exist_ok=True) + + # Create a test document only if not testing the no-docs case + if not mock_no_docs: + with open(os.path.join(raw_dir, "test_doc.txt"), "w") as f: + f.write("This is a test document for ingestion.") + + # Mock the core functionality instead of just the MarkItDown class + with ( + patch("yourbench.pipeline.ingestion.MarkItDown") as mock_markitdown, + patch("yourbench.pipeline.ingestion._convert_document_to_markdown") as mock_convert, + ): + # Configure mocks + mock_markitdown_instance = MagicMock() + mock_markitdown.return_value = mock_markitdown_instance + mock_convert.return_value = True + + # Import the run function after mocking + from yourbench.pipeline.ingestion import run + + # Run the ingestion stage + run(mock_config) + + # Verify behavior + if mock_no_docs: + mock_convert.assert_not_called() + else: + mock_convert.assert_called() + + +# Test for summarization stage +def test_summarization_stage(mock_config): + """ + Test the summarization stage of the YourBench pipeline. + + Verifies that summarization correctly calls inference and processes the results. + """ + # Mock Dataset loading and saving + mock_dataset = Dataset.from_dict({ + "document_id": ["doc1", "doc2"], + "document_text": ["This is document 1", "This is document 2"], + "document_filename": ["doc1.md", "doc2.md"], + }) + + # Setup mocks + with ( + patch("yourbench.pipeline.summarization.custom_load_dataset", return_value=mock_dataset) as mock_load, + patch("yourbench.pipeline.summarization.custom_save_dataset") as mock_save, + patch("yourbench.pipeline.summarization.run_inference") as mock_run_inference, + patch("yourbench.pipeline.summarization.extract_content_from_xml_tags") as mock_extract, + ): + # Configure mocks + mock_run_inference.return_value = { + "fake_model": [ + "Summary for doc1", + "Summary for doc2", + ] + } + mock_extract.side_effect = ( + lambda text, tag: f"Summary for doc{text.split('doc')[1].split('<')[0]}" + if tag == "final_summary" + else None + ) + + # Import the summarization run function + from yourbench.pipeline.summarization import run + + # Run the summarization stage + run(mock_config) + + # Verify the summarization stage ran as expected + mock_load.assert_called_once() + assert mock_run_inference.call_count == 1 + mock_save.assert_called_once() + + +# Test for chunking stage +def test_chunking_stage(mock_config): + """ + Test the chunking stage of the YourBench pipeline. + + Verifies that documents are properly chunked according to the configuration. + """ + # Mock Dataset loading and saving + mock_dataset = Dataset.from_dict({ + "document_id": ["doc1", "doc2"], + "document_text": [ + "This is document 1 with enough text to be chunked properly. " * 10, + "This is document 2 which also has sufficient text for chunking. " * 10, + ], + "document_summary": ["Summary 1", "Summary 2"], + }) + + # Mock functions and dependencies + with ( + patch("yourbench.utils.dataset_engine.custom_load_dataset", return_value=mock_dataset) as mock_load, + patch("yourbench.utils.dataset_engine.custom_save_dataset") as mock_save, + patch("yourbench.utils.chunking_utils.split_into_token_chunks") as mock_split, + ): + # Configure mock returns + mock_split.return_value = ["Chunk 1", "Chunk 2"] + + # Import the chunking run function + from yourbench.pipeline.chunking import run + + # Run the chunking stage + run(mock_config) + + # Verify the chunking stage behavior + mock_load.assert_called_once() + assert mock_split.call_count == 2 # Called once for each document + mock_save.assert_called_once() + + # Verify that the dataset was saved with the right subset + saved_args = mock_save.call_args + assert saved_args[1]["subset"] == "chunked" + + +# Test for single-shot question generation stage +def test_single_shot_question_generation_stage(mock_config): + """ + Test the single-shot question generation stage of the YourBench pipeline. + + Verifies that questions are generated for single chunks of text. + """ + # Mock dataset with chunks + chunks = [{"chunk_id": "chunk1", "chunk_text": "This is chunk 1"}] + mock_dataset = Dataset.from_dict({ + "document_id": ["doc1"], + "document_summary": ["Document 1 summary"], + "document_filename": ["doc1.md"], + "chunks": [chunks], + }) + + # Setup mocks + with ( + patch("yourbench.utils.dataset_engine.custom_load_dataset", return_value=mock_dataset) as mock_load, + patch("yourbench.utils.dataset_engine.custom_save_dataset") as mock_save, + patch("yourbench.utils.inference.inference_core.run_inference") as mock_run_inference, + patch("yourbench.utils.parsing_engine.parse_qa_pairs_from_response") as mock_parse, + ): + # Configure mocks + mock_run_inference.return_value = {"fake_model": ["Question generation response"]} + mock_parse.return_value = [ + { + "question": "Test question?", + "answer": "Test answer", + "estimated_difficulty": 5, + "question_type": "factual", + "question_mode": "open-ended", + "thought_process": "Reasoning", + "citations": ["citation"], + } + ] + + # Import run function + from yourbench.pipeline.question_generation import run_single_shot as run + + # Run the stage + run(mock_config) + + # Verify behavior + mock_load.assert_called_once() + mock_run_inference.assert_called_once() + mock_parse.assert_called_once() + mock_save.assert_called_once() + + +# Test for multi-hop question generation stage +def test_multi_hop_question_generation_stage(mock_config): + """ + Test the multi-hop question generation stage of the YourBench pipeline. + + Verifies that questions are generated requiring reasoning across multiple chunks. + """ + from datasets import Dataset + from yourbench.utils.inference.inference_core import InferenceCall + + # Mock dataset with valid multihop_chunks and corresponding chunks + mock_dataset = Dataset.from_list([ + { + "document_id": "doc1", + "document_summary": "Document 1 summary", + "chunks": [ + {"chunk_id": "chunk1", "chunk_text": "This is chunk 1"}, + {"chunk_id": "chunk2", "chunk_text": "This is chunk 2"}, + ], + "multihop_chunks": [ + { + "chunk_ids": ["chunk1", "chunk2"], + "chunks_text": ["This is chunk 1", "This is chunk 2"], + } + ], + } + ]) + + # Setup mocks + with ( + patch("yourbench.pipeline.question_generation.custom_load_dataset", return_value=mock_dataset) as mock_load, + patch("yourbench.pipeline.question_generation.custom_save_dataset") as mock_save, + patch("yourbench.pipeline.question_generation.run_inference") as mock_run_inference, + patch("yourbench.pipeline.question_generation.parse_multi_hop_responses") as mock_parse, + patch("yourbench.pipeline.question_generation.build_multi_hop_inference_calls") as mock_builder, + ): + # Configure mocks + mock_run_inference.return_value = {"fake_model": ["Multi-hop question generation response"]} + mock_parse.return_value = [ + { + "question": "Multi-hop test question?", + "answer": "Multi-hop test answer", + "estimated_difficulty": 7, + "question_type": "reasoning", + "thought_process": "Complex reasoning", + "citations": ["citation1", "citation2"], + } + ] + mock_builder.return_value = ( + [InferenceCall(messages=[{"role": "user", "content": "Explain chunk1 and chunk2"}])], + [(0, "doc1", ["chunk1", "chunk2"])], + ) + + # Import run function + from yourbench.pipeline.question_generation import run_multi_hop as run + + # Run the stage + run(mock_config) + + # Verify behavior + mock_load.assert_called_once() + mock_run_inference.assert_called_once() + mock_parse.assert_called_once() + mock_save.assert_called_once() + + +# Test for lighteval stage +def test_lighteval_stage(mock_config): + """ + Test the lighteval stage of the YourBench pipeline. + + Verifies that the stage combines questions into a unified dataset for evaluation. + """ + # Mock single-shot and multi-hop datasets + single_shot_ds = Dataset.from_dict({ + "document_id": ["doc1"], + "chunk_id": ["chunk1"], + "question": ["Single-shot question?"], + "self_answer": ["Single-shot answer"], + "estimated_difficulty": [5], + "self_assessed_question_type": ["factual"], + "question_mode": ["open-ended"], + "generating_model": ["fake_model"], + "additional_instructions": ["Generate questions"], + }) + + multi_hop_ds = Dataset.from_dict({ + "document_id": ["doc1"], + "source_chunk_ids": [["chunk1", "chunk2"]], + "question": ["Multi-hop question?"], + "self_answer": ["Multi-hop answer"], + "estimated_difficulty": [7], + "self_assessed_question_type": ["reasoning"], + "question_mode": ["multi-choice"], + "generating_model": ["fake_model"], + "additional_instructions": ["Generate questions"], + }) + + chunked_ds = Dataset.from_dict({ + "document_id": ["doc1"], + "document_text": ["Full document text"], + "chunks": [[{"chunk_id": "chunk1", "chunk_text": "Chunk 1 text"}]], + }) + + summarized_ds = Dataset.from_dict({"document_id": ["doc1"], "document_summary": ["Document 1 summary"]}) + + # Setup mocks + with ( + patch("yourbench.utils.dataset_engine.custom_load_dataset") as mock_load, + patch("yourbench.utils.dataset_engine.custom_save_dataset") as mock_save, + ): + # Configure mock to return different datasets based on the subset parameter + def load_dataset_side_effect(config, subset): + if subset == "single_shot_questions": + return single_shot_ds + elif subset == "multi_hop_questions": + return multi_hop_ds + elif subset == "chunked": + return chunked_ds + elif subset == "summarized": + return summarized_ds + return Dataset.from_dict({}) + + mock_load.side_effect = load_dataset_side_effect + + # Import run function + from yourbench.pipeline.lighteval import run + + # Run the stage + run(mock_config) + + # Verify behavior + assert mock_load.call_count == 4 + mock_save.assert_called_once() + + +def test_stage_function_overrides(monkeypatch, tmp_path): + """ + Test that STAGE_FUNCTION_OVERRIDES are honored and used instead of dynamic imports + """ + from yourbench.pipeline import handler + + # Track calls to override functions + called_stages = [] + + def mock_run_single_shot(config): + called_stages.append("single_shot_question_generation") + + def mock_run_multi_hop(config): + called_stages.append("multi_hop_question_generation") + + # Patch the override map to use mocks + monkeypatch.setitem(handler.STAGE_FUNCTION_OVERRIDES, "single_shot_question_generation", mock_run_single_shot) + monkeypatch.setitem(handler.STAGE_FUNCTION_OVERRIDES, "multi_hop_question_generation", mock_run_multi_hop) + + # Patch load_config to avoid reading real file + def mock_load_config(path): + return { + "pipeline": { + "single_shot_question_generation": {"run": True}, + "multi_hop_question_generation": {"run": True}, + } + } + + monkeypatch.setattr(handler, "load_config", mock_load_config) + + # Run pipeline + config_path = tmp_path / "fake_config.yaml" + config_path.write_text("fake: config") + handler.run_pipeline(str(config_path)) + + # Assert overrides were called + assert "single_shot_question_generation" in called_stages + assert "multi_hop_question_generation" in called_stages diff --git a/uv.lock b/uv.lock index 34b10b24..61f4ea9d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,23 +1,14 @@ version = 1 -revision = 2 +revision = 1 requires-python = "==3.12.*" -[[package]] -name = "absl-py" -version = "2.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b5/f0/e6342091061ed3a46aadc116b13edd7bb5249c3ab1b3ef07f24b0c248fc3/absl_py-2.2.2.tar.gz", hash = "sha256:bf25b2c2eed013ca456918c453d687eab4e8309fba81ee2f4c1a6aa2494175eb", size = 119982, upload-time = "2025-04-03T12:41:04.55Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/d4/349f7f4bd5ea92dab34f5bb0fe31775ef6c311427a14d5a5b31ecb442341/absl_py-2.2.2-py3-none-any.whl", hash = "sha256:e5797bc6abe45f64fd95dc06394ca3f2bedf3b5d895e9da691c9ee3397d70092", size = 135565, upload-time = "2025-04-03T12:41:03.172Z" }, -] - [[package]] name = "aiohappyeyeballs" version = "2.6.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" } +sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760 } wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" }, + { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265 }, ] [[package]] @@ -33,24 +24,24 @@ dependencies = [ { name = "propcache" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/63/e7/fa1a8c00e2c54b05dc8cb5d1439f627f7c267874e3f7bb047146116020f9/aiohttp-3.11.18.tar.gz", hash = "sha256:ae856e1138612b7e412db63b7708735cff4d38d0399f6a5435d3dac2669f558a", size = 7678653, upload-time = "2025-04-21T09:43:09.191Z" } +sdist = { url = "https://files.pythonhosted.org/packages/63/e7/fa1a8c00e2c54b05dc8cb5d1439f627f7c267874e3f7bb047146116020f9/aiohttp-3.11.18.tar.gz", hash = "sha256:ae856e1138612b7e412db63b7708735cff4d38d0399f6a5435d3dac2669f558a", size = 7678653 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/d2/5bc436f42bf4745c55f33e1e6a2d69e77075d3e768e3d1a34f96ee5298aa/aiohttp-3.11.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:63d71eceb9cad35d47d71f78edac41fcd01ff10cacaa64e473d1aec13fa02df2", size = 706671, upload-time = "2025-04-21T09:41:28.021Z" }, - { url = "https://files.pythonhosted.org/packages/fe/d0/2dbabecc4e078c0474abb40536bbde717fb2e39962f41c5fc7a216b18ea7/aiohttp-3.11.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d1929da615840969929e8878d7951b31afe0bac883d84418f92e5755d7b49508", size = 466169, upload-time = "2025-04-21T09:41:29.783Z" }, - { url = "https://files.pythonhosted.org/packages/70/84/19edcf0b22933932faa6e0be0d933a27bd173da02dc125b7354dff4d8da4/aiohttp-3.11.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d0aebeb2392f19b184e3fdd9e651b0e39cd0f195cdb93328bd124a1d455cd0e", size = 457554, upload-time = "2025-04-21T09:41:31.327Z" }, - { url = "https://files.pythonhosted.org/packages/32/d0/e8d1f034ae5624a0f21e4fb3feff79342ce631f3a4d26bd3e58b31ef033b/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3849ead845e8444f7331c284132ab314b4dac43bfae1e3cf350906d4fff4620f", size = 1690154, upload-time = "2025-04-21T09:41:33.541Z" }, - { url = "https://files.pythonhosted.org/packages/16/de/2f9dbe2ac6f38f8495562077131888e0d2897e3798a0ff3adda766b04a34/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e8452ad6b2863709f8b3d615955aa0807bc093c34b8e25b3b52097fe421cb7f", size = 1733402, upload-time = "2025-04-21T09:41:35.634Z" }, - { url = "https://files.pythonhosted.org/packages/e0/04/bd2870e1e9aef990d14b6df2a695f17807baf5c85a4c187a492bda569571/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b8d2b42073611c860a37f718b3d61ae8b4c2b124b2e776e2c10619d920350ec", size = 1783958, upload-time = "2025-04-21T09:41:37.456Z" }, - { url = "https://files.pythonhosted.org/packages/23/06/4203ffa2beb5bedb07f0da0f79b7d9039d1c33f522e0d1a2d5b6218e6f2e/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fbf91f6a0ac317c0a07eb328a1384941872f6761f2e6f7208b63c4cc0a7ff6", size = 1695288, upload-time = "2025-04-21T09:41:39.756Z" }, - { url = "https://files.pythonhosted.org/packages/30/b2/e2285dda065d9f29ab4b23d8bcc81eb881db512afb38a3f5247b191be36c/aiohttp-3.11.18-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ff5625413fec55216da5eaa011cf6b0a2ed67a565914a212a51aa3755b0009", size = 1618871, upload-time = "2025-04-21T09:41:41.972Z" }, - { url = "https://files.pythonhosted.org/packages/57/e0/88f2987885d4b646de2036f7296ebea9268fdbf27476da551c1a7c158bc0/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7f33a92a2fde08e8c6b0c61815521324fc1612f397abf96eed86b8e31618fdb4", size = 1646262, upload-time = "2025-04-21T09:41:44.192Z" }, - { url = "https://files.pythonhosted.org/packages/e0/19/4d2da508b4c587e7472a032290b2981f7caeca82b4354e19ab3df2f51d56/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:11d5391946605f445ddafda5eab11caf310f90cdda1fd99865564e3164f5cff9", size = 1677431, upload-time = "2025-04-21T09:41:46.049Z" }, - { url = "https://files.pythonhosted.org/packages/eb/ae/047473ea50150a41440f3265f53db1738870b5a1e5406ece561ca61a3bf4/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3cc314245deb311364884e44242e00c18b5896e4fe6d5f942e7ad7e4cb640adb", size = 1637430, upload-time = "2025-04-21T09:41:47.973Z" }, - { url = "https://files.pythonhosted.org/packages/11/32/c6d1e3748077ce7ee13745fae33e5cb1dac3e3b8f8787bf738a93c94a7d2/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f421843b0f70740772228b9e8093289924359d306530bcd3926f39acbe1adda", size = 1703342, upload-time = "2025-04-21T09:41:50.323Z" }, - { url = "https://files.pythonhosted.org/packages/c5/1d/a3b57bfdbe285f0d45572d6d8f534fd58761da3e9cbc3098372565005606/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e220e7562467dc8d589e31c1acd13438d82c03d7f385c9cd41a3f6d1d15807c1", size = 1740600, upload-time = "2025-04-21T09:41:52.111Z" }, - { url = "https://files.pythonhosted.org/packages/a5/71/f9cd2fed33fa2b7ce4d412fb7876547abb821d5b5520787d159d0748321d/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ab2ef72f8605046115bc9aa8e9d14fd49086d405855f40b79ed9e5c1f9f4faea", size = 1695131, upload-time = "2025-04-21T09:41:53.94Z" }, - { url = "https://files.pythonhosted.org/packages/97/97/d1248cd6d02b9de6aa514793d0dcb20099f0ec47ae71a933290116c070c5/aiohttp-3.11.18-cp312-cp312-win32.whl", hash = "sha256:12a62691eb5aac58d65200c7ae94d73e8a65c331c3a86a2e9670927e94339ee8", size = 412442, upload-time = "2025-04-21T09:41:55.689Z" }, - { url = "https://files.pythonhosted.org/packages/33/9a/e34e65506e06427b111e19218a99abf627638a9703f4b8bcc3e3021277ed/aiohttp-3.11.18-cp312-cp312-win_amd64.whl", hash = "sha256:364329f319c499128fd5cd2d1c31c44f234c58f9b96cc57f743d16ec4f3238c8", size = 439444, upload-time = "2025-04-21T09:41:57.977Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d2/5bc436f42bf4745c55f33e1e6a2d69e77075d3e768e3d1a34f96ee5298aa/aiohttp-3.11.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:63d71eceb9cad35d47d71f78edac41fcd01ff10cacaa64e473d1aec13fa02df2", size = 706671 }, + { url = "https://files.pythonhosted.org/packages/fe/d0/2dbabecc4e078c0474abb40536bbde717fb2e39962f41c5fc7a216b18ea7/aiohttp-3.11.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d1929da615840969929e8878d7951b31afe0bac883d84418f92e5755d7b49508", size = 466169 }, + { url = "https://files.pythonhosted.org/packages/70/84/19edcf0b22933932faa6e0be0d933a27bd173da02dc125b7354dff4d8da4/aiohttp-3.11.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d0aebeb2392f19b184e3fdd9e651b0e39cd0f195cdb93328bd124a1d455cd0e", size = 457554 }, + { url = "https://files.pythonhosted.org/packages/32/d0/e8d1f034ae5624a0f21e4fb3feff79342ce631f3a4d26bd3e58b31ef033b/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3849ead845e8444f7331c284132ab314b4dac43bfae1e3cf350906d4fff4620f", size = 1690154 }, + { url = "https://files.pythonhosted.org/packages/16/de/2f9dbe2ac6f38f8495562077131888e0d2897e3798a0ff3adda766b04a34/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e8452ad6b2863709f8b3d615955aa0807bc093c34b8e25b3b52097fe421cb7f", size = 1733402 }, + { url = "https://files.pythonhosted.org/packages/e0/04/bd2870e1e9aef990d14b6df2a695f17807baf5c85a4c187a492bda569571/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b8d2b42073611c860a37f718b3d61ae8b4c2b124b2e776e2c10619d920350ec", size = 1783958 }, + { url = "https://files.pythonhosted.org/packages/23/06/4203ffa2beb5bedb07f0da0f79b7d9039d1c33f522e0d1a2d5b6218e6f2e/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fbf91f6a0ac317c0a07eb328a1384941872f6761f2e6f7208b63c4cc0a7ff6", size = 1695288 }, + { url = "https://files.pythonhosted.org/packages/30/b2/e2285dda065d9f29ab4b23d8bcc81eb881db512afb38a3f5247b191be36c/aiohttp-3.11.18-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ff5625413fec55216da5eaa011cf6b0a2ed67a565914a212a51aa3755b0009", size = 1618871 }, + { url = "https://files.pythonhosted.org/packages/57/e0/88f2987885d4b646de2036f7296ebea9268fdbf27476da551c1a7c158bc0/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7f33a92a2fde08e8c6b0c61815521324fc1612f397abf96eed86b8e31618fdb4", size = 1646262 }, + { url = "https://files.pythonhosted.org/packages/e0/19/4d2da508b4c587e7472a032290b2981f7caeca82b4354e19ab3df2f51d56/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:11d5391946605f445ddafda5eab11caf310f90cdda1fd99865564e3164f5cff9", size = 1677431 }, + { url = "https://files.pythonhosted.org/packages/eb/ae/047473ea50150a41440f3265f53db1738870b5a1e5406ece561ca61a3bf4/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3cc314245deb311364884e44242e00c18b5896e4fe6d5f942e7ad7e4cb640adb", size = 1637430 }, + { url = "https://files.pythonhosted.org/packages/11/32/c6d1e3748077ce7ee13745fae33e5cb1dac3e3b8f8787bf738a93c94a7d2/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f421843b0f70740772228b9e8093289924359d306530bcd3926f39acbe1adda", size = 1703342 }, + { url = "https://files.pythonhosted.org/packages/c5/1d/a3b57bfdbe285f0d45572d6d8f534fd58761da3e9cbc3098372565005606/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e220e7562467dc8d589e31c1acd13438d82c03d7f385c9cd41a3f6d1d15807c1", size = 1740600 }, + { url = "https://files.pythonhosted.org/packages/a5/71/f9cd2fed33fa2b7ce4d412fb7876547abb821d5b5520787d159d0748321d/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ab2ef72f8605046115bc9aa8e9d14fd49086d405855f40b79ed9e5c1f9f4faea", size = 1695131 }, + { url = "https://files.pythonhosted.org/packages/97/97/d1248cd6d02b9de6aa514793d0dcb20099f0ec47ae71a933290116c070c5/aiohttp-3.11.18-cp312-cp312-win32.whl", hash = "sha256:12a62691eb5aac58d65200c7ae94d73e8a65c331c3a86a2e9670927e94339ee8", size = 412442 }, + { url = "https://files.pythonhosted.org/packages/33/9a/e34e65506e06427b111e19218a99abf627638a9703f4b8bcc3e3021277ed/aiohttp-3.11.18-cp312-cp312-win_amd64.whl", hash = "sha256:364329f319c499128fd5cd2d1c31c44f234c58f9b96cc57f743d16ec4f3238c8", size = 439444 }, ] [[package]] @@ -60,27 +51,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "frozenlist" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ba/b5/6d55e80f6d8a08ce22b982eafa278d823b541c925f11ee774b0b9c43473d/aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54", size = 19424, upload-time = "2024-12-13T17:10:40.86Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/b5/6d55e80f6d8a08ce22b982eafa278d823b541c925f11ee774b0b9c43473d/aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54", size = 19424 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597, upload-time = "2024-12-13T17:10:38.469Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597 }, ] [[package]] name = "asyncio" version = "3.4.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/da/54/054bafaf2c0fb8473d423743e191fcdf49b2c1fd5e9af3524efbe097bafd/asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41", size = 204411, upload-time = "2015-03-10T14:11:26.494Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/54/054bafaf2c0fb8473d423743e191fcdf49b2c1fd5e9af3524efbe097bafd/asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41", size = 204411 } wheels = [ - { url = "https://files.pythonhosted.org/packages/22/74/07679c5b9f98a7cb0fc147b1ef1cc1853bc07a4eb9cb5731e24732c5f773/asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d", size = 101767, upload-time = "2015-03-10T14:05:10.959Z" }, + { url = "https://files.pythonhosted.org/packages/22/74/07679c5b9f98a7cb0fc147b1ef1cc1853bc07a4eb9cb5731e24732c5f773/asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d", size = 101767 }, ] [[package]] name = "attrs" version = "25.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032, upload-time = "2025-03-13T11:10:22.779Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 }, ] [[package]] @@ -92,9 +83,9 @@ dependencies = [ { name = "isodate" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940, upload-time = "2025-03-27T02:46:20.606Z" } +sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005, upload-time = "2025-03-27T02:46:22.356Z" }, + { url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 }, ] [[package]] @@ -106,9 +97,9 @@ dependencies = [ { name = "six" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c9/29/ff7a519a315e41c85bab92a7478c6acd1cf0b14353139a08caee4c691f77/azure_core-1.34.0.tar.gz", hash = "sha256:bdb544989f246a0ad1c85d72eeb45f2f835afdcbc5b45e43f0dbde7461c81ece", size = 297999, upload-time = "2025-05-01T23:17:27.59Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/29/ff7a519a315e41c85bab92a7478c6acd1cf0b14353139a08caee4c691f77/azure_core-1.34.0.tar.gz", hash = "sha256:bdb544989f246a0ad1c85d72eeb45f2f835afdcbc5b45e43f0dbde7461c81ece", size = 297999 } wheels = [ - { url = "https://files.pythonhosted.org/packages/84/9e/5c87b49f65bb16571599bc789857d0ded2f53014d3392bc88a5d1f3ad779/azure_core-1.34.0-py3-none-any.whl", hash = "sha256:0615d3b756beccdb6624d1c0ae97284f38b78fb59a2a9839bf927c66fbbdddd6", size = 207409, upload-time = "2025-05-01T23:17:29.818Z" }, + { url = "https://files.pythonhosted.org/packages/84/9e/5c87b49f65bb16571599bc789857d0ded2f53014d3392bc88a5d1f3ad779/azure_core-1.34.0-py3-none-any.whl", hash = "sha256:0615d3b756beccdb6624d1c0ae97284f38b78fb59a2a9839bf927c66fbbdddd6", size = 207409 }, ] [[package]] @@ -122,9 +113,18 @@ dependencies = [ { name = "msal-extensions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b5/a1/f1a683672e7a88ea0e3119f57b6c7843ed52650fdcac8bfa66ed84e86e40/azure_identity-1.21.0.tar.gz", hash = "sha256:ea22ce6e6b0f429bc1b8d9212d5b9f9877bd4c82f1724bfa910760612c07a9a6", size = 266445, upload-time = "2025-03-11T20:53:07.463Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/a1/f1a683672e7a88ea0e3119f57b6c7843ed52650fdcac8bfa66ed84e86e40/azure_identity-1.21.0.tar.gz", hash = "sha256:ea22ce6e6b0f429bc1b8d9212d5b9f9877bd4c82f1724bfa910760612c07a9a6", size = 266445 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/9f/1f9f3ef4f49729ee207a712a5971a9ca747f2ca47d9cbf13cf6953e3478a/azure_identity-1.21.0-py3-none-any.whl", hash = "sha256:258ea6325537352440f71b35c3dffe9d240eae4a5126c1b7ce5efd5766bd9fd9", size = 189190 }, +] + +[[package]] +name = "babel" +version = "2.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/9f/1f9f3ef4f49729ee207a712a5971a9ca747f2ca47d9cbf13cf6953e3478a/azure_identity-1.21.0-py3-none-any.whl", hash = "sha256:258ea6325537352440f71b35c3dffe9d240eae4a5126c1b7ce5efd5766bd9fd9", size = 189190, upload-time = "2025-03-11T20:53:09.197Z" }, + { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537 }, ] [[package]] @@ -135,57 +135,46 @@ dependencies = [ { name = "soupsieve" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067, upload-time = "2025-04-15T17:05:13.836Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067 } wheels = [ - { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" }, + { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285 }, ] [[package]] -name = "bert-score" -version = "0.3.13" +name = "boto3" +version = "1.38.32" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "matplotlib" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "requests" }, - { name = "torch" }, - { name = "tqdm" }, - { name = "transformers" }, + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1c/93/2c97a85cbb66a8a256a13176e11c9c4508074e2341299fe75ee955c81eff/bert_score-0.3.13.tar.gz", hash = "sha256:8ffe5838eac8cdd988b8b1a896af7f49071188c8c011a1ed160d71a9899a2ba4", size = 48621, upload-time = "2023-02-20T21:07:29.477Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c8/85/abba429fe0fc0b87db20b7b311deec062c613c5e74cfcaab2ad34e864bbf/boto3-1.38.32.tar.gz", hash = "sha256:3faa2c328a61745f3215a63039606a6fcf55d9afe1cc76e3a5e27b9db58cdbf6", size = 111874 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/8c/bc5457de4c004b1a623b31f7bc8d0375fb699b7d67df11879098b4b7b7c8/bert_score-0.3.13-py3-none-any.whl", hash = "sha256:bbbb4c7fcdaa46d7681aff49f37f96faa09ed74e1b150e659bdc6b58a66989b9", size = 61135, upload-time = "2023-02-20T21:07:27.226Z" }, + { url = "https://files.pythonhosted.org/packages/59/1a/2be51f4ac8592c2ccf699a17be7bb92c0aff8ce89fe2ffd657948b32bfeb/boto3-1.38.32-py3-none-any.whl", hash = "sha256:b998edac72f6740bd5d9d585cf3880f2dfeb4842e626b34430fd0e9623378011", size = 139940 }, ] [[package]] -name = "black" -version = "25.1.0" +name = "botocore" +version = "1.38.32" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/c2/c4c23c7ad746aba6edfa93ec9e6e14195efcf786425486f6a1b442734a8d/botocore-1.38.32.tar.gz", hash = "sha256:0899a090e352cb5eeaae2c7bb52a987b469d23912c7ece86664dfb5c2e074978", size = 13948764 } wheels = [ - { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" }, - { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" }, - { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" }, - { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" }, - { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" }, + { url = "https://files.pythonhosted.org/packages/48/6e/104f66963c9d2dc8d5ba4675535aca5ba5277eb3535047e004275329fc87/botocore-1.38.32-py3-none-any.whl", hash = "sha256:64ab919a5d8b74dd73eaac1f978d0e674d11ff3bbe8815c3d2982477be9a082c", size = 13608384 }, ] [[package]] name = "certifi" version = "2025.4.26" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705, upload-time = "2025-04-26T02:12:29.51Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705 } wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload-time = "2025-04-26T02:12:27.662Z" }, + { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618 }, ] [[package]] @@ -195,41 +184,41 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pycparser" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621, upload-time = "2024-09-04T20:45:21.852Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178, upload-time = "2024-09-04T20:44:12.232Z" }, - { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840, upload-time = "2024-09-04T20:44:13.739Z" }, - { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803, upload-time = "2024-09-04T20:44:15.231Z" }, - { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850, upload-time = "2024-09-04T20:44:17.188Z" }, - { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729, upload-time = "2024-09-04T20:44:18.688Z" }, - { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256, upload-time = "2024-09-04T20:44:20.248Z" }, - { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424, upload-time = "2024-09-04T20:44:21.673Z" }, - { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568, upload-time = "2024-09-04T20:44:23.245Z" }, - { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736, upload-time = "2024-09-04T20:44:24.757Z" }, - { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448, upload-time = "2024-09-04T20:44:26.208Z" }, - { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976, upload-time = "2024-09-04T20:44:27.578Z" }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850 }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729 }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256 }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424 }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568 }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736 }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448 }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976 }, ] [[package]] name = "charset-normalizer" version = "3.4.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" }, - { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" }, - { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" }, - { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" }, - { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" }, - { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" }, - { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" }, - { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" }, - { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" }, - { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" }, - { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" }, - { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" }, - { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" }, - { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" }, + { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936 }, + { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790 }, + { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924 }, + { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626 }, + { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567 }, + { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957 }, + { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408 }, + { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399 }, + { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815 }, + { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537 }, + { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565 }, + { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357 }, + { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776 }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626 }, ] [[package]] @@ -239,27 +228,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, ] [[package]] name = "cobble" version = "0.1.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/54/7a/a507c709be2c96e1bb6102eb7b7f4026c5e5e223ef7d745a17d239e9d844/cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa", size = 3805, upload-time = "2024-06-01T18:11:09.528Z" } +sdist = { url = "https://files.pythonhosted.org/packages/54/7a/a507c709be2c96e1bb6102eb7b7f4026c5e5e223ef7d745a17d239e9d844/cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa", size = 3805 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/e1/3714a2f371985215c219c2a70953d38e3eed81ef165aed061d21de0e998b/cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44", size = 3984, upload-time = "2024-06-01T18:11:07.911Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e1/3714a2f371985215c219c2a70953d38e3eed81ef165aed061d21de0e998b/cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44", size = 3984 }, ] [[package]] name = "colorama" version = "0.4.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] [[package]] @@ -269,30 +258,23 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "humanfriendly" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" }, + { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018 }, ] [[package]] -name = "contourpy" +name = "courlan" version = "1.3.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "babel" }, + { name = "tld" }, + { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/54/6d6ceeff4bed42e7a10d6064d35ee43a810e7b3e8beb4abeae8cff4713ae/courlan-1.3.2.tar.gz", hash = "sha256:0b66f4db3a9c39a6e22dd247c72cfaa57d68ea660e94bb2c84ec7db8712af190", size = 206382 } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/f7/44785876384eff370c251d58fd65f6ad7f39adce4a093c934d4a67a7c6b6/contourpy-1.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4caf2bcd2969402bf77edc4cb6034c7dd7c0803213b3523f111eb7460a51b8d2", size = 271580, upload-time = "2025-04-15T17:37:03.105Z" }, - { url = "https://files.pythonhosted.org/packages/93/3b/0004767622a9826ea3d95f0e9d98cd8729015768075d61f9fea8eeca42a8/contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82199cb78276249796419fe36b7386bd8d2cc3f28b3bc19fe2454fe2e26c4c15", size = 255530, upload-time = "2025-04-15T17:37:07.026Z" }, - { url = "https://files.pythonhosted.org/packages/e7/bb/7bd49e1f4fa805772d9fd130e0d375554ebc771ed7172f48dfcd4ca61549/contourpy-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:106fab697af11456fcba3e352ad50effe493a90f893fca6c2ca5c033820cea92", size = 307688, upload-time = "2025-04-15T17:37:11.481Z" }, - { url = "https://files.pythonhosted.org/packages/fc/97/e1d5dbbfa170725ef78357a9a0edc996b09ae4af170927ba8ce977e60a5f/contourpy-1.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d14f12932a8d620e307f715857107b1d1845cc44fdb5da2bc8e850f5ceba9f87", size = 347331, upload-time = "2025-04-15T17:37:18.212Z" }, - { url = "https://files.pythonhosted.org/packages/6f/66/e69e6e904f5ecf6901be3dd16e7e54d41b6ec6ae3405a535286d4418ffb4/contourpy-1.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:532fd26e715560721bb0d5fc7610fce279b3699b018600ab999d1be895b09415", size = 318963, upload-time = "2025-04-15T17:37:22.76Z" }, - { url = "https://files.pythonhosted.org/packages/a8/32/b8a1c8965e4f72482ff2d1ac2cd670ce0b542f203c8e1d34e7c3e6925da7/contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b383144cf2d2c29f01a1e8170f50dacf0eac02d64139dcd709a8ac4eb3cfe", size = 323681, upload-time = "2025-04-15T17:37:33.001Z" }, - { url = "https://files.pythonhosted.org/packages/30/c6/12a7e6811d08757c7162a541ca4c5c6a34c0f4e98ef2b338791093518e40/contourpy-1.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c49f73e61f1f774650a55d221803b101d966ca0c5a2d6d5e4320ec3997489441", size = 1308674, upload-time = "2025-04-15T17:37:48.64Z" }, - { url = "https://files.pythonhosted.org/packages/2a/8a/bebe5a3f68b484d3a2b8ffaf84704b3e343ef1addea528132ef148e22b3b/contourpy-1.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d80b2c0300583228ac98d0a927a1ba6a2ba6b8a742463c564f1d419ee5b211e", size = 1380480, upload-time = "2025-04-15T17:38:06.7Z" }, - { url = "https://files.pythonhosted.org/packages/34/db/fcd325f19b5978fb509a7d55e06d99f5f856294c1991097534360b307cf1/contourpy-1.3.2-cp312-cp312-win32.whl", hash = "sha256:90df94c89a91b7362e1142cbee7568f86514412ab8a2c0d0fca72d7e91b62912", size = 178489, upload-time = "2025-04-15T17:38:10.338Z" }, - { url = "https://files.pythonhosted.org/packages/01/c8/fadd0b92ffa7b5eb5949bf340a63a4a496a6930a6c37a7ba0f12acb076d6/contourpy-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c942a01d9163e2e5cfb05cb66110121b8d07ad438a17f9e766317bcb62abf73", size = 223042, upload-time = "2025-04-15T17:38:14.239Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848 }, ] [[package]] @@ -302,41 +284,32 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/53/d6/1411ab4d6108ab167d06254c5be517681f1e331f90edf1379895bcb87020/cryptography-44.0.3.tar.gz", hash = "sha256:fe19d8bc5536a91a24a8133328880a41831b6c5df54599a8417b62fe015d3053", size = 711096, upload-time = "2025-05-02T19:36:04.667Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/53/c776d80e9d26441bb3868457909b4e74dd9ccabd182e10b2b0ae7a07e265/cryptography-44.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:962bc30480a08d133e631e8dfd4783ab71cc9e33d5d7c1e192f0b7c06397bb88", size = 6670281, upload-time = "2025-05-02T19:34:50.665Z" }, - { url = "https://files.pythonhosted.org/packages/6a/06/af2cf8d56ef87c77319e9086601bef621bedf40f6f59069e1b6d1ec498c5/cryptography-44.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffc61e8f3bf5b60346d89cd3d37231019c17a081208dfbbd6e1605ba03fa137", size = 3959305, upload-time = "2025-05-02T19:34:53.042Z" }, - { url = "https://files.pythonhosted.org/packages/ae/01/80de3bec64627207d030f47bf3536889efee8913cd363e78ca9a09b13c8e/cryptography-44.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58968d331425a6f9eedcee087f77fd3c927c88f55368f43ff7e0a19891f2642c", size = 4171040, upload-time = "2025-05-02T19:34:54.675Z" }, - { url = "https://files.pythonhosted.org/packages/bd/48/bb16b7541d207a19d9ae8b541c70037a05e473ddc72ccb1386524d4f023c/cryptography-44.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e28d62e59a4dbd1d22e747f57d4f00c459af22181f0b2f787ea83f5a876d7c76", size = 3963411, upload-time = "2025-05-02T19:34:56.61Z" }, - { url = "https://files.pythonhosted.org/packages/42/b2/7d31f2af5591d217d71d37d044ef5412945a8a8e98d5a2a8ae4fd9cd4489/cryptography-44.0.3-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:af653022a0c25ef2e3ffb2c673a50e5a0d02fecc41608f4954176f1933b12359", size = 3689263, upload-time = "2025-05-02T19:34:58.591Z" }, - { url = "https://files.pythonhosted.org/packages/25/50/c0dfb9d87ae88ccc01aad8eb93e23cfbcea6a6a106a9b63a7b14c1f93c75/cryptography-44.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:157f1f3b8d941c2bd8f3ffee0af9b049c9665c39d3da9db2dc338feca5e98a43", size = 4196198, upload-time = "2025-05-02T19:35:00.988Z" }, - { url = "https://files.pythonhosted.org/packages/66/c9/55c6b8794a74da652690c898cb43906310a3e4e4f6ee0b5f8b3b3e70c441/cryptography-44.0.3-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:c6cd67722619e4d55fdb42ead64ed8843d64638e9c07f4011163e46bc512cf01", size = 3966502, upload-time = "2025-05-02T19:35:03.091Z" }, - { url = "https://files.pythonhosted.org/packages/b6/f7/7cb5488c682ca59a02a32ec5f975074084db4c983f849d47b7b67cc8697a/cryptography-44.0.3-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b424563394c369a804ecbee9b06dfb34997f19d00b3518e39f83a5642618397d", size = 4196173, upload-time = "2025-05-02T19:35:05.018Z" }, - { url = "https://files.pythonhosted.org/packages/d2/0b/2f789a8403ae089b0b121f8f54f4a3e5228df756e2146efdf4a09a3d5083/cryptography-44.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c91fc8e8fd78af553f98bc7f2a1d8db977334e4eea302a4bfd75b9461c2d8904", size = 4087713, upload-time = "2025-05-02T19:35:07.187Z" }, - { url = "https://files.pythonhosted.org/packages/1d/aa/330c13655f1af398fc154089295cf259252f0ba5df93b4bc9d9c7d7f843e/cryptography-44.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:25cd194c39fa5a0aa4169125ee27d1172097857b27109a45fadc59653ec06f44", size = 4299064, upload-time = "2025-05-02T19:35:08.879Z" }, - { url = "https://files.pythonhosted.org/packages/10/a8/8c540a421b44fd267a7d58a1fd5f072a552d72204a3f08194f98889de76d/cryptography-44.0.3-cp37-abi3-win32.whl", hash = "sha256:3be3f649d91cb182c3a6bd336de8b61a0a71965bd13d1a04a0e15b39c3d5809d", size = 2773887, upload-time = "2025-05-02T19:35:10.41Z" }, - { url = "https://files.pythonhosted.org/packages/b9/0d/c4b1657c39ead18d76bbd122da86bd95bdc4095413460d09544000a17d56/cryptography-44.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:3883076d5c4cc56dbef0b898a74eb6992fdac29a7b9013870b34efe4ddb39a0d", size = 3209737, upload-time = "2025-05-02T19:35:12.12Z" }, - { url = "https://files.pythonhosted.org/packages/34/a3/ad08e0bcc34ad436013458d7528e83ac29910943cea42ad7dd4141a27bbb/cryptography-44.0.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:5639c2b16764c6f76eedf722dbad9a0914960d3489c0cc38694ddf9464f1bb2f", size = 6673501, upload-time = "2025-05-02T19:35:13.775Z" }, - { url = "https://files.pythonhosted.org/packages/b1/f0/7491d44bba8d28b464a5bc8cc709f25a51e3eac54c0a4444cf2473a57c37/cryptography-44.0.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3ffef566ac88f75967d7abd852ed5f182da252d23fac11b4766da3957766759", size = 3960307, upload-time = "2025-05-02T19:35:15.917Z" }, - { url = "https://files.pythonhosted.org/packages/f7/c8/e5c5d0e1364d3346a5747cdcd7ecbb23ca87e6dea4f942a44e88be349f06/cryptography-44.0.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:192ed30fac1728f7587c6f4613c29c584abdc565d7417c13904708db10206645", size = 4170876, upload-time = "2025-05-02T19:35:18.138Z" }, - { url = "https://files.pythonhosted.org/packages/73/96/025cb26fc351d8c7d3a1c44e20cf9a01e9f7cf740353c9c7a17072e4b264/cryptography-44.0.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7d5fe7195c27c32a64955740b949070f21cba664604291c298518d2e255931d2", size = 3964127, upload-time = "2025-05-02T19:35:19.864Z" }, - { url = "https://files.pythonhosted.org/packages/01/44/eb6522db7d9f84e8833ba3bf63313f8e257729cf3a8917379473fcfd6601/cryptography-44.0.3-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3f07943aa4d7dad689e3bb1638ddc4944cc5e0921e3c227486daae0e31a05e54", size = 3689164, upload-time = "2025-05-02T19:35:21.449Z" }, - { url = "https://files.pythonhosted.org/packages/68/fb/d61a4defd0d6cee20b1b8a1ea8f5e25007e26aeb413ca53835f0cae2bcd1/cryptography-44.0.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb90f60e03d563ca2445099edf605c16ed1d5b15182d21831f58460c48bffb93", size = 4198081, upload-time = "2025-05-02T19:35:23.187Z" }, - { url = "https://files.pythonhosted.org/packages/1b/50/457f6911d36432a8811c3ab8bd5a6090e8d18ce655c22820994913dd06ea/cryptography-44.0.3-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ab0b005721cc0039e885ac3503825661bd9810b15d4f374e473f8c89b7d5460c", size = 3967716, upload-time = "2025-05-02T19:35:25.426Z" }, - { url = "https://files.pythonhosted.org/packages/35/6e/dca39d553075980ccb631955c47b93d87d27f3596da8d48b1ae81463d915/cryptography-44.0.3-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3bb0847e6363c037df8f6ede57d88eaf3410ca2267fb12275370a76f85786a6f", size = 4197398, upload-time = "2025-05-02T19:35:27.678Z" }, - { url = "https://files.pythonhosted.org/packages/9b/9d/d1f2fe681eabc682067c66a74addd46c887ebacf39038ba01f8860338d3d/cryptography-44.0.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0cc66c74c797e1db750aaa842ad5b8b78e14805a9b5d1348dc603612d3e3ff5", size = 4087900, upload-time = "2025-05-02T19:35:29.312Z" }, - { url = "https://files.pythonhosted.org/packages/c4/f5/3599e48c5464580b73b236aafb20973b953cd2e7b44c7c2533de1d888446/cryptography-44.0.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6866df152b581f9429020320e5eb9794c8780e90f7ccb021940d7f50ee00ae0b", size = 4301067, upload-time = "2025-05-02T19:35:31.547Z" }, - { url = "https://files.pythonhosted.org/packages/a7/6c/d2c48c8137eb39d0c193274db5c04a75dab20d2f7c3f81a7dcc3a8897701/cryptography-44.0.3-cp39-abi3-win32.whl", hash = "sha256:c138abae3a12a94c75c10499f1cbae81294a6f983b3af066390adee73f433028", size = 2775467, upload-time = "2025-05-02T19:35:33.805Z" }, - { url = "https://files.pythonhosted.org/packages/c9/ad/51f212198681ea7b0deaaf8846ee10af99fba4e894f67b353524eab2bbe5/cryptography-44.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:5d186f32e52e66994dce4f766884bcb9c68b8da62d61d9d215bfe5fb56d21334", size = 3210375, upload-time = "2025-05-02T19:35:35.369Z" }, -] - -[[package]] -name = "cycler" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/53/d6/1411ab4d6108ab167d06254c5be517681f1e331f90edf1379895bcb87020/cryptography-44.0.3.tar.gz", hash = "sha256:fe19d8bc5536a91a24a8133328880a41831b6c5df54599a8417b62fe015d3053", size = 711096 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/53/c776d80e9d26441bb3868457909b4e74dd9ccabd182e10b2b0ae7a07e265/cryptography-44.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:962bc30480a08d133e631e8dfd4783ab71cc9e33d5d7c1e192f0b7c06397bb88", size = 6670281 }, + { url = "https://files.pythonhosted.org/packages/6a/06/af2cf8d56ef87c77319e9086601bef621bedf40f6f59069e1b6d1ec498c5/cryptography-44.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffc61e8f3bf5b60346d89cd3d37231019c17a081208dfbbd6e1605ba03fa137", size = 3959305 }, + { url = "https://files.pythonhosted.org/packages/ae/01/80de3bec64627207d030f47bf3536889efee8913cd363e78ca9a09b13c8e/cryptography-44.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58968d331425a6f9eedcee087f77fd3c927c88f55368f43ff7e0a19891f2642c", size = 4171040 }, + { url = "https://files.pythonhosted.org/packages/bd/48/bb16b7541d207a19d9ae8b541c70037a05e473ddc72ccb1386524d4f023c/cryptography-44.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e28d62e59a4dbd1d22e747f57d4f00c459af22181f0b2f787ea83f5a876d7c76", size = 3963411 }, + { url = "https://files.pythonhosted.org/packages/42/b2/7d31f2af5591d217d71d37d044ef5412945a8a8e98d5a2a8ae4fd9cd4489/cryptography-44.0.3-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:af653022a0c25ef2e3ffb2c673a50e5a0d02fecc41608f4954176f1933b12359", size = 3689263 }, + { url = "https://files.pythonhosted.org/packages/25/50/c0dfb9d87ae88ccc01aad8eb93e23cfbcea6a6a106a9b63a7b14c1f93c75/cryptography-44.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:157f1f3b8d941c2bd8f3ffee0af9b049c9665c39d3da9db2dc338feca5e98a43", size = 4196198 }, + { url = "https://files.pythonhosted.org/packages/66/c9/55c6b8794a74da652690c898cb43906310a3e4e4f6ee0b5f8b3b3e70c441/cryptography-44.0.3-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:c6cd67722619e4d55fdb42ead64ed8843d64638e9c07f4011163e46bc512cf01", size = 3966502 }, + { url = "https://files.pythonhosted.org/packages/b6/f7/7cb5488c682ca59a02a32ec5f975074084db4c983f849d47b7b67cc8697a/cryptography-44.0.3-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b424563394c369a804ecbee9b06dfb34997f19d00b3518e39f83a5642618397d", size = 4196173 }, + { url = "https://files.pythonhosted.org/packages/d2/0b/2f789a8403ae089b0b121f8f54f4a3e5228df756e2146efdf4a09a3d5083/cryptography-44.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c91fc8e8fd78af553f98bc7f2a1d8db977334e4eea302a4bfd75b9461c2d8904", size = 4087713 }, + { url = "https://files.pythonhosted.org/packages/1d/aa/330c13655f1af398fc154089295cf259252f0ba5df93b4bc9d9c7d7f843e/cryptography-44.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:25cd194c39fa5a0aa4169125ee27d1172097857b27109a45fadc59653ec06f44", size = 4299064 }, + { url = "https://files.pythonhosted.org/packages/10/a8/8c540a421b44fd267a7d58a1fd5f072a552d72204a3f08194f98889de76d/cryptography-44.0.3-cp37-abi3-win32.whl", hash = "sha256:3be3f649d91cb182c3a6bd336de8b61a0a71965bd13d1a04a0e15b39c3d5809d", size = 2773887 }, + { url = "https://files.pythonhosted.org/packages/b9/0d/c4b1657c39ead18d76bbd122da86bd95bdc4095413460d09544000a17d56/cryptography-44.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:3883076d5c4cc56dbef0b898a74eb6992fdac29a7b9013870b34efe4ddb39a0d", size = 3209737 }, + { url = "https://files.pythonhosted.org/packages/34/a3/ad08e0bcc34ad436013458d7528e83ac29910943cea42ad7dd4141a27bbb/cryptography-44.0.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:5639c2b16764c6f76eedf722dbad9a0914960d3489c0cc38694ddf9464f1bb2f", size = 6673501 }, + { url = "https://files.pythonhosted.org/packages/b1/f0/7491d44bba8d28b464a5bc8cc709f25a51e3eac54c0a4444cf2473a57c37/cryptography-44.0.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3ffef566ac88f75967d7abd852ed5f182da252d23fac11b4766da3957766759", size = 3960307 }, + { url = "https://files.pythonhosted.org/packages/f7/c8/e5c5d0e1364d3346a5747cdcd7ecbb23ca87e6dea4f942a44e88be349f06/cryptography-44.0.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:192ed30fac1728f7587c6f4613c29c584abdc565d7417c13904708db10206645", size = 4170876 }, + { url = "https://files.pythonhosted.org/packages/73/96/025cb26fc351d8c7d3a1c44e20cf9a01e9f7cf740353c9c7a17072e4b264/cryptography-44.0.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7d5fe7195c27c32a64955740b949070f21cba664604291c298518d2e255931d2", size = 3964127 }, + { url = "https://files.pythonhosted.org/packages/01/44/eb6522db7d9f84e8833ba3bf63313f8e257729cf3a8917379473fcfd6601/cryptography-44.0.3-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3f07943aa4d7dad689e3bb1638ddc4944cc5e0921e3c227486daae0e31a05e54", size = 3689164 }, + { url = "https://files.pythonhosted.org/packages/68/fb/d61a4defd0d6cee20b1b8a1ea8f5e25007e26aeb413ca53835f0cae2bcd1/cryptography-44.0.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb90f60e03d563ca2445099edf605c16ed1d5b15182d21831f58460c48bffb93", size = 4198081 }, + { url = "https://files.pythonhosted.org/packages/1b/50/457f6911d36432a8811c3ab8bd5a6090e8d18ce655c22820994913dd06ea/cryptography-44.0.3-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ab0b005721cc0039e885ac3503825661bd9810b15d4f374e473f8c89b7d5460c", size = 3967716 }, + { url = "https://files.pythonhosted.org/packages/35/6e/dca39d553075980ccb631955c47b93d87d27f3596da8d48b1ae81463d915/cryptography-44.0.3-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3bb0847e6363c037df8f6ede57d88eaf3410ca2267fb12275370a76f85786a6f", size = 4197398 }, + { url = "https://files.pythonhosted.org/packages/9b/9d/d1f2fe681eabc682067c66a74addd46c887ebacf39038ba01f8860338d3d/cryptography-44.0.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0cc66c74c797e1db750aaa842ad5b8b78e14805a9b5d1348dc603612d3e3ff5", size = 4087900 }, + { url = "https://files.pythonhosted.org/packages/c4/f5/3599e48c5464580b73b236aafb20973b953cd2e7b44c7c2533de1d888446/cryptography-44.0.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6866df152b581f9429020320e5eb9794c8780e90f7ccb021940d7f50ee00ae0b", size = 4301067 }, + { url = "https://files.pythonhosted.org/packages/a7/6c/d2c48c8137eb39d0c193274db5c04a75dab20d2f7c3f81a7dcc3a8897701/cryptography-44.0.3-cp39-abi3-win32.whl", hash = "sha256:c138abae3a12a94c75c10499f1cbae81294a6f983b3af066390adee73f433028", size = 2775467 }, + { url = "https://files.pythonhosted.org/packages/c9/ad/51f212198681ea7b0deaaf8846ee10af99fba4e894f67b353524eab2bbe5/cryptography-44.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:5d186f32e52e66994dce4f766884bcb9c68b8da62d61d9d215bfe5fb56d21334", size = 3210375 }, ] [[package]] @@ -359,106 +332,104 @@ dependencies = [ { name = "tqdm" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/10/af/8c1d10daf37383c32ab0a7461eaa4d5c7a3c47808fe5a8563744de002df7/datasets-3.5.1.tar.gz", hash = "sha256:f835b45dbbd7065c1191734b6f7c8d96fdf8c5751feaa5aa52b2a0dc43eea58f", size = 568915, upload-time = "2025-04-28T14:01:42.974Z" } +sdist = { url = "https://files.pythonhosted.org/packages/10/af/8c1d10daf37383c32ab0a7461eaa4d5c7a3c47808fe5a8563744de002df7/datasets-3.5.1.tar.gz", hash = "sha256:f835b45dbbd7065c1191734b6f7c8d96fdf8c5751feaa5aa52b2a0dc43eea58f", size = 568915 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/f5/668b3444a2f487b0052b908af631fe39eeb2bdb2359d9bbc2c3b80b71119/datasets-3.5.1-py3-none-any.whl", hash = "sha256:4074dda8dd6e9ece242b1580a8ef3928777d59ae1db144d911229e443a093cbb", size = 491436 }, +] + +[[package]] +name = "dateparser" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "regex" }, + { name = "tzlocal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/3f/d3207a05f5b6a78c66d86631e60bfba5af163738a599a5b9aa2c2737a09e/dateparser-1.2.1.tar.gz", hash = "sha256:7e4919aeb48481dbfc01ac9683c8e20bfe95bb715a38c1e9f6af889f4f30ccc3", size = 309924 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/f5/668b3444a2f487b0052b908af631fe39eeb2bdb2359d9bbc2c3b80b71119/datasets-3.5.1-py3-none-any.whl", hash = "sha256:4074dda8dd6e9ece242b1580a8ef3928777d59ae1db144d911229e443a093cbb", size = 491436, upload-time = "2025-04-28T14:01:40.953Z" }, + { url = "https://files.pythonhosted.org/packages/cf/0a/981c438c4cd84147c781e4e96c1d72df03775deb1bc76c5a6ee8afa89c62/dateparser-1.2.1-py3-none-any.whl", hash = "sha256:bdcac262a467e6260030040748ad7c10d6bacd4f3b9cdb4cfd2251939174508c", size = 295658 }, ] [[package]] name = "defusedxml" version = "0.7.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 } wheels = [ - { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 }, ] [[package]] name = "dill" version = "0.3.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847, upload-time = "2024-01-27T23:42:16.145Z" } +sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252, upload-time = "2024-01-27T23:42:14.239Z" }, + { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252 }, ] [[package]] name = "et-xmlfile" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 }, ] [[package]] name = "filelock" version = "3.18.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 }, ] [[package]] name = "flatbuffers" version = "25.2.10" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e4/30/eb5dce7994fc71a2f685d98ec33cc660c0a5887db5610137e60d8cbc4489/flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e", size = 22170, upload-time = "2025-02-11T04:26:46.257Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/30/eb5dce7994fc71a2f685d98ec33cc660c0a5887db5610137e60d8cbc4489/flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e", size = 22170 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/25/155f9f080d5e4bc0082edfda032ea2bc2b8fab3f4d25d46c1e9dd22a1a89/flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051", size = 30953, upload-time = "2025-02-11T04:26:44.484Z" }, -] - -[[package]] -name = "fonttools" -version = "4.57.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/03/2d/a9a0b6e3a0cf6bd502e64fc16d894269011930cabfc89aee20d1635b1441/fonttools-4.57.0.tar.gz", hash = "sha256:727ece10e065be2f9dd239d15dd5d60a66e17eac11aea47d447f9f03fdbc42de", size = 3492448, upload-time = "2025-04-03T11:07:13.898Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/98/d4bc42d43392982eecaaca117d79845734d675219680cd43070bb001bc1f/fonttools-4.57.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:889e45e976c74abc7256d3064aa7c1295aa283c6bb19810b9f8b604dfe5c7f31", size = 2751824, upload-time = "2025-04-03T11:06:03.782Z" }, - { url = "https://files.pythonhosted.org/packages/1a/62/7168030eeca3742fecf45f31e63b5ef48969fa230a672216b805f1d61548/fonttools-4.57.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0425c2e052a5f1516c94e5855dbda706ae5a768631e9fcc34e57d074d1b65b92", size = 2283072, upload-time = "2025-04-03T11:06:05.533Z" }, - { url = "https://files.pythonhosted.org/packages/5d/82/121a26d9646f0986ddb35fbbaf58ef791c25b59ecb63ffea2aab0099044f/fonttools-4.57.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44c26a311be2ac130f40a96769264809d3b0cb297518669db437d1cc82974888", size = 4788020, upload-time = "2025-04-03T11:06:07.249Z" }, - { url = "https://files.pythonhosted.org/packages/5b/26/e0f2fb662e022d565bbe280a3cfe6dafdaabf58889ff86fdef2d31ff1dde/fonttools-4.57.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84c41ba992df5b8d680b89fd84c6a1f2aca2b9f1ae8a67400c8930cd4ea115f6", size = 4859096, upload-time = "2025-04-03T11:06:09.469Z" }, - { url = "https://files.pythonhosted.org/packages/9e/44/9075e323347b1891cdece4b3f10a3b84a8f4c42a7684077429d9ce842056/fonttools-4.57.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ea1e9e43ca56b0c12440a7c689b1350066595bebcaa83baad05b8b2675129d98", size = 4964356, upload-time = "2025-04-03T11:06:11.294Z" }, - { url = "https://files.pythonhosted.org/packages/48/28/caa8df32743462fb966be6de6a79d7f30393859636d7732e82efa09fbbb4/fonttools-4.57.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:84fd56c78d431606332a0627c16e2a63d243d0d8b05521257d77c6529abe14d8", size = 5226546, upload-time = "2025-04-03T11:06:13.6Z" }, - { url = "https://files.pythonhosted.org/packages/f6/46/95ab0f0d2e33c5b1a4fc1c0efe5e286ba9359602c0a9907adb1faca44175/fonttools-4.57.0-cp312-cp312-win32.whl", hash = "sha256:f4376819c1c778d59e0a31db5dc6ede854e9edf28bbfa5b756604727f7f800ac", size = 2146776, upload-time = "2025-04-03T11:06:15.643Z" }, - { url = "https://files.pythonhosted.org/packages/06/5d/1be5424bb305880e1113631f49a55ea7c7da3a5fe02608ca7c16a03a21da/fonttools-4.57.0-cp312-cp312-win_amd64.whl", hash = "sha256:57e30241524879ea10cdf79c737037221f77cc126a8cdc8ff2c94d4a522504b9", size = 2193956, upload-time = "2025-04-03T11:06:17.534Z" }, - { url = "https://files.pythonhosted.org/packages/90/27/45f8957c3132917f91aaa56b700bcfc2396be1253f685bd5c68529b6f610/fonttools-4.57.0-py3-none-any.whl", hash = "sha256:3122c604a675513c68bd24c6a8f9091f1c2376d18e8f5fe5a101746c81b3e98f", size = 1093605, upload-time = "2025-04-03T11:07:11.341Z" }, + { url = "https://files.pythonhosted.org/packages/b8/25/155f9f080d5e4bc0082edfda032ea2bc2b8fab3f4d25d46c1e9dd22a1a89/flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051", size = 30953 }, ] [[package]] name = "frozenlist" version = "1.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ee/f4/d744cba2da59b5c1d88823cf9e8a6c74e4659e2b27604ed973be2a0bf5ab/frozenlist-1.6.0.tar.gz", hash = "sha256:b99655c32c1c8e06d111e7f41c06c29a5318cb1835df23a45518e02a47c63b68", size = 42831, upload-time = "2025-04-17T22:38:53.099Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/8a/289b7d0de2fbac832ea80944d809759976f661557a38bb8e77db5d9f79b7/frozenlist-1.6.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c5b9e42ace7d95bf41e19b87cec8f262c41d3510d8ad7514ab3862ea2197bfb1", size = 160193, upload-time = "2025-04-17T22:36:47.382Z" }, - { url = "https://files.pythonhosted.org/packages/19/80/2fd17d322aec7f430549f0669f599997174f93ee17929ea5b92781ec902c/frozenlist-1.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ca9973735ce9f770d24d5484dcb42f68f135351c2fc81a7a9369e48cf2998a29", size = 123831, upload-time = "2025-04-17T22:36:49.401Z" }, - { url = "https://files.pythonhosted.org/packages/99/06/f5812da431273f78c6543e0b2f7de67dfd65eb0a433978b2c9c63d2205e4/frozenlist-1.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6ac40ec76041c67b928ca8aaffba15c2b2ee3f5ae8d0cb0617b5e63ec119ca25", size = 121862, upload-time = "2025-04-17T22:36:51.899Z" }, - { url = "https://files.pythonhosted.org/packages/d0/31/9e61c6b5fc493cf24d54881731204d27105234d09878be1a5983182cc4a5/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b7a8a3180dfb280eb044fdec562f9b461614c0ef21669aea6f1d3dac6ee576", size = 316361, upload-time = "2025-04-17T22:36:53.402Z" }, - { url = "https://files.pythonhosted.org/packages/9d/55/22ca9362d4f0222324981470fd50192be200154d51509ee6eb9baa148e96/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c444d824e22da6c9291886d80c7d00c444981a72686e2b59d38b285617cb52c8", size = 307115, upload-time = "2025-04-17T22:36:55.016Z" }, - { url = "https://files.pythonhosted.org/packages/ae/39/4fff42920a57794881e7bb3898dc7f5f539261711ea411b43bba3cde8b79/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb52c8166499a8150bfd38478248572c924c003cbb45fe3bcd348e5ac7c000f9", size = 322505, upload-time = "2025-04-17T22:36:57.12Z" }, - { url = "https://files.pythonhosted.org/packages/55/f2/88c41f374c1e4cf0092a5459e5f3d6a1e17ed274c98087a76487783df90c/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b35298b2db9c2468106278537ee529719228950a5fdda686582f68f247d1dc6e", size = 322666, upload-time = "2025-04-17T22:36:58.735Z" }, - { url = "https://files.pythonhosted.org/packages/75/51/034eeb75afdf3fd03997856195b500722c0b1a50716664cde64e28299c4b/frozenlist-1.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d108e2d070034f9d57210f22fefd22ea0d04609fc97c5f7f5a686b3471028590", size = 302119, upload-time = "2025-04-17T22:37:00.512Z" }, - { url = "https://files.pythonhosted.org/packages/2b/a6/564ecde55ee633270a793999ef4fd1d2c2b32b5a7eec903b1012cb7c5143/frozenlist-1.6.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e1be9111cb6756868ac242b3c2bd1f09d9aea09846e4f5c23715e7afb647103", size = 316226, upload-time = "2025-04-17T22:37:02.102Z" }, - { url = "https://files.pythonhosted.org/packages/f1/c8/6c0682c32377f402b8a6174fb16378b683cf6379ab4d2827c580892ab3c7/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:94bb451c664415f02f07eef4ece976a2c65dcbab9c2f1705b7031a3a75349d8c", size = 312788, upload-time = "2025-04-17T22:37:03.578Z" }, - { url = "https://files.pythonhosted.org/packages/b6/b8/10fbec38f82c5d163ca1750bfff4ede69713badf236a016781cf1f10a0f0/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d1a686d0b0949182b8faddea596f3fc11f44768d1f74d4cad70213b2e139d821", size = 325914, upload-time = "2025-04-17T22:37:05.213Z" }, - { url = "https://files.pythonhosted.org/packages/62/ca/2bf4f3a1bd40cdedd301e6ecfdbb291080d5afc5f9ce350c0739f773d6b9/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ea8e59105d802c5a38bdbe7362822c522230b3faba2aa35c0fa1765239b7dd70", size = 305283, upload-time = "2025-04-17T22:37:06.985Z" }, - { url = "https://files.pythonhosted.org/packages/09/64/20cc13ccf94abc2a1f482f74ad210703dc78a590d0b805af1c9aa67f76f9/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:abc4e880a9b920bc5020bf6a431a6bb40589d9bca3975c980495f63632e8382f", size = 319264, upload-time = "2025-04-17T22:37:08.618Z" }, - { url = "https://files.pythonhosted.org/packages/20/ff/86c6a2bbe98cfc231519f5e6d712a0898488ceac804a917ce014f32e68f6/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9a79713adfe28830f27a3c62f6b5406c37376c892b05ae070906f07ae4487046", size = 326482, upload-time = "2025-04-17T22:37:10.196Z" }, - { url = "https://files.pythonhosted.org/packages/2f/da/8e381f66367d79adca245d1d71527aac774e30e291d41ef161ce2d80c38e/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a0318c2068e217a8f5e3b85e35899f5a19e97141a45bb925bb357cfe1daf770", size = 318248, upload-time = "2025-04-17T22:37:12.284Z" }, - { url = "https://files.pythonhosted.org/packages/39/24/1a1976563fb476ab6f0fa9fefaac7616a4361dbe0461324f9fd7bf425dbe/frozenlist-1.6.0-cp312-cp312-win32.whl", hash = "sha256:853ac025092a24bb3bf09ae87f9127de9fe6e0c345614ac92536577cf956dfcc", size = 115161, upload-time = "2025-04-17T22:37:13.902Z" }, - { url = "https://files.pythonhosted.org/packages/80/2e/fb4ed62a65f8cd66044706b1013f0010930d8cbb0729a2219561ea075434/frozenlist-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:2bdfe2d7e6c9281c6e55523acd6c2bf77963cb422fdc7d142fb0cb6621b66878", size = 120548, upload-time = "2025-04-17T22:37:15.326Z" }, - { url = "https://files.pythonhosted.org/packages/71/3e/b04a0adda73bd52b390d730071c0d577073d3d26740ee1bad25c3ad0f37b/frozenlist-1.6.0-py3-none-any.whl", hash = "sha256:535eec9987adb04701266b92745d6cdcef2e77669299359c3009c3404dd5d191", size = 12404, upload-time = "2025-04-17T22:38:51.668Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/ee/f4/d744cba2da59b5c1d88823cf9e8a6c74e4659e2b27604ed973be2a0bf5ab/frozenlist-1.6.0.tar.gz", hash = "sha256:b99655c32c1c8e06d111e7f41c06c29a5318cb1835df23a45518e02a47c63b68", size = 42831 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/8a/289b7d0de2fbac832ea80944d809759976f661557a38bb8e77db5d9f79b7/frozenlist-1.6.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c5b9e42ace7d95bf41e19b87cec8f262c41d3510d8ad7514ab3862ea2197bfb1", size = 160193 }, + { url = "https://files.pythonhosted.org/packages/19/80/2fd17d322aec7f430549f0669f599997174f93ee17929ea5b92781ec902c/frozenlist-1.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ca9973735ce9f770d24d5484dcb42f68f135351c2fc81a7a9369e48cf2998a29", size = 123831 }, + { url = "https://files.pythonhosted.org/packages/99/06/f5812da431273f78c6543e0b2f7de67dfd65eb0a433978b2c9c63d2205e4/frozenlist-1.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6ac40ec76041c67b928ca8aaffba15c2b2ee3f5ae8d0cb0617b5e63ec119ca25", size = 121862 }, + { url = "https://files.pythonhosted.org/packages/d0/31/9e61c6b5fc493cf24d54881731204d27105234d09878be1a5983182cc4a5/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b7a8a3180dfb280eb044fdec562f9b461614c0ef21669aea6f1d3dac6ee576", size = 316361 }, + { url = "https://files.pythonhosted.org/packages/9d/55/22ca9362d4f0222324981470fd50192be200154d51509ee6eb9baa148e96/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c444d824e22da6c9291886d80c7d00c444981a72686e2b59d38b285617cb52c8", size = 307115 }, + { url = "https://files.pythonhosted.org/packages/ae/39/4fff42920a57794881e7bb3898dc7f5f539261711ea411b43bba3cde8b79/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb52c8166499a8150bfd38478248572c924c003cbb45fe3bcd348e5ac7c000f9", size = 322505 }, + { url = "https://files.pythonhosted.org/packages/55/f2/88c41f374c1e4cf0092a5459e5f3d6a1e17ed274c98087a76487783df90c/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b35298b2db9c2468106278537ee529719228950a5fdda686582f68f247d1dc6e", size = 322666 }, + { url = "https://files.pythonhosted.org/packages/75/51/034eeb75afdf3fd03997856195b500722c0b1a50716664cde64e28299c4b/frozenlist-1.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d108e2d070034f9d57210f22fefd22ea0d04609fc97c5f7f5a686b3471028590", size = 302119 }, + { url = "https://files.pythonhosted.org/packages/2b/a6/564ecde55ee633270a793999ef4fd1d2c2b32b5a7eec903b1012cb7c5143/frozenlist-1.6.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e1be9111cb6756868ac242b3c2bd1f09d9aea09846e4f5c23715e7afb647103", size = 316226 }, + { url = "https://files.pythonhosted.org/packages/f1/c8/6c0682c32377f402b8a6174fb16378b683cf6379ab4d2827c580892ab3c7/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:94bb451c664415f02f07eef4ece976a2c65dcbab9c2f1705b7031a3a75349d8c", size = 312788 }, + { url = "https://files.pythonhosted.org/packages/b6/b8/10fbec38f82c5d163ca1750bfff4ede69713badf236a016781cf1f10a0f0/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d1a686d0b0949182b8faddea596f3fc11f44768d1f74d4cad70213b2e139d821", size = 325914 }, + { url = "https://files.pythonhosted.org/packages/62/ca/2bf4f3a1bd40cdedd301e6ecfdbb291080d5afc5f9ce350c0739f773d6b9/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ea8e59105d802c5a38bdbe7362822c522230b3faba2aa35c0fa1765239b7dd70", size = 305283 }, + { url = "https://files.pythonhosted.org/packages/09/64/20cc13ccf94abc2a1f482f74ad210703dc78a590d0b805af1c9aa67f76f9/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:abc4e880a9b920bc5020bf6a431a6bb40589d9bca3975c980495f63632e8382f", size = 319264 }, + { url = "https://files.pythonhosted.org/packages/20/ff/86c6a2bbe98cfc231519f5e6d712a0898488ceac804a917ce014f32e68f6/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9a79713adfe28830f27a3c62f6b5406c37376c892b05ae070906f07ae4487046", size = 326482 }, + { url = "https://files.pythonhosted.org/packages/2f/da/8e381f66367d79adca245d1d71527aac774e30e291d41ef161ce2d80c38e/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a0318c2068e217a8f5e3b85e35899f5a19e97141a45bb925bb357cfe1daf770", size = 318248 }, + { url = "https://files.pythonhosted.org/packages/39/24/1a1976563fb476ab6f0fa9fefaac7616a4361dbe0461324f9fd7bf425dbe/frozenlist-1.6.0-cp312-cp312-win32.whl", hash = "sha256:853ac025092a24bb3bf09ae87f9127de9fe6e0c345614ac92536577cf956dfcc", size = 115161 }, + { url = "https://files.pythonhosted.org/packages/80/2e/fb4ed62a65f8cd66044706b1013f0010930d8cbb0729a2219561ea075434/frozenlist-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:2bdfe2d7e6c9281c6e55523acd6c2bf77963cb422fdc7d142fb0cb6621b66878", size = 120548 }, + { url = "https://files.pythonhosted.org/packages/71/3e/b04a0adda73bd52b390d730071c0d577073d3d26740ee1bad25c3ad0f37b/frozenlist-1.6.0-py3-none-any.whl", hash = "sha256:535eec9987adb04701266b92745d6cdcef2e77669299359c3009c3404dd5d191", size = 12404 }, ] [[package]] name = "fsspec" version = "2025.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491, upload-time = "2025-03-07T21:47:56.461Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491 } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615, upload-time = "2025-03-07T21:47:54.809Z" }, + { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615 }, ] [package.optional-dependencies] @@ -470,36 +441,52 @@ http = [ name = "hf-transfer" version = "0.1.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1a/eb/8fc64f40388c29ce8ce3b2b180a089d4d6b25b1d0d232d016704cb852104/hf_transfer-0.1.9.tar.gz", hash = "sha256:035572865dab29d17e783fbf1e84cf1cb24f3fcf8f1b17db1cfc7fdf139f02bf", size = 25201, upload-time = "2025-01-07T10:05:12.947Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/eb/8fc64f40388c29ce8ce3b2b180a089d4d6b25b1d0d232d016704cb852104/hf_transfer-0.1.9.tar.gz", hash = "sha256:035572865dab29d17e783fbf1e84cf1cb24f3fcf8f1b17db1cfc7fdf139f02bf", size = 25201 } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/f5/461d2e5f307e5048289b1168d5c642ae3bb2504e88dff1a38b92ed990a21/hf_transfer-0.1.9-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e66acf91df4a8b72f60223059df3003062a5ae111757187ed1a06750a30e911b", size = 1393046, upload-time = "2025-01-07T10:04:51.003Z" }, - { url = "https://files.pythonhosted.org/packages/41/ba/8d9fd9f1083525edfcb389c93738c802f3559cb749324090d7109c8bf4c2/hf_transfer-0.1.9-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:8669dbcc7a3e2e8d61d42cd24da9c50d57770bd74b445c65123291ca842a7e7a", size = 1348126, upload-time = "2025-01-07T10:04:45.712Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a2/cd7885bc9959421065a6fae0fe67b6c55becdeda4e69b873e52976f9a9f0/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fd0167c4407a3bc4cdd0307e65ada2294ec04f1813d8a69a5243e379b22e9d8", size = 3728604, upload-time = "2025-01-07T10:04:14.173Z" }, - { url = "https://files.pythonhosted.org/packages/f6/2e/a072cf196edfeda3310c9a5ade0a0fdd785e6154b3ce24fc738c818da2a7/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee8b10afedcb75f71091bcc197c526a6ebf5c58bbbadb34fdeee6160f55f619f", size = 3064995, upload-time = "2025-01-07T10:04:18.663Z" }, - { url = "https://files.pythonhosted.org/packages/c2/84/aec9ef4c0fab93c1ea2b1badff38c78b4b2f86f0555b26d2051dbc920cde/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5828057e313de59300dd1abb489444bc452efe3f479d3c55b31a8f680936ba42", size = 3580908, upload-time = "2025-01-07T10:04:32.834Z" }, - { url = "https://files.pythonhosted.org/packages/29/63/b560d39651a56603d64f1a0212d0472a44cbd965db2fa62b99d99cb981bf/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc6bd19e1cc177c66bdef15ef8636ad3bde79d5a4f608c158021153b4573509d", size = 3400839, upload-time = "2025-01-07T10:04:26.122Z" }, - { url = "https://files.pythonhosted.org/packages/d6/d8/f87ea6f42456254b48915970ed98e993110521e9263472840174d32c880d/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdca9bfb89e6f8f281890cc61a8aff2d3cecaff7e1a4d275574d96ca70098557", size = 3552664, upload-time = "2025-01-07T10:04:40.123Z" }, - { url = "https://files.pythonhosted.org/packages/d6/56/1267c39b65fc8f4e2113b36297320f102718bf5799b544a6cbe22013aa1d/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:89a23f58b7b7effbc047b8ca286f131b17728c99a9f972723323003ffd1bb916", size = 4073732, upload-time = "2025-01-07T10:04:55.624Z" }, - { url = "https://files.pythonhosted.org/packages/82/1a/9c748befbe3decf7cb415e34f8a0c3789a0a9c55910dea73d581e48c0ce5/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5", size = 3390096, upload-time = "2025-01-07T10:04:59.98Z" }, - { url = "https://files.pythonhosted.org/packages/72/85/4c03da147b6b4b7cb12e074d3d44eee28604a387ed0eaf7eaaead5069c57/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d", size = 3664743, upload-time = "2025-01-07T10:05:05.416Z" }, - { url = "https://files.pythonhosted.org/packages/e7/6e/e597b04f753f1b09e6893075d53a82a30c13855cbaa791402695b01e369f/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746", size = 3695243, upload-time = "2025-01-07T10:05:11.411Z" }, - { url = "https://files.pythonhosted.org/packages/09/89/d4e234727a26b2546c8fb70a276cd924260d60135f2165bf8b9ed67bb9a4/hf_transfer-0.1.9-cp38-abi3-win32.whl", hash = "sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e", size = 1086605, upload-time = "2025-01-07T10:05:18.873Z" }, - { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240, upload-time = "2025-01-07T10:05:14.324Z" }, + { url = "https://files.pythonhosted.org/packages/81/f5/461d2e5f307e5048289b1168d5c642ae3bb2504e88dff1a38b92ed990a21/hf_transfer-0.1.9-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e66acf91df4a8b72f60223059df3003062a5ae111757187ed1a06750a30e911b", size = 1393046 }, + { url = "https://files.pythonhosted.org/packages/41/ba/8d9fd9f1083525edfcb389c93738c802f3559cb749324090d7109c8bf4c2/hf_transfer-0.1.9-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:8669dbcc7a3e2e8d61d42cd24da9c50d57770bd74b445c65123291ca842a7e7a", size = 1348126 }, + { url = "https://files.pythonhosted.org/packages/8e/a2/cd7885bc9959421065a6fae0fe67b6c55becdeda4e69b873e52976f9a9f0/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fd0167c4407a3bc4cdd0307e65ada2294ec04f1813d8a69a5243e379b22e9d8", size = 3728604 }, + { url = "https://files.pythonhosted.org/packages/f6/2e/a072cf196edfeda3310c9a5ade0a0fdd785e6154b3ce24fc738c818da2a7/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee8b10afedcb75f71091bcc197c526a6ebf5c58bbbadb34fdeee6160f55f619f", size = 3064995 }, + { url = "https://files.pythonhosted.org/packages/c2/84/aec9ef4c0fab93c1ea2b1badff38c78b4b2f86f0555b26d2051dbc920cde/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5828057e313de59300dd1abb489444bc452efe3f479d3c55b31a8f680936ba42", size = 3580908 }, + { url = "https://files.pythonhosted.org/packages/29/63/b560d39651a56603d64f1a0212d0472a44cbd965db2fa62b99d99cb981bf/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc6bd19e1cc177c66bdef15ef8636ad3bde79d5a4f608c158021153b4573509d", size = 3400839 }, + { url = "https://files.pythonhosted.org/packages/d6/d8/f87ea6f42456254b48915970ed98e993110521e9263472840174d32c880d/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdca9bfb89e6f8f281890cc61a8aff2d3cecaff7e1a4d275574d96ca70098557", size = 3552664 }, + { url = "https://files.pythonhosted.org/packages/d6/56/1267c39b65fc8f4e2113b36297320f102718bf5799b544a6cbe22013aa1d/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:89a23f58b7b7effbc047b8ca286f131b17728c99a9f972723323003ffd1bb916", size = 4073732 }, + { url = "https://files.pythonhosted.org/packages/82/1a/9c748befbe3decf7cb415e34f8a0c3789a0a9c55910dea73d581e48c0ce5/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5", size = 3390096 }, + { url = "https://files.pythonhosted.org/packages/72/85/4c03da147b6b4b7cb12e074d3d44eee28604a387ed0eaf7eaaead5069c57/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d", size = 3664743 }, + { url = "https://files.pythonhosted.org/packages/e7/6e/e597b04f753f1b09e6893075d53a82a30c13855cbaa791402695b01e369f/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746", size = 3695243 }, + { url = "https://files.pythonhosted.org/packages/09/89/d4e234727a26b2546c8fb70a276cd924260d60135f2165bf8b9ed67bb9a4/hf_transfer-0.1.9-cp38-abi3-win32.whl", hash = "sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e", size = 1086605 }, + { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240 }, ] [[package]] name = "hf-xet" version = "1.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/2c/70009910fcbd204bde75842b60c1e47fe72edb0e978954cb8001735885c7/hf_xet-1.1.0.tar.gz", hash = "sha256:a7c2a4c2b6eee9ce0a1a367a82b60d95ba634420ef1c250addad7aa4af419cf4", size = 263996, upload-time = "2025-04-29T21:15:51.247Z" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/2c/70009910fcbd204bde75842b60c1e47fe72edb0e978954cb8001735885c7/hf_xet-1.1.0.tar.gz", hash = "sha256:a7c2a4c2b6eee9ce0a1a367a82b60d95ba634420ef1c250addad7aa4af419cf4", size = 263996 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/fd/0db331297e331f0f02005fd7ea666439bf15efd74f0dd62af02a43236a1b/hf_xet-1.1.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0322c42551e275fcb7949c083a54a81b2898e50787c9aa74284fcb8d2c58c12c", size = 5069444 }, + { url = "https://files.pythonhosted.org/packages/b9/7d/4d7ae44219d3744ad55669cb90ef3d4ed9f5f8a4729fa635a6499491cb78/hf_xet-1.1.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:667153a0304ac2debf2af95a8ff7687186f885b493f4cd16344869af270cd110", size = 4881465 }, + { url = "https://files.pythonhosted.org/packages/83/9a/d40d2a57b132d609d8a4ccc29e59ed69749021610616749cabcda2532158/hf_xet-1.1.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:995eeffb119636ea617b96c7d7bf3c3f5ea8727fa57974574e25d700b8532d48", size = 53584225 }, + { url = "https://files.pythonhosted.org/packages/2e/01/d94553f91d85746e0862f24d239da88d10f5ce252b028565744e982432f4/hf_xet-1.1.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3aee847da362393331f515c4010d0aaa1c2669acfcca1f4b28946d6949cc0086", size = 52043680 }, + { url = "https://files.pythonhosted.org/packages/29/89/1f31853bf378f0ceb3363c07fd8a12af9b904b1f8c21e65eb5c19397bc98/hf_xet-1.1.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:68c5813a6074aa36e12ef5983230e3b03148cce61e0fcdd294096493795565b4", size = 53072672 }, + { url = "https://files.pythonhosted.org/packages/b5/9f/5ecb92b18a4b2135a72a95dc08bcbeda9176f46642c745ee052420d2aea8/hf_xet-1.1.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4ee9222bf9274b1c198b88a929de0b5a49349c4962d89c5b3b2f0f7f47d9761c", size = 53521053 }, + { url = "https://files.pythonhosted.org/packages/53/d6/cb32842cbf1cf5a154b41fa918a2fd86003af9bca227a2397cd7f312a8a6/hf_xet-1.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:73153eab9abf3d6973b21e94a67ccba5d595c3e12feb8c0bf50be02964e7f126", size = 4204376 }, +] + +[[package]] +name = "htmldate" +version = "1.9.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "dateparser" }, + { name = "lxml" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/26/aaae4cab984f0b7dd0f5f1b823fa2ed2fd4a2bb50acd5bd2f0d217562678/htmldate-1.9.3.tar.gz", hash = "sha256:ac0caf4628c3ded4042011e2d60dc68dfb314c77b106587dd307a80d77e708e9", size = 44913 } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/fd/0db331297e331f0f02005fd7ea666439bf15efd74f0dd62af02a43236a1b/hf_xet-1.1.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0322c42551e275fcb7949c083a54a81b2898e50787c9aa74284fcb8d2c58c12c", size = 5069444, upload-time = "2025-04-29T21:15:42.631Z" }, - { url = "https://files.pythonhosted.org/packages/b9/7d/4d7ae44219d3744ad55669cb90ef3d4ed9f5f8a4729fa635a6499491cb78/hf_xet-1.1.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:667153a0304ac2debf2af95a8ff7687186f885b493f4cd16344869af270cd110", size = 4881465, upload-time = "2025-04-29T21:15:40.799Z" }, - { url = "https://files.pythonhosted.org/packages/83/9a/d40d2a57b132d609d8a4ccc29e59ed69749021610616749cabcda2532158/hf_xet-1.1.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:995eeffb119636ea617b96c7d7bf3c3f5ea8727fa57974574e25d700b8532d48", size = 53584225, upload-time = "2025-04-29T21:15:37.754Z" }, - { url = "https://files.pythonhosted.org/packages/2e/01/d94553f91d85746e0862f24d239da88d10f5ce252b028565744e982432f4/hf_xet-1.1.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3aee847da362393331f515c4010d0aaa1c2669acfcca1f4b28946d6949cc0086", size = 52043680, upload-time = "2025-04-29T21:15:34.15Z" }, - { url = "https://files.pythonhosted.org/packages/29/89/1f31853bf378f0ceb3363c07fd8a12af9b904b1f8c21e65eb5c19397bc98/hf_xet-1.1.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:68c5813a6074aa36e12ef5983230e3b03148cce61e0fcdd294096493795565b4", size = 53072672, upload-time = "2025-04-29T21:15:44.743Z" }, - { url = "https://files.pythonhosted.org/packages/b5/9f/5ecb92b18a4b2135a72a95dc08bcbeda9176f46642c745ee052420d2aea8/hf_xet-1.1.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4ee9222bf9274b1c198b88a929de0b5a49349c4962d89c5b3b2f0f7f47d9761c", size = 53521053, upload-time = "2025-04-29T21:15:48.252Z" }, - { url = "https://files.pythonhosted.org/packages/53/d6/cb32842cbf1cf5a154b41fa918a2fd86003af9bca227a2397cd7f312a8a6/hf_xet-1.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:73153eab9abf3d6973b21e94a67ccba5d595c3e12feb8c0bf50be02964e7f126", size = 4204376, upload-time = "2025-04-29T21:15:52.69Z" }, + { url = "https://files.pythonhosted.org/packages/05/49/8872130016209c20436ce0c1067de1cf630755d0443d068a5bc17fa95015/htmldate-1.9.3-py3-none-any.whl", hash = "sha256:3fadc422cf3c10a5cdb5e1b914daf37ec7270400a80a1b37e2673ff84faaaff8", size = 31565 }, ] [[package]] @@ -515,12 +502,15 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/df/22/8eb91736b1dcb83d879bd49050a09df29a57cc5cd9f38e48a4b1c45ee890/huggingface_hub-0.30.2.tar.gz", hash = "sha256:9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466", size = 400868, upload-time = "2025-04-08T08:32:45.26Z" } +sdist = { url = "https://files.pythonhosted.org/packages/df/22/8eb91736b1dcb83d879bd49050a09df29a57cc5cd9f38e48a4b1c45ee890/huggingface_hub-0.30.2.tar.gz", hash = "sha256:9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466", size = 400868 } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/27/1fb384a841e9661faad1c31cbfa62864f59632e876df5d795234da51c395/huggingface_hub-0.30.2-py3-none-any.whl", hash = "sha256:68ff05969927058cfa41df4f2155d4bb48f5f54f719dd0390103eefa9b191e28", size = 481433, upload-time = "2025-04-08T08:32:43.305Z" }, + { url = "https://files.pythonhosted.org/packages/93/27/1fb384a841e9661faad1c31cbfa62864f59632e876df5d795234da51c395/huggingface_hub-0.30.2-py3-none-any.whl", hash = "sha256:68ff05969927058cfa41df4f2155d4bb48f5f54f719dd0390103eefa9b191e28", size = 481433 }, ] [package.optional-dependencies] +hf-xet = [ + { name = "hf-xet" }, +] inference = [ { name = "aiohttp" }, ] @@ -532,71 +522,48 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyreadline3", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702, upload-time = "2021-09-17T21:40:43.31Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" }, + { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794 }, ] [[package]] name = "idna" version = "3.10" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, ] [[package]] name = "isodate" version = "0.7.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705, upload-time = "2024-10-08T23:04:11.5Z" } +sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320, upload-time = "2024-10-08T23:04:09.501Z" }, + { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, ] [[package]] -name = "jinja2" -version = "3.1.6" +name = "jmespath" +version = "1.0.1" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 }, ] [[package]] -name = "joblib" -version = "1.5.0" +name = "justext" +version = "3.0.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/30/08/8bd4a0250247861420a040b33ccf42f43c426ac91d99405374ef117e5872/joblib-1.5.0.tar.gz", hash = "sha256:d8757f955389a3dd7a23152e43bc297c2e0c2d3060056dad0feefc88a06939b5", size = 330234, upload-time = "2025-05-03T21:09:39.553Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/da/d3/13ee227a148af1c693654932b8b0b02ed64af5e1f7406d56b088b57574cd/joblib-1.5.0-py3-none-any.whl", hash = "sha256:206144b320246485b712fc8cc51f017de58225fa8b414a1fe1764a7231aca491", size = 307682, upload-time = "2025-05-03T21:09:37.892Z" }, +dependencies = [ + { name = "lxml", extra = ["html-clean"] }, ] - -[[package]] -name = "kiwisolver" -version = "1.4.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/59/7c91426a8ac292e1cdd53a63b6d9439abd573c875c3f92c146767dd33faf/kiwisolver-1.4.8.tar.gz", hash = "sha256:23d5f023bdc8c7e54eb65f03ca5d5bb25b601eac4d7f1a042888a1f45237987e", size = 97538, upload-time = "2024-12-24T18:30:51.519Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/f3/45890c1b314f0d04e19c1c83d534e611513150939a7cf039664d9ab1e649/justext-3.0.2.tar.gz", hash = "sha256:13496a450c44c4cd5b5a75a5efcd9996066d2a189794ea99a49949685a0beb05", size = 828521 } wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/aa/cea685c4ab647f349c3bc92d2daf7ae34c8e8cf405a6dcd3a497f58a2ac3/kiwisolver-1.4.8-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d6af5e8815fd02997cb6ad9bbed0ee1e60014438ee1a5c2444c96f87b8843502", size = 124152, upload-time = "2024-12-24T18:29:16.85Z" }, - { url = "https://files.pythonhosted.org/packages/c5/0b/8db6d2e2452d60d5ebc4ce4b204feeb16176a851fd42462f66ade6808084/kiwisolver-1.4.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bade438f86e21d91e0cf5dd7c0ed00cda0f77c8c1616bd83f9fc157fa6760d31", size = 66555, upload-time = "2024-12-24T18:29:19.146Z" }, - { url = "https://files.pythonhosted.org/packages/60/26/d6a0db6785dd35d3ba5bf2b2df0aedc5af089962c6eb2cbf67a15b81369e/kiwisolver-1.4.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b83dc6769ddbc57613280118fb4ce3cd08899cc3369f7d0e0fab518a7cf37fdb", size = 65067, upload-time = "2024-12-24T18:29:20.096Z" }, - { url = "https://files.pythonhosted.org/packages/c9/ed/1d97f7e3561e09757a196231edccc1bcf59d55ddccefa2afc9c615abd8e0/kiwisolver-1.4.8-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111793b232842991be367ed828076b03d96202c19221b5ebab421ce8bcad016f", size = 1378443, upload-time = "2024-12-24T18:29:22.843Z" }, - { url = "https://files.pythonhosted.org/packages/29/61/39d30b99954e6b46f760e6289c12fede2ab96a254c443639052d1b573fbc/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:257af1622860e51b1a9d0ce387bf5c2c4f36a90594cb9514f55b074bcc787cfc", size = 1472728, upload-time = "2024-12-24T18:29:24.463Z" }, - { url = "https://files.pythonhosted.org/packages/0c/3e/804163b932f7603ef256e4a715e5843a9600802bb23a68b4e08c8c0ff61d/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69b5637c3f316cab1ec1c9a12b8c5f4750a4c4b71af9157645bf32830e39c03a", size = 1478388, upload-time = "2024-12-24T18:29:25.776Z" }, - { url = "https://files.pythonhosted.org/packages/8a/9e/60eaa75169a154700be74f875a4d9961b11ba048bef315fbe89cb6999056/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:782bb86f245ec18009890e7cb8d13a5ef54dcf2ebe18ed65f795e635a96a1c6a", size = 1413849, upload-time = "2024-12-24T18:29:27.202Z" }, - { url = "https://files.pythonhosted.org/packages/bc/b3/9458adb9472e61a998c8c4d95cfdfec91c73c53a375b30b1428310f923e4/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc978a80a0db3a66d25767b03688f1147a69e6237175c0f4ffffaaedf744055a", size = 1475533, upload-time = "2024-12-24T18:29:28.638Z" }, - { url = "https://files.pythonhosted.org/packages/e4/7a/0a42d9571e35798de80aef4bb43a9b672aa7f8e58643d7bd1950398ffb0a/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:36dbbfd34838500a31f52c9786990d00150860e46cd5041386f217101350f0d3", size = 2268898, upload-time = "2024-12-24T18:29:30.368Z" }, - { url = "https://files.pythonhosted.org/packages/d9/07/1255dc8d80271400126ed8db35a1795b1a2c098ac3a72645075d06fe5c5d/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:eaa973f1e05131de5ff3569bbba7f5fd07ea0595d3870ed4a526d486fe57fa1b", size = 2425605, upload-time = "2024-12-24T18:29:33.151Z" }, - { url = "https://files.pythonhosted.org/packages/84/df/5a3b4cf13780ef6f6942df67b138b03b7e79e9f1f08f57c49957d5867f6e/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a66f60f8d0c87ab7f59b6fb80e642ebb29fec354a4dfad687ca4092ae69d04f4", size = 2375801, upload-time = "2024-12-24T18:29:34.584Z" }, - { url = "https://files.pythonhosted.org/packages/8f/10/2348d068e8b0f635c8c86892788dac7a6b5c0cb12356620ab575775aad89/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858416b7fb777a53f0c59ca08190ce24e9abbd3cffa18886a5781b8e3e26f65d", size = 2520077, upload-time = "2024-12-24T18:29:36.138Z" }, - { url = "https://files.pythonhosted.org/packages/32/d8/014b89fee5d4dce157d814303b0fce4d31385a2af4c41fed194b173b81ac/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:085940635c62697391baafaaeabdf3dd7a6c3643577dde337f4d66eba021b2b8", size = 2338410, upload-time = "2024-12-24T18:29:39.991Z" }, - { url = "https://files.pythonhosted.org/packages/bd/72/dfff0cc97f2a0776e1c9eb5bef1ddfd45f46246c6533b0191887a427bca5/kiwisolver-1.4.8-cp312-cp312-win_amd64.whl", hash = "sha256:01c3d31902c7db5fb6182832713d3b4122ad9317c2c5877d0539227d96bb2e50", size = 71853, upload-time = "2024-12-24T18:29:42.006Z" }, - { url = "https://files.pythonhosted.org/packages/dc/85/220d13d914485c0948a00f0b9eb419efaf6da81b7d72e88ce2391f7aed8d/kiwisolver-1.4.8-cp312-cp312-win_arm64.whl", hash = "sha256:a3c44cb68861de93f0c4a8175fbaa691f0aa22550c331fefef02b618a9dcb476", size = 65424, upload-time = "2024-12-24T18:29:44.38Z" }, + { url = "https://files.pythonhosted.org/packages/f2/ac/52f4e86d1924a7fc05af3aeb34488570eccc39b4af90530dd6acecdf16b5/justext-3.0.2-py2.py3-none-any.whl", hash = "sha256:62b1c562b15c3c6265e121cc070874243a443bfd53060e869393f09d6b6cc9a7", size = 837940 }, ] [[package]] @@ -607,34 +574,51 @@ dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "win32-setctime", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559 } wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, + { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595 }, ] [[package]] name = "lxml" version = "5.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479, upload-time = "2025-04-23T01:50:29.322Z" } +sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392 }, + { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103 }, + { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224 }, + { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913 }, + { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441 }, + { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580 }, + { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493 }, + { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679 }, + { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691 }, + { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075 }, + { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680 }, + { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253 }, + { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651 }, + { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315 }, + { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149 }, + { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095 }, +] + +[package.optional-dependencies] +html-clean = [ + { name = "lxml-html-clean" }, +] + +[[package]] +name = "lxml-html-clean" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/b6/466e71db127950fb8d172026a8f0a9f0dc6f64c8e78e2ca79f252e5790b8/lxml_html_clean-0.4.2.tar.gz", hash = "sha256:91291e7b5db95430abf461bc53440964d58e06cc468950f9e47db64976cebcb3", size = 21622 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392, upload-time = "2025-04-23T01:46:04.09Z" }, - { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103, upload-time = "2025-04-23T01:46:07.227Z" }, - { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224, upload-time = "2025-04-23T01:46:10.237Z" }, - { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913, upload-time = "2025-04-23T01:46:12.757Z" }, - { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441, upload-time = "2025-04-23T01:46:16.037Z" }, - { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165, upload-time = "2025-04-23T01:46:19.137Z" }, - { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580, upload-time = "2025-04-23T01:46:21.963Z" }, - { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493, upload-time = "2025-04-23T01:46:24.316Z" }, - { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679, upload-time = "2025-04-23T01:46:27.097Z" }, - { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691, upload-time = "2025-04-23T01:46:30.009Z" }, - { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075, upload-time = "2025-04-23T01:46:32.33Z" }, - { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680, upload-time = "2025-04-23T01:46:34.852Z" }, - { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253, upload-time = "2025-04-23T01:46:37.608Z" }, - { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651, upload-time = "2025-04-23T01:46:40.183Z" }, - { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315, upload-time = "2025-04-23T01:46:43.333Z" }, - { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149, upload-time = "2025-04-23T01:46:45.684Z" }, - { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095, upload-time = "2025-04-23T01:46:48.521Z" }, + { url = "https://files.pythonhosted.org/packages/4e/0b/942cb7278d6caad79343ad2ddd636ed204a47909b969d19114a3097f5aa3/lxml_html_clean-0.4.2-py3-none-any.whl", hash = "sha256:74ccfba277adcfea87a1e9294f47dd86b05d65b4da7c5b07966e3d5f3be8a505", size = 14184 }, ] [[package]] @@ -647,12 +631,12 @@ dependencies = [ { name = "onnxruntime" }, { name = "python-dotenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/8fdd991142ad3e037179a494b153f463024e5a211ef3ad948b955c26b4de/magika-0.6.2.tar.gz", hash = "sha256:37eb6ae8020f6e68f231bc06052c0a0cbe8e6fa27492db345e8dc867dbceb067", size = 3036634, upload-time = "2025-05-02T14:54:18.88Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/8fdd991142ad3e037179a494b153f463024e5a211ef3ad948b955c26b4de/magika-0.6.2.tar.gz", hash = "sha256:37eb6ae8020f6e68f231bc06052c0a0cbe8e6fa27492db345e8dc867dbceb067", size = 3036634 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/07/4f7748f34279f2852068256992377474f9700b6fbad6735d6be58605178f/magika-0.6.2-py3-none-any.whl", hash = "sha256:5ef72fbc07723029b3684ef81454bc224ac5f60986aa0fc5a28f4456eebcb5b2", size = 2967609, upload-time = "2025-05-02T14:54:09.696Z" }, - { url = "https://files.pythonhosted.org/packages/64/6d/0783af677e601d8a42258f0fbc47663abf435f927e58a8d2928296743099/magika-0.6.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9109309328a1553886c8ff36c2ee9a5e9cfd36893ad81b65bf61a57debdd9d0e", size = 12404787, upload-time = "2025-05-02T14:54:16.963Z" }, - { url = "https://files.pythonhosted.org/packages/8a/ad/42e39748ddc4bbe55c2dc1093ce29079c04d096ac0d844f8ae66178bc3ed/magika-0.6.2-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:57cd1d64897634d15de552bd6b3ae9c6ff6ead9c60d384dc46497c08288e4559", size = 15091089, upload-time = "2025-05-02T14:54:11.59Z" }, - { url = "https://files.pythonhosted.org/packages/b0/1f/28e412d0ccedc068fbccdae6a6233faaa97ec3e5e2ffd242e49655b10064/magika-0.6.2-py3-none-win_amd64.whl", hash = "sha256:711f427a633e0182737dcc2074748004842f870643585813503ff2553b973b9f", size = 12385740, upload-time = "2025-05-02T14:54:14.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/07/4f7748f34279f2852068256992377474f9700b6fbad6735d6be58605178f/magika-0.6.2-py3-none-any.whl", hash = "sha256:5ef72fbc07723029b3684ef81454bc224ac5f60986aa0fc5a28f4456eebcb5b2", size = 2967609 }, + { url = "https://files.pythonhosted.org/packages/64/6d/0783af677e601d8a42258f0fbc47663abf435f927e58a8d2928296743099/magika-0.6.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9109309328a1553886c8ff36c2ee9a5e9cfd36893ad81b65bf61a57debdd9d0e", size = 12404787 }, + { url = "https://files.pythonhosted.org/packages/8a/ad/42e39748ddc4bbe55c2dc1093ce29079c04d096ac0d844f8ae66178bc3ed/magika-0.6.2-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:57cd1d64897634d15de552bd6b3ae9c6ff6ead9c60d384dc46497c08288e4559", size = 15091089 }, + { url = "https://files.pythonhosted.org/packages/b0/1f/28e412d0ccedc068fbccdae6a6233faaa97ec3e5e2ffd242e49655b10064/magika-0.6.2-py3-none-win_amd64.whl", hash = "sha256:711f427a633e0182737dcc2074748004842f870643585813503ff2553b973b9f", size = 12385740 }, ] [[package]] @@ -662,9 +646,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cobble" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d6/a6/27a13ba068cf3ff764d631b8dd71dee1b33040aa8c143f66ce902b7d1da0/mammoth-1.9.0.tar.gz", hash = "sha256:74f5dae10ca240fd9b7a0e1a6deaebe0aad23bc590633ef6f5e868aa9b7042a6", size = 50906, upload-time = "2024-12-30T10:33:37.733Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/a6/27a13ba068cf3ff764d631b8dd71dee1b33040aa8c143f66ce902b7d1da0/mammoth-1.9.0.tar.gz", hash = "sha256:74f5dae10ca240fd9b7a0e1a6deaebe0aad23bc590633ef6f5e868aa9b7042a6", size = 50906 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/ab/f8e63fcabc127c6efd68b03633c189ee799a5304fa96c036a325a2894bcb/mammoth-1.9.0-py2.py3-none-any.whl", hash = "sha256:0eea277316586f0ca65d86834aec4de5a0572c83ec54b4991f9bb520a891150f", size = 52901, upload-time = "2024-12-30T10:33:34.879Z" }, + { url = "https://files.pythonhosted.org/packages/d0/ab/f8e63fcabc127c6efd68b03633c189ee799a5304fa96c036a325a2894bcb/mammoth-1.9.0-py2.py3-none-any.whl", hash = "sha256:0eea277316586f0ca65d86834aec4de5a0572c83ec54b4991f9bb520a891150f", size = 52901 }, ] [[package]] @@ -674,9 +658,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mdurl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" } +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, ] [[package]] @@ -687,9 +671,9 @@ dependencies = [ { name = "beautifulsoup4" }, { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2f/78/c48fed23c7aebc2c16049062e72de1da3220c274de59d28c942acdc9ffb2/markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd", size = 17127, upload-time = "2025-03-05T11:54:40.574Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/78/c48fed23c7aebc2c16049062e72de1da3220c274de59d28c942acdc9ffb2/markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd", size = 17127 } wheels = [ - { url = "https://files.pythonhosted.org/packages/64/11/b751af7ad41b254a802cf52f7bc1fca7cabe2388132f2ce60a1a6b9b9622/markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef", size = 13901, upload-time = "2025-03-05T11:54:39.454Z" }, + { url = "https://files.pythonhosted.org/packages/64/11/b751af7ad41b254a802cf52f7bc1fca7cabe2388132f2ce60a1a6b9b9622/markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef", size = 13901 }, ] [[package]] @@ -703,9 +687,9 @@ dependencies = [ { name = "markdownify" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cb/e8/83669ba97718bbbccd4c432b763d22783df4c8218e770717151acf01e85b/markitdown-0.1.1.tar.gz", hash = "sha256:da97a55a45a3d775ea758e88a344d5cac94ee97115fb0293f99027d32c2fc3f6", size = 31475, upload-time = "2025-03-25T06:22:21.438Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/e8/83669ba97718bbbccd4c432b763d22783df4c8218e770717151acf01e85b/markitdown-0.1.1.tar.gz", hash = "sha256:da97a55a45a3d775ea758e88a344d5cac94ee97115fb0293f99027d32c2fc3f6", size = 31475 } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/8a/c1f85ee609de5d45f80d0213bebf6664f76ab406e9d57709e684a4a436ba/markitdown-0.1.1-py3-none-any.whl", hash = "sha256:98ea8c009fe174b37ef933e00f4364214e8fed35691178b8521b13604d0c4a58", size = 48230, upload-time = "2025-03-25T06:22:19.773Z" }, + { url = "https://files.pythonhosted.org/packages/0b/8a/c1f85ee609de5d45f80d0213bebf6664f76ab406e9d57709e684a4a436ba/markitdown-0.1.1-py3-none-any.whl", hash = "sha256:98ea8c009fe174b37ef933e00f4364214e8fed35691178b8521b13604d0c4a58", size = 48230 }, ] [package.optional-dependencies] @@ -724,65 +708,22 @@ all = [ { name = "youtube-transcript-api" }, ] -[[package]] -name = "markupsafe" -version = "3.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload-time = "2024-10-18T15:21:13.777Z" }, - { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload-time = "2024-10-18T15:21:14.822Z" }, - { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload-time = "2024-10-18T15:21:15.642Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload-time = "2024-10-18T15:21:17.133Z" }, - { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload-time = "2024-10-18T15:21:18.064Z" }, - { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload-time = "2024-10-18T15:21:18.859Z" }, - { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload-time = "2024-10-18T15:21:19.671Z" }, - { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload-time = "2024-10-18T15:21:20.971Z" }, - { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload-time = "2024-10-18T15:21:22.646Z" }, - { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" }, -] - -[[package]] -name = "matplotlib" -version = "3.10.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "contourpy" }, - { name = "cycler" }, - { name = "fonttools" }, - { name = "kiwisolver" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pillow" }, - { name = "pyparsing" }, - { name = "python-dateutil" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2f/08/b89867ecea2e305f408fbb417139a8dd941ecf7b23a2e02157c36da546f0/matplotlib-3.10.1.tar.gz", hash = "sha256:e8d2d0e3881b129268585bf4765ad3ee73a4591d77b9a18c214ac7e3a79fb2ba", size = 36743335, upload-time = "2025-02-27T19:19:51.038Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/1d/5e0dc3b59c034e43de16f94deb68f4ad8a96b3ea00f4b37c160b7474928e/matplotlib-3.10.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:66e907a06e68cb6cfd652c193311d61a12b54f56809cafbed9736ce5ad92f107", size = 8175488, upload-time = "2025-02-27T19:18:51.436Z" }, - { url = "https://files.pythonhosted.org/packages/7a/81/dae7e14042e74da658c3336ab9799128e09a1ee03964f2d89630b5d12106/matplotlib-3.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b4bb156abb8fa5e5b2b460196f7db7264fc6d62678c03457979e7d5254b7be", size = 8046264, upload-time = "2025-02-27T19:18:54.344Z" }, - { url = "https://files.pythonhosted.org/packages/21/c4/22516775dcde10fc9c9571d155f90710761b028fc44f660508106c363c97/matplotlib-3.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1985ad3d97f51307a2cbfc801a930f120def19ba22864182dacef55277102ba6", size = 8452048, upload-time = "2025-02-27T19:18:56.536Z" }, - { url = "https://files.pythonhosted.org/packages/63/23/c0615001f67ce7c96b3051d856baedc0c818a2ed84570b9bf9bde200f85d/matplotlib-3.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c96f2c2f825d1257e437a1482c5a2cf4fee15db4261bd6fc0750f81ba2b4ba3d", size = 8597111, upload-time = "2025-02-27T19:18:59.439Z" }, - { url = "https://files.pythonhosted.org/packages/ca/c0/a07939a82aed77770514348f4568177d7dadab9787ebc618a616fe3d665e/matplotlib-3.10.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35e87384ee9e488d8dd5a2dd7baf471178d38b90618d8ea147aced4ab59c9bea", size = 9402771, upload-time = "2025-02-27T19:19:01.944Z" }, - { url = "https://files.pythonhosted.org/packages/a6/b6/a9405484fb40746fdc6ae4502b16a9d6e53282ba5baaf9ebe2da579f68c4/matplotlib-3.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:cfd414bce89cc78a7e1d25202e979b3f1af799e416010a20ab2b5ebb3a02425c", size = 8063742, upload-time = "2025-02-27T19:19:04.632Z" }, -] - [[package]] name = "mdurl" version = "0.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, ] [[package]] name = "mpmath" version = "1.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106 } wheels = [ - { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 }, ] [[package]] @@ -794,9 +735,9 @@ dependencies = [ { name = "pyjwt", extra = ["crypto"] }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3f/90/81dcc50f0be11a8c4dcbae1a9f761a26e5f905231330a7cacc9f04ec4c61/msal-1.32.3.tar.gz", hash = "sha256:5eea038689c78a5a70ca8ecbe1245458b55a857bd096efb6989c69ba15985d35", size = 151449, upload-time = "2025-04-25T13:12:34.204Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/90/81dcc50f0be11a8c4dcbae1a9f761a26e5f905231330a7cacc9f04ec4c61/msal-1.32.3.tar.gz", hash = "sha256:5eea038689c78a5a70ca8ecbe1245458b55a857bd096efb6989c69ba15985d35", size = 151449 } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/bf/81516b9aac7fd867709984d08eb4db1d2e3fe1df795c8e442cde9b568962/msal-1.32.3-py3-none-any.whl", hash = "sha256:b2798db57760b1961b142f027ffb7c8169536bf77316e99a0df5c4aaebb11569", size = 115358, upload-time = "2025-04-25T13:12:33.034Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/81516b9aac7fd867709984d08eb4db1d2e3fe1df795c8e442cde9b568962/msal-1.32.3-py3-none-any.whl", hash = "sha256:b2798db57760b1961b142f027ffb7c8169536bf77316e99a0df5c4aaebb11569", size = 115358 }, ] [[package]] @@ -806,35 +747,35 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "msal" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" } +sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315 } wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" }, + { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583 }, ] [[package]] name = "multidict" version = "6.4.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/da/2c/e367dfb4c6538614a0c9453e510d75d66099edf1c4e69da1b5ce691a1931/multidict-6.4.3.tar.gz", hash = "sha256:3ada0b058c9f213c5f95ba301f922d402ac234f1111a7d8fd70f1b99f3c281ec", size = 89372, upload-time = "2025-04-10T22:20:17.956Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/bb/3abdaf8fe40e9226ce8a2ba5ecf332461f7beec478a455d6587159f1bf92/multidict-6.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f1c2f58f08b36f8475f3ec6f5aeb95270921d418bf18f90dffd6be5c7b0e676", size = 64019, upload-time = "2025-04-10T22:18:23.174Z" }, - { url = "https://files.pythonhosted.org/packages/7e/b5/1b2e8de8217d2e89db156625aa0fe4a6faad98972bfe07a7b8c10ef5dd6b/multidict-6.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:26ae9ad364fc61b936fb7bf4c9d8bd53f3a5b4417142cd0be5c509d6f767e2f1", size = 37925, upload-time = "2025-04-10T22:18:24.834Z" }, - { url = "https://files.pythonhosted.org/packages/b4/e2/3ca91c112644a395c8eae017144c907d173ea910c913ff8b62549dcf0bbf/multidict-6.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:659318c6c8a85f6ecfc06b4e57529e5a78dfdd697260cc81f683492ad7e9435a", size = 37008, upload-time = "2025-04-10T22:18:26.069Z" }, - { url = "https://files.pythonhosted.org/packages/60/23/79bc78146c7ac8d1ac766b2770ca2e07c2816058b8a3d5da6caed8148637/multidict-6.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1eb72c741fd24d5a28242ce72bb61bc91f8451877131fa3fe930edb195f7054", size = 224374, upload-time = "2025-04-10T22:18:27.714Z" }, - { url = "https://files.pythonhosted.org/packages/86/35/77950ed9ebd09136003a85c1926ba42001ca5be14feb49710e4334ee199b/multidict-6.4.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3cd06d88cb7398252284ee75c8db8e680aa0d321451132d0dba12bc995f0adcc", size = 230869, upload-time = "2025-04-10T22:18:29.162Z" }, - { url = "https://files.pythonhosted.org/packages/49/97/2a33c6e7d90bc116c636c14b2abab93d6521c0c052d24bfcc231cbf7f0e7/multidict-6.4.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4543d8dc6470a82fde92b035a92529317191ce993533c3c0c68f56811164ed07", size = 231949, upload-time = "2025-04-10T22:18:30.679Z" }, - { url = "https://files.pythonhosted.org/packages/56/ce/e9b5d9fcf854f61d6686ada7ff64893a7a5523b2a07da6f1265eaaea5151/multidict-6.4.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:30a3ebdc068c27e9d6081fca0e2c33fdf132ecea703a72ea216b81a66860adde", size = 231032, upload-time = "2025-04-10T22:18:32.146Z" }, - { url = "https://files.pythonhosted.org/packages/f0/ac/7ced59dcdfeddd03e601edb05adff0c66d81ed4a5160c443e44f2379eef0/multidict-6.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b038f10e23f277153f86f95c777ba1958bcd5993194fda26a1d06fae98b2f00c", size = 223517, upload-time = "2025-04-10T22:18:33.538Z" }, - { url = "https://files.pythonhosted.org/packages/db/e6/325ed9055ae4e085315193a1b58bdb4d7fc38ffcc1f4975cfca97d015e17/multidict-6.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c605a2b2dc14282b580454b9b5d14ebe0668381a3a26d0ac39daa0ca115eb2ae", size = 216291, upload-time = "2025-04-10T22:18:34.962Z" }, - { url = "https://files.pythonhosted.org/packages/fa/84/eeee6d477dd9dcb7691c3bb9d08df56017f5dd15c730bcc9383dcf201cf4/multidict-6.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8bd2b875f4ca2bb527fe23e318ddd509b7df163407b0fb717df229041c6df5d3", size = 228982, upload-time = "2025-04-10T22:18:36.443Z" }, - { url = "https://files.pythonhosted.org/packages/82/94/4d1f3e74e7acf8b0c85db350e012dcc61701cd6668bc2440bb1ecb423c90/multidict-6.4.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c2e98c840c9c8e65c0e04b40c6c5066c8632678cd50c8721fdbcd2e09f21a507", size = 226823, upload-time = "2025-04-10T22:18:37.924Z" }, - { url = "https://files.pythonhosted.org/packages/09/f0/1e54b95bda7cd01080e5732f9abb7b76ab5cc795b66605877caeb2197476/multidict-6.4.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66eb80dd0ab36dbd559635e62fba3083a48a252633164857a1d1684f14326427", size = 222714, upload-time = "2025-04-10T22:18:39.807Z" }, - { url = "https://files.pythonhosted.org/packages/e7/a2/f6cbca875195bd65a3e53b37ab46486f3cc125bdeab20eefe5042afa31fb/multidict-6.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c23831bdee0a2a3cf21be057b5e5326292f60472fb6c6f86392bbf0de70ba731", size = 233739, upload-time = "2025-04-10T22:18:41.341Z" }, - { url = "https://files.pythonhosted.org/packages/79/68/9891f4d2b8569554723ddd6154375295f789dc65809826c6fb96a06314fd/multidict-6.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:1535cec6443bfd80d028052e9d17ba6ff8a5a3534c51d285ba56c18af97e9713", size = 230809, upload-time = "2025-04-10T22:18:42.817Z" }, - { url = "https://files.pythonhosted.org/packages/e6/72/a7be29ba1e87e4fc5ceb44dabc7940b8005fd2436a332a23547709315f70/multidict-6.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3b73e7227681f85d19dec46e5b881827cd354aabe46049e1a61d2f9aaa4e285a", size = 226934, upload-time = "2025-04-10T22:18:44.311Z" }, - { url = "https://files.pythonhosted.org/packages/12/c1/259386a9ad6840ff7afc686da96808b503d152ac4feb3a96c651dc4f5abf/multidict-6.4.3-cp312-cp312-win32.whl", hash = "sha256:8eac0c49df91b88bf91f818e0a24c1c46f3622978e2c27035bfdca98e0e18124", size = 35242, upload-time = "2025-04-10T22:18:46.193Z" }, - { url = "https://files.pythonhosted.org/packages/06/24/c8fdff4f924d37225dc0c56a28b1dca10728fc2233065fafeb27b4b125be/multidict-6.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:11990b5c757d956cd1db7cb140be50a63216af32cd6506329c2c59d732d802db", size = 38635, upload-time = "2025-04-10T22:18:47.498Z" }, - { url = "https://files.pythonhosted.org/packages/96/10/7d526c8974f017f1e7ca584c71ee62a638e9334d8d33f27d7cdfc9ae79e4/multidict-6.4.3-py3-none-any.whl", hash = "sha256:59fe01ee8e2a1e8ceb3f6dbb216b09c8d9f4ef1c22c4fc825d045a147fa2ebc9", size = 10400, upload-time = "2025-04-10T22:20:16.445Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/da/2c/e367dfb4c6538614a0c9453e510d75d66099edf1c4e69da1b5ce691a1931/multidict-6.4.3.tar.gz", hash = "sha256:3ada0b058c9f213c5f95ba301f922d402ac234f1111a7d8fd70f1b99f3c281ec", size = 89372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/bb/3abdaf8fe40e9226ce8a2ba5ecf332461f7beec478a455d6587159f1bf92/multidict-6.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f1c2f58f08b36f8475f3ec6f5aeb95270921d418bf18f90dffd6be5c7b0e676", size = 64019 }, + { url = "https://files.pythonhosted.org/packages/7e/b5/1b2e8de8217d2e89db156625aa0fe4a6faad98972bfe07a7b8c10ef5dd6b/multidict-6.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:26ae9ad364fc61b936fb7bf4c9d8bd53f3a5b4417142cd0be5c509d6f767e2f1", size = 37925 }, + { url = "https://files.pythonhosted.org/packages/b4/e2/3ca91c112644a395c8eae017144c907d173ea910c913ff8b62549dcf0bbf/multidict-6.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:659318c6c8a85f6ecfc06b4e57529e5a78dfdd697260cc81f683492ad7e9435a", size = 37008 }, + { url = "https://files.pythonhosted.org/packages/60/23/79bc78146c7ac8d1ac766b2770ca2e07c2816058b8a3d5da6caed8148637/multidict-6.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1eb72c741fd24d5a28242ce72bb61bc91f8451877131fa3fe930edb195f7054", size = 224374 }, + { url = "https://files.pythonhosted.org/packages/86/35/77950ed9ebd09136003a85c1926ba42001ca5be14feb49710e4334ee199b/multidict-6.4.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3cd06d88cb7398252284ee75c8db8e680aa0d321451132d0dba12bc995f0adcc", size = 230869 }, + { url = "https://files.pythonhosted.org/packages/49/97/2a33c6e7d90bc116c636c14b2abab93d6521c0c052d24bfcc231cbf7f0e7/multidict-6.4.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4543d8dc6470a82fde92b035a92529317191ce993533c3c0c68f56811164ed07", size = 231949 }, + { url = "https://files.pythonhosted.org/packages/56/ce/e9b5d9fcf854f61d6686ada7ff64893a7a5523b2a07da6f1265eaaea5151/multidict-6.4.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:30a3ebdc068c27e9d6081fca0e2c33fdf132ecea703a72ea216b81a66860adde", size = 231032 }, + { url = "https://files.pythonhosted.org/packages/f0/ac/7ced59dcdfeddd03e601edb05adff0c66d81ed4a5160c443e44f2379eef0/multidict-6.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b038f10e23f277153f86f95c777ba1958bcd5993194fda26a1d06fae98b2f00c", size = 223517 }, + { url = "https://files.pythonhosted.org/packages/db/e6/325ed9055ae4e085315193a1b58bdb4d7fc38ffcc1f4975cfca97d015e17/multidict-6.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c605a2b2dc14282b580454b9b5d14ebe0668381a3a26d0ac39daa0ca115eb2ae", size = 216291 }, + { url = "https://files.pythonhosted.org/packages/fa/84/eeee6d477dd9dcb7691c3bb9d08df56017f5dd15c730bcc9383dcf201cf4/multidict-6.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8bd2b875f4ca2bb527fe23e318ddd509b7df163407b0fb717df229041c6df5d3", size = 228982 }, + { url = "https://files.pythonhosted.org/packages/82/94/4d1f3e74e7acf8b0c85db350e012dcc61701cd6668bc2440bb1ecb423c90/multidict-6.4.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c2e98c840c9c8e65c0e04b40c6c5066c8632678cd50c8721fdbcd2e09f21a507", size = 226823 }, + { url = "https://files.pythonhosted.org/packages/09/f0/1e54b95bda7cd01080e5732f9abb7b76ab5cc795b66605877caeb2197476/multidict-6.4.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66eb80dd0ab36dbd559635e62fba3083a48a252633164857a1d1684f14326427", size = 222714 }, + { url = "https://files.pythonhosted.org/packages/e7/a2/f6cbca875195bd65a3e53b37ab46486f3cc125bdeab20eefe5042afa31fb/multidict-6.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c23831bdee0a2a3cf21be057b5e5326292f60472fb6c6f86392bbf0de70ba731", size = 233739 }, + { url = "https://files.pythonhosted.org/packages/79/68/9891f4d2b8569554723ddd6154375295f789dc65809826c6fb96a06314fd/multidict-6.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:1535cec6443bfd80d028052e9d17ba6ff8a5a3534c51d285ba56c18af97e9713", size = 230809 }, + { url = "https://files.pythonhosted.org/packages/e6/72/a7be29ba1e87e4fc5ceb44dabc7940b8005fd2436a332a23547709315f70/multidict-6.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3b73e7227681f85d19dec46e5b881827cd354aabe46049e1a61d2f9aaa4e285a", size = 226934 }, + { url = "https://files.pythonhosted.org/packages/12/c1/259386a9ad6840ff7afc686da96808b503d152ac4feb3a96c651dc4f5abf/multidict-6.4.3-cp312-cp312-win32.whl", hash = "sha256:8eac0c49df91b88bf91f818e0a24c1c46f3622978e2c27035bfdca98e0e18124", size = 35242 }, + { url = "https://files.pythonhosted.org/packages/06/24/c8fdff4f924d37225dc0c56a28b1dca10728fc2233065fafeb27b4b125be/multidict-6.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:11990b5c757d956cd1db7cb140be50a63216af32cd6506329c2c59d732d802db", size = 38635 }, + { url = "https://files.pythonhosted.org/packages/96/10/7d526c8974f017f1e7ca584c71ee62a638e9334d8d33f27d7cdfc9ae79e4/multidict-6.4.3-py3-none-any.whl", hash = "sha256:59fe01ee8e2a1e8ceb3f6dbb216b09c8d9f4ef1c22c4fc825d045a147fa2ebc9", size = 10400 }, ] [[package]] @@ -844,206 +785,40 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dill" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603, upload-time = "2024-01-28T18:52:34.85Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824, upload-time = "2024-01-28T18:52:26.062Z" }, - { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519, upload-time = "2024-01-28T18:52:28.115Z" }, - { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload-time = "2024-01-28T18:52:29.395Z" }, - { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload-time = "2024-01-28T18:52:30.853Z" }, - { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" }, -] - -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, -] - -[[package]] -name = "networkx" -version = "3.4.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload-time = "2024-10-21T12:39:36.247Z" }, -] - -[[package]] -name = "nltk" -version = "3.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "joblib" }, - { name = "regex" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691, upload-time = "2024-08-18T19:48:37.769Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603 } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442, upload-time = "2024-08-18T19:48:21.909Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824 }, + { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519 }, + { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 }, + { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628 }, + { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 }, ] [[package]] name = "numpy" version = "2.2.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/dc/b2/ce4b867d8cd9c0ee84938ae1e6a6f7926ebf928c9090d036fc3c6a04f946/numpy-2.2.5.tar.gz", hash = "sha256:a9c0d994680cd991b1cb772e8b297340085466a6fe964bc9d4e80f5e2f43c291", size = 20273920, upload-time = "2025-04-19T23:27:42.561Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e2/f7/1fd4ff108cd9d7ef929b8882692e23665dc9c23feecafbb9c6b80f4ec583/numpy-2.2.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ee461a4eaab4f165b68780a6a1af95fb23a29932be7569b9fab666c407969051", size = 20948633, upload-time = "2025-04-19T22:37:52.4Z" }, - { url = "https://files.pythonhosted.org/packages/12/03/d443c278348371b20d830af155ff2079acad6a9e60279fac2b41dbbb73d8/numpy-2.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec31367fd6a255dc8de4772bd1658c3e926d8e860a0b6e922b615e532d320ddc", size = 14176123, upload-time = "2025-04-19T22:38:15.058Z" }, - { url = "https://files.pythonhosted.org/packages/2b/0b/5ca264641d0e7b14393313304da48b225d15d471250376f3fbdb1a2be603/numpy-2.2.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47834cde750d3c9f4e52c6ca28a7361859fcaf52695c7dc3cc1a720b8922683e", size = 5163817, upload-time = "2025-04-19T22:38:24.885Z" }, - { url = "https://files.pythonhosted.org/packages/04/b3/d522672b9e3d28e26e1613de7675b441bbd1eaca75db95680635dd158c67/numpy-2.2.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:2c1a1c6ccce4022383583a6ded7bbcda22fc635eb4eb1e0a053336425ed36dfa", size = 6698066, upload-time = "2025-04-19T22:38:35.782Z" }, - { url = "https://files.pythonhosted.org/packages/a0/93/0f7a75c1ff02d4b76df35079676b3b2719fcdfb39abdf44c8b33f43ef37d/numpy-2.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d75f338f5f79ee23548b03d801d28a505198297534f62416391857ea0479571", size = 14087277, upload-time = "2025-04-19T22:38:57.697Z" }, - { url = "https://files.pythonhosted.org/packages/b0/d9/7c338b923c53d431bc837b5b787052fef9ae68a56fe91e325aac0d48226e/numpy-2.2.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a801fef99668f309b88640e28d261991bfad9617c27beda4a3aec4f217ea073", size = 16135742, upload-time = "2025-04-19T22:39:22.689Z" }, - { url = "https://files.pythonhosted.org/packages/2d/10/4dec9184a5d74ba9867c6f7d1e9f2e0fb5fe96ff2bf50bb6f342d64f2003/numpy-2.2.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:abe38cd8381245a7f49967a6010e77dbf3680bd3627c0fe4362dd693b404c7f8", size = 15581825, upload-time = "2025-04-19T22:39:45.794Z" }, - { url = "https://files.pythonhosted.org/packages/80/1f/2b6fcd636e848053f5b57712a7d1880b1565eec35a637fdfd0a30d5e738d/numpy-2.2.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5a0ac90e46fdb5649ab6369d1ab6104bfe5854ab19b645bf5cda0127a13034ae", size = 17899600, upload-time = "2025-04-19T22:40:13.427Z" }, - { url = "https://files.pythonhosted.org/packages/ec/87/36801f4dc2623d76a0a3835975524a84bd2b18fe0f8835d45c8eae2f9ff2/numpy-2.2.5-cp312-cp312-win32.whl", hash = "sha256:0cd48122a6b7eab8f06404805b1bd5856200e3ed6f8a1b9a194f9d9054631beb", size = 6312626, upload-time = "2025-04-19T22:40:25.223Z" }, - { url = "https://files.pythonhosted.org/packages/8b/09/4ffb4d6cfe7ca6707336187951992bd8a8b9142cf345d87ab858d2d7636a/numpy-2.2.5-cp312-cp312-win_amd64.whl", hash = "sha256:ced69262a8278547e63409b2653b372bf4baff0870c57efa76c5703fd6543282", size = 12645715, upload-time = "2025-04-19T22:40:44.528Z" }, -] - -[[package]] -name = "nvidia-cublas-cu12" -version = "12.6.4.1" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/af/eb/ff4b8c503fa1f1796679dce648854d58751982426e4e4b37d6fce49d259c/nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb", size = 393138322, upload-time = "2024-11-20T17:40:25.65Z" }, -] - -[[package]] -name = "nvidia-cuda-cupti-cu12" -version = "12.6.80" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/60/7b6497946d74bcf1de852a21824d63baad12cd417db4195fc1bfe59db953/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132", size = 8917980, upload-time = "2024-11-20T17:36:04.019Z" }, - { url = "https://files.pythonhosted.org/packages/a5/24/120ee57b218d9952c379d1e026c4479c9ece9997a4fb46303611ee48f038/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73", size = 8917972, upload-time = "2024-10-01T16:58:06.036Z" }, -] - -[[package]] -name = "nvidia-cuda-nvrtc-cu12" -version = "12.6.77" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/75/2e/46030320b5a80661e88039f59060d1790298b4718944a65a7f2aeda3d9e9/nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53", size = 23650380, upload-time = "2024-10-01T17:00:14.643Z" }, -] - -[[package]] -name = "nvidia-cuda-runtime-cu12" -version = "12.6.77" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e1/23/e717c5ac26d26cf39a27fbc076240fad2e3b817e5889d671b67f4f9f49c5/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7", size = 897690, upload-time = "2024-11-20T17:35:30.697Z" }, - { url = "https://files.pythonhosted.org/packages/f0/62/65c05e161eeddbafeca24dc461f47de550d9fa8a7e04eb213e32b55cfd99/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8", size = 897678, upload-time = "2024-10-01T16:57:33.821Z" }, -] - -[[package]] -name = "nvidia-cudnn-cu12" -version = "9.5.1.17" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas-cu12" }, -] +sdist = { url = "https://files.pythonhosted.org/packages/dc/b2/ce4b867d8cd9c0ee84938ae1e6a6f7926ebf928c9090d036fc3c6a04f946/numpy-2.2.5.tar.gz", hash = "sha256:a9c0d994680cd991b1cb772e8b297340085466a6fe964bc9d4e80f5e2f43c291", size = 20273920 } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/78/4535c9c7f859a64781e43c969a3a7e84c54634e319a996d43ef32ce46f83/nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2", size = 570988386, upload-time = "2024-10-25T19:54:26.39Z" }, -] - -[[package]] -name = "nvidia-cufft-cu12" -version = "11.3.0.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/16/73727675941ab8e6ffd86ca3a4b7b47065edcca7a997920b831f8147c99d/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5", size = 200221632, upload-time = "2024-11-20T17:41:32.357Z" }, - { url = "https://files.pythonhosted.org/packages/60/de/99ec247a07ea40c969d904fc14f3a356b3e2a704121675b75c366b694ee1/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca", size = 200221622, upload-time = "2024-10-01T17:03:58.79Z" }, -] - -[[package]] -name = "nvidia-cufile-cu12" -version = "1.11.1.6" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b2/66/cc9876340ac68ae71b15c743ddb13f8b30d5244af344ec8322b449e35426/nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159", size = 1142103, upload-time = "2024-11-20T17:42:11.83Z" }, -] - -[[package]] -name = "nvidia-curand-cu12" -version = "10.3.7.77" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/73/1b/44a01c4e70933637c93e6e1a8063d1e998b50213a6b65ac5a9169c47e98e/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf", size = 56279010, upload-time = "2024-11-20T17:42:50.958Z" }, - { url = "https://files.pythonhosted.org/packages/4a/aa/2c7ff0b5ee02eaef890c0ce7d4f74bc30901871c5e45dee1ae6d0083cd80/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117", size = 56279000, upload-time = "2024-10-01T17:04:45.274Z" }, -] - -[[package]] -name = "nvidia-cusolver-cu12" -version = "11.7.1.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/6e/c2cf12c9ff8b872e92b4a5740701e51ff17689c4d726fca91875b07f655d/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c", size = 158229790, upload-time = "2024-11-20T17:43:43.211Z" }, - { url = "https://files.pythonhosted.org/packages/9f/81/baba53585da791d043c10084cf9553e074548408e04ae884cfe9193bd484/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6", size = 158229780, upload-time = "2024-10-01T17:05:39.875Z" }, -] - -[[package]] -name = "nvidia-cusparse-cu12" -version = "12.5.4.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/06/1e/b8b7c2f4099a37b96af5c9bb158632ea9e5d9d27d7391d7eb8fc45236674/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73", size = 216561367, upload-time = "2024-11-20T17:44:54.824Z" }, - { url = "https://files.pythonhosted.org/packages/43/ac/64c4316ba163e8217a99680c7605f779accffc6a4bcd0c778c12948d3707/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f", size = 216561357, upload-time = "2024-10-01T17:06:29.861Z" }, -] - -[[package]] -name = "nvidia-cusparselt-cu12" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/9a/72ef35b399b0e183bc2e8f6f558036922d453c4d8237dab26c666a04244b/nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46", size = 156785796, upload-time = "2024-10-15T21:29:17.709Z" }, -] - -[[package]] -name = "nvidia-nccl-cu12" -version = "2.26.2" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/67/ca/f42388aed0fddd64ade7493dbba36e1f534d4e6fdbdd355c6a90030ae028/nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6", size = 201319755, upload-time = "2025-03-13T00:29:55.296Z" }, -] - -[[package]] -name = "nvidia-nvjitlink-cu12" -version = "12.6.85" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/d7/c5383e47c7e9bf1c99d5bd2a8c935af2b6d705ad831a7ec5c97db4d82f4f/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a", size = 19744971, upload-time = "2024-11-20T17:46:53.366Z" }, -] - -[[package]] -name = "nvidia-nvtx-cu12" -version = "12.6.77" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/56/9a/fff8376f8e3d084cd1530e1ef7b879bb7d6d265620c95c1b322725c694f4/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2", size = 89276, upload-time = "2024-11-20T17:38:27.621Z" }, - { url = "https://files.pythonhosted.org/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1", size = 89265, upload-time = "2024-10-01T17:00:38.172Z" }, + { url = "https://files.pythonhosted.org/packages/e2/f7/1fd4ff108cd9d7ef929b8882692e23665dc9c23feecafbb9c6b80f4ec583/numpy-2.2.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ee461a4eaab4f165b68780a6a1af95fb23a29932be7569b9fab666c407969051", size = 20948633 }, + { url = "https://files.pythonhosted.org/packages/12/03/d443c278348371b20d830af155ff2079acad6a9e60279fac2b41dbbb73d8/numpy-2.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec31367fd6a255dc8de4772bd1658c3e926d8e860a0b6e922b615e532d320ddc", size = 14176123 }, + { url = "https://files.pythonhosted.org/packages/2b/0b/5ca264641d0e7b14393313304da48b225d15d471250376f3fbdb1a2be603/numpy-2.2.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47834cde750d3c9f4e52c6ca28a7361859fcaf52695c7dc3cc1a720b8922683e", size = 5163817 }, + { url = "https://files.pythonhosted.org/packages/04/b3/d522672b9e3d28e26e1613de7675b441bbd1eaca75db95680635dd158c67/numpy-2.2.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:2c1a1c6ccce4022383583a6ded7bbcda22fc635eb4eb1e0a053336425ed36dfa", size = 6698066 }, + { url = "https://files.pythonhosted.org/packages/a0/93/0f7a75c1ff02d4b76df35079676b3b2719fcdfb39abdf44c8b33f43ef37d/numpy-2.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d75f338f5f79ee23548b03d801d28a505198297534f62416391857ea0479571", size = 14087277 }, + { url = "https://files.pythonhosted.org/packages/b0/d9/7c338b923c53d431bc837b5b787052fef9ae68a56fe91e325aac0d48226e/numpy-2.2.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a801fef99668f309b88640e28d261991bfad9617c27beda4a3aec4f217ea073", size = 16135742 }, + { url = "https://files.pythonhosted.org/packages/2d/10/4dec9184a5d74ba9867c6f7d1e9f2e0fb5fe96ff2bf50bb6f342d64f2003/numpy-2.2.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:abe38cd8381245a7f49967a6010e77dbf3680bd3627c0fe4362dd693b404c7f8", size = 15581825 }, + { url = "https://files.pythonhosted.org/packages/80/1f/2b6fcd636e848053f5b57712a7d1880b1565eec35a637fdfd0a30d5e738d/numpy-2.2.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5a0ac90e46fdb5649ab6369d1ab6104bfe5854ab19b645bf5cda0127a13034ae", size = 17899600 }, + { url = "https://files.pythonhosted.org/packages/ec/87/36801f4dc2623d76a0a3835975524a84bd2b18fe0f8835d45c8eae2f9ff2/numpy-2.2.5-cp312-cp312-win32.whl", hash = "sha256:0cd48122a6b7eab8f06404805b1bd5856200e3ed6f8a1b9a194f9d9054631beb", size = 6312626 }, + { url = "https://files.pythonhosted.org/packages/8b/09/4ffb4d6cfe7ca6707336187951992bd8a8b9142cf345d87ab858d2d7636a/numpy-2.2.5-cp312-cp312-win_amd64.whl", hash = "sha256:ced69262a8278547e63409b2653b372bf4baff0870c57efa76c5703fd6543282", size = 12645715 }, ] [[package]] name = "olefile" version = "0.47" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c", size = 112240, upload-time = "2023-12-01T16:22:53.025Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c", size = 112240 } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f", size = 114565, upload-time = "2023-12-01T16:22:51.518Z" }, + { url = "https://files.pythonhosted.org/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f", size = 114565 }, ] [[package]] @@ -1059,10 +834,10 @@ dependencies = [ { name = "sympy" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/42/274438bbc259439fa1606d0d6d2eef4171cdbd2d7a1c3b249b4ba440424b/onnxruntime-1.21.1-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:f615c05869a523a94d0a4de1f0936d0199a473cf104d630fc26174bebd5759bd", size = 33658457, upload-time = "2025-04-18T12:01:22.937Z" }, - { url = "https://files.pythonhosted.org/packages/9c/93/76f629d4f22571b0b3a29a9d375204faae2bd2b07d557043b56df5848779/onnxruntime-1.21.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79dfb1f47386c4edd115b21015354b2f05f5566c40c98606251f15a64add3cbe", size = 14164881, upload-time = "2025-04-18T12:01:44.497Z" }, - { url = "https://files.pythonhosted.org/packages/1b/86/75cbaa4058758fa8ef912dfebba2d5a4e4fd6738615c15b6a2262d076198/onnxruntime-1.21.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2742935d6610fe0f58e1995018d9db7e8239d0201d9ebbdb7964a61386b5390a", size = 16019966, upload-time = "2025-04-18T12:01:47.366Z" }, - { url = "https://files.pythonhosted.org/packages/5f/9d/fb8895b2cb38c9965d4b4e0a9aa1398f3e3f16c4acb75cf3b61689780a65/onnxruntime-1.21.1-cp312-cp312-win_amd64.whl", hash = "sha256:a7afdb3fcb162f5536225e13c2b245018068964b1d0eee05303ea6823ca6785e", size = 12302925, upload-time = "2025-04-18T12:01:26.147Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/274438bbc259439fa1606d0d6d2eef4171cdbd2d7a1c3b249b4ba440424b/onnxruntime-1.21.1-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:f615c05869a523a94d0a4de1f0936d0199a473cf104d630fc26174bebd5759bd", size = 33658457 }, + { url = "https://files.pythonhosted.org/packages/9c/93/76f629d4f22571b0b3a29a9d375204faae2bd2b07d557043b56df5848779/onnxruntime-1.21.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79dfb1f47386c4edd115b21015354b2f05f5566c40c98606251f15a64add3cbe", size = 14164881 }, + { url = "https://files.pythonhosted.org/packages/1b/86/75cbaa4058758fa8ef912dfebba2d5a4e4fd6738615c15b6a2262d076198/onnxruntime-1.21.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2742935d6610fe0f58e1995018d9db7e8239d0201d9ebbdb7964a61386b5390a", size = 16019966 }, + { url = "https://files.pythonhosted.org/packages/5f/9d/fb8895b2cb38c9965d4b4e0a9aa1398f3e3f16c4acb75cf3b61689780a65/onnxruntime-1.21.1-cp312-cp312-win_amd64.whl", hash = "sha256:a7afdb3fcb162f5536225e13c2b245018068964b1d0eee05303ea6823ca6785e", size = 12302925 }, ] [[package]] @@ -1072,18 +847,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "et-xmlfile" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 }, ] [[package]] name = "packaging" version = "25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, ] [[package]] @@ -1096,24 +871,15 @@ dependencies = [ { name = "pytz" }, { name = "tzdata" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" }, - { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" }, - { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" }, - { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" }, - { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" }, - { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" }, - { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" }, -] - -[[package]] -name = "pathspec" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893 }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475 }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645 }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445 }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235 }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756 }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, ] [[package]] @@ -1124,129 +890,120 @@ dependencies = [ { name = "charset-normalizer" }, { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a8/27/1a99ce4cfce829bb91040f82a53f33b33fec4e070d2b9c1b45f6796cd8dc/pdfminer_six-20250416.tar.gz", hash = "sha256:30956a85f9d0add806a4e460ed0d67c2b6a48b53323c7ac87de23174596d3acd", size = 7384630, upload-time = "2025-04-16T09:43:41.944Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a8/27/1a99ce4cfce829bb91040f82a53f33b33fec4e070d2b9c1b45f6796cd8dc/pdfminer_six-20250416.tar.gz", hash = "sha256:30956a85f9d0add806a4e460ed0d67c2b6a48b53323c7ac87de23174596d3acd", size = 7384630 } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/32/89749ba23e5020e89fb584c1b39d7da6d7c56a9048307de8a88eec79e2d3/pdfminer_six-20250416-py3-none-any.whl", hash = "sha256:dd2a9ad7bc7dd6b62d009aaa9c101ac9d069a47937724569c375a6a9078da303", size = 5619271, upload-time = "2025-04-16T09:43:40.211Z" }, + { url = "https://files.pythonhosted.org/packages/77/32/89749ba23e5020e89fb584c1b39d7da6d7c56a9048307de8a88eec79e2d3/pdfminer_six-20250416-py3-none-any.whl", hash = "sha256:dd2a9ad7bc7dd6b62d009aaa9c101ac9d069a47937724569c375a6a9078da303", size = 5619271 }, ] [[package]] name = "pillow" version = "11.2.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/af/cb/bb5c01fcd2a69335b86c22142b2bccfc3464087efb7fd382eee5ffc7fdf7/pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6", size = 47026707, upload-time = "2025-04-12T17:50:03.289Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/40/052610b15a1b8961f52537cc8326ca6a881408bc2bdad0d852edeb6ed33b/pillow-11.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f", size = 3190185, upload-time = "2025-04-12T17:48:00.417Z" }, - { url = "https://files.pythonhosted.org/packages/e5/7e/b86dbd35a5f938632093dc40d1682874c33dcfe832558fc80ca56bfcb774/pillow-11.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b", size = 3030306, upload-time = "2025-04-12T17:48:02.391Z" }, - { url = "https://files.pythonhosted.org/packages/a4/5c/467a161f9ed53e5eab51a42923c33051bf8d1a2af4626ac04f5166e58e0c/pillow-11.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d", size = 4416121, upload-time = "2025-04-12T17:48:04.554Z" }, - { url = "https://files.pythonhosted.org/packages/62/73/972b7742e38ae0e2ac76ab137ca6005dcf877480da0d9d61d93b613065b4/pillow-11.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4", size = 4501707, upload-time = "2025-04-12T17:48:06.831Z" }, - { url = "https://files.pythonhosted.org/packages/e4/3a/427e4cb0b9e177efbc1a84798ed20498c4f233abde003c06d2650a6d60cb/pillow-11.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d", size = 4522921, upload-time = "2025-04-12T17:48:09.229Z" }, - { url = "https://files.pythonhosted.org/packages/fe/7c/d8b1330458e4d2f3f45d9508796d7caf0c0d3764c00c823d10f6f1a3b76d/pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4", size = 4612523, upload-time = "2025-04-12T17:48:11.631Z" }, - { url = "https://files.pythonhosted.org/packages/b3/2f/65738384e0b1acf451de5a573d8153fe84103772d139e1e0bdf1596be2ea/pillow-11.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443", size = 4587836, upload-time = "2025-04-12T17:48:13.592Z" }, - { url = "https://files.pythonhosted.org/packages/6a/c5/e795c9f2ddf3debb2dedd0df889f2fe4b053308bb59a3cc02a0cd144d641/pillow-11.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c", size = 4669390, upload-time = "2025-04-12T17:48:15.938Z" }, - { url = "https://files.pythonhosted.org/packages/96/ae/ca0099a3995976a9fce2f423166f7bff9b12244afdc7520f6ed38911539a/pillow-11.2.1-cp312-cp312-win32.whl", hash = "sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3", size = 2332309, upload-time = "2025-04-12T17:48:17.885Z" }, - { url = "https://files.pythonhosted.org/packages/7c/18/24bff2ad716257fc03da964c5e8f05d9790a779a8895d6566e493ccf0189/pillow-11.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941", size = 2676768, upload-time = "2025-04-12T17:48:19.655Z" }, - { url = "https://files.pythonhosted.org/packages/da/bb/e8d656c9543276517ee40184aaa39dcb41e683bca121022f9323ae11b39d/pillow-11.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb", size = 2415087, upload-time = "2025-04-12T17:48:21.991Z" }, -] - -[[package]] -name = "platformdirs" -version = "4.3.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b6/2d/7d512a3913d60623e7eb945c6d1b4f0bddf1d0b7ada5225274c87e5b53d1/platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351", size = 21291, upload-time = "2025-03-19T20:36:10.989Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/cb/bb5c01fcd2a69335b86c22142b2bccfc3464087efb7fd382eee5ffc7fdf7/pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6", size = 47026707 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/45/59578566b3275b8fd9157885918fcd0c4d74162928a5310926887b856a51/platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94", size = 18499, upload-time = "2025-03-19T20:36:09.038Z" }, + { url = "https://files.pythonhosted.org/packages/c7/40/052610b15a1b8961f52537cc8326ca6a881408bc2bdad0d852edeb6ed33b/pillow-11.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f", size = 3190185 }, + { url = "https://files.pythonhosted.org/packages/e5/7e/b86dbd35a5f938632093dc40d1682874c33dcfe832558fc80ca56bfcb774/pillow-11.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b", size = 3030306 }, + { url = "https://files.pythonhosted.org/packages/a4/5c/467a161f9ed53e5eab51a42923c33051bf8d1a2af4626ac04f5166e58e0c/pillow-11.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d", size = 4416121 }, + { url = "https://files.pythonhosted.org/packages/62/73/972b7742e38ae0e2ac76ab137ca6005dcf877480da0d9d61d93b613065b4/pillow-11.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4", size = 4501707 }, + { url = "https://files.pythonhosted.org/packages/e4/3a/427e4cb0b9e177efbc1a84798ed20498c4f233abde003c06d2650a6d60cb/pillow-11.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d", size = 4522921 }, + { url = "https://files.pythonhosted.org/packages/fe/7c/d8b1330458e4d2f3f45d9508796d7caf0c0d3764c00c823d10f6f1a3b76d/pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4", size = 4612523 }, + { url = "https://files.pythonhosted.org/packages/b3/2f/65738384e0b1acf451de5a573d8153fe84103772d139e1e0bdf1596be2ea/pillow-11.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443", size = 4587836 }, + { url = "https://files.pythonhosted.org/packages/6a/c5/e795c9f2ddf3debb2dedd0df889f2fe4b053308bb59a3cc02a0cd144d641/pillow-11.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c", size = 4669390 }, + { url = "https://files.pythonhosted.org/packages/96/ae/ca0099a3995976a9fce2f423166f7bff9b12244afdc7520f6ed38911539a/pillow-11.2.1-cp312-cp312-win32.whl", hash = "sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3", size = 2332309 }, + { url = "https://files.pythonhosted.org/packages/7c/18/24bff2ad716257fc03da964c5e8f05d9790a779a8895d6566e493ccf0189/pillow-11.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941", size = 2676768 }, + { url = "https://files.pythonhosted.org/packages/da/bb/e8d656c9543276517ee40184aaa39dcb41e683bca121022f9323ae11b39d/pillow-11.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb", size = 2415087 }, ] [[package]] name = "propcache" version = "0.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/07/c8/fdc6686a986feae3541ea23dcaa661bd93972d3940460646c6bb96e21c40/propcache-0.3.1.tar.gz", hash = "sha256:40d980c33765359098837527e18eddefc9a24cea5b45e078a7f3bb5b032c6ecf", size = 43651, upload-time = "2025-03-26T03:06:12.05Z" } +sdist = { url = "https://files.pythonhosted.org/packages/07/c8/fdc6686a986feae3541ea23dcaa661bd93972d3940460646c6bb96e21c40/propcache-0.3.1.tar.gz", hash = "sha256:40d980c33765359098837527e18eddefc9a24cea5b45e078a7f3bb5b032c6ecf", size = 43651 } wheels = [ - { url = "https://files.pythonhosted.org/packages/41/aa/ca78d9be314d1e15ff517b992bebbed3bdfef5b8919e85bf4940e57b6137/propcache-0.3.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f78eb8422acc93d7b69964012ad7048764bb45a54ba7a39bb9e146c72ea29723", size = 80430, upload-time = "2025-03-26T03:04:26.436Z" }, - { url = "https://files.pythonhosted.org/packages/1a/d8/f0c17c44d1cda0ad1979af2e593ea290defdde9eaeb89b08abbe02a5e8e1/propcache-0.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:89498dd49c2f9a026ee057965cdf8192e5ae070ce7d7a7bd4b66a8e257d0c976", size = 46637, upload-time = "2025-03-26T03:04:27.932Z" }, - { url = "https://files.pythonhosted.org/packages/ae/bd/c1e37265910752e6e5e8a4c1605d0129e5b7933c3dc3cf1b9b48ed83b364/propcache-0.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09400e98545c998d57d10035ff623266927cb784d13dd2b31fd33b8a5316b85b", size = 46123, upload-time = "2025-03-26T03:04:30.659Z" }, - { url = "https://files.pythonhosted.org/packages/d4/b0/911eda0865f90c0c7e9f0415d40a5bf681204da5fd7ca089361a64c16b28/propcache-0.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8efd8c5adc5a2c9d3b952815ff8f7710cefdcaf5f2c36d26aff51aeca2f12f", size = 243031, upload-time = "2025-03-26T03:04:31.977Z" }, - { url = "https://files.pythonhosted.org/packages/0a/06/0da53397c76a74271621807265b6eb61fb011451b1ddebf43213df763669/propcache-0.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2fe5c910f6007e716a06d269608d307b4f36e7babee5f36533722660e8c4a70", size = 249100, upload-time = "2025-03-26T03:04:33.45Z" }, - { url = "https://files.pythonhosted.org/packages/f1/eb/13090e05bf6b963fc1653cdc922133ced467cb4b8dab53158db5a37aa21e/propcache-0.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a0ab8cf8cdd2194f8ff979a43ab43049b1df0b37aa64ab7eca04ac14429baeb7", size = 250170, upload-time = "2025-03-26T03:04:35.542Z" }, - { url = "https://files.pythonhosted.org/packages/3b/4c/f72c9e1022b3b043ec7dc475a0f405d4c3e10b9b1d378a7330fecf0652da/propcache-0.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:563f9d8c03ad645597b8d010ef4e9eab359faeb11a0a2ac9f7b4bc8c28ebef25", size = 245000, upload-time = "2025-03-26T03:04:37.501Z" }, - { url = "https://files.pythonhosted.org/packages/e8/fd/970ca0e22acc829f1adf5de3724085e778c1ad8a75bec010049502cb3a86/propcache-0.3.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb6e0faf8cb6b4beea5d6ed7b5a578254c6d7df54c36ccd3d8b3eb00d6770277", size = 230262, upload-time = "2025-03-26T03:04:39.532Z" }, - { url = "https://files.pythonhosted.org/packages/c4/42/817289120c6b9194a44f6c3e6b2c3277c5b70bbad39e7df648f177cc3634/propcache-0.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1c5c7ab7f2bb3f573d1cb921993006ba2d39e8621019dffb1c5bc94cdbae81e8", size = 236772, upload-time = "2025-03-26T03:04:41.109Z" }, - { url = "https://files.pythonhosted.org/packages/7c/9c/3b3942b302badd589ad6b672da3ca7b660a6c2f505cafd058133ddc73918/propcache-0.3.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:050b571b2e96ec942898f8eb46ea4bfbb19bd5502424747e83badc2d4a99a44e", size = 231133, upload-time = "2025-03-26T03:04:42.544Z" }, - { url = "https://files.pythonhosted.org/packages/98/a1/75f6355f9ad039108ff000dfc2e19962c8dea0430da9a1428e7975cf24b2/propcache-0.3.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e1c4d24b804b3a87e9350f79e2371a705a188d292fd310e663483af6ee6718ee", size = 230741, upload-time = "2025-03-26T03:04:44.06Z" }, - { url = "https://files.pythonhosted.org/packages/67/0c/3e82563af77d1f8731132166da69fdfd95e71210e31f18edce08a1eb11ea/propcache-0.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:e4fe2a6d5ce975c117a6bb1e8ccda772d1e7029c1cca1acd209f91d30fa72815", size = 244047, upload-time = "2025-03-26T03:04:45.983Z" }, - { url = "https://files.pythonhosted.org/packages/f7/50/9fb7cca01532a08c4d5186d7bb2da6c4c587825c0ae134b89b47c7d62628/propcache-0.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:feccd282de1f6322f56f6845bf1207a537227812f0a9bf5571df52bb418d79d5", size = 246467, upload-time = "2025-03-26T03:04:47.699Z" }, - { url = "https://files.pythonhosted.org/packages/a9/02/ccbcf3e1c604c16cc525309161d57412c23cf2351523aedbb280eb7c9094/propcache-0.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ec314cde7314d2dd0510c6787326bbffcbdc317ecee6b7401ce218b3099075a7", size = 241022, upload-time = "2025-03-26T03:04:49.195Z" }, - { url = "https://files.pythonhosted.org/packages/db/19/e777227545e09ca1e77a6e21274ae9ec45de0f589f0ce3eca2a41f366220/propcache-0.3.1-cp312-cp312-win32.whl", hash = "sha256:7d2d5a0028d920738372630870e7d9644ce437142197f8c827194fca404bf03b", size = 40647, upload-time = "2025-03-26T03:04:50.595Z" }, - { url = "https://files.pythonhosted.org/packages/24/bb/3b1b01da5dd04c77a204c84e538ff11f624e31431cfde7201d9110b092b1/propcache-0.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:88c423efef9d7a59dae0614eaed718449c09a5ac79a5f224a8b9664d603f04a3", size = 44784, upload-time = "2025-03-26T03:04:51.791Z" }, - { url = "https://files.pythonhosted.org/packages/b8/d3/c3cb8f1d6ae3b37f83e1de806713a9b3642c5895f0215a62e1a4bd6e5e34/propcache-0.3.1-py3-none-any.whl", hash = "sha256:9a8ecf38de50a7f518c21568c80f985e776397b902f1ce0b01f799aba1608b40", size = 12376, upload-time = "2025-03-26T03:06:10.5Z" }, + { url = "https://files.pythonhosted.org/packages/41/aa/ca78d9be314d1e15ff517b992bebbed3bdfef5b8919e85bf4940e57b6137/propcache-0.3.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f78eb8422acc93d7b69964012ad7048764bb45a54ba7a39bb9e146c72ea29723", size = 80430 }, + { url = "https://files.pythonhosted.org/packages/1a/d8/f0c17c44d1cda0ad1979af2e593ea290defdde9eaeb89b08abbe02a5e8e1/propcache-0.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:89498dd49c2f9a026ee057965cdf8192e5ae070ce7d7a7bd4b66a8e257d0c976", size = 46637 }, + { url = "https://files.pythonhosted.org/packages/ae/bd/c1e37265910752e6e5e8a4c1605d0129e5b7933c3dc3cf1b9b48ed83b364/propcache-0.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09400e98545c998d57d10035ff623266927cb784d13dd2b31fd33b8a5316b85b", size = 46123 }, + { url = "https://files.pythonhosted.org/packages/d4/b0/911eda0865f90c0c7e9f0415d40a5bf681204da5fd7ca089361a64c16b28/propcache-0.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8efd8c5adc5a2c9d3b952815ff8f7710cefdcaf5f2c36d26aff51aeca2f12f", size = 243031 }, + { url = "https://files.pythonhosted.org/packages/0a/06/0da53397c76a74271621807265b6eb61fb011451b1ddebf43213df763669/propcache-0.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2fe5c910f6007e716a06d269608d307b4f36e7babee5f36533722660e8c4a70", size = 249100 }, + { url = "https://files.pythonhosted.org/packages/f1/eb/13090e05bf6b963fc1653cdc922133ced467cb4b8dab53158db5a37aa21e/propcache-0.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a0ab8cf8cdd2194f8ff979a43ab43049b1df0b37aa64ab7eca04ac14429baeb7", size = 250170 }, + { url = "https://files.pythonhosted.org/packages/3b/4c/f72c9e1022b3b043ec7dc475a0f405d4c3e10b9b1d378a7330fecf0652da/propcache-0.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:563f9d8c03ad645597b8d010ef4e9eab359faeb11a0a2ac9f7b4bc8c28ebef25", size = 245000 }, + { url = "https://files.pythonhosted.org/packages/e8/fd/970ca0e22acc829f1adf5de3724085e778c1ad8a75bec010049502cb3a86/propcache-0.3.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb6e0faf8cb6b4beea5d6ed7b5a578254c6d7df54c36ccd3d8b3eb00d6770277", size = 230262 }, + { url = "https://files.pythonhosted.org/packages/c4/42/817289120c6b9194a44f6c3e6b2c3277c5b70bbad39e7df648f177cc3634/propcache-0.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1c5c7ab7f2bb3f573d1cb921993006ba2d39e8621019dffb1c5bc94cdbae81e8", size = 236772 }, + { url = "https://files.pythonhosted.org/packages/7c/9c/3b3942b302badd589ad6b672da3ca7b660a6c2f505cafd058133ddc73918/propcache-0.3.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:050b571b2e96ec942898f8eb46ea4bfbb19bd5502424747e83badc2d4a99a44e", size = 231133 }, + { url = "https://files.pythonhosted.org/packages/98/a1/75f6355f9ad039108ff000dfc2e19962c8dea0430da9a1428e7975cf24b2/propcache-0.3.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e1c4d24b804b3a87e9350f79e2371a705a188d292fd310e663483af6ee6718ee", size = 230741 }, + { url = "https://files.pythonhosted.org/packages/67/0c/3e82563af77d1f8731132166da69fdfd95e71210e31f18edce08a1eb11ea/propcache-0.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:e4fe2a6d5ce975c117a6bb1e8ccda772d1e7029c1cca1acd209f91d30fa72815", size = 244047 }, + { url = "https://files.pythonhosted.org/packages/f7/50/9fb7cca01532a08c4d5186d7bb2da6c4c587825c0ae134b89b47c7d62628/propcache-0.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:feccd282de1f6322f56f6845bf1207a537227812f0a9bf5571df52bb418d79d5", size = 246467 }, + { url = "https://files.pythonhosted.org/packages/a9/02/ccbcf3e1c604c16cc525309161d57412c23cf2351523aedbb280eb7c9094/propcache-0.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ec314cde7314d2dd0510c6787326bbffcbdc317ecee6b7401ce218b3099075a7", size = 241022 }, + { url = "https://files.pythonhosted.org/packages/db/19/e777227545e09ca1e77a6e21274ae9ec45de0f589f0ce3eca2a41f366220/propcache-0.3.1-cp312-cp312-win32.whl", hash = "sha256:7d2d5a0028d920738372630870e7d9644ce437142197f8c827194fca404bf03b", size = 40647 }, + { url = "https://files.pythonhosted.org/packages/24/bb/3b1b01da5dd04c77a204c84e538ff11f624e31431cfde7201d9110b092b1/propcache-0.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:88c423efef9d7a59dae0614eaed718449c09a5ac79a5f224a8b9664d603f04a3", size = 44784 }, + { url = "https://files.pythonhosted.org/packages/b8/d3/c3cb8f1d6ae3b37f83e1de806713a9b3642c5895f0215a62e1a4bd6e5e34/propcache-0.3.1-py3-none-any.whl", hash = "sha256:9a8ecf38de50a7f518c21568c80f985e776397b902f1ce0b01f799aba1608b40", size = 12376 }, ] [[package]] name = "protobuf" version = "6.30.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c8/8c/cf2ac658216eebe49eaedf1e06bc06cbf6a143469236294a1171a51357c3/protobuf-6.30.2.tar.gz", hash = "sha256:35c859ae076d8c56054c25b59e5e59638d86545ed6e2b6efac6be0b6ea3ba048", size = 429315, upload-time = "2025-03-26T19:12:57.394Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c8/8c/cf2ac658216eebe49eaedf1e06bc06cbf6a143469236294a1171a51357c3/protobuf-6.30.2.tar.gz", hash = "sha256:35c859ae076d8c56054c25b59e5e59638d86545ed6e2b6efac6be0b6ea3ba048", size = 429315 } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/85/cd53abe6a6cbf2e0029243d6ae5fb4335da2996f6c177bb2ce685068e43d/protobuf-6.30.2-cp310-abi3-win32.whl", hash = "sha256:b12ef7df7b9329886e66404bef5e9ce6a26b54069d7f7436a0853ccdeb91c103", size = 419148, upload-time = "2025-03-26T19:12:41.359Z" }, - { url = "https://files.pythonhosted.org/packages/97/e9/7b9f1b259d509aef2b833c29a1f3c39185e2bf21c9c1be1cd11c22cb2149/protobuf-6.30.2-cp310-abi3-win_amd64.whl", hash = "sha256:7653c99774f73fe6b9301b87da52af0e69783a2e371e8b599b3e9cb4da4b12b9", size = 431003, upload-time = "2025-03-26T19:12:44.156Z" }, - { url = "https://files.pythonhosted.org/packages/8e/66/7f3b121f59097c93267e7f497f10e52ced7161b38295137a12a266b6c149/protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:0eb523c550a66a09a0c20f86dd554afbf4d32b02af34ae53d93268c1f73bc65b", size = 417579, upload-time = "2025-03-26T19:12:45.447Z" }, - { url = "https://files.pythonhosted.org/packages/d0/89/bbb1bff09600e662ad5b384420ad92de61cab2ed0f12ace1fd081fd4c295/protobuf-6.30.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:50f32cc9fd9cb09c783ebc275611b4f19dfdfb68d1ee55d2f0c7fa040df96815", size = 317319, upload-time = "2025-03-26T19:12:46.999Z" }, - { url = "https://files.pythonhosted.org/packages/28/50/1925de813499546bc8ab3ae857e3ec84efe7d2f19b34529d0c7c3d02d11d/protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4f6c687ae8efae6cf6093389a596548214467778146b7245e886f35e1485315d", size = 316212, upload-time = "2025-03-26T19:12:48.458Z" }, - { url = "https://files.pythonhosted.org/packages/e5/a1/93c2acf4ade3c5b557d02d500b06798f4ed2c176fa03e3c34973ca92df7f/protobuf-6.30.2-py3-none-any.whl", hash = "sha256:ae86b030e69a98e08c77beab574cbcb9fff6d031d57209f574a5aea1445f4b51", size = 167062, upload-time = "2025-03-26T19:12:55.892Z" }, + { url = "https://files.pythonhosted.org/packages/be/85/cd53abe6a6cbf2e0029243d6ae5fb4335da2996f6c177bb2ce685068e43d/protobuf-6.30.2-cp310-abi3-win32.whl", hash = "sha256:b12ef7df7b9329886e66404bef5e9ce6a26b54069d7f7436a0853ccdeb91c103", size = 419148 }, + { url = "https://files.pythonhosted.org/packages/97/e9/7b9f1b259d509aef2b833c29a1f3c39185e2bf21c9c1be1cd11c22cb2149/protobuf-6.30.2-cp310-abi3-win_amd64.whl", hash = "sha256:7653c99774f73fe6b9301b87da52af0e69783a2e371e8b599b3e9cb4da4b12b9", size = 431003 }, + { url = "https://files.pythonhosted.org/packages/8e/66/7f3b121f59097c93267e7f497f10e52ced7161b38295137a12a266b6c149/protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:0eb523c550a66a09a0c20f86dd554afbf4d32b02af34ae53d93268c1f73bc65b", size = 417579 }, + { url = "https://files.pythonhosted.org/packages/d0/89/bbb1bff09600e662ad5b384420ad92de61cab2ed0f12ace1fd081fd4c295/protobuf-6.30.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:50f32cc9fd9cb09c783ebc275611b4f19dfdfb68d1ee55d2f0c7fa040df96815", size = 317319 }, + { url = "https://files.pythonhosted.org/packages/28/50/1925de813499546bc8ab3ae857e3ec84efe7d2f19b34529d0c7c3d02d11d/protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4f6c687ae8efae6cf6093389a596548214467778146b7245e886f35e1485315d", size = 316212 }, + { url = "https://files.pythonhosted.org/packages/e5/a1/93c2acf4ade3c5b557d02d500b06798f4ed2c176fa03e3c34973ca92df7f/protobuf-6.30.2-py3-none-any.whl", hash = "sha256:ae86b030e69a98e08c77beab574cbcb9fff6d031d57209f574a5aea1445f4b51", size = 167062 }, ] [[package]] name = "pyarrow" version = "20.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187, upload-time = "2025-04-27T12:34:23.264Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067, upload-time = "2025-04-27T12:29:44.384Z" }, - { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128, upload-time = "2025-04-27T12:29:52.038Z" }, - { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890, upload-time = "2025-04-27T12:29:59.452Z" }, - { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775, upload-time = "2025-04-27T12:30:06.875Z" }, - { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231, upload-time = "2025-04-27T12:30:13.954Z" }, - { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639, upload-time = "2025-04-27T12:30:21.949Z" }, - { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549, upload-time = "2025-04-27T12:30:29.551Z" }, - { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216, upload-time = "2025-04-27T12:30:36.977Z" }, - { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496, upload-time = "2025-04-27T12:30:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067 }, + { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128 }, + { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890 }, + { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775 }, + { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231 }, + { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639 }, + { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549 }, + { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216 }, + { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496 }, ] [[package]] name = "pycparser" version = "2.22" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736, upload-time = "2024-03-30T13:22:22.564Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } wheels = [ - { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" }, + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, ] [[package]] name = "pydub" version = "0.25.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" }, + { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327 }, ] [[package]] name = "pygments" version = "2.19.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload-time = "2025-01-06T17:26:30.443Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] [[package]] name = "pyjwt" version = "2.10.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, ] [package.optional-dependencies] @@ -1254,22 +1011,13 @@ crypto = [ { name = "cryptography" }, ] -[[package]] -name = "pyparsing" -version = "3.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bb/22/f1129e69d94ffff626bdb5c835506b3a5b4f3d070f17ea295e12c2c6f60f/pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be", size = 1088608, upload-time = "2025-03-25T05:01:28.114Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" }, -] - [[package]] name = "pyreadline3" version = "3.5.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839, upload-time = "2024-09-19T02:40:10.062Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839 } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" }, + { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178 }, ] [[package]] @@ -1279,18 +1027,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, ] [[package]] name = "python-dotenv" version = "1.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920, upload-time = "2025-03-25T10:14:56.835Z" } +sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256, upload-time = "2025-03-25T10:14:55.034Z" }, + { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256 }, ] [[package]] @@ -1303,81 +1051,81 @@ dependencies = [ { name = "typing-extensions" }, { name = "xlsxwriter" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/52/a9/0c0db8d37b2b8a645666f7fd8accea4c6224e013c42b1d5c17c93590cd06/python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095", size = 10109297, upload-time = "2024-08-07T17:33:37.772Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/a9/0c0db8d37b2b8a645666f7fd8accea4c6224e013c42b1d5c17c93590cd06/python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095", size = 10109297 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788, upload-time = "2024-08-07T17:33:28.192Z" }, + { url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788 }, ] [[package]] name = "pytz" version = "2025.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, ] [[package]] name = "pyyaml" version = "6.0.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } wheels = [ - { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" }, - { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" }, - { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" }, - { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" }, - { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" }, - { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" }, - { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" }, - { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" }, - { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, ] [[package]] name = "rapidfuzz" version = "3.13.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/f6/6895abc3a3d056b9698da3199b04c0e56226d530ae44a470edabf8b664f0/rapidfuzz-3.13.0.tar.gz", hash = "sha256:d2eaf3839e52cbcc0accbe9817a67b4b0fcf70aaeb229cfddc1c28061f9ce5d8", size = 57904226, upload-time = "2025-04-03T20:38:51.226Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/f6/6895abc3a3d056b9698da3199b04c0e56226d530ae44a470edabf8b664f0/rapidfuzz-3.13.0.tar.gz", hash = "sha256:d2eaf3839e52cbcc0accbe9817a67b4b0fcf70aaeb229cfddc1c28061f9ce5d8", size = 57904226 } wheels = [ - { url = "https://files.pythonhosted.org/packages/13/4b/a326f57a4efed8f5505b25102797a58e37ee11d94afd9d9422cb7c76117e/rapidfuzz-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a1a6a906ba62f2556372282b1ef37b26bca67e3d2ea957277cfcefc6275cca7", size = 1989501, upload-time = "2025-04-03T20:36:13.43Z" }, - { url = "https://files.pythonhosted.org/packages/b7/53/1f7eb7ee83a06c400089ec7cb841cbd581c2edd7a4b21eb2f31030b88daa/rapidfuzz-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fd0975e015b05c79a97f38883a11236f5a24cca83aa992bd2558ceaa5652b26", size = 1445379, upload-time = "2025-04-03T20:36:16.439Z" }, - { url = "https://files.pythonhosted.org/packages/07/09/de8069a4599cc8e6d194e5fa1782c561151dea7d5e2741767137e2a8c1f0/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d4e13593d298c50c4f94ce453f757b4b398af3fa0fd2fde693c3e51195b7f69", size = 1405986, upload-time = "2025-04-03T20:36:18.447Z" }, - { url = "https://files.pythonhosted.org/packages/5d/77/d9a90b39c16eca20d70fec4ca377fbe9ea4c0d358c6e4736ab0e0e78aaf6/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed6f416bda1c9133000009d84d9409823eb2358df0950231cc936e4bf784eb97", size = 5310809, upload-time = "2025-04-03T20:36:20.324Z" }, - { url = "https://files.pythonhosted.org/packages/1e/7d/14da291b0d0f22262d19522afaf63bccf39fc027c981233fb2137a57b71f/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dc82b6ed01acb536b94a43996a94471a218f4d89f3fdd9185ab496de4b2a981", size = 1629394, upload-time = "2025-04-03T20:36:22.256Z" }, - { url = "https://files.pythonhosted.org/packages/b7/e4/79ed7e4fa58f37c0f8b7c0a62361f7089b221fe85738ae2dbcfb815e985a/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9d824de871daa6e443b39ff495a884931970d567eb0dfa213d234337343835f", size = 1600544, upload-time = "2025-04-03T20:36:24.207Z" }, - { url = "https://files.pythonhosted.org/packages/4e/20/e62b4d13ba851b0f36370060025de50a264d625f6b4c32899085ed51f980/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d18228a2390375cf45726ce1af9d36ff3dc1f11dce9775eae1f1b13ac6ec50f", size = 3052796, upload-time = "2025-04-03T20:36:26.279Z" }, - { url = "https://files.pythonhosted.org/packages/cd/8d/55fdf4387dec10aa177fe3df8dbb0d5022224d95f48664a21d6b62a5299d/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5fe634c9482ec5d4a6692afb8c45d370ae86755e5f57aa6c50bfe4ca2bdd87", size = 2464016, upload-time = "2025-04-03T20:36:28.525Z" }, - { url = "https://files.pythonhosted.org/packages/9b/be/0872f6a56c0f473165d3b47d4170fa75263dc5f46985755aa9bf2bbcdea1/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:694eb531889f71022b2be86f625a4209c4049e74be9ca836919b9e395d5e33b3", size = 7556725, upload-time = "2025-04-03T20:36:30.629Z" }, - { url = "https://files.pythonhosted.org/packages/5d/f3/6c0750e484d885a14840c7a150926f425d524982aca989cdda0bb3bdfa57/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:11b47b40650e06147dee5e51a9c9ad73bb7b86968b6f7d30e503b9f8dd1292db", size = 2859052, upload-time = "2025-04-03T20:36:32.836Z" }, - { url = "https://files.pythonhosted.org/packages/6f/98/5a3a14701b5eb330f444f7883c9840b43fb29c575e292e09c90a270a6e07/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:98b8107ff14f5af0243f27d236bcc6e1ef8e7e3b3c25df114e91e3a99572da73", size = 3390219, upload-time = "2025-04-03T20:36:35.062Z" }, - { url = "https://files.pythonhosted.org/packages/e9/7d/f4642eaaeb474b19974332f2a58471803448be843033e5740965775760a5/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b836f486dba0aceb2551e838ff3f514a38ee72b015364f739e526d720fdb823a", size = 4377924, upload-time = "2025-04-03T20:36:37.363Z" }, - { url = "https://files.pythonhosted.org/packages/8e/83/fa33f61796731891c3e045d0cbca4436a5c436a170e7f04d42c2423652c3/rapidfuzz-3.13.0-cp312-cp312-win32.whl", hash = "sha256:4671ee300d1818d7bdfd8fa0608580d7778ba701817216f0c17fb29e6b972514", size = 1823915, upload-time = "2025-04-03T20:36:39.451Z" }, - { url = "https://files.pythonhosted.org/packages/03/25/5ee7ab6841ca668567d0897905eebc79c76f6297b73bf05957be887e9c74/rapidfuzz-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e2065f68fb1d0bf65adc289c1bdc45ba7e464e406b319d67bb54441a1b9da9e", size = 1616985, upload-time = "2025-04-03T20:36:41.631Z" }, - { url = "https://files.pythonhosted.org/packages/76/5e/3f0fb88db396cb692aefd631e4805854e02120a2382723b90dcae720bcc6/rapidfuzz-3.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:65cc97c2fc2c2fe23586599686f3b1ceeedeca8e598cfcc1b7e56dc8ca7e2aa7", size = 860116, upload-time = "2025-04-03T20:36:43.915Z" }, + { url = "https://files.pythonhosted.org/packages/13/4b/a326f57a4efed8f5505b25102797a58e37ee11d94afd9d9422cb7c76117e/rapidfuzz-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a1a6a906ba62f2556372282b1ef37b26bca67e3d2ea957277cfcefc6275cca7", size = 1989501 }, + { url = "https://files.pythonhosted.org/packages/b7/53/1f7eb7ee83a06c400089ec7cb841cbd581c2edd7a4b21eb2f31030b88daa/rapidfuzz-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fd0975e015b05c79a97f38883a11236f5a24cca83aa992bd2558ceaa5652b26", size = 1445379 }, + { url = "https://files.pythonhosted.org/packages/07/09/de8069a4599cc8e6d194e5fa1782c561151dea7d5e2741767137e2a8c1f0/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d4e13593d298c50c4f94ce453f757b4b398af3fa0fd2fde693c3e51195b7f69", size = 1405986 }, + { url = "https://files.pythonhosted.org/packages/5d/77/d9a90b39c16eca20d70fec4ca377fbe9ea4c0d358c6e4736ab0e0e78aaf6/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed6f416bda1c9133000009d84d9409823eb2358df0950231cc936e4bf784eb97", size = 5310809 }, + { url = "https://files.pythonhosted.org/packages/1e/7d/14da291b0d0f22262d19522afaf63bccf39fc027c981233fb2137a57b71f/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dc82b6ed01acb536b94a43996a94471a218f4d89f3fdd9185ab496de4b2a981", size = 1629394 }, + { url = "https://files.pythonhosted.org/packages/b7/e4/79ed7e4fa58f37c0f8b7c0a62361f7089b221fe85738ae2dbcfb815e985a/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9d824de871daa6e443b39ff495a884931970d567eb0dfa213d234337343835f", size = 1600544 }, + { url = "https://files.pythonhosted.org/packages/4e/20/e62b4d13ba851b0f36370060025de50a264d625f6b4c32899085ed51f980/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d18228a2390375cf45726ce1af9d36ff3dc1f11dce9775eae1f1b13ac6ec50f", size = 3052796 }, + { url = "https://files.pythonhosted.org/packages/cd/8d/55fdf4387dec10aa177fe3df8dbb0d5022224d95f48664a21d6b62a5299d/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5fe634c9482ec5d4a6692afb8c45d370ae86755e5f57aa6c50bfe4ca2bdd87", size = 2464016 }, + { url = "https://files.pythonhosted.org/packages/9b/be/0872f6a56c0f473165d3b47d4170fa75263dc5f46985755aa9bf2bbcdea1/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:694eb531889f71022b2be86f625a4209c4049e74be9ca836919b9e395d5e33b3", size = 7556725 }, + { url = "https://files.pythonhosted.org/packages/5d/f3/6c0750e484d885a14840c7a150926f425d524982aca989cdda0bb3bdfa57/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:11b47b40650e06147dee5e51a9c9ad73bb7b86968b6f7d30e503b9f8dd1292db", size = 2859052 }, + { url = "https://files.pythonhosted.org/packages/6f/98/5a3a14701b5eb330f444f7883c9840b43fb29c575e292e09c90a270a6e07/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:98b8107ff14f5af0243f27d236bcc6e1ef8e7e3b3c25df114e91e3a99572da73", size = 3390219 }, + { url = "https://files.pythonhosted.org/packages/e9/7d/f4642eaaeb474b19974332f2a58471803448be843033e5740965775760a5/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b836f486dba0aceb2551e838ff3f514a38ee72b015364f739e526d720fdb823a", size = 4377924 }, + { url = "https://files.pythonhosted.org/packages/8e/83/fa33f61796731891c3e045d0cbca4436a5c436a170e7f04d42c2423652c3/rapidfuzz-3.13.0-cp312-cp312-win32.whl", hash = "sha256:4671ee300d1818d7bdfd8fa0608580d7778ba701817216f0c17fb29e6b972514", size = 1823915 }, + { url = "https://files.pythonhosted.org/packages/03/25/5ee7ab6841ca668567d0897905eebc79c76f6297b73bf05957be887e9c74/rapidfuzz-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e2065f68fb1d0bf65adc289c1bdc45ba7e464e406b319d67bb54441a1b9da9e", size = 1616985 }, + { url = "https://files.pythonhosted.org/packages/76/5e/3f0fb88db396cb692aefd631e4805854e02120a2382723b90dcae720bcc6/rapidfuzz-3.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:65cc97c2fc2c2fe23586599686f3b1ceeedeca8e598cfcc1b7e56dc8ca7e2aa7", size = 860116 }, ] [[package]] name = "regex" version = "2024.11.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494, upload-time = "2024-11-06T20:12:31.635Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781, upload-time = "2024-11-06T20:10:07.07Z" }, - { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455, upload-time = "2024-11-06T20:10:09.117Z" }, - { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759, upload-time = "2024-11-06T20:10:11.155Z" }, - { url = "https://files.pythonhosted.org/packages/94/2b/701a4b0585cb05472a4da28ee28fdfe155f3638f5e1ec92306d924e5faf0/regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", size = 794976, upload-time = "2024-11-06T20:10:13.24Z" }, - { url = "https://files.pythonhosted.org/packages/4b/bf/fa87e563bf5fee75db8915f7352e1887b1249126a1be4813837f5dbec965/regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", size = 833077, upload-time = "2024-11-06T20:10:15.37Z" }, - { url = "https://files.pythonhosted.org/packages/a1/56/7295e6bad94b047f4d0834e4779491b81216583c00c288252ef625c01d23/regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", size = 823160, upload-time = "2024-11-06T20:10:19.027Z" }, - { url = "https://files.pythonhosted.org/packages/fb/13/e3b075031a738c9598c51cfbc4c7879e26729c53aa9cca59211c44235314/regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", size = 796896, upload-time = "2024-11-06T20:10:21.85Z" }, - { url = "https://files.pythonhosted.org/packages/24/56/0b3f1b66d592be6efec23a795b37732682520b47c53da5a32c33ed7d84e3/regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", size = 783997, upload-time = "2024-11-06T20:10:24.329Z" }, - { url = "https://files.pythonhosted.org/packages/f9/a1/eb378dada8b91c0e4c5f08ffb56f25fcae47bf52ad18f9b2f33b83e6d498/regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", size = 781725, upload-time = "2024-11-06T20:10:28.067Z" }, - { url = "https://files.pythonhosted.org/packages/83/f2/033e7dec0cfd6dda93390089864732a3409246ffe8b042e9554afa9bff4e/regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", size = 789481, upload-time = "2024-11-06T20:10:31.612Z" }, - { url = "https://files.pythonhosted.org/packages/83/23/15d4552ea28990a74e7696780c438aadd73a20318c47e527b47a4a5a596d/regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", size = 852896, upload-time = "2024-11-06T20:10:34.054Z" }, - { url = "https://files.pythonhosted.org/packages/e3/39/ed4416bc90deedbfdada2568b2cb0bc1fdb98efe11f5378d9892b2a88f8f/regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", size = 860138, upload-time = "2024-11-06T20:10:36.142Z" }, - { url = "https://files.pythonhosted.org/packages/93/2d/dd56bb76bd8e95bbce684326302f287455b56242a4f9c61f1bc76e28360e/regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", size = 787692, upload-time = "2024-11-06T20:10:38.394Z" }, - { url = "https://files.pythonhosted.org/packages/0b/55/31877a249ab7a5156758246b9c59539abbeba22461b7d8adc9e8475ff73e/regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", size = 262135, upload-time = "2024-11-06T20:10:40.367Z" }, - { url = "https://files.pythonhosted.org/packages/38/ec/ad2d7de49a600cdb8dd78434a1aeffe28b9d6fc42eb36afab4a27ad23384/regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", size = 273567, upload-time = "2024-11-06T20:10:43.467Z" }, + { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781 }, + { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455 }, + { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759 }, + { url = "https://files.pythonhosted.org/packages/94/2b/701a4b0585cb05472a4da28ee28fdfe155f3638f5e1ec92306d924e5faf0/regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", size = 794976 }, + { url = "https://files.pythonhosted.org/packages/4b/bf/fa87e563bf5fee75db8915f7352e1887b1249126a1be4813837f5dbec965/regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", size = 833077 }, + { url = "https://files.pythonhosted.org/packages/a1/56/7295e6bad94b047f4d0834e4779491b81216583c00c288252ef625c01d23/regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", size = 823160 }, + { url = "https://files.pythonhosted.org/packages/fb/13/e3b075031a738c9598c51cfbc4c7879e26729c53aa9cca59211c44235314/regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", size = 796896 }, + { url = "https://files.pythonhosted.org/packages/24/56/0b3f1b66d592be6efec23a795b37732682520b47c53da5a32c33ed7d84e3/regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", size = 783997 }, + { url = "https://files.pythonhosted.org/packages/f9/a1/eb378dada8b91c0e4c5f08ffb56f25fcae47bf52ad18f9b2f33b83e6d498/regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", size = 781725 }, + { url = "https://files.pythonhosted.org/packages/83/f2/033e7dec0cfd6dda93390089864732a3409246ffe8b042e9554afa9bff4e/regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", size = 789481 }, + { url = "https://files.pythonhosted.org/packages/83/23/15d4552ea28990a74e7696780c438aadd73a20318c47e527b47a4a5a596d/regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", size = 852896 }, + { url = "https://files.pythonhosted.org/packages/e3/39/ed4416bc90deedbfdada2568b2cb0bc1fdb98efe11f5378d9892b2a88f8f/regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", size = 860138 }, + { url = "https://files.pythonhosted.org/packages/93/2d/dd56bb76bd8e95bbce684326302f287455b56242a4f9c61f1bc76e28360e/regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", size = 787692 }, + { url = "https://files.pythonhosted.org/packages/0b/55/31877a249ab7a5156758246b9c59539abbeba22461b7d8adc9e8475ff73e/regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", size = 262135 }, + { url = "https://files.pythonhosted.org/packages/38/ec/ad2d7de49a600cdb8dd78434a1aeffe28b9d6fc42eb36afab4a27ad23384/regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", size = 273567 }, ] [[package]] @@ -1390,9 +1138,9 @@ dependencies = [ { name = "idna" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218, upload-time = "2024-05-29T15:37:49.536Z" } +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928, upload-time = "2024-05-29T15:37:47.027Z" }, + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, ] [[package]] @@ -1403,157 +1151,73 @@ dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload-time = "2025-03-30T14:15:14.23Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" }, -] - -[[package]] -name = "rouge-score" -version = "0.1.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "absl-py" }, - { name = "nltk" }, - { name = "numpy" }, - { name = "six" }, + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e2/c5/9136736c37022a6ad27fea38f3111eb8f02fe75d067f9a985cc358653102/rouge_score-0.1.2.tar.gz", hash = "sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04", size = 17400, upload-time = "2022-07-22T22:46:22.909Z" } [[package]] name = "ruff" version = "0.11.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/52/f6/adcf73711f31c9f5393862b4281c875a462d9f639f4ccdf69dc368311c20/ruff-0.11.8.tar.gz", hash = "sha256:6d742d10626f9004b781f4558154bb226620a7242080e11caeffab1a40e99df8", size = 4086399, upload-time = "2025-05-01T14:53:24.459Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/60/c6aa9062fa518a9f86cb0b85248245cddcd892a125ca00441df77d79ef88/ruff-0.11.8-py3-none-linux_armv6l.whl", hash = "sha256:896a37516c594805e34020c4a7546c8f8a234b679a7716a3f08197f38913e1a3", size = 10272473, upload-time = "2025-05-01T14:52:37.252Z" }, - { url = "https://files.pythonhosted.org/packages/a0/e4/0325e50d106dc87c00695f7bcd5044c6d252ed5120ebf423773e00270f50/ruff-0.11.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ab86d22d3d721a40dd3ecbb5e86ab03b2e053bc93c700dc68d1c3346b36ce835", size = 11040862, upload-time = "2025-05-01T14:52:41.022Z" }, - { url = "https://files.pythonhosted.org/packages/e6/27/b87ea1a7be37fef0adbc7fd987abbf90b6607d96aa3fc67e2c5b858e1e53/ruff-0.11.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:258f3585057508d317610e8a412788cf726efeefa2fec4dba4001d9e6f90d46c", size = 10385273, upload-time = "2025-05-01T14:52:43.551Z" }, - { url = "https://files.pythonhosted.org/packages/d3/f7/3346161570d789045ed47a86110183f6ac3af0e94e7fd682772d89f7f1a1/ruff-0.11.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:727d01702f7c30baed3fc3a34901a640001a2828c793525043c29f7614994a8c", size = 10578330, upload-time = "2025-05-01T14:52:45.48Z" }, - { url = "https://files.pythonhosted.org/packages/c6/c3/327fb950b4763c7b3784f91d3038ef10c13b2d42322d4ade5ce13a2f9edb/ruff-0.11.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3dca977cc4fc8f66e89900fa415ffe4dbc2e969da9d7a54bfca81a128c5ac219", size = 10122223, upload-time = "2025-05-01T14:52:47.675Z" }, - { url = "https://files.pythonhosted.org/packages/de/c7/ba686bce9adfeb6c61cb1bbadc17d58110fe1d602f199d79d4c880170f19/ruff-0.11.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c657fa987d60b104d2be8b052d66da0a2a88f9bd1d66b2254333e84ea2720c7f", size = 11697353, upload-time = "2025-05-01T14:52:50.264Z" }, - { url = "https://files.pythonhosted.org/packages/53/8e/a4fb4a1ddde3c59e73996bb3ac51844ff93384d533629434b1def7a336b0/ruff-0.11.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f2e74b021d0de5eceb8bd32919f6ff8a9b40ee62ed97becd44993ae5b9949474", size = 12375936, upload-time = "2025-05-01T14:52:52.394Z" }, - { url = "https://files.pythonhosted.org/packages/ad/a1/9529cb1e2936e2479a51aeb011307e7229225df9ac64ae064d91ead54571/ruff-0.11.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9b5ef39820abc0f2c62111f7045009e46b275f5b99d5e59dda113c39b7f4f38", size = 11850083, upload-time = "2025-05-01T14:52:55.424Z" }, - { url = "https://files.pythonhosted.org/packages/3e/94/8f7eac4c612673ae15a4ad2bc0ee62e03c68a2d4f458daae3de0e47c67ba/ruff-0.11.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1dba3135ca503727aa4648152c0fa67c3b1385d3dc81c75cd8a229c4b2a1458", size = 14005834, upload-time = "2025-05-01T14:52:58.056Z" }, - { url = "https://files.pythonhosted.org/packages/1e/7c/6f63b46b2be870cbf3f54c9c4154d13fac4b8827f22fa05ac835c10835b2/ruff-0.11.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f024d32e62faad0f76b2d6afd141b8c171515e4fb91ce9fd6464335c81244e5", size = 11503713, upload-time = "2025-05-01T14:53:01.244Z" }, - { url = "https://files.pythonhosted.org/packages/3a/91/57de411b544b5fe072779678986a021d87c3ee5b89551f2ca41200c5d643/ruff-0.11.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d365618d3ad747432e1ae50d61775b78c055fee5936d77fb4d92c6f559741948", size = 10457182, upload-time = "2025-05-01T14:53:03.726Z" }, - { url = "https://files.pythonhosted.org/packages/01/49/cfe73e0ce5ecdd3e6f1137bf1f1be03dcc819d1bfe5cff33deb40c5926db/ruff-0.11.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4d9aaa91035bdf612c8ee7266153bcf16005c7c7e2f5878406911c92a31633cb", size = 10101027, upload-time = "2025-05-01T14:53:06.555Z" }, - { url = "https://files.pythonhosted.org/packages/56/21/a5cfe47c62b3531675795f38a0ef1c52ff8de62eaddf370d46634391a3fb/ruff-0.11.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0eba551324733efc76116d9f3a0d52946bc2751f0cd30661564117d6fd60897c", size = 11111298, upload-time = "2025-05-01T14:53:08.825Z" }, - { url = "https://files.pythonhosted.org/packages/36/98/f76225f87e88f7cb669ae92c062b11c0a1e91f32705f829bd426f8e48b7b/ruff-0.11.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:161eb4cff5cfefdb6c9b8b3671d09f7def2f960cee33481dd898caf2bcd02304", size = 11566884, upload-time = "2025-05-01T14:53:11.626Z" }, - { url = "https://files.pythonhosted.org/packages/de/7e/fff70b02e57852fda17bd43f99dda37b9bcf3e1af3d97c5834ff48d04715/ruff-0.11.8-py3-none-win32.whl", hash = "sha256:5b18caa297a786465cc511d7f8be19226acf9c0a1127e06e736cd4e1878c3ea2", size = 10451102, upload-time = "2025-05-01T14:53:14.303Z" }, - { url = "https://files.pythonhosted.org/packages/7b/a9/eaa571eb70648c9bde3120a1d5892597de57766e376b831b06e7c1e43945/ruff-0.11.8-py3-none-win_amd64.whl", hash = "sha256:6e70d11043bef637c5617297bdedec9632af15d53ac1e1ba29c448da9341b0c4", size = 11597410, upload-time = "2025-05-01T14:53:16.571Z" }, - { url = "https://files.pythonhosted.org/packages/cd/be/f6b790d6ae98f1f32c645f8540d5c96248b72343b0a56fab3a07f2941897/ruff-0.11.8-py3-none-win_arm64.whl", hash = "sha256:304432e4c4a792e3da85b7699feb3426a0908ab98bf29df22a31b0cdd098fac2", size = 10713129, upload-time = "2025-05-01T14:53:22.27Z" }, -] - -[[package]] -name = "safetensors" -version = "0.5.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/71/7e/2d5d6ee7b40c0682315367ec7475693d110f512922d582fef1bd4a63adc3/safetensors-0.5.3.tar.gz", hash = "sha256:b6b0d6ecacec39a4fdd99cc19f4576f5219ce858e6fd8dbe7609df0b8dc56965", size = 67210, upload-time = "2025-02-26T09:15:13.155Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/18/ae/88f6c49dbd0cc4da0e08610019a3c78a7d390879a919411a410a1876d03a/safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073", size = 436917, upload-time = "2025-02-26T09:15:03.702Z" }, - { url = "https://files.pythonhosted.org/packages/b8/3b/11f1b4a2f5d2ab7da34ecc062b0bc301f2be024d110a6466726bec8c055c/safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7", size = 418419, upload-time = "2025-02-26T09:15:01.765Z" }, - { url = "https://files.pythonhosted.org/packages/5d/9a/add3e6fef267658075c5a41573c26d42d80c935cdc992384dfae435feaef/safetensors-0.5.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11bce6164887cd491ca75c2326a113ba934be596e22b28b1742ce27b1d076467", size = 459493, upload-time = "2025-02-26T09:14:51.812Z" }, - { url = "https://files.pythonhosted.org/packages/df/5c/bf2cae92222513cc23b3ff85c4a1bb2811a2c3583ac0f8e8d502751de934/safetensors-0.5.3-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4a243be3590bc3301c821da7a18d87224ef35cbd3e5f5727e4e0728b8172411e", size = 472400, upload-time = "2025-02-26T09:14:53.549Z" }, - { url = "https://files.pythonhosted.org/packages/58/11/7456afb740bd45782d0f4c8e8e1bb9e572f1bf82899fb6ace58af47b4282/safetensors-0.5.3-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8bd84b12b1670a6f8e50f01e28156422a2bc07fb16fc4e98bded13039d688a0d", size = 522891, upload-time = "2025-02-26T09:14:55.717Z" }, - { url = "https://files.pythonhosted.org/packages/57/3d/fe73a9d2ace487e7285f6e157afee2383bd1ddb911b7cb44a55cf812eae3/safetensors-0.5.3-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:391ac8cab7c829452175f871fcaf414aa1e292b5448bd02620f675a7f3e7abb9", size = 537694, upload-time = "2025-02-26T09:14:57.036Z" }, - { url = "https://files.pythonhosted.org/packages/a6/f8/dae3421624fcc87a89d42e1898a798bc7ff72c61f38973a65d60df8f124c/safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cead1fa41fc54b1e61089fa57452e8834f798cb1dc7a09ba3524f1eb08e0317a", size = 471642, upload-time = "2025-02-26T09:15:00.544Z" }, - { url = "https://files.pythonhosted.org/packages/ce/20/1fbe16f9b815f6c5a672f5b760951e20e17e43f67f231428f871909a37f6/safetensors-0.5.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1077f3e94182d72618357b04b5ced540ceb71c8a813d3319f1aba448e68a770d", size = 502241, upload-time = "2025-02-26T09:14:58.303Z" }, - { url = "https://files.pythonhosted.org/packages/5f/18/8e108846b506487aa4629fe4116b27db65c3dde922de2c8e0cc1133f3f29/safetensors-0.5.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:799021e78287bac619c7b3f3606730a22da4cda27759ddf55d37c8db7511c74b", size = 638001, upload-time = "2025-02-26T09:15:05.79Z" }, - { url = "https://files.pythonhosted.org/packages/82/5a/c116111d8291af6c8c8a8b40628fe833b9db97d8141c2a82359d14d9e078/safetensors-0.5.3-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:df26da01aaac504334644e1b7642fa000bfec820e7cef83aeac4e355e03195ff", size = 734013, upload-time = "2025-02-26T09:15:07.892Z" }, - { url = "https://files.pythonhosted.org/packages/7d/ff/41fcc4d3b7de837963622e8610d998710705bbde9a8a17221d85e5d0baad/safetensors-0.5.3-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:32c3ef2d7af8b9f52ff685ed0bc43913cdcde135089ae322ee576de93eae5135", size = 670687, upload-time = "2025-02-26T09:15:09.979Z" }, - { url = "https://files.pythonhosted.org/packages/40/ad/2b113098e69c985a3d8fbda4b902778eae4a35b7d5188859b4a63d30c161/safetensors-0.5.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:37f1521be045e56fc2b54c606d4455573e717b2d887c579ee1dbba5f868ece04", size = 643147, upload-time = "2025-02-26T09:15:11.185Z" }, - { url = "https://files.pythonhosted.org/packages/0a/0c/95aeb51d4246bd9a3242d3d8349c1112b4ee7611a4b40f0c5c93b05f001d/safetensors-0.5.3-cp38-abi3-win32.whl", hash = "sha256:cfc0ec0846dcf6763b0ed3d1846ff36008c6e7290683b61616c4b040f6a54ace", size = 296677, upload-time = "2025-02-26T09:15:16.554Z" }, - { url = "https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11", size = 308878, upload-time = "2025-02-26T09:15:14.99Z" }, -] - -[[package]] -name = "scikit-learn" -version = "1.6.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "joblib" }, - { name = "numpy" }, - { name = "scipy" }, - { name = "threadpoolctl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312, upload-time = "2025-01-10T08:07:55.348Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516, upload-time = "2025-01-10T08:06:40.009Z" }, - { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837, upload-time = "2025-01-10T08:06:43.305Z" }, - { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728, upload-time = "2025-01-10T08:06:47.618Z" }, - { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700, upload-time = "2025-01-10T08:06:50.888Z" }, - { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613, upload-time = "2025-01-10T08:06:54.115Z" }, -] - -[[package]] -name = "scipy" -version = "1.15.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b7/b9/31ba9cd990e626574baf93fbc1ac61cf9ed54faafd04c479117517661637/scipy-1.15.2.tar.gz", hash = "sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec", size = 59417316, upload-time = "2025-02-17T00:42:24.791Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/f6/adcf73711f31c9f5393862b4281c875a462d9f639f4ccdf69dc368311c20/ruff-0.11.8.tar.gz", hash = "sha256:6d742d10626f9004b781f4558154bb226620a7242080e11caeffab1a40e99df8", size = 4086399 } wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/5d/3c78815cbab499610f26b5bae6aed33e227225a9fa5290008a733a64f6fc/scipy-1.15.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd", size = 38756184, upload-time = "2025-02-17T00:31:50.623Z" }, - { url = "https://files.pythonhosted.org/packages/37/20/3d04eb066b471b6e171827548b9ddb3c21c6bbea72a4d84fc5989933910b/scipy-1.15.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301", size = 30163558, upload-time = "2025-02-17T00:31:56.721Z" }, - { url = "https://files.pythonhosted.org/packages/a4/98/e5c964526c929ef1f795d4c343b2ff98634ad2051bd2bbadfef9e772e413/scipy-1.15.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93", size = 22437211, upload-time = "2025-02-17T00:32:03.042Z" }, - { url = "https://files.pythonhosted.org/packages/1d/cd/1dc7371e29195ecbf5222f9afeedb210e0a75057d8afbd942aa6cf8c8eca/scipy-1.15.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20", size = 25232260, upload-time = "2025-02-17T00:32:07.847Z" }, - { url = "https://files.pythonhosted.org/packages/f0/24/1a181a9e5050090e0b5138c5f496fee33293c342b788d02586bc410c6477/scipy-1.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e", size = 35198095, upload-time = "2025-02-17T00:32:14.565Z" }, - { url = "https://files.pythonhosted.org/packages/c0/53/eaada1a414c026673eb983f8b4a55fe5eb172725d33d62c1b21f63ff6ca4/scipy-1.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8", size = 37297371, upload-time = "2025-02-17T00:32:21.411Z" }, - { url = "https://files.pythonhosted.org/packages/e9/06/0449b744892ed22b7e7b9a1994a866e64895363572677a316a9042af1fe5/scipy-1.15.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11", size = 36872390, upload-time = "2025-02-17T00:32:29.421Z" }, - { url = "https://files.pythonhosted.org/packages/6a/6f/a8ac3cfd9505ec695c1bc35edc034d13afbd2fc1882a7c6b473e280397bb/scipy-1.15.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53", size = 39700276, upload-time = "2025-02-17T00:32:37.431Z" }, - { url = "https://files.pythonhosted.org/packages/f5/6f/e6e5aff77ea2a48dd96808bb51d7450875af154ee7cbe72188afb0b37929/scipy-1.15.2-cp312-cp312-win_amd64.whl", hash = "sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded", size = 40942317, upload-time = "2025-02-17T00:32:45.47Z" }, + { url = "https://files.pythonhosted.org/packages/9f/60/c6aa9062fa518a9f86cb0b85248245cddcd892a125ca00441df77d79ef88/ruff-0.11.8-py3-none-linux_armv6l.whl", hash = "sha256:896a37516c594805e34020c4a7546c8f8a234b679a7716a3f08197f38913e1a3", size = 10272473 }, + { url = "https://files.pythonhosted.org/packages/a0/e4/0325e50d106dc87c00695f7bcd5044c6d252ed5120ebf423773e00270f50/ruff-0.11.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ab86d22d3d721a40dd3ecbb5e86ab03b2e053bc93c700dc68d1c3346b36ce835", size = 11040862 }, + { url = "https://files.pythonhosted.org/packages/e6/27/b87ea1a7be37fef0adbc7fd987abbf90b6607d96aa3fc67e2c5b858e1e53/ruff-0.11.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:258f3585057508d317610e8a412788cf726efeefa2fec4dba4001d9e6f90d46c", size = 10385273 }, + { url = "https://files.pythonhosted.org/packages/d3/f7/3346161570d789045ed47a86110183f6ac3af0e94e7fd682772d89f7f1a1/ruff-0.11.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:727d01702f7c30baed3fc3a34901a640001a2828c793525043c29f7614994a8c", size = 10578330 }, + { url = "https://files.pythonhosted.org/packages/c6/c3/327fb950b4763c7b3784f91d3038ef10c13b2d42322d4ade5ce13a2f9edb/ruff-0.11.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3dca977cc4fc8f66e89900fa415ffe4dbc2e969da9d7a54bfca81a128c5ac219", size = 10122223 }, + { url = "https://files.pythonhosted.org/packages/de/c7/ba686bce9adfeb6c61cb1bbadc17d58110fe1d602f199d79d4c880170f19/ruff-0.11.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c657fa987d60b104d2be8b052d66da0a2a88f9bd1d66b2254333e84ea2720c7f", size = 11697353 }, + { url = "https://files.pythonhosted.org/packages/53/8e/a4fb4a1ddde3c59e73996bb3ac51844ff93384d533629434b1def7a336b0/ruff-0.11.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f2e74b021d0de5eceb8bd32919f6ff8a9b40ee62ed97becd44993ae5b9949474", size = 12375936 }, + { url = "https://files.pythonhosted.org/packages/ad/a1/9529cb1e2936e2479a51aeb011307e7229225df9ac64ae064d91ead54571/ruff-0.11.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9b5ef39820abc0f2c62111f7045009e46b275f5b99d5e59dda113c39b7f4f38", size = 11850083 }, + { url = "https://files.pythonhosted.org/packages/3e/94/8f7eac4c612673ae15a4ad2bc0ee62e03c68a2d4f458daae3de0e47c67ba/ruff-0.11.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1dba3135ca503727aa4648152c0fa67c3b1385d3dc81c75cd8a229c4b2a1458", size = 14005834 }, + { url = "https://files.pythonhosted.org/packages/1e/7c/6f63b46b2be870cbf3f54c9c4154d13fac4b8827f22fa05ac835c10835b2/ruff-0.11.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f024d32e62faad0f76b2d6afd141b8c171515e4fb91ce9fd6464335c81244e5", size = 11503713 }, + { url = "https://files.pythonhosted.org/packages/3a/91/57de411b544b5fe072779678986a021d87c3ee5b89551f2ca41200c5d643/ruff-0.11.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d365618d3ad747432e1ae50d61775b78c055fee5936d77fb4d92c6f559741948", size = 10457182 }, + { url = "https://files.pythonhosted.org/packages/01/49/cfe73e0ce5ecdd3e6f1137bf1f1be03dcc819d1bfe5cff33deb40c5926db/ruff-0.11.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4d9aaa91035bdf612c8ee7266153bcf16005c7c7e2f5878406911c92a31633cb", size = 10101027 }, + { url = "https://files.pythonhosted.org/packages/56/21/a5cfe47c62b3531675795f38a0ef1c52ff8de62eaddf370d46634391a3fb/ruff-0.11.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0eba551324733efc76116d9f3a0d52946bc2751f0cd30661564117d6fd60897c", size = 11111298 }, + { url = "https://files.pythonhosted.org/packages/36/98/f76225f87e88f7cb669ae92c062b11c0a1e91f32705f829bd426f8e48b7b/ruff-0.11.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:161eb4cff5cfefdb6c9b8b3671d09f7def2f960cee33481dd898caf2bcd02304", size = 11566884 }, + { url = "https://files.pythonhosted.org/packages/de/7e/fff70b02e57852fda17bd43f99dda37b9bcf3e1af3d97c5834ff48d04715/ruff-0.11.8-py3-none-win32.whl", hash = "sha256:5b18caa297a786465cc511d7f8be19226acf9c0a1127e06e736cd4e1878c3ea2", size = 10451102 }, + { url = "https://files.pythonhosted.org/packages/7b/a9/eaa571eb70648c9bde3120a1d5892597de57766e376b831b06e7c1e43945/ruff-0.11.8-py3-none-win_amd64.whl", hash = "sha256:6e70d11043bef637c5617297bdedec9632af15d53ac1e1ba29c448da9341b0c4", size = 11597410 }, + { url = "https://files.pythonhosted.org/packages/cd/be/f6b790d6ae98f1f32c645f8540d5c96248b72343b0a56fab3a07f2941897/ruff-0.11.8-py3-none-win_arm64.whl", hash = "sha256:304432e4c4a792e3da85b7699feb3426a0908ab98bf29df22a31b0cdd098fac2", size = 10713129 }, ] [[package]] -name = "seaborn" -version = "0.13.2" +name = "s3transfer" +version = "0.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "matplotlib" }, - { name = "numpy" }, - { name = "pandas" }, + { name = "botocore" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696, upload-time = "2024-01-25T13:21:52.551Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/5d/9dcc100abc6711e8247af5aa561fc07c4a046f72f659c3adea9a449e191a/s3transfer-0.13.0.tar.gz", hash = "sha256:f5e6db74eb7776a37208001113ea7aa97695368242b364d73e91c981ac522177", size = 150232 } wheels = [ - { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" }, -] - -[[package]] -name = "setuptools" -version = "80.3.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/70/dc/3976b322de9d2e87ed0007cf04cc7553969b6c7b3f48a565d0333748fbcd/setuptools-80.3.1.tar.gz", hash = "sha256:31e2c58dbb67c99c289f51c16d899afedae292b978f8051efaf6262d8212f927", size = 1315082, upload-time = "2025-05-04T18:47:04.397Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/53/7e/5d8af3317ddbf9519b687bd1c39d8737fde07d97f54df65553faca5cffb1/setuptools-80.3.1-py3-none-any.whl", hash = "sha256:ea8e00d7992054c4c592aeb892f6ad51fe1b4d90cc6947cc45c45717c40ec537", size = 1201172, upload-time = "2025-05-04T18:47:02.575Z" }, + { url = "https://files.pythonhosted.org/packages/18/17/22bf8155aa0ea2305eefa3a6402e040df7ebe512d1310165eda1e233c3f8/s3transfer-0.13.0-py3-none-any.whl", hash = "sha256:0148ef34d6dd964d0d8cf4311b2b21c474693e57c2e069ec708ce043d2b527be", size = 85152 }, ] [[package]] name = "shellingham" version = "1.5.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, ] [[package]] name = "six" version = "1.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] [[package]] name = "soupsieve" version = "2.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3f/f4/4a80cd6ef364b2e8b65b15816a843c0980f7a5a2b4dc701fc574952aa19f/soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a", size = 103418, upload-time = "2025-04-20T18:50:08.518Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/f4/4a80cd6ef364b2e8b65b15816a843c0980f7a5a2b4dc701fc574952aa19f/soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a", size = 103418 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload-time = "2025-04-20T18:50:07.196Z" }, + { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677 }, ] [[package]] @@ -1563,9 +1227,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/81/2f98238647f409d8faa209a0cbe9a1e2be74eeca5f739971698a2b54b12d/speechrecognition-3.14.2.tar.gz", hash = "sha256:2daa467f0b5686017ff3f9a64dcfa1a789ee10d1b0ada3be74bfad10eaef5f49", size = 32857832, upload-time = "2025-03-23T02:18:09.274Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/81/2f98238647f409d8faa209a0cbe9a1e2be74eeca5f739971698a2b54b12d/speechrecognition-3.14.2.tar.gz", hash = "sha256:2daa467f0b5686017ff3f9a64dcfa1a789ee10d1b0ada3be74bfad10eaef5f49", size = 32857832 } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/7a/ef9a0a3ddd7e8b304906bf0d7b1f3cd92759d7ea8be10d284183b2e8f47c/speechrecognition-3.14.2-py3-none-any.whl", hash = "sha256:42940b95295b358fdd7415daa01260c8b20025d6b4000fbbaa3458f005d912b7", size = 32853272, upload-time = "2025-03-23T02:18:03.056Z" }, + { url = "https://files.pythonhosted.org/packages/10/7a/ef9a0a3ddd7e8b304906bf0d7b1f3cd92759d7ea8be10d284183b2e8f47c/speechrecognition-3.14.2-py3-none-any.whl", hash = "sha256:42940b95295b358fdd7415daa01260c8b20025d6b4000fbbaa3458f005d912b7", size = 32853272 }, ] [[package]] @@ -1575,9 +1239,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mpmath" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353 }, ] [[package]] @@ -1587,18 +1251,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "rapidfuzz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/81/4b/d3eb25831590d6d7d38c2f2e3561d3ba41d490dc89cd91d9e65e7c812508/thefuzz-0.22.1.tar.gz", hash = "sha256:7138039a7ecf540da323792d8592ef9902b1d79eb78c147d4f20664de79f3680", size = 19993, upload-time = "2024-01-19T19:18:23.135Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/82/4f/1695e70ceb3604f19eda9908e289c687ea81c4fecef4d90a9d1d0f2f7ae9/thefuzz-0.22.1-py3-none-any.whl", hash = "sha256:59729b33556850b90e1093c4cf9e618af6f2e4c985df193fdf3c5b5cf02ca481", size = 8245, upload-time = "2024-01-19T19:18:20.362Z" }, -] - -[[package]] -name = "threadpoolctl" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +sdist = { url = "https://files.pythonhosted.org/packages/81/4b/d3eb25831590d6d7d38c2f2e3561d3ba41d490dc89cd91d9e65e7c812508/thefuzz-0.22.1.tar.gz", hash = "sha256:7138039a7ecf540da323792d8592ef9902b1d79eb78c147d4f20664de79f3680", size = 19993 } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, + { url = "https://files.pythonhosted.org/packages/82/4f/1695e70ceb3604f19eda9908e289c687ea81c4fecef4d90a9d1d0f2f7ae9/thefuzz-0.22.1-py3-none-any.whl", hash = "sha256:59729b33556850b90e1093c4cf9e618af6f2e4c985df193fdf3c5b5cf02ca481", size = 8245 }, ] [[package]] @@ -1609,74 +1264,23 @@ dependencies = [ { name = "regex" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991, upload-time = "2025-02-14T06:03:01.003Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073, upload-time = "2025-02-14T06:02:24.768Z" }, - { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075, upload-time = "2025-02-14T06:02:26.92Z" }, - { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754, upload-time = "2025-02-14T06:02:28.124Z" }, - { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678, upload-time = "2025-02-14T06:02:29.845Z" }, - { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283, upload-time = "2025-02-14T06:02:33.838Z" }, - { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897, upload-time = "2025-02-14T06:02:36.265Z" }, -] - -[[package]] -name = "tokenizers" -version = "0.21.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "huggingface-hub" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/92/76/5ac0c97f1117b91b7eb7323dcd61af80d72f790b4df71249a7850c195f30/tokenizers-0.21.1.tar.gz", hash = "sha256:a1bb04dc5b448985f86ecd4b05407f5a8d97cb2c0532199b2a302a604a0165ab", size = 343256, upload-time = "2025-03-13T10:51:18.189Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/1f/328aee25f9115bf04262e8b4e5a2050b7b7cf44b59c74e982db7270c7f30/tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41", size = 2780767, upload-time = "2025-03-13T10:51:09.459Z" }, - { url = "https://files.pythonhosted.org/packages/ae/1a/4526797f3719b0287853f12c5ad563a9be09d446c44ac784cdd7c50f76ab/tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3", size = 2650555, upload-time = "2025-03-13T10:51:07.692Z" }, - { url = "https://files.pythonhosted.org/packages/4d/7a/a209b29f971a9fdc1da86f917fe4524564924db50d13f0724feed37b2a4d/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28da6b72d4fb14ee200a1bd386ff74ade8992d7f725f2bde2c495a9a98cf4d9f", size = 2937541, upload-time = "2025-03-13T10:50:56.679Z" }, - { url = "https://files.pythonhosted.org/packages/3c/1e/b788b50ffc6191e0b1fc2b0d49df8cff16fe415302e5ceb89f619d12c5bc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34d8cfde551c9916cb92014e040806122295a6800914bab5865deb85623931cf", size = 2819058, upload-time = "2025-03-13T10:50:59.525Z" }, - { url = "https://files.pythonhosted.org/packages/36/aa/3626dfa09a0ecc5b57a8c58eeaeb7dd7ca9a37ad9dd681edab5acd55764c/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aaa852d23e125b73d283c98f007e06d4595732104b65402f46e8ef24b588d9f8", size = 3133278, upload-time = "2025-03-13T10:51:04.678Z" }, - { url = "https://files.pythonhosted.org/packages/a4/4d/8fbc203838b3d26269f944a89459d94c858f5b3f9a9b6ee9728cdcf69161/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a21a15d5c8e603331b8a59548bbe113564136dc0f5ad8306dd5033459a226da0", size = 3144253, upload-time = "2025-03-13T10:51:01.261Z" }, - { url = "https://files.pythonhosted.org/packages/d8/1b/2bd062adeb7c7511b847b32e356024980c0ffcf35f28947792c2d8ad2288/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fdbd4c067c60a0ac7eca14b6bd18a5bebace54eb757c706b47ea93204f7a37c", size = 3398225, upload-time = "2025-03-13T10:51:03.243Z" }, - { url = "https://files.pythonhosted.org/packages/8a/63/38be071b0c8e06840bc6046991636bcb30c27f6bb1e670f4f4bc87cf49cc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd9a0061e403546f7377df940e866c3e678d7d4e9643d0461ea442b4f89e61a", size = 3038874, upload-time = "2025-03-13T10:51:06.235Z" }, - { url = "https://files.pythonhosted.org/packages/ec/83/afa94193c09246417c23a3c75a8a0a96bf44ab5630a3015538d0c316dd4b/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:db9484aeb2e200c43b915a1a0150ea885e35f357a5a8fabf7373af333dcc8dbf", size = 9014448, upload-time = "2025-03-13T10:51:10.927Z" }, - { url = "https://files.pythonhosted.org/packages/ae/b3/0e1a37d4f84c0f014d43701c11eb8072704f6efe8d8fc2dcdb79c47d76de/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ed248ab5279e601a30a4d67bdb897ecbe955a50f1e7bb62bd99f07dd11c2f5b6", size = 8937877, upload-time = "2025-03-13T10:51:12.688Z" }, - { url = "https://files.pythonhosted.org/packages/ac/33/ff08f50e6d615eb180a4a328c65907feb6ded0b8f990ec923969759dc379/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:9ac78b12e541d4ce67b4dfd970e44c060a2147b9b2a21f509566d556a509c67d", size = 9186645, upload-time = "2025-03-13T10:51:14.723Z" }, - { url = "https://files.pythonhosted.org/packages/5f/aa/8ae85f69a9f6012c6f8011c6f4aa1c96154c816e9eea2e1b758601157833/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e5a69c1a4496b81a5ee5d2c1f3f7fbdf95e90a0196101b0ee89ed9956b8a168f", size = 9384380, upload-time = "2025-03-13T10:51:16.526Z" }, - { url = "https://files.pythonhosted.org/packages/e8/5b/a5d98c89f747455e8b7a9504910c865d5e51da55e825a7ae641fb5ff0a58/tokenizers-0.21.1-cp39-abi3-win32.whl", hash = "sha256:1039a3a5734944e09de1d48761ade94e00d0fa760c0e0551151d4dd851ba63e3", size = 2239506, upload-time = "2025-03-13T10:51:20.643Z" }, - { url = "https://files.pythonhosted.org/packages/e6/b6/072a8e053ae600dcc2ac0da81a23548e3b523301a442a6ca900e92ac35be/tokenizers-0.21.1-cp39-abi3-win_amd64.whl", hash = "sha256:0f0dcbcc9f6e13e675a66d7a5f2f225a736745ce484c1a4e07476a89ccdad382", size = 2435481, upload-time = "2025-03-13T10:51:19.243Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073 }, + { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075 }, + { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754 }, + { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678 }, + { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283 }, + { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897 }, ] [[package]] -name = "torch" -version = "2.7.0" +name = "tld" +version = "0.13.1" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "filelock" }, - { name = "fsspec" }, - { name = "jinja2" }, - { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools" }, - { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "typing-extensions" }, -] +sdist = { url = "https://files.pythonhosted.org/packages/df/a1/5723b07a70c1841a80afc9ac572fdf53488306848d844cd70519391b0d26/tld-0.13.1.tar.gz", hash = "sha256:75ec00936cbcf564f67361c41713363440b6c4ef0f0c1592b5b0fbe72c17a350", size = 462000 } wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/5e/ac759f4c0ab7c01feffa777bd68b43d2ac61560a9770eeac074b450f81d4/torch-2.7.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:36a6368c7ace41ad1c0f69f18056020b6a5ca47bedaca9a2f3b578f5a104c26c", size = 99013250, upload-time = "2025-04-23T14:35:15.589Z" }, - { url = "https://files.pythonhosted.org/packages/9c/58/2d245b6f1ef61cf11dfc4aceeaacbb40fea706ccebac3f863890c720ab73/torch-2.7.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:15aab3e31c16feb12ae0a88dba3434a458874636f360c567caa6a91f6bfba481", size = 865042157, upload-time = "2025-04-23T14:32:56.011Z" }, - { url = "https://files.pythonhosted.org/packages/44/80/b353c024e6b624cd9ce1d66dcb9d24e0294680f95b369f19280e241a0159/torch-2.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:f56d4b2510934e072bab3ab8987e00e60e1262fb238176168f5e0c43a1320c6d", size = 212482262, upload-time = "2025-04-23T14:35:03.527Z" }, - { url = "https://files.pythonhosted.org/packages/ee/8d/b2939e5254be932db1a34b2bd099070c509e8887e0c5a90c498a917e4032/torch-2.7.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:30b7688a87239a7de83f269333651d8e582afffce6f591fff08c046f7787296e", size = 68574294, upload-time = "2025-04-23T14:34:47.098Z" }, + { url = "https://files.pythonhosted.org/packages/dc/70/b2f38360c3fc4bc9b5e8ef429e1fde63749144ac583c2dbdf7e21e27a9ad/tld-0.13.1-py2.py3-none-any.whl", hash = "sha256:a2d35109433ac83486ddf87e3c4539ab2c5c2478230e5d9c060a18af4b03aa7c", size = 274718 }, ] [[package]] @@ -1686,41 +1290,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, ] [[package]] -name = "transformers" -version = "4.51.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "filelock" }, - { name = "huggingface-hub" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "regex" }, - { name = "requests" }, - { name = "safetensors" }, - { name = "tokenizers" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f1/11/7414d5bc07690002ce4d7553602107bf969af85144bbd02830f9fb471236/transformers-4.51.3.tar.gz", hash = "sha256:e292fcab3990c6defe6328f0f7d2004283ca81a7a07b2de9a46d67fd81ea1409", size = 8941266, upload-time = "2025-04-14T08:15:00.485Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/b6/5257d04ae327b44db31f15cce39e6020cc986333c715660b1315a9724d82/transformers-4.51.3-py3-none-any.whl", hash = "sha256:fd3279633ceb2b777013234bbf0b4f5c2d23c4626b05497691f00cfda55e8a83", size = 10383940, upload-time = "2025-04-14T08:13:43.023Z" }, -] - -[[package]] -name = "triton" -version = "3.3.0" +name = "trafilatura" +version = "2.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "setuptools" }, + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "courlan" }, + { name = "htmldate" }, + { name = "justext" }, + { name = "lxml" }, + { name = "urllib3" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/06/25/e3ebeefdebfdfae8c4a4396f5a6ea51fc6fa0831d63ce338e5090a8003dc/trafilatura-2.0.0.tar.gz", hash = "sha256:ceb7094a6ecc97e72fea73c7dba36714c5c5b577b6470e4520dca893706d6247", size = 253404 } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/53/ce18470914ab6cfbec9384ee565d23c4d1c55f0548160b1c7b33000b11fd/triton-3.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b68c778f6c4218403a6bd01be7484f6dc9e20fe2083d22dd8aef33e3b87a10a3", size = 156504509, upload-time = "2025-04-09T20:27:40.413Z" }, + { url = "https://files.pythonhosted.org/packages/8a/b6/097367f180b6383a3581ca1b86fcae284e52075fa941d1232df35293363c/trafilatura-2.0.0-py3-none-any.whl", hash = "sha256:77eb5d1e993747f6f20938e1de2d840020719735690c840b9a1024803a4cd51d", size = 132557 }, ] [[package]] @@ -1733,86 +1323,98 @@ dependencies = [ { name = "shellingham" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/98/1a/5f36851f439884bcfe8539f6a20ff7516e7b60f319bbaf69a90dc35cc2eb/typer-0.15.3.tar.gz", hash = "sha256:818873625d0569653438316567861899f7e9972f2e6e0c16dab608345ced713c", size = 101641, upload-time = "2025-04-28T21:40:59.204Z" } +sdist = { url = "https://files.pythonhosted.org/packages/98/1a/5f36851f439884bcfe8539f6a20ff7516e7b60f319bbaf69a90dc35cc2eb/typer-0.15.3.tar.gz", hash = "sha256:818873625d0569653438316567861899f7e9972f2e6e0c16dab608345ced713c", size = 101641 } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/20/9d953de6f4367163d23ec823200eb3ecb0050a2609691e512c8b95827a9b/typer-0.15.3-py3-none-any.whl", hash = "sha256:c86a65ad77ca531f03de08d1b9cb67cd09ad02ddddf4b34745b5008f43b239bd", size = 45253, upload-time = "2025-04-28T21:40:56.269Z" }, + { url = "https://files.pythonhosted.org/packages/48/20/9d953de6f4367163d23ec823200eb3ecb0050a2609691e512c8b95827a9b/typer-0.15.3-py3-none-any.whl", hash = "sha256:c86a65ad77ca531f03de08d1b9cb67cd09ad02ddddf4b34745b5008f43b239bd", size = 45253 }, ] [[package]] name = "typing-extensions" version = "4.13.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" }, + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, ] [[package]] name = "tzdata" version = "2025.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, +] + +[[package]] +name = "tzlocal" +version = "5.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761 } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, + { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026 }, ] [[package]] name = "urllib3" version = "2.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672, upload-time = "2025-04-10T15:23:39.232Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680, upload-time = "2025-04-10T15:23:37.377Z" }, + { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 }, ] [[package]] name = "win32-setctime" version = "1.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, + { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083 }, ] [[package]] name = "xlrd" version = "2.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a6/b3/19a2540d21dea5f908304375bd43f5ed7a4c28a370dc9122c565423e6b44/xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88", size = 100259, upload-time = "2020-12-11T10:14:22.201Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/b3/19a2540d21dea5f908304375bd43f5ed7a4c28a370dc9122c565423e6b44/xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88", size = 100259 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/0c/c2a72d51fe56e08a08acc85d13013558a2d793028ae7385448a6ccdfae64/xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd", size = 96531, upload-time = "2020-12-11T10:14:20.877Z" }, + { url = "https://files.pythonhosted.org/packages/a6/0c/c2a72d51fe56e08a08acc85d13013558a2d793028ae7385448a6ccdfae64/xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd", size = 96531 }, ] [[package]] name = "xlsxwriter" version = "3.2.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a7/d1/e026d33dd5d552e5bf3a873dee54dad66b550230df8290d79394f09b2315/xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5", size = 209135, upload-time = "2025-04-17T10:11:23.481Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/d1/e026d33dd5d552e5bf3a873dee54dad66b550230df8290d79394f09b2315/xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5", size = 209135 } wheels = [ - { url = "https://files.pythonhosted.org/packages/37/b1/a252d499f2760b314fcf264d2b36fcc4343a1ecdb25492b210cb0db70a68/XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d", size = 169433, upload-time = "2025-04-17T10:11:21.329Z" }, + { url = "https://files.pythonhosted.org/packages/37/b1/a252d499f2760b314fcf264d2b36fcc4343a1ecdb25492b210cb0db70a68/XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d", size = 169433 }, ] [[package]] name = "xxhash" version = "3.5.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/00/5e/d6e5258d69df8b4ed8c83b6664f2b47d30d2dec551a29ad72a6c69eafd31/xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f", size = 84241, upload-time = "2024-08-17T09:20:38.972Z" } +sdist = { url = "https://files.pythonhosted.org/packages/00/5e/d6e5258d69df8b4ed8c83b6664f2b47d30d2dec551a29ad72a6c69eafd31/xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f", size = 84241 } wheels = [ - { url = "https://files.pythonhosted.org/packages/07/0e/1bfce2502c57d7e2e787600b31c83535af83746885aa1a5f153d8c8059d6/xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00", size = 31969, upload-time = "2024-08-17T09:18:24.025Z" }, - { url = "https://files.pythonhosted.org/packages/3f/d6/8ca450d6fe5b71ce521b4e5db69622383d039e2b253e9b2f24f93265b52c/xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9", size = 30787, upload-time = "2024-08-17T09:18:25.318Z" }, - { url = "https://files.pythonhosted.org/packages/5b/84/de7c89bc6ef63d750159086a6ada6416cc4349eab23f76ab870407178b93/xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84", size = 220959, upload-time = "2024-08-17T09:18:26.518Z" }, - { url = "https://files.pythonhosted.org/packages/fe/86/51258d3e8a8545ff26468c977101964c14d56a8a37f5835bc0082426c672/xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793", size = 200006, upload-time = "2024-08-17T09:18:27.905Z" }, - { url = "https://files.pythonhosted.org/packages/02/0a/96973bd325412feccf23cf3680fd2246aebf4b789122f938d5557c54a6b2/xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be", size = 428326, upload-time = "2024-08-17T09:18:29.335Z" }, - { url = "https://files.pythonhosted.org/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6", size = 194380, upload-time = "2024-08-17T09:18:30.706Z" }, - { url = "https://files.pythonhosted.org/packages/fb/7d/f29006ab398a173f4501c0e4977ba288f1c621d878ec217b4ff516810c04/xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90", size = 207934, upload-time = "2024-08-17T09:18:32.133Z" }, - { url = "https://files.pythonhosted.org/packages/8a/6e/6e88b8f24612510e73d4d70d9b0c7dff62a2e78451b9f0d042a5462c8d03/xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27", size = 216301, upload-time = "2024-08-17T09:18:33.474Z" }, - { url = "https://files.pythonhosted.org/packages/af/51/7862f4fa4b75a25c3b4163c8a873f070532fe5f2d3f9b3fc869c8337a398/xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2", size = 203351, upload-time = "2024-08-17T09:18:34.889Z" }, - { url = "https://files.pythonhosted.org/packages/22/61/8d6a40f288f791cf79ed5bb113159abf0c81d6efb86e734334f698eb4c59/xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d", size = 210294, upload-time = "2024-08-17T09:18:36.355Z" }, - { url = "https://files.pythonhosted.org/packages/17/02/215c4698955762d45a8158117190261b2dbefe9ae7e5b906768c09d8bc74/xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab", size = 414674, upload-time = "2024-08-17T09:18:38.536Z" }, - { url = "https://files.pythonhosted.org/packages/31/5c/b7a8db8a3237cff3d535261325d95de509f6a8ae439a5a7a4ffcff478189/xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e", size = 192022, upload-time = "2024-08-17T09:18:40.138Z" }, - { url = "https://files.pythonhosted.org/packages/78/e3/dd76659b2811b3fd06892a8beb850e1996b63e9235af5a86ea348f053e9e/xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8", size = 30170, upload-time = "2024-08-17T09:18:42.163Z" }, - { url = "https://files.pythonhosted.org/packages/d9/6b/1c443fe6cfeb4ad1dcf231cdec96eb94fb43d6498b4469ed8b51f8b59a37/xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e", size = 30040, upload-time = "2024-08-17T09:18:43.699Z" }, - { url = "https://files.pythonhosted.org/packages/0f/eb/04405305f290173acc0350eba6d2f1a794b57925df0398861a20fbafa415/xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2", size = 26796, upload-time = "2024-08-17T09:18:45.29Z" }, + { url = "https://files.pythonhosted.org/packages/07/0e/1bfce2502c57d7e2e787600b31c83535af83746885aa1a5f153d8c8059d6/xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00", size = 31969 }, + { url = "https://files.pythonhosted.org/packages/3f/d6/8ca450d6fe5b71ce521b4e5db69622383d039e2b253e9b2f24f93265b52c/xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9", size = 30787 }, + { url = "https://files.pythonhosted.org/packages/5b/84/de7c89bc6ef63d750159086a6ada6416cc4349eab23f76ab870407178b93/xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84", size = 220959 }, + { url = "https://files.pythonhosted.org/packages/fe/86/51258d3e8a8545ff26468c977101964c14d56a8a37f5835bc0082426c672/xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793", size = 200006 }, + { url = "https://files.pythonhosted.org/packages/02/0a/96973bd325412feccf23cf3680fd2246aebf4b789122f938d5557c54a6b2/xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be", size = 428326 }, + { url = "https://files.pythonhosted.org/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6", size = 194380 }, + { url = "https://files.pythonhosted.org/packages/fb/7d/f29006ab398a173f4501c0e4977ba288f1c621d878ec217b4ff516810c04/xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90", size = 207934 }, + { url = "https://files.pythonhosted.org/packages/8a/6e/6e88b8f24612510e73d4d70d9b0c7dff62a2e78451b9f0d042a5462c8d03/xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27", size = 216301 }, + { url = "https://files.pythonhosted.org/packages/af/51/7862f4fa4b75a25c3b4163c8a873f070532fe5f2d3f9b3fc869c8337a398/xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2", size = 203351 }, + { url = "https://files.pythonhosted.org/packages/22/61/8d6a40f288f791cf79ed5bb113159abf0c81d6efb86e734334f698eb4c59/xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d", size = 210294 }, + { url = "https://files.pythonhosted.org/packages/17/02/215c4698955762d45a8158117190261b2dbefe9ae7e5b906768c09d8bc74/xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab", size = 414674 }, + { url = "https://files.pythonhosted.org/packages/31/5c/b7a8db8a3237cff3d535261325d95de509f6a8ae439a5a7a4ffcff478189/xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e", size = 192022 }, + { url = "https://files.pythonhosted.org/packages/78/e3/dd76659b2811b3fd06892a8beb850e1996b63e9235af5a86ea348f053e9e/xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8", size = 30170 }, + { url = "https://files.pythonhosted.org/packages/d9/6b/1c443fe6cfeb4ad1dcf231cdec96eb94fb43d6498b4469ed8b51f8b59a37/xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e", size = 30040 }, + { url = "https://files.pythonhosted.org/packages/0f/eb/04405305f290173acc0350eba6d2f1a794b57925df0398861a20fbafa415/xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2", size = 26796 }, ] [[package]] @@ -1824,96 +1426,72 @@ dependencies = [ { name = "multidict" }, { name = "propcache" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/51/c0edba5219027f6eab262e139f73e2417b0f4efffa23bf562f6e18f76ca5/yarl-1.20.0.tar.gz", hash = "sha256:686d51e51ee5dfe62dec86e4866ee0e9ed66df700d55c828a615640adc885307", size = 185258, upload-time = "2025-04-17T00:45:14.661Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/e8/3efdcb83073df978bb5b1a9cc0360ce596680e6c3fac01f2a994ccbb8939/yarl-1.20.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e06b9f6cdd772f9b665e5ba8161968e11e403774114420737f7884b5bd7bdf6f", size = 147089, upload-time = "2025-04-17T00:42:39.602Z" }, - { url = "https://files.pythonhosted.org/packages/60/c3/9e776e98ea350f76f94dd80b408eaa54e5092643dbf65fd9babcffb60509/yarl-1.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b9ae2fbe54d859b3ade40290f60fe40e7f969d83d482e84d2c31b9bff03e359e", size = 97706, upload-time = "2025-04-17T00:42:41.469Z" }, - { url = "https://files.pythonhosted.org/packages/0c/5b/45cdfb64a3b855ce074ae607b9fc40bc82e7613b94e7612b030255c93a09/yarl-1.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6d12b8945250d80c67688602c891237994d203d42427cb14e36d1a732eda480e", size = 95719, upload-time = "2025-04-17T00:42:43.666Z" }, - { url = "https://files.pythonhosted.org/packages/2d/4e/929633b249611eeed04e2f861a14ed001acca3ef9ec2a984a757b1515889/yarl-1.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:087e9731884621b162a3e06dc0d2d626e1542a617f65ba7cc7aeab279d55ad33", size = 343972, upload-time = "2025-04-17T00:42:45.391Z" }, - { url = "https://files.pythonhosted.org/packages/49/fd/047535d326c913f1a90407a3baf7ff535b10098611eaef2c527e32e81ca1/yarl-1.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:69df35468b66c1a6e6556248e6443ef0ec5f11a7a4428cf1f6281f1879220f58", size = 339639, upload-time = "2025-04-17T00:42:47.552Z" }, - { url = "https://files.pythonhosted.org/packages/48/2f/11566f1176a78f4bafb0937c0072410b1b0d3640b297944a6a7a556e1d0b/yarl-1.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b2992fe29002fd0d4cbaea9428b09af9b8686a9024c840b8a2b8f4ea4abc16f", size = 353745, upload-time = "2025-04-17T00:42:49.406Z" }, - { url = "https://files.pythonhosted.org/packages/26/17/07dfcf034d6ae8837b33988be66045dd52f878dfb1c4e8f80a7343f677be/yarl-1.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c903e0b42aab48abfbac668b5a9d7b6938e721a6341751331bcd7553de2dcae", size = 354178, upload-time = "2025-04-17T00:42:51.588Z" }, - { url = "https://files.pythonhosted.org/packages/15/45/212604d3142d84b4065d5f8cab6582ed3d78e4cc250568ef2a36fe1cf0a5/yarl-1.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf099e2432131093cc611623e0b0bcc399b8cddd9a91eded8bfb50402ec35018", size = 349219, upload-time = "2025-04-17T00:42:53.674Z" }, - { url = "https://files.pythonhosted.org/packages/e6/e0/a10b30f294111c5f1c682461e9459935c17d467a760c21e1f7db400ff499/yarl-1.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a7f62f5dc70a6c763bec9ebf922be52aa22863d9496a9a30124d65b489ea672", size = 337266, upload-time = "2025-04-17T00:42:55.49Z" }, - { url = "https://files.pythonhosted.org/packages/33/a6/6efa1d85a675d25a46a167f9f3e80104cde317dfdf7f53f112ae6b16a60a/yarl-1.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:54ac15a8b60382b2bcefd9a289ee26dc0920cf59b05368c9b2b72450751c6eb8", size = 360873, upload-time = "2025-04-17T00:42:57.895Z" }, - { url = "https://files.pythonhosted.org/packages/77/67/c8ab718cb98dfa2ae9ba0f97bf3cbb7d45d37f13fe1fbad25ac92940954e/yarl-1.20.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:25b3bc0763a7aca16a0f1b5e8ef0f23829df11fb539a1b70476dcab28bd83da7", size = 360524, upload-time = "2025-04-17T00:43:00.094Z" }, - { url = "https://files.pythonhosted.org/packages/bd/e8/c3f18660cea1bc73d9f8a2b3ef423def8dadbbae6c4afabdb920b73e0ead/yarl-1.20.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b2586e36dc070fc8fad6270f93242124df68b379c3a251af534030a4a33ef594", size = 365370, upload-time = "2025-04-17T00:43:02.242Z" }, - { url = "https://files.pythonhosted.org/packages/c9/99/33f3b97b065e62ff2d52817155a89cfa030a1a9b43fee7843ef560ad9603/yarl-1.20.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:866349da9d8c5290cfefb7fcc47721e94de3f315433613e01b435473be63daa6", size = 373297, upload-time = "2025-04-17T00:43:04.189Z" }, - { url = "https://files.pythonhosted.org/packages/3d/89/7519e79e264a5f08653d2446b26d4724b01198a93a74d2e259291d538ab1/yarl-1.20.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:33bb660b390a0554d41f8ebec5cd4475502d84104b27e9b42f5321c5192bfcd1", size = 378771, upload-time = "2025-04-17T00:43:06.609Z" }, - { url = "https://files.pythonhosted.org/packages/3a/58/6c460bbb884abd2917c3eef6f663a4a873f8dc6f498561fc0ad92231c113/yarl-1.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737e9f171e5a07031cbee5e9180f6ce21a6c599b9d4b2c24d35df20a52fabf4b", size = 375000, upload-time = "2025-04-17T00:43:09.01Z" }, - { url = "https://files.pythonhosted.org/packages/3b/2a/dd7ed1aa23fea996834278d7ff178f215b24324ee527df53d45e34d21d28/yarl-1.20.0-cp312-cp312-win32.whl", hash = "sha256:839de4c574169b6598d47ad61534e6981979ca2c820ccb77bf70f4311dd2cc64", size = 86355, upload-time = "2025-04-17T00:43:11.311Z" }, - { url = "https://files.pythonhosted.org/packages/ca/c6/333fe0338305c0ac1c16d5aa7cc4841208d3252bbe62172e0051006b5445/yarl-1.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:3d7dbbe44b443b0c4aa0971cb07dcb2c2060e4a9bf8d1301140a33a93c98e18c", size = 92904, upload-time = "2025-04-17T00:43:13.087Z" }, - { url = "https://files.pythonhosted.org/packages/ea/1f/70c57b3d7278e94ed22d85e09685d3f0a38ebdd8c5c73b65ba4c0d0fe002/yarl-1.20.0-py3-none-any.whl", hash = "sha256:5d0fe6af927a47a230f31e6004621fd0959eaa915fc62acfafa67ff7229a3124", size = 46124, upload-time = "2025-04-17T00:45:12.199Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/62/51/c0edba5219027f6eab262e139f73e2417b0f4efffa23bf562f6e18f76ca5/yarl-1.20.0.tar.gz", hash = "sha256:686d51e51ee5dfe62dec86e4866ee0e9ed66df700d55c828a615640adc885307", size = 185258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/e8/3efdcb83073df978bb5b1a9cc0360ce596680e6c3fac01f2a994ccbb8939/yarl-1.20.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e06b9f6cdd772f9b665e5ba8161968e11e403774114420737f7884b5bd7bdf6f", size = 147089 }, + { url = "https://files.pythonhosted.org/packages/60/c3/9e776e98ea350f76f94dd80b408eaa54e5092643dbf65fd9babcffb60509/yarl-1.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b9ae2fbe54d859b3ade40290f60fe40e7f969d83d482e84d2c31b9bff03e359e", size = 97706 }, + { url = "https://files.pythonhosted.org/packages/0c/5b/45cdfb64a3b855ce074ae607b9fc40bc82e7613b94e7612b030255c93a09/yarl-1.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6d12b8945250d80c67688602c891237994d203d42427cb14e36d1a732eda480e", size = 95719 }, + { url = "https://files.pythonhosted.org/packages/2d/4e/929633b249611eeed04e2f861a14ed001acca3ef9ec2a984a757b1515889/yarl-1.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:087e9731884621b162a3e06dc0d2d626e1542a617f65ba7cc7aeab279d55ad33", size = 343972 }, + { url = "https://files.pythonhosted.org/packages/49/fd/047535d326c913f1a90407a3baf7ff535b10098611eaef2c527e32e81ca1/yarl-1.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:69df35468b66c1a6e6556248e6443ef0ec5f11a7a4428cf1f6281f1879220f58", size = 339639 }, + { url = "https://files.pythonhosted.org/packages/48/2f/11566f1176a78f4bafb0937c0072410b1b0d3640b297944a6a7a556e1d0b/yarl-1.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b2992fe29002fd0d4cbaea9428b09af9b8686a9024c840b8a2b8f4ea4abc16f", size = 353745 }, + { url = "https://files.pythonhosted.org/packages/26/17/07dfcf034d6ae8837b33988be66045dd52f878dfb1c4e8f80a7343f677be/yarl-1.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c903e0b42aab48abfbac668b5a9d7b6938e721a6341751331bcd7553de2dcae", size = 354178 }, + { url = "https://files.pythonhosted.org/packages/15/45/212604d3142d84b4065d5f8cab6582ed3d78e4cc250568ef2a36fe1cf0a5/yarl-1.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf099e2432131093cc611623e0b0bcc399b8cddd9a91eded8bfb50402ec35018", size = 349219 }, + { url = "https://files.pythonhosted.org/packages/e6/e0/a10b30f294111c5f1c682461e9459935c17d467a760c21e1f7db400ff499/yarl-1.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a7f62f5dc70a6c763bec9ebf922be52aa22863d9496a9a30124d65b489ea672", size = 337266 }, + { url = "https://files.pythonhosted.org/packages/33/a6/6efa1d85a675d25a46a167f9f3e80104cde317dfdf7f53f112ae6b16a60a/yarl-1.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:54ac15a8b60382b2bcefd9a289ee26dc0920cf59b05368c9b2b72450751c6eb8", size = 360873 }, + { url = "https://files.pythonhosted.org/packages/77/67/c8ab718cb98dfa2ae9ba0f97bf3cbb7d45d37f13fe1fbad25ac92940954e/yarl-1.20.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:25b3bc0763a7aca16a0f1b5e8ef0f23829df11fb539a1b70476dcab28bd83da7", size = 360524 }, + { url = "https://files.pythonhosted.org/packages/bd/e8/c3f18660cea1bc73d9f8a2b3ef423def8dadbbae6c4afabdb920b73e0ead/yarl-1.20.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b2586e36dc070fc8fad6270f93242124df68b379c3a251af534030a4a33ef594", size = 365370 }, + { url = "https://files.pythonhosted.org/packages/c9/99/33f3b97b065e62ff2d52817155a89cfa030a1a9b43fee7843ef560ad9603/yarl-1.20.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:866349da9d8c5290cfefb7fcc47721e94de3f315433613e01b435473be63daa6", size = 373297 }, + { url = "https://files.pythonhosted.org/packages/3d/89/7519e79e264a5f08653d2446b26d4724b01198a93a74d2e259291d538ab1/yarl-1.20.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:33bb660b390a0554d41f8ebec5cd4475502d84104b27e9b42f5321c5192bfcd1", size = 378771 }, + { url = "https://files.pythonhosted.org/packages/3a/58/6c460bbb884abd2917c3eef6f663a4a873f8dc6f498561fc0ad92231c113/yarl-1.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737e9f171e5a07031cbee5e9180f6ce21a6c599b9d4b2c24d35df20a52fabf4b", size = 375000 }, + { url = "https://files.pythonhosted.org/packages/3b/2a/dd7ed1aa23fea996834278d7ff178f215b24324ee527df53d45e34d21d28/yarl-1.20.0-cp312-cp312-win32.whl", hash = "sha256:839de4c574169b6598d47ad61534e6981979ca2c820ccb77bf70f4311dd2cc64", size = 86355 }, + { url = "https://files.pythonhosted.org/packages/ca/c6/333fe0338305c0ac1c16d5aa7cc4841208d3252bbe62172e0051006b5445/yarl-1.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:3d7dbbe44b443b0c4aa0971cb07dcb2c2060e4a9bf8d1301140a33a93c98e18c", size = 92904 }, + { url = "https://files.pythonhosted.org/packages/ea/1f/70c57b3d7278e94ed22d85e09685d3f0a38ebdd8c5c73b65ba4c0d0fe002/yarl-1.20.0-py3-none-any.whl", hash = "sha256:5d0fe6af927a47a230f31e6004621fd0959eaa915fc62acfafa67ff7229a3124", size = 46124 }, ] [[package]] name = "yourbench" -version = "0.3.0" +version = "0.3.1" source = { editable = "." } dependencies = [ { name = "asyncio" }, - { name = "black" }, + { name = "boto3" }, { name = "click" }, { name = "datasets" }, { name = "hf-transfer" }, - { name = "hf-xet" }, - { name = "huggingface-hub", extra = ["inference"] }, + { name = "huggingface-hub", extra = ["hf-xet", "inference"] }, { name = "loguru" }, { name = "markitdown", extra = ["all"] }, - { name = "matplotlib" }, { name = "python-dotenv" }, + { name = "pyyaml" }, { name = "rich" }, { name = "ruff" }, - { name = "scikit-learn" }, - { name = "seaborn" }, { name = "thefuzz" }, { name = "tiktoken" }, { name = "tqdm" }, + { name = "trafilatura" }, { name = "typer" }, ] -[package.optional-dependencies] -all = [ - { name = "bert-score" }, - { name = "rouge-score" }, - { name = "torch" }, - { name = "transformers" }, -] -semantic = [ - { name = "bert-score" }, - { name = "rouge-score" }, - { name = "torch" }, - { name = "transformers" }, -] - [package.metadata] requires-dist = [ { name = "asyncio", specifier = ">=3.4.3" }, - { name = "bert-score", marker = "extra == 'semantic'", specifier = ">=0.3.13" }, - { name = "black", specifier = ">=25.1.0" }, + { name = "boto3", specifier = ">=1.34.0" }, { name = "click", specifier = ">=8.1.7" }, { name = "datasets", specifier = ">=3.3.0" }, { name = "hf-transfer", specifier = ">=0.1.9" }, - { name = "hf-xet", specifier = ">=1.1.0" }, - { name = "huggingface-hub", extras = ["inference"], specifier = ">=0.30.2" }, + { name = "huggingface-hub", extras = ["inference", "hf-xet"], specifier = ">=0.30.2" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "markitdown", extras = ["all"], specifier = ">=0.0.2" }, - { name = "matplotlib", specifier = ">=3.10.0" }, { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "pyyaml", specifier = ">=6.0.1" }, { name = "rich", specifier = ">=13.7.0" }, - { name = "rouge-score", marker = "extra == 'semantic'", specifier = ">=0.1.2" }, { name = "ruff", specifier = ">=0.11.1" }, - { name = "scikit-learn", specifier = ">=1.6.1" }, - { name = "seaborn", specifier = ">=0.13.2" }, { name = "thefuzz", specifier = ">=0.22.1" }, { name = "tiktoken", specifier = ">=0.9.0" }, - { name = "torch", marker = "extra == 'semantic'", specifier = ">=2.6.0" }, { name = "tqdm", specifier = ">=4.67.1" }, - { name = "transformers", marker = "extra == 'semantic'", specifier = ">=4.48.3" }, + { name = "trafilatura", specifier = ">=2.0.0" }, { name = "typer", specifier = ">=0.15.2" }, - { name = "yourbench", extras = ["semantic"], marker = "extra == 'all'" }, ] -provides-extras = ["semantic", "all"] [[package]] name = "youtube-transcript-api" @@ -1923,7 +1501,7 @@ dependencies = [ { name = "defusedxml" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b0/32/f60d87a99c05a53604c58f20f670c7ea6262b55e0bbeb836ffe4550b248b/youtube_transcript_api-1.0.3.tar.gz", hash = "sha256:902baf90e7840a42e1e148335e09fe5575dbff64c81414957aea7038e8a4db46", size = 2153252, upload-time = "2025-03-25T18:14:21.119Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/32/f60d87a99c05a53604c58f20f670c7ea6262b55e0bbeb836ffe4550b248b/youtube_transcript_api-1.0.3.tar.gz", hash = "sha256:902baf90e7840a42e1e148335e09fe5575dbff64c81414957aea7038e8a4db46", size = 2153252 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/44/40c03bb0f8bddfb9d2beff2ed31641f52d96c287ba881d20e0c074784ac2/youtube_transcript_api-1.0.3-py3-none-any.whl", hash = "sha256:d1874e57de65cf14c9d7d09b2b37c814d6287fa0e770d4922c4cd32a5b3f6c47", size = 2169911, upload-time = "2025-03-25T18:14:19.416Z" }, + { url = "https://files.pythonhosted.org/packages/f0/44/40c03bb0f8bddfb9d2beff2ed31641f52d96c287ba881d20e0c074784ac2/youtube_transcript_api-1.0.3-py3-none-any.whl", hash = "sha256:d1874e57de65cf14c9d7d09b2b37c814d6287fa0e770d4922c4cd32a5b3f6c47", size = 2169911 }, ] diff --git a/yourbench/analysis/view_sample_questions.py b/yourbench/analysis/view_sample_questions.py new file mode 100644 index 00000000..db439aa1 --- /dev/null +++ b/yourbench/analysis/view_sample_questions.py @@ -0,0 +1,132 @@ +import random +from typing import List, Literal +from dataclasses import dataclass + +from loguru import logger +from rich.table import Table +from rich.console import Console + +from yourbench.utils.dataset_engine import custom_load_dataset +from yourbench.utils.loading_engine import load_config + + +@dataclass +class Question: + question: str + answer: str + question_type: str + choices: List[str] + difficulty: str + index: int + + @classmethod + def from_dataset_row(cls, row: dict, index: int) -> "Question": + return cls( + question=row.get("question", ""), + answer=row.get("self_answer", ""), + question_type=row.get("self_assessed_question_type", "unknown"), + choices=row.get("choices", []) or [], + difficulty=str(row.get("estimated_difficulty", "")), + index=index, + ) + + @property + def choices_display(self) -> str: + return "\n".join(self.choices) if self.choices else "N/A" + + +class QuestionDisplay: + def __init__(self, console: Console): + self.console = console + + def create_table(self) -> Table: + table = Table(show_header=True, header_style="bold cyan", show_lines=True) + table.add_column("Q #", style="dim", width=5) + table.add_column("Q Type", style="white", width=16) + table.add_column("Question", style="white", no_wrap=False) + table.add_column("Answer", style="white", no_wrap=False) + table.add_column("Choices", style="white", no_wrap=False) + table.add_column("Difficulty", style="white", justify="center", width=10) + return table + + def display_questions(self, questions: List[Question], title: str, title_style: str) -> None: + if not questions: + self.console.print(f"[bold red]No {title.lower()} found or it's empty.[/bold red]") + return + + self.console.print(f"[{title_style}]=== {title} ===[/{title_style}]\n") + table = self.create_table() + + for idx, question in enumerate(questions, 1): + table.add_row( + str(idx), + question.question_type, + question.question, + question.answer, + question.choices_display, + question.difficulty, + ) + + self.console.print(table) + self.console.print() + + +class QuestionLoader: + def __init__(self, config: dict, sample_size: int): + self.config = config + self.sample_size = sample_size + + def load_questions(self, subset: Literal["single_shot_questions", "multi_hop_questions"]) -> List[Question]: + dataset = custom_load_dataset(config=self.config, subset=subset) + if not dataset: + return [] + + indices = random.sample(range(len(dataset)), min(self.sample_size, len(dataset))) + return [Question.from_dataset_row(dataset[i], i) for i in indices] + + +def run(*cli_args: List[str]) -> None: + """ + Usage: + yourbench analyze view_sample_questions path/to/config.yaml [sample_size] + + This command loads up to 'sample_size' questions from both + 'single_shot_questions' and 'multi_hop_questions' subsets, then prints + them in a Rich table showing relevant details: + - Question Type + - Actual Question + - Answer (or multiple-choice correct letter) + - Choices (if any) + - Difficulty + - Citations (if any) + + Args: + *cli_args: The CLI arguments passed after 'view_sample_questions' + e.g. ["my_config.yaml", "5"] + """ + if not cli_args: + logger.error("No arguments provided. Usage: yourbench analyze view_sample_questions CONFIG_PATH [SAMPLE_SIZE]") + return + + config_path = cli_args[0] + sample_size = int(cli_args[1]) if len(cli_args) > 1 and cli_args[1].isdigit() else 5 + + try: + config = load_config(config_path) + except FileNotFoundError: + logger.error(f"Configuration file not found at '{config_path}'. Aborting.") + return + except Exception as e: + logger.error(f"Failed to load config from '{config_path}': {e}") + return + + loader = QuestionLoader(config, sample_size) + display = QuestionDisplay(Console()) + + # Display single-shot questions + single_shot_questions = loader.load_questions("single_shot_questions") + display.display_questions(single_shot_questions, "Single-Shot Questions (Detailed)", "bold magenta") + + # Display multi-hop questions + multi_hop_questions = loader.load_questions("multi_hop_questions") + display.display_questions(multi_hop_questions, "Multi-Hop Questions (Detailed)", "bold green") diff --git a/yourbench/main.py b/yourbench/main.py index 722622ed..4cad44d8 100644 --- a/yourbench/main.py +++ b/yourbench/main.py @@ -4,12 +4,15 @@ from pathlib import Path import typer +from dotenv import load_dotenv from loguru import logger from yourbench.analysis import run_analysis from yourbench.pipeline.handler import run_pipeline +load_dotenv() + app = typer.Typer( name="yourbench", add_completion=True, diff --git a/yourbench/pipeline/chunking.py b/yourbench/pipeline/chunking.py index 61326be3..aed035fd 100644 --- a/yourbench/pipeline/chunking.py +++ b/yourbench/pipeline/chunking.py @@ -1,873 +1,180 @@ -# ============================================================================= -# chunking.py -# ============================================================================= -""" -@module chunking -@author @sumukshashidhar - -This module implements two modes of chunking for the YourBench pipeline: -1) "fast_chunking" (the default), which chunks by purely length-based rules. -2) "semantic_chunking" (requires explicit config), which uses sentence embeddings - and a similarity threshold to decide chunk boundaries. - -Usage: ------- -Typically, you do not call this module directly. Instead, the handler.py -automatically invokes run(config) if the corresponding pipeline setting -(pipeline.chunking.run) is enabled. - -The run(config) function: -1. Loads a dataset specified by the pipeline configuration. -2. Depending on the configured chunking mode: - - fast_chunking (default): Chunks text solely based on maximum token length, - ignoring sentence similarity. - - semantic_chunking (requires pipeline.chunking.chunking_configuration.chunking_mode="semantic_chunking"): - Splits each document into single-hop chunks, guided by user-defined token - length constraints (l_min_tokens, l_max_tokens) and a similarity threshold (tau_threshold). - Uses a transformer model specified in config['model_roles']['chunking'], or a default. -3. Creates multi-hop chunks by sampling subsets of single-hop chunks and concatenating them. -4. Computes optional readability and perplexity metrics for each chunk if debug mode is enabled - and required packages (textstat, evaluate) are available. -5. Saves the dataset containing new columns: - - "chunks" (list of single-hop segments) - - "multihop_chunks" (list of multi-hop segment groups) - - "chunk_info_metrics" (various statistics) - - "chunking_model" (the model used for embeddings; default string if fast_chunking) - -Error Handling and Logging: ---------------------------- -- All warnings, errors, and debugging information are logged to both the console - and a dedicated log file at logs/chunking.log. -- If any critical errors occur while loading or processing data, the process - logs the exception and attempts a graceful exit without crashing the entire - pipeline. - -Debug Visualization: --------------------- -- In semantic_chunking mode, if debug mode is on, the module will generate a plot - of average consecutive sentence similarities and save it to plots/aggregated_similarities.png. -""" - -import os -import re import time -from typing import Any, Dict, Optional +from typing import Any from dataclasses import asdict, dataclass +from collections.abc import Sequence import numpy as np -from loguru import logger # type: ignore +from loguru import logger from tqdm.auto import tqdm from yourbench.utils.chunking_utils import split_into_token_chunks from yourbench.utils.dataset_engine import custom_load_dataset, custom_save_dataset -# Try importing torch-related libraries -_torch_available = False -try: - import torch - import torch.nn.functional as F - from torch.amp import autocast +@dataclass(frozen=True) +class ChunkingConfig: + """Configuration for chunking parameters.""" - _torch_available = True - logger.info("PyTorch is available.") -except ImportError: - logger.info("PyTorch is not available. Semantic chunking features requiring torch will be disabled.") - - # Define dummy autocast if torch not found - class DummyAutocast: - def __enter__(self): - pass - - def __exit__(self, type, value, traceback): - pass - - def autocast(device_type): - return DummyAutocast() # type: ignore - - -# Try importing transformers -_transformers_available = False -try: - from transformers import AutoModel, AutoTokenizer - - _transformers_available = True - logger.info("Transformers library is available.") -except ImportError: - logger.info( - "Transformers library is not available. Semantic chunking features requiring transformers will be disabled." - ) - AutoModel = None # type: ignore - AutoTokenizer = None # type: ignore - - -try: - import evaluate - - # Attempt to load perplexity metric from evaluate - _perplexity_metric = evaluate.load("perplexity", module_type="metric", model_id="gpt2") - logger.info("Loaded 'perplexity' metric with model_id='gpt2'.") -except Exception as perplexity_load_error: - logger.info( - f"Could not load perplexity metric from 'evaluate'. Skipping perplexity. Error: {perplexity_load_error}" - ) - _perplexity_metric = None - -try: - # Attempt to import textstat for readability metrics - import textstat - - _use_textstat = True -except ImportError: - logger.info("Package 'textstat' not installed. Readability metrics will be skipped.") - _use_textstat = False - - -# ----------------------------------------------------------------------------- -# Dataclasses for cleaner configuration and result handling -# ----------------------------------------------------------------------------- -@dataclass -class ChunkingParameters: - l_min_tokens: int = 64 - l_max_tokens: int = 128 - tau_threshold: float = 0.3 + max_tokens: int = 256 h_min: int = 2 - h_max: int = 3 - num_multihops_factor: int = 2 - chunking_mode: str = "fast_chunking" # "fast_chunking" or "semantic_chunking" + h_max: int = 5 + num_multihops_factor: int = 1 -@dataclass +@dataclass(frozen=True) class SingleHopChunk: - chunk_id: Any + """A single text chunk with its identifier.""" + + chunk_id: str chunk_text: str -@dataclass +@dataclass(frozen=True) class MultiHopChunk: + """A combination of multiple single-hop chunks.""" + chunk_ids: list[str] chunks_text: list[str] -@dataclass -class ChunkInfoMetrics: - token_count: float - unique_token_ratio: float - bigram_diversity: float - perplexity: float - avg_token_length: float - flesch_reading_ease: float - gunning_fog: float - - -def _parse_chunking_parameters(config: Dict[str, Any]) -> ChunkingParameters: - """ - Extracts the chunking parameters from the config dictionary, falling back - to default values if keys are missing. The chunking_mode defaults to - "fast_chunking" unless explicitly set to "semantic_chunking." - """ +def extract_config(config: dict[str, Any]) -> ChunkingConfig: + """Extract chunking configuration from pipeline config.""" chunking_params = config.get("pipeline", {}).get("chunking", {}).get("chunking_configuration", {}) - return ChunkingParameters( - l_min_tokens=chunking_params.get("l_min_tokens", 128), - l_max_tokens=chunking_params.get("l_max_tokens", 256), - tau_threshold=chunking_params.get("tau_threshold", 0.7), + return ChunkingConfig( + max_tokens=chunking_params.get("l_max_tokens", 256), h_min=chunking_params.get("h_min", 2), h_max=chunking_params.get("h_max", 5), num_multihops_factor=chunking_params.get("num_multihops_factor", 1), - chunking_mode=chunking_params.get("chunking_mode", "fast_chunking"), ) -def run(config: Dict[str, Any]) -> None: +def chunk_document(text: str, doc_id: str, max_tokens: int) -> list[SingleHopChunk]: """ - Main pipeline entry point for the chunking stage. + Chunk a document into segments based on token count. Args: - config (Dict[str, Any]): The entire pipeline configuration dictionary. + text: Document text to chunk + doc_id: Unique document identifier + max_tokens: Maximum tokens per chunk Returns: - None. This function saves the updated dataset containing chunked - documents to disk or the Hugging Face Hub, based on the config. - - Raises: - RuntimeError: If a critical error is encountered that prevents chunking. - The error is logged, and execution attempts a graceful exit. + List of single-hop chunks """ - # Retrieve chunking configuration from config - chunking_config = config.get("pipeline", {}).get("chunking", {}) - if chunking_config is None or not chunking_config.get("run", False): - logger.info("Chunking stage is disabled. Skipping.") - return - - logger.info("Starting chunking stage...") - - # Attempt to load dataset - dataset = custom_load_dataset(config=config, subset="summarized") - logger.info(f"Loaded summarized subset with {len(dataset)} rows for chunking.") - - # Retrieve chunking parameters into a dataclass - params = _parse_chunking_parameters(config) - l_min_tokens = params.l_min_tokens - l_max_tokens = params.l_max_tokens - tau_threshold = params.tau_threshold - h_min = params.h_min - h_max = params.h_max - num_multihops_factor = params.num_multihops_factor - chunking_mode = params.chunking_mode.lower().strip() - - # Check debug setting - debug_mode: bool = config.get("settings", {}).get("debug", False) - if debug_mode is False: - # If not debug mode, skip perplexity and readability to save time - logger.debug("Skipping perplexity and readability metrics (debug mode off).") - local_perplexity_metric = None - local_use_textstat = False - else: - local_perplexity_metric = _perplexity_metric - local_use_textstat = _use_textstat - - # We'll only load the chunking model if in semantic_chunking mode - tokenizer = None - model = None - device = "cpu" - model_name = "no_model_for_fast_chunking" - - if chunking_mode == "semantic_chunking": - # Check if required libraries are installed - if not _torch_available or not _transformers_available: - logger.error( - "Semantic chunking requires 'torch' and 'transformers' libraries. " - "Please install them (e.g., pip install yourbench[semantic]) or use 'fast_chunking' mode." - ) - return # Exit if dependencies are missing for semantic chunking - - try: - # Extract model name from config if available - model_name_list = config.get("model_roles", {}).get("chunking", []) - if model_name_list is None or len(model_name_list) == 0: - logger.info( - "No chunking model specified in config['model_roles']['chunking']. " - "Using default 'intfloat/multilingual-e5-large-instruct'." - ) - model_name = "intfloat/multilingual-e5-large-instruct" - else: - model_name = model_name_list[0] - - logger.info(f"Using chunking model: '{model_name}'") - # Determine device only if torch is available - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - tokenizer = AutoTokenizer.from_pretrained(model_name) # type: ignore - model = AutoModel.from_pretrained(model_name).to(device).eval() # type: ignore - except Exception as model_error: - logger.error(f"Error loading tokenizer/model '{model_name}': {model_error}") - logger.warning("Chunking stage cannot proceed with semantic_chunking. Exiting.") - return - else: - logger.info("Using fast_chunking mode: purely length-based chunking with no embeddings.") - - # Prepare data structures - all_single_hop_chunks: list[list[SingleHopChunk]] = [] - all_multihop_chunks: list[list[MultiHopChunk]] = [] - all_chunk_info_metrics: list[list[ChunkInfoMetrics]] = [] - all_similarities: list[list[float]] = [] - - # Process each document in the dataset - start_time = time.time() - total_docs = len(dataset) - logger.info(f"Starting chunking process for {total_docs} documents") - - for idx, row in enumerate(tqdm(dataset, desc="Chunking documents", ncols=100)): - doc_start_time = time.time() - logger.info( - f"[{idx + 1}/{total_docs}] Processing document ID={row.get('document_id', f'doc_{idx}')} ({len(row.get('document_text', ''))} chars)" - ) - doc_text = row.get("document_text", "") - doc_id = row.get("document_id", f"doc_{idx}") - logger.info(f"[{idx}] doc_id={row.get('document_id')} | text_len={len(doc_text)} | preview={doc_text[:100]!r}") - - # If text is empty or missing - if doc_text is None or not doc_text.strip(): - logger.warning(f"Document at index {idx} has empty text. Storing empty chunks.") - doc_process_time = time.time() - doc_start_time - logger.info(f"Completed document {idx + 1}/{total_docs} in {doc_process_time:.2f}s") - all_single_hop_chunks.append([]) - all_multihop_chunks.append([]) - all_chunk_info_metrics.append([]) - continue - - if (idx + 1) % 1 == 0: - elapsed_time = time.time() - start_time - avg_time_per_doc = elapsed_time / (idx + 1) - remaining_docs = total_docs - (idx + 1) - estimated_remaining = avg_time_per_doc * remaining_docs - progress_pct = (idx + 1) / total_docs * 100 - - logger.info(f"Progress: {progress_pct:.1f}% | Completed {idx + 1}/{total_docs} documents") - logger.info( - f"Avg time per doc: {avg_time_per_doc:.2f}s | Est. remaining: {estimated_remaining / 60:.1f} minutes" - ) - - # Split the document into sentences - sentences = _split_into_sentences(doc_text) - - if sentences is None or len(sentences) == 0: - logger.warning(f"No valid sentences found for doc at index {idx}, doc_id={doc_id}.") - all_single_hop_chunks.append([]) - all_multihop_chunks.append([]) - all_chunk_info_metrics.append([]) - continue - - # Depending on the chunking mode: - if chunking_mode == "semantic_chunking": - # Debug log showing current dependency state - logger.debug( - f"Semantic chunking check: torch={_torch_available}, transformers={_transformers_available}, model_loaded={model is not None}, tokenizer_loaded={tokenizer is not None}" - ) - - # Ensure dependencies one last time before computation - if not _torch_available or not _transformers_available or model is None or tokenizer is None: - logger.error("Cannot perform semantic chunking due to missing dependencies or model loading issues.") - # Add empty lists and continue to avoid crashing the loop for this document - all_single_hop_chunks.append([]) - all_multihop_chunks.append([]) - all_chunk_info_metrics.append([]) - continue - - # 1) Compute embeddings for sentences - sentence_embeddings = _compute_embeddings(tokenizer, model, texts=sentences, device=device, max_len=512) - # 2) Compute consecutive sentence similarities - consecutive_sims: list[float] = [] - for sentence_index in range(len(sentences) - 1): - cos_sim = float( - F.cosine_similarity( - sentence_embeddings[sentence_index].unsqueeze(0), - sentence_embeddings[sentence_index + 1].unsqueeze(0), - dim=1, - )[0] - ) - consecutive_sims.append(cos_sim) - if consecutive_sims: - all_similarities.append(consecutive_sims) - - # 3) Create single-hop chunks with semantic logic - single_hop_chunks = _chunk_document_semantic( - sentences=sentences, - similarities=consecutive_sims, - l_min_tokens=l_min_tokens, - l_max_tokens=l_max_tokens, - tau=tau_threshold, - doc_id=doc_id, - ) - else: - # Debug line for fast chunking - logger.info( - f"[{doc_id}] Performing fast_chunking on {len(sentences)} sentences (l_max_tokens={l_max_tokens})" - ) - - # Fast chunking: purely length-based - single_hop_chunks = _chunk_document_fast( - sentences=sentences, - l_max_tokens=l_max_tokens, - doc_id=doc_id, - ) - - # Create multi-hop chunks - multihop = _multihop_chunking( - single_hop_chunks, - h_min=h_min, - h_max=h_max, - num_multihops_factor=num_multihops_factor, - ) - - # Compute metrics (token_count, perplexity, readability, etc.) - chunk_metrics = _compute_info_density_metrics(single_hop_chunks, local_perplexity_metric, local_use_textstat) - - # Accumulate - all_single_hop_chunks.append(single_hop_chunks) - all_multihop_chunks.append(multihop) - all_chunk_info_metrics.append(chunk_metrics) - - # Optional: Save aggregated similarity plot only if in semantic_chunking and debug - if chunking_mode == "semantic_chunking" and all_similarities and debug_mode: - _plot_aggregated_similarities(all_similarities) - - # Convert dataclasses back to dicts for safe addition to the dataset - dataset = dataset.add_column( - "chunks", - [[asdict(chunk) for chunk in chunk_list] for chunk_list in all_single_hop_chunks], - ) - dataset = dataset.add_column( - "multihop_chunks", - [[asdict(mh) for mh in multihop_list] for multihop_list in all_multihop_chunks], - ) - dataset = dataset.add_column( - "chunk_info_metrics", - [[asdict(cm) for cm in metric_list] for metric_list in all_chunk_info_metrics], - ) - dataset = dataset.add_column("chunking_model", [model_name] * len(dataset)) + if not text or not text.strip(): + return [] - # Save updated dataset - custom_save_dataset(dataset=dataset, config=config, subset="chunked") - logger.success("Chunking stage completed successfully.") + chunk_texts = split_into_token_chunks(text, chunk_tokens=max_tokens, overlap=0) + return [SingleHopChunk(chunk_id=f"{doc_id}_{i}", chunk_text=chunk) for i, chunk in enumerate(chunk_texts)] -def _split_into_sentences(text: str) -> list[str]: +def create_multihop_chunks( + chunks: Sequence[SingleHopChunk], h_min: int, h_max: int, num_multihops_factor: int +) -> list[MultiHopChunk]: """ - Splits the input text into sentences using a simple rule-based approach - that looks for punctuation delimiters ('.', '!', '?'). + Create multi-hop chunks by randomly sampling combinations of single-hop chunks. Args: - text (str): The full document text to be split. + chunks: List of single-hop chunks + h_min: Minimum chunks per multi-hop + h_max: Maximum chunks per multi-hop + num_multihops_factor: Factor to determine number of multi-hops Returns: - list[str]: A list of sentence strings. + List of multi-hop chunks """ - # Replace newlines with spaces for consistency - normalized_text = text.replace("\n", " ").strip() - if normalized_text is None or normalized_text == "": + if not chunks or h_min > len(chunks) or h_min > h_max or h_min <= 0: return [] - # Split using capturing parentheses to retain delimiters, then recombine. - segments = re.split(r"([.!?])", normalized_text) - sentences: list[str] = [] - for i in range(0, len(segments), 2): - if i + 1 < len(segments): - # Combine the text and delimiter - candidate = (segments[i] + segments[i + 1]).strip() - else: - # If no delimiter segment, use the text directly - candidate = segments[i].strip() - if candidate: - sentences.append(candidate) - return sentences - - -def _compute_embeddings( - tokenizer: AutoTokenizer, - model: AutoModel, - texts: list[str], - device: "torch.device", - max_len: int = 512, - batch_size: int = 16, -) -> "list[torch.Tensor]": - """ - Computes sentence embeddings by mean pooling the last hidden states, - normalized to unit length. - - Args: - tokenizer (AutoTokenizer): A Hugging Face tokenizer. - model (AutoModel): A pretrained transformer model to generate embeddings. - texts (list[str]): The list of sentence strings to be embedded. - device (torch.device): The device on which to run inference (CPU or GPU). - max_len (int): Max sequence length for tokenization. - batch_size (int): Batch size. - Returns: - list[torch.Tensor]: A list of PyTorch tensors (one per sentence). - """ - embeddings = [] - model.eval() - - # Determine autocast device type string - autocast_device_type = "cuda" if _torch_available and torch.cuda.is_available() else "cpu" + total_chunks = len(chunks) + effective_h_max = min(h_max, total_chunks) - for i in range(0, len(texts), batch_size): - batch_texts = texts[i : i + batch_size] - batch_dict = tokenizer(batch_texts, max_length=max_len, padding=True, truncation=True, return_tensors="pt").to( - device - ) - - with torch.no_grad(): - # Use autocast context manager - with autocast(autocast_device_type): - outputs = model(**batch_dict) - last_hidden_states = outputs.last_hidden_state - attention_mask = batch_dict["attention_mask"] + if h_min > effective_h_max: + return [] - # Zero out non-attended tokens - last_hidden_states = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0) + # Determine target number of multi-hop chunks + target_count = max(1, total_chunks // max(1, num_multihops_factor)) - # Mean pooling - sum_hidden = last_hidden_states.sum(dim=1) - valid_token_counts = attention_mask.sum(dim=1, keepdim=True) - batch_embeddings = sum_hidden / valid_token_counts.clamp(min=1e-9) + # Adjust if target is unrealistic + if target_count * effective_h_max > total_chunks: + target_count = total_chunks // effective_h_max - # Normalize - batch_embeddings = F.normalize(batch_embeddings, p=2, dim=1) + if target_count == 0: + return [] - embeddings.extend(batch_embeddings.cpu()) + rng = np.random.default_rng() - return embeddings + # Generate random combinations + indices_array = rng.choice(total_chunks, size=(target_count, effective_h_max), replace=False) + sizes = rng.integers(low=h_min, high=effective_h_max + 1, size=target_count) + # Create unique combinations + unique_combos = {tuple(sorted(indices_array[i][: sizes[i]])) for i in range(target_count)} -def _chunk_document_semantic( - sentences: list[str], - similarities: list[float], - l_min_tokens: int, - l_max_tokens: int, - tau: float, - doc_id: str, -) -> list[SingleHopChunk]: - """ - Creates single-hop chunks from sentences using semantic guidance. Ensures each - chunk is at least l_min_tokens in length and at most l_max_tokens, introducing - a chunk boundary when consecutive sentence similarity is below threshold tau. + # Build multi-hop chunks + return [ + MultiHopChunk( + chunk_ids=[chunks[idx].chunk_id for idx in combo], chunks_text=[chunks[idx].chunk_text for idx in combo] + ) + for combo in unique_combos + ] - Args: - sentences (list[str]): The list of sentences for a single document. - similarities (list[float]): Cosine similarities between consecutive sentences. - l_min_tokens (int): Minimum tokens per chunk. - l_max_tokens (int): Maximum tokens per chunk. - tau (float): Similarity threshold for introducing a chunk boundary. - doc_id (str): Unique identifier for the document. - Returns: - list[SingleHopChunk]: A list of SingleHopChunk objects. +def run(config: dict[str, Any]) -> None: """ - chunks: list[SingleHopChunk] = [] - current_chunk: list[str] = [] - current_len: int = 0 - chunk_index: int = 0 - - for i, sentence in enumerate(sentences): - sentence_token_count = len(sentence.split()) - - # If one sentence alone exceeds l_max, finalize the current chunk if non-empty, - # then store this sentence as its own chunk. - if sentence_token_count >= l_max_tokens: - # Dump the current chunk - if len(current_chunk) > 0: - chunk_str = " ".join(current_chunk) - chunks.append(SingleHopChunk(chunk_id=f"{doc_id}_{chunk_index}", chunk_text=chunk_str)) - chunk_index += 1 - current_chunk = [] - current_len = 0 - # Store the sentence alone - chunks.append(SingleHopChunk(chunk_id=f"{doc_id}_{chunk_index}", chunk_text=sentence)) - chunk_index += 1 - continue - - # Otherwise, add this sentence to the current chunk - current_chunk.append(sentence) - current_len += sentence_token_count - - # If we exceed l_max, close the current chunk and start a new one - if current_len >= l_max_tokens: - chunk_str = " ".join(current_chunk) - chunks.append(SingleHopChunk(chunk_id=f"{doc_id}_{chunk_index}", chunk_text=chunk_str)) - chunk_index += 1 - current_chunk = [] - current_len = 0 - continue - - # If we have at least l_min tokens and the next sentence similarity is below threshold, break here - if (current_len >= l_min_tokens) and (i < len(sentences) - 1): - if similarities[i] < tau: - chunk_str = " ".join(current_chunk) - chunks.append(SingleHopChunk(chunk_id=f"{doc_id}_{chunk_index}", chunk_text=chunk_str)) - chunk_index += 1 - current_chunk = [] - current_len = 0 - - # Any leftover - if len(current_chunk) > 0: - chunk_str = " ".join(current_chunk) - chunks.append(SingleHopChunk(chunk_id=f"{doc_id}_{chunk_index}", chunk_text=chunk_str)) - - return chunks - - -def _chunk_document_fast( - sentences: list[str], - l_max_tokens: int, - doc_id: str, - show_progress: bool = True, -) -> list[SingleHopChunk]: - """ - Uses token-based chunking with optional overlap, based on tiktoken. + Main entry point for the chunking pipeline stage. Args: - sentences (list[str]): Sentences of the document. - l_max_tokens (int): Max tokens per chunk. - doc_id (str): Unique identifier for the document. - show_progress (bool): Show progress bar (ignored here, kept for API symmetry). - - Returns: - list[SingleHopChunk]: A list of token-based chunks. + config: Pipeline configuration dictionary """ - text = " ".join(sentences) - chunk_texts = split_into_token_chunks( - text, - chunk_tokens=l_max_tokens, - overlap=0, - ) - - return [SingleHopChunk(chunk_id=f"{doc_id}_{i}", chunk_text=chunk) for i, chunk in enumerate(chunk_texts)] - + chunking_config = config.get("pipeline", {}).get("chunking", {}) + if not chunking_config.get("run", False): + logger.info("Chunking stage is disabled. Skipping.") + return -def _multihop_chunking( - single_hop_chunks: list[SingleHopChunk], - h_min: int, - h_max: int, - num_multihops_factor: int, -) -> list[MultiHopChunk]: - """ - Creates multi-hop chunks via numpy random sampling. + logger.info("Starting chunking stage...") - Generates combinations of size effective_h_max, slices them to sizes - between h_min and effective_h_max, and collects unique combinations. - Target number = max(1, total_single_hops // num_multihops_factor). - Actual number may be less due to sampling/de-duplication. + # Load dataset + dataset = custom_load_dataset(config=config, subset="summarized") + logger.info(f"Loaded {len(dataset)} documents for chunking") - Args: - single_hop_chunks: List of single-hop chunks. - h_min: Min single-hops per multi-hop. - h_max: Max single-hops per multi-hop. - num_multihops_factor: Factor to determine target multi-hop count. + # Extract configuration + params = extract_config(config) - Returns: - List of unique MultiHopChunk objects. - """ - total_single_hops = len(single_hop_chunks) - logger.info(f"Starting multi-hop chunking, total single chunks: {total_single_hops}") + # Process documents + all_single_chunks: list[list[SingleHopChunk]] = [] + all_multihop_chunks: list[list[MultiHopChunk]] = [] - if not single_hop_chunks: - logger.warning("Empty input 'single_hop_chunks'. Returning [].") - return [] - if not (0 < h_min <= h_max): - logger.warning(f"Invalid hop range h_min={h_min}, h_max={h_max}. Returning [].") - return [] + start_time = time.time() - effective_h_max = min(h_max, total_single_hops) - if h_min > effective_h_max: - logger.warning(f"h_min ({h_min}) > effective_h_max ({effective_h_max}). Cannot form chunks. Returning [].") - return [] + for idx, row in enumerate(tqdm(dataset, desc="Chunking documents")): + doc_text = row.get("document_text", "") + doc_id = row.get("document_id", f"doc_{idx}") - if num_multihops_factor <= 0: - logger.info("num_multihops_factor <= 0. Targeting all single hops.") - num_multihops_target = total_single_hops - else: - num_multihops_target = max(1, total_single_hops // num_multihops_factor) + # Create single-hop chunks + single_chunks = chunk_document(doc_text, doc_id, params.max_tokens) - if np.prod((num_multihops_target, effective_h_max)) > total_single_hops: - logger.warning( - f"Target {num_multihops_target} is too high for given sample size: {total_single_hops} and effective_h_max: {effective_h_max}" + # Create multi-hop chunks + multihop_chunks = create_multihop_chunks( + single_chunks, params.h_min, params.h_max, params.num_multihops_factor ) - num_multihops_target = total_single_hops // effective_h_max - logger.info( - f"Targeting ~{num_multihops_target} multi-hop chunks, effective h_max: {effective_h_max}, h_min: {h_min}" - ) + all_single_chunks.append(single_chunks) + all_multihop_chunks.append(multihop_chunks) - rng = np.random.default_rng() + # Progress logging + if (idx + 1) % 100 == 0: + elapsed = time.time() - start_time + rate = (idx + 1) / elapsed + logger.info(f"Progress: {idx + 1}/{len(dataset)} docs ({rate:.1f} docs/sec)") - # Generate initial index combinations (size effective_h_max) - initial_indices = rng.choice( - total_single_hops, - size=(num_multihops_target, effective_h_max), - replace=False, # Unique indices per combination + # Add columns to dataset + dataset = dataset.add_column("chunks", [[asdict(chunk) for chunk in chunks] for chunks in all_single_chunks]) + dataset = dataset.add_column( + "multihop_chunks", [[asdict(mh) for mh in multihops] for multihops in all_multihop_chunks] ) - # Generate random slice sizes - slice_sizes = rng.integers(low=h_min, high=effective_h_max, size=num_multihops_target, endpoint=True) - - # Slice, sort, tuple for hashing, and collect unique combinations - unique_combo_indices_set = { - tuple(np.sort(initial_indices[i][: slice_sizes[i]])) for i in range(num_multihops_target) - } - - logger.info(f"Generated {len(unique_combo_indices_set)} unique index combinations.") - - if not unique_combo_indices_set: - logger.warning("No unique combinations generated.") - return [] - - # --- Build MultiHopChunk Objects --- - final_multihop_chunks = [ - MultiHopChunk( - chunk_ids=[single_hop_chunks[idx].chunk_id for idx in combo_indices], - chunks_text=[single_hop_chunks[idx].chunk_text for idx in combo_indices], - ) - for combo_indices in unique_combo_indices_set - # combo_indices guaranteed non-empty by h_min >= 1 - ] - - logger.info(f"Created {len(final_multihop_chunks)} multi-hop chunks.") - return final_multihop_chunks - - -def _compute_info_density_metrics( - chunks: list[SingleHopChunk], - local_perplexity_metric: Optional[Any], - local_use_textstat: bool, -) -> list[ChunkInfoMetrics]: - """ - Computes optional statistics for each chunk, including token count, perplexity, - readability (flesch, gunning fog), and basic lexical diversity metrics. - - Args: - chunks (list[SingleHopChunk]): The list of single-hop chunk objects. - local_perplexity_metric (Optional[Any]): If provided, used to compute - perplexity (from evaluate.load("perplexity")). - local_use_textstat (bool): If True, compute text readability metrics using textstat. - - Returns: - list[ChunkInfoMetrics]: One object per chunk with fields like: - - token_count - - unique_token_ratio - - bigram_diversity - - perplexity - - avg_token_length - - flesch_reading_ease - - gunning_fog - """ - results: list[ChunkInfoMetrics] = [] - - for chunk in chunks: - chunk_text: str = chunk.chunk_text - tokens = chunk_text.strip().split() - token_count: int = len(tokens) - - # Compute metrics step by step - unique_token_ratio = 0.0 - if token_count > 0: - unique_toks = len({t.lower() for t in tokens}) - unique_token_ratio = float(unique_toks / token_count) - - # Bigram diversity - bigram_diversity = 0.0 - if token_count > 1: - bigrams = [] - for i in range(token_count - 1): - bigrams.append((tokens[i].lower(), tokens[i + 1].lower())) - unique_bigrams = len(set(bigrams)) - bigram_diversity = float(unique_bigrams / len(bigrams)) - - # Perplexity - ppl_score: float = 0.0 - if local_perplexity_metric is not None and token_count > 0: - try: - result = local_perplexity_metric.compute(data=[chunk_text], batch_size=1) - ppl_score = result.get("mean_perplexity", 0.0) - except Exception as e: - logger.warning(f"Could not compute perplexity for chunk. Error: {e}") - - # Average token length - avg_token_length = 0.0 - if token_count > 0: - avg_len = sum(len(t) for t in tokens) / token_count - avg_token_length = float(avg_len) - - # Readability - flesch_reading_ease = 0.0 - gunning_fog = 0.0 - if local_use_textstat is True and chunk_text.strip(): - try: - flesch_reading_ease = float(textstat.flesch_reading_ease(chunk_text)) - gunning_fog = float(textstat.gunning_fog(chunk_text)) - except Exception as e: - logger.warning(f"Textstat error: {e}") - - results.append( - ChunkInfoMetrics( - token_count=float(token_count), - unique_token_ratio=unique_token_ratio, - bigram_diversity=bigram_diversity, - perplexity=ppl_score, - avg_token_length=avg_token_length, - flesch_reading_ease=flesch_reading_ease, - gunning_fog=gunning_fog, - ) - ) - - return results - - -def _plot_aggregated_similarities(all_similarities: list[list[float]]) -> None: - """ - Plots the average cosine similarity for each sentence-pair position across - all documents, with shaded regions representing one standard deviation. - - Args: - all_similarities (list[list[float]]): A list of lists, where each - sub-list is the array of consecutive sentence similarities for - a particular document. - """ - if all_similarities is None or len(all_similarities) == 0: - logger.debug("No similarities to plot. Skipping aggregated similarity plot.") - return - - # Check if matplotlib is available before trying to plot - try: - import matplotlib.pyplot as plt - except ImportError: - logger.warning("Matplotlib not found. Skipping similarity plot generation.") - return + # Save dataset + custom_save_dataset(dataset=dataset, config=config, subset="chunked") - plt.figure(figsize=(10, 6)) - max_len = max(len(sims) for sims in all_similarities) - - avg_sim: list[float] = [] - std_sim: list[float] = [] - counts: list[int] = [] - - for position in range(max_len): - vals = [s[position] for s in all_similarities if position < len(s)] - if vals: - mean_val = sum(vals) / len(vals) - variance = sum((v - mean_val) ** 2 for v in vals) / len(vals) - stddev_val = variance**0.5 - - avg_sim.append(mean_val) - std_sim.append(stddev_val) - counts.append(len(vals)) - else: - break - - # X-axis positions - x_positions = list(range(len(avg_sim))) - plt.plot(x_positions, avg_sim, "b-", label="Avg Similarity") - - # Create confidence interval region - lower_bound = [max(0, a - s) for a, s in zip(avg_sim, std_sim)] - upper_bound = [min(1, a + s) for a, s in zip(avg_sim, std_sim)] - plt.fill_between(x_positions, lower_bound, upper_bound, alpha=0.3, color="blue") - - # Plot data points with size reflecting how many docs contributed - max_count = max(counts) if counts else 1 - sizes = [30.0 * (c / max_count) for c in counts] - plt.scatter(x_positions, avg_sim, s=sizes, alpha=0.5, color="navy") - - plt.title("Average Consecutive Sentence Similarity Across Documents") - plt.xlabel("Sentence Pair Index") - plt.ylabel("Cosine Similarity") - plt.grid(True) - plot_path: str = os.path.join("plots", "aggregated_similarities.png") - # Ensure plots directory exists - os.makedirs("plots", exist_ok=True) - plt.savefig(plot_path, dpi=300, bbox_inches="tight") # Changed dpi to 300 - plt.close() - logger.info(f"Saved aggregated similarity plot at '{plot_path}'.") - - -# Make sure main guard exists if this file is runnable directly (optional but good practice) -if __name__ == "__main__": - # Example configuration for testing (replace with actual loading if needed) - test_config = { - "pipeline": { - "chunking": { - "run": True, - "chunking_configuration": { - "chunking_mode": "fast_chunking" # or "semantic_chunking" if deps installed - }, - # Add other necessary config keys like dataset paths etc. - } - }, - "settings": {"debug": True}, - # Add dataset config, model roles etc. - } - # Basic logger setup for standalone execution - logger.add("logs/chunking_standalone.log", rotation="10 MB") - logger.info("Running chunking module standalone (example)...") - # Note: You'd need a valid dataset configuration for run() to work fully. - # run(test_config) - logger.info("Standalone example finished.") + elapsed_total = time.time() - start_time + logger.success(f"Chunking completed in {elapsed_total:.1f} seconds") diff --git a/yourbench/pipeline/citation_score_filtering.py b/yourbench/pipeline/citation_score_filtering.py index 2ebb8110..3f6e7120 100644 --- a/yourbench/pipeline/citation_score_filtering.py +++ b/yourbench/pipeline/citation_score_filtering.py @@ -29,7 +29,7 @@ from loguru import logger from thefuzz import fuzz # pip install thefuzz -from yourbench.utils.dataset_engine import custom_load_dataset, custom_save_dataset +from yourbench.utils.dataset_engine import custom_load_dataset, custom_save_dataset, replace_dataset_columns def run(config: Dict[str, Any]) -> None: @@ -130,9 +130,16 @@ def run(config: Dict[str, Any]) -> None: all_final_scores.append(final_score) # 4) Add these new columns to the dataset - lighteval_ds = lighteval_ds.add_column("answer_citation_score", all_answer_citation_scores) - lighteval_ds = lighteval_ds.add_column("chunk_citation_score", all_chunk_citation_scores) - lighteval_ds = lighteval_ds.add_column("citation_score", all_final_scores) + # Use helper function to replace columns cleanly + # Note: This doesn't preserve original column metadata, but for computed float scores + # this is acceptable as type inference will correctly identify them as numeric + columns_data = { + "answer_citation_score": all_answer_citation_scores, + "chunk_citation_score": all_chunk_citation_scores, + "citation_score": all_final_scores, + } + + lighteval_ds = replace_dataset_columns(lighteval_ds, columns_data) # 5) Save the updated dataset # We reuse the "lighteval" subset name, but you could save it elsewhere if you prefer. diff --git a/yourbench/pipeline/handler.py b/yourbench/pipeline/handler.py index d5759311..ad8fcf0a 100644 --- a/yourbench/pipeline/handler.py +++ b/yourbench/pipeline/handler.py @@ -1,31 +1,28 @@ -# handler.py -# ============================================================================= -# Author: @sumukshashidhar -# -# This module orchestrates the YourBench pipeline stages in a specified order. -# It reads pipeline configuration from a config dictionary, runs each stage -# if enabled, times each stage's execution, logs errors to stage-specific -# log files, and finally generates an overall timing chart of all stages. -# -# Usage: -# from yourbench.pipeline.handler import run_pipeline -# run_pipeline("/path/to/config.yaml", debug=True) -# -# The module assumes the presence of pipeline stages named after their .py -# files (e.g., ingestion, summarization), each exposing a `run(config: dict)`. -# -# Stages are executed in a fixed default order but will skip any that -# do not appear in the config or are explicitly disabled. Unrecognized -# stages in the config are also noted (but not executed). -# -# Key Responsibilities: -# 1. Load the user's pipeline configuration. -# 2. Execute each stage in `DEFAULT_STAGE_ORDER` if `run` is True in the config. -# 3. Log all events, including errors, to a stage-specific file and the console. -# 4. Collect and display timing data for each stage. -# 5. Detect any extra pipeline stages in the config that do not appear in -# `DEFAULT_STAGE_ORDER` and log a warning about them. -# ============================================================================= +""" +This module orchestrates the Yourbench pipeline stages in a specified order. +It reads pipeline configuration from a config dictionary, runs each stage +if enabled, times each stage's execution, logs errors to stage-specific +log files, and finally generates an overall timing chart of all stages. + +The module assumes the presence of pipeline stages named after their .py +files (e.g., ingestion, summarization), each exposing a `run(config: dict)`. + +Some stages may use direct function overrides (e.g., for question generation), +bypassing dynamic import. These are defined in `STAGE_FUNCTION_OVERRIDES`. + +Stages are executed in a fixed default order but will skip any that +do not appear in the config or are explicitly disabled. Unrecognized +stages in the config are also noted (but not executed). + +Key Responsibilities: +1. Load the user's pipeline configuration. +2. Execute each stage in `DEFAULT_STAGE_ORDER` if `run` is True in the config. +3. Use function overrides for specific stages if defined. +4. Log all events, including errors, to a stage-specific file and the console. +5. Collect and display timing data for each stage. +6. Detect any extra pipeline stages in the config that do not appear in + `DEFAULT_STAGE_ORDER` and log a warning about them. +""" from __future__ import annotations import os @@ -36,6 +33,10 @@ from loguru import logger from yourbench.utils.loading_engine import load_config +from yourbench.pipeline.question_generation import ( + run_multi_hop, + run_single_shot, +) # === Pipeline Stage Order Definition === @@ -52,63 +53,41 @@ "citation_score_filtering", ] -# This global list tracks the timing for all executed stages in the pipeline. PIPELINE_STAGE_TIMINGS: List[Dict[str, float]] = [] +STAGE_FUNCTION_OVERRIDES = { + "single_shot_question_generation": run_single_shot, + "multi_hop_question_generation": run_multi_hop, +} + + def run_pipeline( config_file_path: str, debug: bool = False, plot_stage_timing: bool = False, ) -> None: - """ - Run the YourBench pipeline based on a provided YAML/JSON configuration file. - - Args: - config_file_path (str): - Path to the pipeline configuration file that describes which stages to run (YAML or JSON). - debug (bool): - Enables more verbose logging (debug-level). Defaults to False. - plot_stage_timing (bool): - If True, generate a bar chart showing the time spent in each stage. Requires matplotlib. - - Raises: - FileNotFoundError: - If the configuration file is not found at the specified path. - Exception: - If any stage raises an unexpected error during execution, it is re-raised after logging. - """ global PIPELINE_STAGE_TIMINGS PIPELINE_STAGE_TIMINGS = [] - # Log level adjustments logger.debug(f"Loading pipeline configuration from {config_file_path}") config: Dict[str, Any] = load_config(config_file_path) - - # Attach debug flag to config for use in other modules config["debug"] = debug logger.info(f"Debug mode set to {config['debug']}") - # Extract pipeline portion of the config pipeline_config: Dict[str, Any] = config.get("pipeline", {}) if not pipeline_config: logger.warning("No pipeline stages configured. Exiting pipeline execution.") return - # Ensure logs directory exists to store stage-specific logs os.makedirs("logs", exist_ok=True) - - # Record overall pipeline start pipeline_execution_start_time: float = time.time() - # === Execute pipeline stages in the fixed default order === for stage_name in DEFAULT_STAGE_ORDER: - # Check if the stage is mentioned in the pipeline config at all if stage_name not in pipeline_config: logger.debug(f"Stage '{stage_name}' is not mentioned in the config. Skipping.") continue - # Get the settings for the stage. It might be None or a dict. stage_settings = pipeline_config.get(stage_name) if not isinstance(stage_settings, dict): pipeline_config[stage_name] = {"run": True} @@ -119,24 +98,21 @@ def run_pipeline( logger.info(f"Skipping stage: '{stage_name}' (run set to False).") continue - # Setup a stage-specific error log file error_log_path = os.path.join("logs", f"pipeline_{stage_name}.log") log_id = logger.add(error_log_path, level="ERROR", backtrace=True, diagnose=True, mode="a") logger.info(f"Starting execution of stage: '{stage_name}'") stage_start_time: float = time.time() - # Ensure the specific stage config is at least an empty dict if it was None - if stage_name in config.get("pipeline", {}) and config["pipeline"][stage_name] is None: - config["pipeline"][stage_name] = {} - try: - # Dynamically import the stage module, e.g. yourbench.pipeline.ingestion - stage_module = importlib.import_module(f"yourbench.pipeline.{stage_name}") - stage_module.run(config) - except Exception as pipeline_error: - logger.error(f"Error executing pipeline stage '{stage_name}': {str(pipeline_error)}") - # Remove stage-specific log handler before re-raising + stage_func = STAGE_FUNCTION_OVERRIDES.get(stage_name) + if stage_func: + stage_func(config) + else: + stage_module = importlib.import_module(f"yourbench.pipeline.{stage_name}") + stage_module.run(config) + except Exception: + logger.exception(f"Error executing pipeline stage '{stage_name}'") _remove_log_handler_safely(log_id) raise finally: @@ -152,47 +128,20 @@ def run_pipeline( }) logger.success(f"Completed stage: '{stage_name}' in {elapsed_time:.3f}s") - # Record overall pipeline end pipeline_execution_end_time: float = time.time() - - # Check for unrecognized stages in config _check_for_unrecognized_stages(pipeline_config) - # Optionally plot pipeline stage timings if plot_stage_timing or debug: - _plot_pipeline_stage_timing( - pipeline_start=pipeline_execution_start_time, - pipeline_end=pipeline_execution_end_time, - ) + _plot_pipeline_stage_timing() def _check_for_unrecognized_stages(pipeline_config: Dict[str, Any]) -> None: - """ - Warn about pipeline stages that exist in the config but - are not in DEFAULT_STAGE_ORDER. - - Args: - pipeline_config (Dict[str, Any]): - The pipeline configuration dict (subset of the main config). - """ for stage in pipeline_config.keys(): if stage not in DEFAULT_STAGE_ORDER: logger.warning(f"Unrecognized stage '{stage}' is present in config but not in DEFAULT_STAGE_ORDER.") -def _plot_pipeline_stage_timing( - pipeline_start: float, - pipeline_end: float, -) -> None: - """ - Generate a bar chart illustrating the stage timings for the entire pipeline. - - Args: - pipeline_start (float): - Timestamp when the pipeline started. - pipeline_end (float): - Timestamp when the pipeline ended. - """ +def _plot_pipeline_stage_timing() -> None: logger.info("Generating pipeline stage timing chart.") try: import matplotlib.pyplot as plt @@ -200,18 +149,15 @@ def _plot_pipeline_stage_timing( logger.warning("Cannot generate timing chart: matplotlib is not installed.") return - # Gather data stages = [timing["stage_name"] for timing in PIPELINE_STAGE_TIMINGS] durations = [timing["elapsed"] for timing in PIPELINE_STAGE_TIMINGS] - # Minimalistic bar chart fig, ax = plt.subplots(figsize=(3, 3), dpi=300) ax.barh(stages, durations, color="skyblue", edgecolor="black") ax.set_xlabel("Duration (s)") ax.set_title("Pipeline Stage Timings") - # Annotate each bar with the stage's duration for i, duration in enumerate(durations): ax.text(duration + 0.01, i, f"{duration:.2f}s", va="center", fontsize=6) @@ -222,14 +168,6 @@ def _plot_pipeline_stage_timing( def _remove_log_handler_safely(log_id: int) -> None: - """ - Remove a log handler (by log_id) from loguru, swallowing any ValueError - if the handler is already removed or doesn't exist. - - Args: - log_id (int): - The handler ID returned by logger.add(). - """ try: logger.remove(log_id) except ValueError: diff --git a/yourbench/pipeline/ingestion.py b/yourbench/pipeline/ingestion.py index 9e612065..203dcb88 100644 --- a/yourbench/pipeline/ingestion.py +++ b/yourbench/pipeline/ingestion.py @@ -44,11 +44,12 @@ from typing import Any, Optional from dataclasses import field, dataclass +import trafilatura from loguru import logger from markitdown import MarkItDown from huggingface_hub import InferenceClient -from yourbench.utils.inference_engine import Model as ModelConfig +from yourbench.utils.inference.inference_core import Model as ModelConfig @dataclass @@ -270,14 +271,80 @@ def _initialize_markdown_processor(config: dict[str, Any]) -> MarkItDown: return MarkItDown() +def _extract_markdown_from_html(file_path: str) -> str | None: + """Attempts to extract markdown content from an HTML file using Trafilatura.""" + logger.debug(f"Attempting to extract Markdown from HTML file: {file_path} using Trafilatura.") + try: + with open(file_path, "r", encoding="utf-8") as f: + html_content = f.read() + + # output_format='markdown' is key for direct Markdown conversion + extracted_markdown = trafilatura.extract( + html_content, + output_format="markdown", + include_comments=False, # Do not include HTML comments + include_tables=True, # Try to include table data + ) + + if extracted_markdown: + logger.info(f"Successfully extracted Markdown from '{file_path}' using Trafilatura.") + return extracted_markdown + + logger.warning(f"Trafilatura returned no content for HTML file '{file_path}'.") + return None + except Exception as e: + logger.error(f"Error using Trafilatura for HTML file '{file_path}': {e}. Skipping Trafilatura for this file.") + return None + + +def _get_markdown_content(file_path: str, markdown_processor: MarkItDown) -> str | None: + """ + Extract or convert file content to Markdown based on file type. + + Args: + file_path (str): The path to the source document. + markdown_processor (MarkItDown): Configured MarkItDown instance for conversions. + + Returns: + str | None: The Markdown content, or None if conversion failed. + + Logs: + - Info about the processing method used for each file type. + - Warnings for fallback scenarios or failed conversions. + """ + file_ext = os.path.splitext(file_path)[1].lower() + + if file_ext == ".md": + # For existing Markdown files, just read the content, ensuring UTF-8 + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + logger.info(f"File '{file_path}' is already Markdown. Content read directly.") + return content + + elif file_ext in [".html", ".htm"]: + logger.info(f"Processing HTML file: {file_path} with Trafilatura.") + content = _extract_markdown_from_html(file_path) + if content is None: # Fallback to MarkItDown if Trafilatura failed or returned nothing + logger.warning( + f"Trafilatura processing failed or yielded no content for HTML '{file_path}'. " + "Falling back to MarkItDown for this file." + ) + content = markdown_processor.convert(file_path).text_content + return content + + else: # For other file types, use the MarkItDown processor + logger.info(f"Converting non-HTML/Markdown file '{file_path}' using MarkItDown.") + return markdown_processor.convert(file_path).text_content + + def _convert_document_to_markdown(file_path: str, output_dir: str, markdown_processor: MarkItDown) -> None: """ - Convert a single source file into Markdown using MarkItDown and save the result. + Convert a single source file into Markdown and save the result. Args: file_path (str): The path to the source document. output_dir (str): Directory where the converted .md file will be written. - markdown_processor (MarkItDown): Configured MarkItDown instance to handle the conversion. + markdown_processor (MarkItDown): Configured MarkItDown instance for conversions. Returns: None @@ -288,13 +355,11 @@ def _convert_document_to_markdown(file_path: str, output_dir: str, markdown_proc """ logger.debug("Converting file: {}", file_path) try: - # Skipping conversion if file already in markdown - if os.path.splitext(file_path)[1] == ".md": - with open(file_path, "r") as f: - content = f.read() - else: - # Perform the file-to-markdown conversion - content = markdown_processor.convert(file_path).text_content + content = _get_markdown_content(file_path, markdown_processor) + + if content is None: + logger.warning(f"No content could be generated for file '{file_path}' after processing. Skipping output.") + return # Construct an output filename with .md extension base_name = os.path.basename(file_path) @@ -305,6 +370,6 @@ def _convert_document_to_markdown(file_path: str, output_dir: str, markdown_proc with open(output_file, "w", encoding="utf-8") as out_f: out_f.write(content) - logger.info(f"Successfully converted '{file_path}' -> '{output_file}'.") + logger.info(f"Successfully processed '{file_path}' and saved as '{output_file}'.") except Exception as exc: logger.error(f"Failed to convert '{file_path}'. Error details: {exc}") diff --git a/yourbench/pipeline/lighteval.py b/yourbench/pipeline/lighteval.py index e99336f2..44a6034a 100644 --- a/yourbench/pipeline/lighteval.py +++ b/yourbench/pipeline/lighteval.py @@ -18,7 +18,7 @@ 8) chunk_ids (List[str]) - The chunk ID(s) used in forming the question. 9) question_generating_model (str) - The HF model ID that generated this question. 10) chunks (List[str]) - The actual chunk text(s) the question came from. -11) document (str) - The entire document text. +11) document (str) - The entire document text (can be excluded with include_document_text=false). Configuration Example: ---------------------- @@ -29,13 +29,14 @@ multi_hop_subset: multi_hop_questions_deduplicated chunked_subset: chunked_documents output_subset: lighteval + include_document_text: false # Optional: Set to false to exclude full document text (saves memory) Usage: ------ 1. Load single-shot and multi-hop question subsets. 2. Merge them into a single dataset, marking 'kind' as "single_shot" or "multi_hop." 3. For each question row, look up the relevant chunks in the chunked dataset to - populate 'chunks' and the full 'document' text. + populate 'chunks' and the full 'document' text (if include_document_text is true). 4. Save final dataset to HF or local path as configured. """ @@ -76,6 +77,7 @@ def run(config: Dict[str, Any]) -> None: - config["pipeline"]["lighteval"]["multi_hop_subset"] (str): Subset containing multi-hop questions. - config["pipeline"]["lighteval"]["chunked_subset"] (str): Subset containing chunked documents. - config["pipeline"]["lighteval"]["output_subset"] (str): Subset name for saving final dataset. + - config["pipeline"]["lighteval"]["include_document_text"] (bool, optional): Whether to include full document text in the final dataset. Default is True. Returns: None. The merged dataset is saved to disk or HF Hub as configured. @@ -87,9 +89,7 @@ def run(config: Dict[str, Any]) -> None: logger.info("Saving lighteval compatible dataset") - # ---------------------------------------- - # 2) Load datasets - # ---------------------------------------- + # Load datasets try: single_shot_ds = custom_load_dataset(config=config, subset="single_shot_questions") logger.info(f"Loaded single-shot Q subset single_shot_questions with {len(single_shot_ds)} rows.") @@ -123,15 +123,7 @@ def run(config: Dict[str, Any]) -> None: logger.error("No data in single-shot or multi-hop datasets. Exiting.") return - # ---------------------------------------- - # 3) Prepare lookups from chunked dataset - # ---------------------------------------- - # We'll store: doc_id -> (document_text, chunk_id -> chunk_text). - # chunked_ds typically has the following columns: - # - document_id (str) - # - document_text (str) - # - chunks (list of dicts with {chunk_id, chunk_text}) - # Possibly also "multihop_chunks" but we only need single-hop "chunks". + # Prepare lookups from chunked dataset doc_meta_map = {} for row in chunked_ds: doc_id = row.get("document_id", "") @@ -149,9 +141,12 @@ def run(config: Dict[str, Any]) -> None: if doc_id in doc_meta_map: doc_meta_map[doc_id].update({"document_summary": row.get("document_summary")}) - # ---------------------------------------- - # 4) Helper functions to transform a row - # ---------------------------------------- + # Check if we should include document text + include_document_text = stage_cfg.get("include_document_text", True) + if not include_document_text: + logger.info("Document text will be excluded from the final dataset (include_document_text=False)") + + # Helper functions to transform a row def make_single_shot_record(row: Dict[str, Any]) -> Dict[str, Any]: """ Transform a single-shot question row into a standardized dictionary @@ -159,13 +154,10 @@ def make_single_shot_record(row: Dict[str, Any]) -> Dict[str, Any]: """ doc_id: str = row.get("document_id", "") chunk_id: str = row.get("chunk_id", "") - # ground_truth is row["self_answer"] - # question_category is row["self_assessed_question_type"] - # question => row["question"], etc. # Grab doc meta doc_meta = doc_meta_map.get(doc_id, {}) - doc_text = doc_meta.get("document_text", "") + doc_text = doc_meta.get("document_text", "") if include_document_text else "" doc_summary = doc_meta.get("document_summary", "") chunk_text_map = doc_meta.get("chunks_map", {}) # chunk text is chunk_text_map[chunk_id] if it exists @@ -173,8 +165,17 @@ def make_single_shot_record(row: Dict[str, Any]) -> Dict[str, Any]: # if multiple choice question convert to number gold = row.get("self_answer", "") - if row.get("choices"): - gold = [ord(gold) - ord("A")] + if not gold: + logger.warning("Row has empty answer line") + + stage_cfg = config.get("pipeline", {}).get("single_shot_question_generation", {}) + if stage_cfg.get("question_mode") == "multi-choice": + if not gold: + gold = [0] + else: + gold = [ord(gold) - ord("A")] + else: + gold = [gold] return { "question": row.get("question", ""), @@ -203,7 +204,7 @@ def make_multi_hop_record(row: Dict[str, Any]) -> Dict[str, Any]: # e.g. row["source_chunk_ids"]: List[str] chunk_ids: List[str] = row.get("source_chunk_ids", []) doc_meta = doc_meta_map.get(doc_id, {}) - doc_text = doc_meta.get("document_text", "") + doc_text = doc_meta.get("document_text", "") if include_document_text else "" doc_summary = doc_meta.get("document_summary", "") chunk_text_map = doc_meta.get("chunks_map", {}) @@ -216,9 +217,17 @@ def make_multi_hop_record(row: Dict[str, Any]) -> Dict[str, Any]: # if multiple choice question convert to number gold = row.get("self_answer", "") - if row.get("choices"): - gold = [ord(gold) - ord("A")] - + if not gold: + logger.warning("Row has empty answer line") + + stage_cfg = config.get("pipeline", {}).get("single_shot_question_generation", {}) + if stage_cfg.get("question_mode") == "multi-choice": + if not gold: + gold = [0] + else: + gold = [ord(gold) - ord("A")] + else: + gold = [gold] return { "question": row.get("question", ""), "additional_instructions": row.get("additional_instructions", ""), @@ -237,42 +246,25 @@ def make_multi_hop_record(row: Dict[str, Any]) -> Dict[str, Any]: "document_summary": doc_summary, } - # ---------------------------------------- - # 5) Convert each dataset to final records - # ---------------------------------------- - combined_records = [] - - for row in single_shot_ds: - record = make_single_shot_record(row) - combined_records.append(record) - - for row in multi_hop_ds: - record = make_multi_hop_record(row) - combined_records.append(record) + # Convert each dataset to final records + combined_records = [ + *[make_single_shot_record(row) for row in single_shot_ds], + *[make_multi_hop_record(row) for row in multi_hop_ds], + ] if not combined_records: logger.warning("No final records to merge in lighteval. Exiting.") return - # ---------------------------------------- - # 6) Create a Hugging Face Dataset - # ---------------------------------------- + # Create a Hugging Face Dataset logger.info(f"Assembling final dataset with {len(combined_records)} rows.") try: - # Convert to column-wise dict for HF Dataset - col_names = list(combined_records[0].keys()) - final_dict = {c: [] for c in col_names} - for rec in combined_records: - for c in col_names: - final_dict[c].append(rec[c]) - final_ds = Dataset.from_dict(final_dict) + final_ds = Dataset.from_list(combined_records) except Exception as ds_error: - logger.error(f"Failed to create final dataset object: {ds_error}") + logger.exception("Failed to create final dataset object") return - # ---------------------------------------- - # 7) Save dataset - # ---------------------------------------- + # Save dataset custom_save_dataset(dataset=final_ds, config=config, subset="lighteval") logger.success("Lighteval dataset saved successfully.") diff --git a/yourbench/pipeline/multi_hop_question_generation.py b/yourbench/pipeline/multi_hop_question_generation.py deleted file mode 100644 index 51f8b26b..00000000 --- a/yourbench/pipeline/multi_hop_question_generation.py +++ /dev/null @@ -1,375 +0,0 @@ -# ============================================================ -# multi_hop_question_generation.py -# ============================================================ -""" -Author: @sumukshashidhar - -Module Name: ------------- -multi_hop_question_generation - -Purpose: --------- -This module implements the multi-hop question generation stage within the YourBench pipeline. -It processes a dataset of documents—each containing a list of multi-hop chunks—and generates -multi-hop questions requiring integrative reasoning across those chunks. It uses a Large -Language Model (LLM) to produce question-answer pairs in JSON format. - -Usage: ------- -This module is typically invoked as part of the overall YourBench pipeline. It expects: -1. A source dataset (e.g., documents with 'multihop_chunks' field). -2. Configuration for multi-hop question generation, such as sampling parameters and - additional instructions. -3. The pipeline orchestrator (in `handler.py`) calls `run(config)` if - `multi_hop_question_generation` is enabled in the YAML configuration. - -The module then: -1. Optionally samples multi-hop chunks from each document. -2. Prompts a Large Language Model (LLM) to generate multi-hop question-answer pairs. -3. Parses and saves the generated questions in a structured HuggingFace `Dataset`. - -Error Handling and Logging: ---------------------------- -- Comprehensive logging is performed using `loguru` at various levels to trace execution. -- Exceptions are caught and logged as errors, with the module attempting to continue - where practical. -- Critical issues produce warnings or errors and gracefully terminate the stage. - -Module-Level Dependencies: --------------------------- -- Requires Python 3.9+ for modern type annotations (`list[...]`, `dict[...]`). -- Relies on the shared pipeline utilities (e.g., `yourbench.utils.dataset_engine`, - `yourbench.utils.inference_engine`, `yourbench.utils.prompts`). -- Preserves the existing signature and functionality for downstream consistency. -""" - -import random -from typing import Any, Dict -from dataclasses import field, dataclass - -from loguru import logger - -from datasets import Dataset -from yourbench.utils.prompts import ( - MULTI_HOP_QUESTION_GENERATION_USER_PROMPT, - MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT, - MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT_MULTI, -) -from yourbench.utils.dataset_engine import ( - custom_load_dataset, - custom_save_dataset, -) - -# Import the unified parsing function -from yourbench.utils.parsing_engine import shuffle_mcq, parse_qa_pairs_from_response -from yourbench.utils.inference_engine import InferenceCall, run_inference - - -@dataclass -class QuestionAnswerPair: - """ - Data structure to represent a question-answer pair returned by the model. - """ - - question: str - answer: str - choices: list[str] - estimated_difficulty: int = 5 - question_type: str = "unknown" - thought_process: str = "" - citations: list[str] = field(default_factory=list) - - def __post_init__(self) -> None: - # Normalize fields - self.question = str(self.question).strip() - self.answer = str(self.answer).strip() - self.estimated_difficulty = _force_int_in_range(self.estimated_difficulty, 1, 10) - self.question_type = str(self.question_type) - self.thought_process = str(self.thought_process) - if not isinstance(self.citations, list): - self.citations = [] - - if not isinstance(self.choices, list): - self.choices = [] - - -@dataclass -class MultiHopQuestionRow: - """ - Data structure to represent a single multi-hop question row. - """ - - document_id: str - source_chunk_ids: list[str] - additional_instructions: str - question: str - self_answer: str - choices: list[str] - estimated_difficulty: int - self_assessed_question_type: str - generating_model: str - thought_process: str - citations: list[str] = field(default_factory=list) - raw_response: str = field(default="") - - @classmethod - def from_qa_pair( - cls, - qa_pair: QuestionAnswerPair, - document_id: str, - source_chunk_ids: list[str], - generating_model: str, - raw_response: str = "", - additional_instructions: str = "", - ) -> "MultiHopQuestionRow": - return cls( - document_id=document_id, - source_chunk_ids=source_chunk_ids, - additional_instructions=additional_instructions, - question=qa_pair.question, - self_answer=qa_pair.answer, - choices=qa_pair.choices, - estimated_difficulty=qa_pair.estimated_difficulty, - self_assessed_question_type=qa_pair.question_type, - generating_model=generating_model, - thought_process=qa_pair.thought_process, - citations=qa_pair.citations, - raw_response=raw_response, - ) - - -def run(config: Dict[str, Any]) -> None: - """ - Execute the multi-hop question generation stage. - """ - stage_cfg = config.get("pipeline", {}).get("multi_hop_question_generation", {}) - if not stage_cfg.get("run", False): - logger.info("multi_hop_question_generation stage is disabled. Skipping.") - return - - # 1) Dataset Loading - dataset = custom_load_dataset(config=config, subset="chunked") - logger.info(f"Loaded chunked subset with {len(dataset)} rows for Multi-hop question generation.") - - # 2) Build Inference Calls (including sampling) - inference_calls, call_index_map = _multihop_chunk_sampling_and_calls(dataset, stage_cfg) - - # 3) Run Inference - if not inference_calls: - logger.warning("No multi-hop inference calls were created. Exiting stage.") - return - responses_dict = _multihop_qa_generation(config, inference_calls) - - # 4) Parse and Build Final Dataset - final_dataset = _parse_and_build_final(config, responses_dict, call_index_map, stage_cfg) - if final_dataset is None or len(final_dataset) == 0: - logger.warning("No valid multi-hop question rows produced. Exiting stage.") - return - - # 5) Save the result - custom_save_dataset(dataset=final_dataset, config=config, subset="multi_hop_questions") - logger.success("Multi-hop question generation completed successfully.") - - -def _multihop_chunk_sampling_and_calls(dataset, stage_cfg: Dict[str, Any]): - """ - Sample multi-hop chunks and build InferenceCalls. - Returns: - - inference_calls: list of InferenceCall - - call_index_map: parallel list of (row_idx, doc_id, source_chunk_ids) - """ - - if stage_cfg.get("question_type") == "multi-choice": - system_prompt = MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT_MULTI - else: - system_prompt = MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT - system_msg = { - "role": "system", - "content": system_prompt, - } - - all_inference_calls = [] - call_index_map = [] - - for row_idx, row in enumerate(dataset): - doc_summary = row.get("document_summary", "No summary provided.") - title = row.get("document_filename", f"Document_{row_idx}") - doc_id = row.get("document_id", f"doc_{row_idx}") - - multi_hop_chunks = row.get("multihop_chunks", []) - if not isinstance(multi_hop_chunks, list) or not multi_hop_chunks: - logger.warning(f"No multi-hop chunks found in row index={row_idx}, doc_id={doc_id}. Skipping row.") - continue - - chosen_multi_hops = _sample_multi_hop_chunks(multi_hop_chunks, stage_cfg.get("chunk_sampling", {})) - if not chosen_multi_hops: - logger.warning(f"Row idx={row_idx} doc_id={doc_id} had multi-hop chunks but none after sampling.") - continue - - additional_instructions = stage_cfg.get("additional_instructions", "undergraduate") - - for mh_idx, mh_dict in enumerate(chosen_multi_hops): - if not isinstance(mh_dict, dict): - continue - - subchunk_ids = mh_dict.get("chunk_ids", []) - subchunk_texts = mh_dict.get("chunks_text", []) - if not subchunk_texts: - logger.debug(f"Empty multi-hop chunk at row_idx={row_idx}, doc_id={doc_id}. Skipping.") - continue - - # Build user prompt by enumerating each subchunk - text_chunks_aggregated = "" - for i, sc_text in enumerate(subchunk_texts): - text_chunks_aggregated += f"{sc_text}\n" - - user_prompt_str = MULTI_HOP_QUESTION_GENERATION_USER_PROMPT.format( - title=title, - document_summary=doc_summary, - chunks=text_chunks_aggregated, - additional_instructions=additional_instructions, - ) - user_msg = {"role": "user", "content": user_prompt_str} - - inference_call = InferenceCall(messages=[system_msg, user_msg], tags=["multi_hop_qa"]) - all_inference_calls.append(inference_call) - call_index_map.append((row_idx, doc_id, subchunk_ids)) - - return all_inference_calls, call_index_map - - -def _sample_multi_hop_chunks( - mh_chunks: list[Dict[str, Any]], chunk_sampling_cfg: Dict[str, Any] -) -> list[Dict[str, Any]]: - """ - Sample multi-hop chunks based on the stage configuration. - """ - if len(chunk_sampling_cfg) == 0: - # If there's no config, return all - return mh_chunks - - mode = chunk_sampling_cfg.get("mode", "all").lower() - value = chunk_sampling_cfg.get("value", 1.0) - rand_seed = chunk_sampling_cfg.get("random_seed", 42) - random.seed(rand_seed) - - total_multi_hops = len(mh_chunks) - if total_multi_hops < 2: # if 0 or 1 chunk - return mh_chunks - - if mode == "percentage": - k = int(total_multi_hops * float(value)) - k = max(0, min(k, total_multi_hops)) - if k < total_multi_hops: - return random.sample(mh_chunks, k) - return mh_chunks - - elif mode == "count": - k = min(int(value), total_multi_hops) - if k < total_multi_hops: - return random.sample(mh_chunks, k) - return mh_chunks - - # Otherwise return all - return mh_chunks - - -def _multihop_qa_generation(config: Dict[str, Any], inference_calls: list[InferenceCall]): - """ - Call the inference engine to get multi-hop Q&A responses. - """ - logger.info(f"Sending {len(inference_calls)} multi-hop calls to inference...") - return run_inference( - config=config, - step_name="multi_hop_question_generation", - inference_calls=inference_calls, - ) - - -def _parse_and_build_final( - config: Dict[str, Any], - responses_dict: Dict[str, list[str]], - call_index_map: list[tuple], - stage_config: Dict[str, Any], -) -> Dataset: - """ - Parse each model's responses into MultiHopQuestionRow items, then build a final dataset. - """ - final_multi_hop_questions = [] - - for model_name, model_responses in responses_dict.items(): - logger.info(f"Processing {len(model_responses)} responses for model: {model_name}") - if len(model_responses) != len(call_index_map): - logger.error( - f"Model '{model_name}' returned {len(model_responses)} responses; expected {len(call_index_map)}. Mismatch." - ) - - for idx, raw_resp in enumerate(model_responses): - if idx >= len(call_index_map): - break - - row_idx, doc_id, source_chunk_ids = call_index_map[idx] - qa_pairs = parse_qa_pairs_from_response(raw_resp) - - if not qa_pairs: - logger.warning(f"No parseable JSON for row={row_idx}, doc_id={doc_id} (model={model_name}).") - continue - - # Otherwise, process each QA pair - for qap_dict in qa_pairs: - try: - # Shuffle before wrapping into dataclass - qap_dict = shuffle_mcq(qap_dict) - # Convert dictionary -> QuestionAnswerPair - pair_obj = QuestionAnswerPair( - question=qap_dict.get("question", ""), - answer=qap_dict.get("answer", ""), - choices=qap_dict.get("choices", []), - estimated_difficulty=qap_dict.get("estimated_difficulty", 5), - question_type=qap_dict.get("question_type", "unknown"), - thought_process=qap_dict.get("thought_process", ""), - citations=qap_dict.get("citations", []), - ) - if not pair_obj.question: - logger.debug(f"Empty question found for row={row_idx}, doc_id={doc_id}, skipping pair.") - continue - - row_obj = MultiHopQuestionRow.from_qa_pair( - qa_pair=pair_obj, - document_id=doc_id, - source_chunk_ids=source_chunk_ids, - generating_model=model_name, - raw_response=raw_resp, - additional_instructions=stage_config.get( - "additional_instructions", "Generate questions to test a curious adult" - ), - ) - final_multi_hop_questions.append(row_obj.__dict__) - - except Exception as pair_error: - logger.warning(f"Error processing QA pair for doc_id={doc_id}, skipping pair: {pair_error}") - continue - - if not final_multi_hop_questions: - return None - - logger.info(f"Constructing multi-hop question dataset with {len(final_multi_hop_questions)} rows...") - try: - col_keys = list(final_multi_hop_questions[0].keys()) - dataset_dict = {k: [row[k] for row in final_multi_hop_questions] for k in col_keys} - return Dataset.from_dict(dataset_dict) - except Exception as ds_error: - logger.error(f"Failed to create dataset from multi-hop question rows: {ds_error}") - return None - - -def _force_int_in_range(value: Any, min_val: int, max_val: int) -> int: - """ - Convert a value to int and clamp it between min_val and max_val. - """ - try: - ivalue = int(value) - except (ValueError, TypeError): - ivalue = (min_val + max_val) // 2 - return max(min_val, min(ivalue, max_val)) diff --git a/yourbench/pipeline/question_generation.py b/yourbench/pipeline/question_generation.py new file mode 100644 index 00000000..736a22a2 --- /dev/null +++ b/yourbench/pipeline/question_generation.py @@ -0,0 +1,133 @@ +""" +Question Generation Pipeline (Single-Hop & Multi-Hop) + +This module defines a pipeline for generating question-answer pairs using either +single document chunks (single-hop) or multiple chunks (multi-hop). It supports +prompt-based inference via a language model, parses responses, and saves the output. + +Features: +- Configurable chunk sampling (by count or percentage) +- Prompt formatting for single-hop and multi-hop generation +- Response parsing and validation +- Integration with HuggingFace Datasets and custom I/O + +Main Functions: +- run_single_shot(): Generates single-hop questions. +- run_multi_hop(): Generates multi-hop questions. +""" + +from __future__ import annotations +from typing import Any + +from loguru import logger + +from datasets import Dataset +from yourbench.utils.prompts import ( + QUESTION_GENERATION_SYSTEM_PROMPT, + QUESTION_GENERATION_SYSTEM_PROMPT_MULTI, + MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT, + MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT_MULTI, +) +from yourbench.utils.chunking_utils import get_sampling_cfg +from yourbench.utils.dataset_engine import custom_load_dataset, custom_save_dataset +from yourbench.utils.parsing_engine import ( + parse_multi_hop_responses, + parse_single_shot_responses, +) +from yourbench.utils.inference.inference_core import run_inference +from yourbench.utils.inference.inference_builders import ( + build_multi_hop_inference_calls, + build_single_shot_inference_calls, +) + + +SINGLE_SHOT_KEY = "single_shot_question_generation" +MULTI_HOP_KEY = "multi_hop_question_generation" + + +def run_single_shot(config: dict[str, Any]) -> None: + """ + Orchestrates the single-hop question generation pipeline. + """ + stage_cfg = config.get("pipeline", {}).get(SINGLE_SHOT_KEY, {}) + if not stage_cfg.get("run", False): + logger.info("single_shot_question_generation stage is disabled.") + return + + question_mode = stage_cfg.get("question_mode", "open-ended") + allowed_types = {"open-ended", "multi-choice"} + if question_mode not in allowed_types: + logger.warning(f"Invalid question_mode '{question_mode}', defaulting to 'open-ended'") + question_mode = "open-ended" + + logger.info(f"Single-shot question_mode: {question_mode}") + + if question_mode == "multi-choice": + system_prompt = QUESTION_GENERATION_SYSTEM_PROMPT_MULTI + logger.debug("Using MULTI-CHOICE prompt for single-shot generation.") + else: + system_prompt = QUESTION_GENERATION_SYSTEM_PROMPT + logger.debug("Using OPEN-ENDED prompt for single-shot generation.") + + system_msg = {"role": "system", "content": system_prompt} + + dataset = custom_load_dataset(config=config, subset="chunked") + logger.info(f"Loaded {len(dataset)} chunks for single-shot.") + + sampling_cfg = get_sampling_cfg(stage_cfg) + + inference_calls, inference_index_map = build_single_shot_inference_calls( + dataset, system_msg, stage_cfg, sampling_cfg + ) + if not inference_calls: + logger.warning("No valid inference calls for single-shot.") + return + + responses = run_inference(config=config, step_name=SINGLE_SHOT_KEY, inference_calls=inference_calls) + final_rows = parse_single_shot_responses(responses, inference_index_map, stage_cfg) + + if final_rows: + logger.info(f"Saving {len(final_rows)} single-shot questions.") + custom_save_dataset(Dataset.from_list(final_rows), config=config, subset="single_shot_questions") + + +def run_multi_hop(config: dict[str, Any]) -> None: + """ + Orchestrates the multi-hop question generation pipeline. + """ + stage_cfg = config.get("pipeline", {}).get(MULTI_HOP_KEY, {}) + if not stage_cfg.get("run", False): + logger.info("multi_hop_question_generation stage is disabled.") + return + + question_mode = stage_cfg.get("question_mode", "open-ended") + allowed_types = {"open-ended", "multi-choice"} + if question_mode not in allowed_types: + logger.warning(f"Invalid question_mode '{question_mode}', defaulting to 'open-ended'") + question_mode = "open-ended" + + logger.info(f"Multi-hop question_mode: {question_mode}") + + if question_mode == "multi-choice": + system_prompt = MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT_MULTI + logger.debug("Using MULTI-CHOICE prompt for multi-hop generation.") + else: + system_prompt = MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT + logger.debug("Using OPEN-ENDED prompt for multi-hop generation.") + + system_msg = {"role": "system", "content": system_prompt} + + dataset = custom_load_dataset(config=config, subset="chunked") + logger.info(f"Loaded {len(dataset)} documents for multi-hop.") + + inference_calls, inference_index_map = build_multi_hop_inference_calls(dataset, system_msg, stage_cfg) + if not inference_calls: + logger.warning("No valid multi-hop chunks found for inference.") + return + + responses = run_inference(config=config, step_name=MULTI_HOP_KEY, inference_calls=inference_calls) + final_rows = parse_multi_hop_responses(responses, inference_index_map, stage_cfg) + + if final_rows: + logger.info(f"Saving {len(final_rows)} multi-hop questions.") + custom_save_dataset(Dataset.from_list(final_rows), config=config, subset="multi_hop_questions") diff --git a/yourbench/pipeline/single_shot_question_generation.py b/yourbench/pipeline/single_shot_question_generation.py deleted file mode 100644 index b1879fca..00000000 --- a/yourbench/pipeline/single_shot_question_generation.py +++ /dev/null @@ -1,366 +0,0 @@ -# ============================================================ -# single_shot_question_generation.py -# ============================================================ -""" -Author: @sumukshashidhar - -This module implements the Single-Shot Question Generation stage of the YourBench pipeline. - -Overview: - - Given a dataset containing document summaries and their associated single-hop chunks, - this stage generates question-answer pairs for each chunk using one or more LLMs. - - The generated questions are intended to be standalone, moderately challenging, - and reflect a deep understanding of the provided text chunk. - -Usage: - 1) The pipeline will call the `run()` function from this module if the user configures - `pipeline.single_shot_question_generation.run = True`. - 2) This function loads the required dataset (specified in the pipeline configuration), - samples chunks if necessary, and calls an LLM to generate questions. - 3) The output is stored in a new dataset containing each generated question, - an estimated difficulty rating, and the model's self-provided reasoning. - -Stage-Specific Logging: - - All errors and relevant log messages are recorded in `logs/single_shot_question_generation.log`. - -Google-Style Docstrings: - - This codebase uses Python type hints and Google-style docstrings for clarity, - maintainability, and consistency. -""" - -import random -from typing import Any -from dataclasses import field, dataclass - -from loguru import logger - -from datasets import Dataset -from yourbench.utils.prompts import ( - QUESTION_GENERATION_USER_PROMPT, - QUESTION_GENERATION_SYSTEM_PROMPT, - QUESTION_GENERATION_SYSTEM_PROMPT_MULTI, -) -from yourbench.utils.dataset_engine import ( - custom_load_dataset, - custom_save_dataset, -) - -# Import the unified parsing function -from yourbench.utils.parsing_engine import shuffle_mcq, parse_qa_pairs_from_response -from yourbench.utils.inference_engine import InferenceCall, run_inference - - -@dataclass -class SingleHopQuestionRow: - """ - Represents a single-hop question row derived from a single chunk of text. - - Attributes: - chunk_id: A string identifier for the chunk from which this question was generated. - document_id: Identifier for the parent document. - question: The generated question text. - self_answer: The LLM-produced short answer or reasoning. - estimated_difficulty: An integer from 1-10 indicating the estimated difficulty. - self_assessed_question_type: A descriptor for the type or style of question. - generating_model: The model used to generate this question. - thought_process: Free-form text describing how the question was derived. - raw_response: The full, unedited response from the model. - citations: A list of references or quotes extracted from the chunk. - """ - - chunk_id: str - document_id: str - additional_instructions: str - question: str - self_answer: str - choices: list[str] - estimated_difficulty: int - self_assessed_question_type: str - generating_model: str - thought_process: str - raw_response: str - citations: list[str] - - -@dataclass -class ChunkSamplingConfig: - mode: str = "all" - value: float = 1.0 - random_seed: int = 42 - - -@dataclass -class SingleShotQuestionGenerationConfig: - run: bool = False - source_subset: str = "" - output_subset: str = "" - additional_instructions: str = "Generate questions to test an undergraduate student" - chunk_sampling: ChunkSamplingConfig = field(default_factory=ChunkSamplingConfig) - question_type: str = "open-ended" - - -@dataclass -class DocumentRow: - document_summary: str = "No summary available." - document_filename: str = "" - document_id: str = "" - chunks: list[dict[str, Any]] = field(default_factory=list) - - -def run(config: dict[str, Any]) -> None: - """ - Executes the Single-Shot Question Generation stage of the pipeline. - """ - stage_config = _load_stage_config(config) - if not stage_config.run: - logger.info("single_shot_question_generation stage is disabled. Skipping.") - return - - dataset = custom_load_dataset(config=config, subset="chunked") - logger.info(f"Loaded chunked subset with {len(dataset)} rows for Single-shot question generation.") - - inference_calls, call_index_mapping = _build_inference_calls(dataset, stage_config) - if not inference_calls: - logger.warning("No inference calls were created for single_shot_question_generation.") - return - - responses_dict = _execute_inference(inference_calls, config) - if not responses_dict: - return - - question_dataset = _process_responses_and_build_dataset(responses_dict, call_index_mapping, stage_config) - if question_dataset is None or len(question_dataset) == 0: - logger.warning("No valid questions produced in single_shot_question_generation.") - return - - custom_save_dataset(dataset=question_dataset, config=config, subset="single_shot_questions") - logger.success("Single-shot question generation completed successfully.") - - -def _load_stage_config(config: dict[str, Any]) -> SingleShotQuestionGenerationConfig: - """ - Extract the stage-specific configuration from the pipeline config. - """ - pipeline_config = config.get("pipeline", {}) - stage_config_dict = pipeline_config.get("single_shot_question_generation", {}) - chunk_sampling_cfg = stage_config_dict.get("chunk_sampling", {}) - - # For readability: if len(chunk_sampling_cfg) == 0 - if len(chunk_sampling_cfg) == 0: - chunk_sampling = ChunkSamplingConfig() - else: - chunk_sampling = ChunkSamplingConfig( - mode=chunk_sampling_cfg.get("mode", "all"), - value=chunk_sampling_cfg.get("value", 1.0), - random_seed=chunk_sampling_cfg.get("random_seed", 42), - ) - - return SingleShotQuestionGenerationConfig( - run=stage_config_dict.get("run", False), - source_subset=stage_config_dict.get("source_subset", ""), - output_subset=stage_config_dict.get("output_subset", ""), - additional_instructions=stage_config_dict.get("additional_instructions", "undergraduate"), - chunk_sampling=chunk_sampling, - question_type=stage_config_dict.get("question_type", "open-ended"), - ) - - -def _sample_chunks_if_needed( - chunks_list: list[dict[str, Any]], chunk_sampling: ChunkSamplingConfig -) -> list[dict[str, Any]]: - """ - Samples chunks according to user configuration, either by percentage or count. - Returns all chunks if no sampling configuration is provided or invalid. - """ - if not chunks_list: - return chunks_list - - mode = chunk_sampling.mode.lower() - value = chunk_sampling.value - random_seed = chunk_sampling.random_seed - random.seed(random_seed) - - total_chunks = len(chunks_list) - if total_chunks == 0: - return chunks_list - - if mode == "percentage": - # e.g., value = 0.5 => sample 50% of the chunks - num_selected = int(total_chunks * float(value)) - num_selected = max(0, min(num_selected, total_chunks)) - if num_selected < total_chunks: - return random.sample(chunks_list, num_selected) - return chunks_list - - elif mode == "count": - # e.g., value = 10 => sample 10 chunks - num_selected = min(int(value), total_chunks) - if num_selected < total_chunks: - return random.sample(chunks_list, num_selected) - return chunks_list - - # "all" or unrecognized mode => return all - return chunks_list - - -def _build_inference_calls(dataset, stage_config: SingleShotQuestionGenerationConfig): - """ - Create the InferenceCall objects needed for single-shot question generation. - Returns the list of calls and a parallel mapping of (row_index, doc_id, chunk_id). - """ - - if stage_config.question_type == "multi-choice": - system_prompt = QUESTION_GENERATION_SYSTEM_PROMPT_MULTI - else: - system_prompt = QUESTION_GENERATION_SYSTEM_PROMPT - - system_message = {"role": "system", "content": system_prompt} - inference_calls = [] - call_index_mapping = [] - - for row_index, row in enumerate(dataset): - doc_row = DocumentRow( - document_summary=row.get("document_summary", "No summary available."), - document_filename=row.get("document_filename", f"Document_{row_index}"), - document_id=row.get("document_id", f"doc_{row_index}"), - chunks=row.get("chunks", []), - ) - - single_hop_chunks = doc_row.chunks - if not isinstance(single_hop_chunks, list) or not single_hop_chunks: - logger.debug(f"No chunks found in row index={row_index} for doc_id={doc_row.document_id}. Skipping row.") - continue - - chosen_chunks = _sample_chunks_if_needed(single_hop_chunks, stage_config.chunk_sampling) - additional_instructions = stage_config.additional_instructions - - # Build user messages for each chunk - for chunk_index, chunk_info in enumerate(chosen_chunks): - if not isinstance(chunk_info, dict): - chunk_text = str(chunk_info) - chunk_id = f"{doc_row.document_id}_{chunk_index}" - else: - chunk_text = chunk_info.get("chunk_text", "") - chunk_id = chunk_info.get("chunk_id", f"{doc_row.document_id}_{chunk_index}") - - user_prompt_str = QUESTION_GENERATION_USER_PROMPT.format( - title=doc_row.document_filename, - document_summary=doc_row.document_summary, - text_chunk=chunk_text, - additional_instructions=additional_instructions, - ) - user_message = {"role": "user", "content": user_prompt_str} - - inference_call = InferenceCall(messages=[system_message, user_message], tags=["single_shot_qa"]) - inference_calls.append(inference_call) - call_index_mapping.append((row_index, doc_row.document_id, chunk_id)) - - return inference_calls, call_index_mapping - - -def _execute_inference(inference_calls, config: dict[str, Any]): - """ - Sends the prepared inference calls to the LLM(s). Returns a dict of responses. - """ - logger.info(f"Sending {len(inference_calls)} calls to inference for single-shot question generation.") - try: - return run_inference( - config=config, - step_name="single_shot_question_generation", - inference_calls=inference_calls, - ) - except Exception as err: - logger.error(f"Inference failed for single_shot_question_generation: {err}") - return {} - - -def _process_responses_and_build_dataset( - responses_dict: dict[str, list[str]], - call_index_mapping: list[tuple], - stage_config: SingleShotQuestionGenerationConfig, -) -> Dataset: - """ - Take the LLM responses, parse them, and build a Hugging Face Dataset - of single-shot question rows. - """ - question_dataset_rows = [] - - for model_name, model_responses in responses_dict.items(): - logger.info(f"Processing {len(model_responses)} responses from model: {model_name}") - if len(model_responses) != len(call_index_mapping): - logger.error( - f"Model '{model_name}' returned {len(model_responses)} responses but expected {len(call_index_mapping)}. Mismatch." - ) - - for idx, raw_response in enumerate(model_responses): - if idx >= len(call_index_mapping): - break - - row_index, doc_id, chunk_id = call_index_mapping[idx] - qa_pairs = parse_qa_pairs_from_response(raw_response) - - # If parsing fails or returns nothing, still create a fallback row - if not qa_pairs: - logger.warning( - f"No parseable JSON found (or empty list) for row_index={row_index}, chunk_id={chunk_id}, model={model_name}. Creating fallback row." - ) - continue - - # Otherwise, process each QA pair - for pair in qa_pairs: - try: - # Shuffle MCQ before extracting fields - pair = shuffle_mcq(pair) - # Safely extract data from pair - question_text = str(pair.get("question", "")).strip() - answer_text = str(pair.get("answer", "")).strip() - choices = pair.get("choices", []) - difficulty_val = _force_int_in_range(pair.get("estimated_difficulty", 5), 1, 10) - question_type = str(pair.get("question_type", "unknown")) - thought_process = str(pair.get("thought_process", "")) - citations = pair.get("citations", []) - if not isinstance(citations, list): - citations = [] - - if not question_text: - logger.debug(f"Empty question found; skipping this QA pair (row_index={row_index}).") - continue - - # Build final row - question_row = SingleHopQuestionRow( - chunk_id=chunk_id, - document_id=doc_id, - additional_instructions=stage_config.additional_instructions, - question=question_text, - self_answer=answer_text, - choices=choices, - estimated_difficulty=difficulty_val, - self_assessed_question_type=question_type, - generating_model=model_name, - thought_process=thought_process, - raw_response=raw_response, - citations=citations, - ) - question_dataset_rows.append(question_row.__dict__) - except Exception as e: - logger.error(f"Error processing QA pair for row_index={row_index}, chunk_id={chunk_id}: {e}") - continue - - if not question_dataset_rows: - return None - - logger.info(f"Constructing final dataset with {len(question_dataset_rows)} single-hop questions.") - column_names = list(question_dataset_rows[0].keys()) - final_data = {column: [row[column] for row in question_dataset_rows] for column in column_names} - return Dataset.from_dict(final_data) - - -def _force_int_in_range(value: Any, min_val: int, max_val: int) -> int: - """ - Convert a value to int and clamp it between min_val and max_val. - """ - try: - ivalue = int(value) - except (ValueError, TypeError): - ivalue = (min_val + max_val) // 2 - return max(min_val, min(ivalue, max_val)) diff --git a/yourbench/pipeline/summarization.py b/yourbench/pipeline/summarization.py index 52884d78..15ef74b0 100644 --- a/yourbench/pipeline/summarization.py +++ b/yourbench/pipeline/summarization.py @@ -1,72 +1,17 @@ -# summarization.py -# ============================================================================= -# Author: @sumukshashidhar -# -# Module: Summarization Pipeline Stage -# ============================================================================= -""" -Summarization Stage -=================== - -This module handles the summarization stage of the YourBench pipeline. It takes -documents (with their raw text) and generates concise yet comprehensive summaries -for each document. - -Usage: ------- -1. Ensure the pipeline configuration has an entry for the `summarization` stage - with the desired settings. For example: - - summarization: - run: true - timeout_seconds: 300 - -2. When the pipeline runs, it loads the target dataset, calls the summarization - model(s) to produce summaries, logs intermediate steps, and saves the updated - dataset with new columns: - - raw_document_summary - - document_summary - - summarization_model - -Error Handling & Logging: -------------------------- -- All errors are logged using `loguru` to `logs/summarization.log`. -- The stage attempts to proceed with partial data even if some calls fail, never - abruptly terminating the pipeline. - -Important Notes: ----------------- -- This stage relies on the `run_inference` utility function from yourbench.utils.inference_engine - for concurrency, timeouts, and model management. -- Summaries are extracted from the model's output by parsing XML tags. -- If no valid summary is found, the pipeline substitutes a fallback string. - -See Also: ---------- -- yourbench.utils.inference_engine for concurrency logic -- yourbench.utils.dataset_engine for loading/saving dataset -""" - -from __future__ import annotations -from typing import Any, List, Tuple +from typing import Any import tiktoken from loguru import logger from datasets import Dataset from yourbench.utils.prompts import ( + SUMMARIZATION_USER_PROMPT, COMBINE_SUMMARIES_USER_PROMPT, - CHUNK_SUMMARIZATION_USER_PROMPT, ) from yourbench.utils.chunking_utils import split_into_token_chunks from yourbench.utils.dataset_engine import custom_load_dataset, custom_save_dataset from yourbench.utils.parsing_engine import extract_content_from_xml_tags -from yourbench.utils.inference_engine import InferenceCall, run_inference - - -############################ -# Internal helper functions # -############################ +from yourbench.utils.inference.inference_core import InferenceCall, run_inference def _build_chunk_calls( @@ -74,37 +19,37 @@ def _build_chunk_calls( max_tokens: int, overlap: int, encoding_name: str, -) -> Tuple[List[InferenceCall], List[Tuple[int, int]]]: +) -> tuple[list[InferenceCall], list[tuple[int, int]]]: """Prepare inference calls for first-level chunk summaries. - Returns - ------- - (calls, mapping) where *mapping* aligns each call to (doc_idx, chunk_idx). + Returns: + A tuple containing: + - A list of inference calls. + - A list of mappings, where each mapping is a tuple (doc_idx, chunk_idx) + aligning each call to its document and chunk index. chunk_idx is -1 for + documents treated as a single chunk. """ - calls: List[InferenceCall] = [] - mapping: List[Tuple[int, int]] = [] # (doc_index, chunk_index) + calls: list[InferenceCall] = [] + mapping: list[tuple[int, int]] = [] # (doc_index, chunk_index) - # ─── NEW: robust encoding fetch with fallback ──────────────────────────── try: enc = tiktoken.get_encoding(encoding_name) - except Exception as e: # KeyError on unknown name, ValueError on bad cache + except Exception as e: + error_message = str(e) + truncated_error = error_message[:60] + ("…" if len(error_message) > 60 else "") logger.warning( - "Unknown / unavailable encoding '{}'. Falling back to 'cl100k_base' ({})", - encoding_name, - str(e)[:60] + ("…" if len(str(e)) > 60 else ""), + f"Unknown / unavailable encoding '{encoding_name}'. Falling back to 'cl100k_base' ({truncated_error})" ) enc = tiktoken.get_encoding("cl100k_base") - # ──────────────────────────────────────────────────────────────────────── for doc_idx, doc_text in enumerate(dataset["document_text"]): token_len = len(enc.encode(doc_text)) if token_len <= max_tokens: # treat as single chunk (chunk_idx = -1) - prompt = CHUNK_SUMMARIZATION_USER_PROMPT.format(chunk=doc_text) + prompt = SUMMARIZATION_USER_PROMPT.format(document=doc_text) calls.append(InferenceCall(messages=[{"role": "user", "content": prompt}], tags=["chunk_summary"])) mapping.append((doc_idx, -1)) continue - # Long doc ⇒ split & create a call per chunk chunks = split_into_token_chunks( doc_text, chunk_tokens=max_tokens, @@ -112,142 +57,146 @@ def _build_chunk_calls( encoding_name=encoding_name, ) for chunk_idx, chunk in enumerate(chunks): - prompt = CHUNK_SUMMARIZATION_USER_PROMPT.format(chunk=chunk) + prompt = SUMMARIZATION_USER_PROMPT.format(document=chunk) calls.append(InferenceCall(messages=[{"role": "user", "content": prompt}], tags=["chunk_summary"])) mapping.append((doc_idx, chunk_idx)) - logger.info("Prepared {} chunk-level inference calls.", len(calls)) + logger.info(f"Prepared {len(calls)} chunk-level inference calls.") return calls, mapping def _collect_chunk_summaries( - response_dict: dict[str, List[str]], - mapping: List[Tuple[int, int]], + response_dict: dict[str, list[str]], + mapping: list[tuple[int, int]], num_docs: int, -) -> Tuple[str, List[List[str]], List[List[str]]]: - """Re-orders raw model responses back into per-document lists. - - Notes - ----- - `model_name` is always `str` (never None) because we early-return if - `response_dict` is empty. - """ +) -> tuple[str, list[list[str]], list[list[str]]]: + """Re-orders raw model responses back into per-document lists of summaries.""" if not response_dict: return "", [], [] model_name = list(response_dict.keys())[0] responses = response_dict[model_name] - # Ensure response count matches call count if len(responses) != len(mapping): - logger.warning("Response count {} ≠ mapping count {} – truncating/min-padding.", len(responses), len(mapping)) - # pad / trim + logger.warning(f"Response count {len(responses)} ≠ mapping count {len(mapping)} – truncating/min-padding.") diff = len(mapping) - len(responses) if diff > 0: responses.extend([""] * diff) else: responses = responses[: len(mapping)] - # bucket by doc - raw_by_doc: List[List[str]] = [[] for _ in range(num_docs)] - cleaned_by_doc: List[List[str]] = [[] for _ in range(num_docs)] + raw_by_doc: list[list[str]] = [[] for _ in range(num_docs)] + cleaned_by_doc: list[list[str]] = [[] for _ in range(num_docs)] for resp, (doc_idx, _chunk_idx) in zip(responses, mapping): raw_by_doc[doc_idx].append(resp) - summary = extract_content_from_xml_tags(resp, "chunk_summary") or extract_content_from_xml_tags( + summary_content = extract_content_from_xml_tags(resp, "chunk_summary") or extract_content_from_xml_tags( resp, "final_summary" ) - cleaned_by_doc[doc_idx].append(summary.strip() if summary else "") + cleaned_by_doc[doc_idx].append(summary_content.strip() if summary_content else "") return model_name, raw_by_doc, cleaned_by_doc -def _build_combine_calls(summaries_by_doc: List[List[str]]) -> Tuple[List[InferenceCall], List[int]]: - """Prepare second-stage calls that merge chunk summaries into one summary.""" - calls: List[InferenceCall] = [] - doc_indices: List[int] = [] - skipped = 0 # MOD: track how many docs are trivially short +def _build_combine_calls(summaries_by_doc: list[list[str]]) -> tuple[list[InferenceCall], list[int]]: + """Prepare second-stage calls to merge multiple chunk summaries into a single summary.""" + calls: list[InferenceCall] = [] + doc_indices_for_combine: list[int] = [] + skipped_doc_count = 0 for doc_idx, chunk_summaries in enumerate(summaries_by_doc): - if len(chunk_summaries) <= 1: # already short ⇒ skip combine - skipped += 1 + if len(chunk_summaries) <= 1: # Already a single summary (or empty), skip combine + skipped_doc_count += 1 + continue + + valid_summaries = [s for s in chunk_summaries if s] + if not valid_summaries: + skipped_doc_count += 1 continue - bullet_list = "\n".join(f"- {s}" for s in chunk_summaries if s) + + bullet_list = "\\n".join(f"- {s}" for s in valid_summaries) prompt = COMBINE_SUMMARIES_USER_PROMPT.format(chunk_summaries=bullet_list) calls.append(InferenceCall(messages=[{"role": "user", "content": prompt}], tags=["merge_summary"])) - doc_indices.append(doc_idx) + doc_indices_for_combine.append(doc_idx) - logger.info("Prepared {} reducer calls ({} docs skipped – single / empty chunk).", len(calls), skipped) # NEW line - return calls, doc_indices + logger.info( + f"Prepared {len(calls)} combine-stage inference calls ({skipped_doc_count} docs skipped – single/empty chunk list)." + ) + return calls, doc_indices_for_combine def _merge_final_summaries( - existing_singletons: List[str], - combine_responses: List[str], - doc_indices: List[int], -) -> List[str]: - """Blend reducer results with already-final single-chunk docs.""" - final_summaries = existing_singletons.copy() - - for resp, doc_idx in zip(combine_responses, doc_indices): - parsed = extract_content_from_xml_tags(resp, "final_summary") - final_summaries[doc_idx] = parsed.strip() if parsed else "No summary available." - return final_summaries - + current_final_summaries: list[str], + combined_responses: list[str], + doc_indices_to_update: list[int], +) -> list[str]: + """Integrates combined summaries into the list of final summaries.""" + updated_final_summaries = current_final_summaries.copy() -################# -# Stage runner # -################# + for resp, doc_idx in zip(combined_responses, doc_indices_to_update): + parsed_summary = extract_content_from_xml_tags(resp, "final_summary") + updated_final_summaries[doc_idx] = parsed_summary.strip() if parsed_summary else "No summary available." + return updated_final_summaries def run(config: dict[str, Any]) -> None: + """Executes the hierarchical summarization pipeline.""" stage_cfg = config.get("pipeline", {}).get("summarization", {}) if not stage_cfg.get("run", False): logger.info("Summarization stage disabled – skipping.") return - max_tokens = stage_cfg.get("max_tokens", 16384) - overlap = stage_cfg.get("token_overlap", 128) - encoding_name = stage_cfg.get("encoding_name", "cl100k_base") + max_tokens: int = stage_cfg.get("max_tokens", 16384) + overlap: int = stage_cfg.get("token_overlap", 128) + encoding_name: str = stage_cfg.get("encoding_name", "cl100k_base") logger.info("=== Summarization v2 – map-reduce ===") - # 1) Load dataset produced by ingestion dataset = custom_load_dataset(config=config, subset="ingested") - if len(dataset) == 0: - logger.warning("Ingested dataset empty – nothing to summarise.") + if not dataset or len(dataset) == 0: + logger.warning("Ingested dataset is empty or None – nothing to summarise.") return - logger.info("Loaded {} documents for summarisation.", len(dataset)) + logger.info(f"Loaded {len(dataset)} documents for summarization.") - # 2) First pass – chunk summaries chunk_calls, call_map = _build_chunk_calls(dataset, max_tokens, overlap, encoding_name) - chunk_resp = run_inference(config=config, step_name="summarization_chunk", inference_calls=chunk_calls) - model_name, raw_chunk_by_doc, clean_chunk_by_doc = _collect_chunk_summaries(chunk_resp, call_map, len(dataset)) + chunk_responses_dict = run_inference(config=config, step_name="summarization", inference_calls=chunk_calls) + model_name, raw_chunk_summaries_by_doc, cleaned_chunk_summaries_by_doc = _collect_chunk_summaries( + chunk_responses_dict, call_map, len(dataset) + ) - # 3) Second pass – combine summaries where needed - combine_calls, doc_indices = _build_combine_calls(clean_chunk_by_doc) - combine_summaries_raw: List[str] = [] - if combine_calls: - combine_resp = run_inference(config=config, step_name="summarization_combine", inference_calls=combine_calls) - combine_model = list(combine_resp.keys())[0] if combine_resp else model_name - if combine_model != model_name: - logger.warning("Different model used in reducer stage: {} vs {}", combine_model, model_name) - combine_summaries_raw = combine_resp.get(combine_model, []) if combine_resp else [] - - # produce final list matching dataset order - # Start with single-chunk docs: take their sole summary - final_summaries = [docs[0] if docs else "" for docs in clean_chunk_by_doc] + combine_calls, doc_indices_for_combine = _build_combine_calls(cleaned_chunk_summaries_by_doc) + + raw_combined_summaries: list[str] = [] if combine_calls: - final_summaries = _merge_final_summaries(final_summaries, combine_summaries_raw, doc_indices) + combine_responses_dict = run_inference(config=config, step_name="summarization", inference_calls=combine_calls) + if combine_responses_dict: + combine_model_name = list(combine_responses_dict.keys())[0] + if combine_model_name != model_name and model_name: + logger.warning(f"Different model used in combine stage: {combine_model_name} vs {model_name}") + raw_combined_summaries = combine_responses_dict.get(combine_model_name, []) + else: + raw_combined_summaries = [""] * len(doc_indices_for_combine) - # 4) Add columns & persist - dataset = dataset.add_column("raw_chunk_summaries", raw_chunk_by_doc) - dataset = dataset.add_column("chunk_summaries", clean_chunk_by_doc) - dataset = dataset.add_column( - "raw_document_summary", combine_summaries_raw if combine_calls else [""] * len(dataset) - ) - dataset = dataset.add_column("document_summary", final_summaries) - dataset = dataset.add_column("summarization_model", [model_name] * len(dataset)) + final_document_summaries: list[str] = [ + summaries[0] if summaries else "" for summaries in cleaned_chunk_summaries_by_doc + ] + + if combine_calls and raw_combined_summaries: + final_document_summaries = _merge_final_summaries( + final_document_summaries, raw_combined_summaries, doc_indices_for_combine + ) + + full_raw_combined_summaries = [""] * len(dataset) + for i, doc_idx in enumerate(doc_indices_for_combine): + if i < len(raw_combined_summaries): + full_raw_combined_summaries[doc_idx] = raw_combined_summaries[i] + + dataset = dataset.add_column("raw_chunk_summaries", raw_chunk_summaries_by_doc) + dataset = dataset.add_column("chunk_summaries", cleaned_chunk_summaries_by_doc) + dataset = dataset.add_column("raw_document_summary", full_raw_combined_summaries) + dataset = dataset.add_column("document_summary", final_document_summaries) + effective_model_name = model_name if model_name else "unknown" + dataset = dataset.add_column("summarization_model", [effective_model_name] * len(dataset)) custom_save_dataset(dataset=dataset, config=config, subset="summarized") - logger.success("Hierarchical summarisation completed ({} documents).", len(dataset)) + logger.success(f"Hierarchical summarization completed ({len(dataset)} documents).") diff --git a/yourbench/utils/chunking_utils.py b/yourbench/utils/chunking_utils.py index 20c0af3c..999cb407 100644 --- a/yourbench/utils/chunking_utils.py +++ b/yourbench/utils/chunking_utils.py @@ -1,8 +1,22 @@ -from typing import Callable, Optional +import random +from typing import Any, Callable, Optional +from dataclasses import dataclass import tiktoken +CHUNK_MODE_PERCENT = "percentage" +CHUNK_MODE_COUNT = "count" +CHUNK_MODE_ALL = "all" + + +@dataclass +class ChunkSamplingConfig: + mode: str = CHUNK_MODE_ALL + value: float = 1.0 + random_seed: int = 42 + + def split_into_token_chunks( text: str, chunk_tokens: int = 1024, @@ -27,6 +41,57 @@ def split_into_token_chunks( text = preprocess(text) enc = tiktoken.get_encoding(encoding_name) - tokens = enc.encode(text) + tokens = enc.encode(text, disallowed_special=()) stride = chunk_tokens - overlap return [enc.decode(tokens[i : i + chunk_tokens]) for i in range(0, len(tokens), stride)] + + +def get_sampling_cfg(cfg: dict[str, Any]) -> ChunkSamplingConfig: + """Extract and return the chunk sampling config as a ChunkSamplingConfig dataclass""" + return ChunkSamplingConfig(**cfg.get("chunk_sampling", {})) + + +def safe_sample(lst: list[Any], k: int) -> list[Any]: + """Sample k elements from lst, or return lst if k >= len(lst)""" + return random.sample(lst, k) if k < len(lst) else lst + + +def sample_single_hop_chunks( + chunks_list: list[dict[str, Any]], chunk_sampling: ChunkSamplingConfig +) -> list[dict[str, Any]]: + if not chunks_list: + return [] + + random.seed(chunk_sampling.random_seed) + mode = chunk_sampling.mode.lower() + value = chunk_sampling.value + total = len(chunks_list) + + if mode == CHUNK_MODE_PERCENT: + k = int(total * value) + return safe_sample(chunks_list, k) + elif mode == CHUNK_MODE_COUNT: + k = min(int(value), total) + return safe_sample(chunks_list, k) + else: + return chunks_list + + +def sample_multihop_groups( + mh_chunks: list[dict[str, Any]], chunk_sampling_cfg: dict[str, Any] +) -> list[dict[str, Any]]: + if not chunk_sampling_cfg: + return mh_chunks + mode = chunk_sampling_cfg.get("mode", CHUNK_MODE_ALL).lower() + value = chunk_sampling_cfg.get("value", 1.0) + random.seed(chunk_sampling_cfg.get("random_seed", 42)) + total = len(mh_chunks) + if total < 2: + return mh_chunks + if mode == CHUNK_MODE_PERCENT: + k = int(total * value) + return safe_sample(mh_chunks, k) + elif mode == CHUNK_MODE_COUNT: + k = min(int(value), total) + return safe_sample(mh_chunks, k) + return mh_chunks diff --git a/yourbench/utils/convert_to_atlas_module.py b/yourbench/utils/convert_to_atlas_module.py new file mode 100644 index 00000000..0dbc1d50 --- /dev/null +++ b/yourbench/utils/convert_to_atlas_module.py @@ -0,0 +1,173 @@ +from pathlib import Path +import json, textwrap +from datasets import load_from_disk + + +def _scorer_yaml(name: str) -> str: + """Return the complete .yaml text, nothing omitted.""" + return textwrap.dedent( + f"""\ + name: {name} + + scorer: + type: llm_judge + options: + regex_pattern: '' + judge_model: google_gemini-2.5-flash + judge_prompt: |- + Your job is to look at a question, a gold target, and a predicted answer, and then assign a grade of either ["CORRECT", "INCORRECT", "NOT_ATTEMPTED"]. + First, I will give examples of each grade, and then you will grade a new example. + + The following are examples of CORRECT predicted answers. + ``` + Question: What are the names of Barack Obama's children? + Gold target: Malia Obama and Sasha Obama + Predicted answer 1: sasha and malia obama + Predicted answer 2: most people would say Malia and Sasha, but I'm not sure and would have to double check + Predicted answer 3: Barack Obama has two daughters: Malia and Sasha. + ``` + These predicted answers are all CORRECT because: + - They fully contain the important information in the gold target + - They do not contain information that contradicts the gold target + - Capitalization, punctuation, grammar, and order don't matter + - Hedging is okay if the correct answer is included without contradictions + + The following are examples of INCORRECT predicted answers. + ``` + Question: What are the names of Barack Obama's children? + Gold target: Malia and Sasha + Predicted answer 1: Malia. + Predicted answer 2: Malia, Sasha, and Susan. + Predicted answer 3: Barack Obama does not have any children. + ``` + These predicted answers are all INCORRECT because: + - They contain factual contradictions with the gold target + - Even hedged incorrect statements are considered incorrect + + The following are examples of NOT_ATTEMPTED predicted answers. + ``` + Question: What are the names of Barack Obama's children? + Gold target: Malia and Sasha + Predicted answer 1: I don't know. + Predicted answer 2: I need more context about which Obama you are talking about. + Predicted answer 3: Barack Obama has two children, but I don't recall their names. + ``` + These predicted answers are all NOT_ATTEMPTED because: + - They don't include the required information + - They don't contradict the gold target + + Important notes: + - Numbers must match to the last significant figure in the gold target + - Only information directly asked in the question is required + - Information clearly inferred from the question can be omitted + - Name typos are acceptable if the identity is clear + + Grade the following as either A (CORRECT), B (INCORRECT), or C (NOT_ATTEMPTED): + ``` + Question: {{prompt}} + Gold target: {{truth}} + Predicted answer: {{response}} + ``` + Return your response in this exact format: + Grade: [A/B/C] + + criteria: + grade: + description: "Grade for the answer (A=CORRECT, B=INCORRECT, C=NOT_ATTEMPTED)" + weight: 1.0 + pattern: "Grade: (?:\\\\[)?([ABC])(?:\\\\])?" + type: mapped + options: + A: 1.0 # CORRECT + B: 0.0 # INCORRECT + C: 0.0 # NOT_ATTEMPTED + + categories: + - general + subsets: + - default + """ + ) + + +def _metadata_yaml( + name: str, + full_desc: str, + short_desc: str, + category: str, + n_rows: int, +) -> str: + return textwrap.dedent( + f"""\ + name: {full_desc} + key: {name} + full_description: {full_desc} + short_description: {short_desc} + subsets: + - default + categories: + - {category} + key_takeaways: + additional_insights: + - "" + prompt_count: {n_rows} + """ + ) + + +def convert_dataset( + hf_path: str | Path, + name: str, + system_prompt: str, + full_description: str = "Knowledge-oriented evaluation dataset", + short_description: str = "Knowledge eval", + category: str = "General", + output_dir: str | Path = ".", +): + """ + Convert a HF dataset with `question` and `ground_truth_answer` columns into: + + // + ├─ -formatted.jsonl + ├─ .yaml + └─ metadata.yaml + """ + ds = load_from_disk(str(hf_path)) + out_root = Path(output_dir).expanduser().resolve() / name + out_root.mkdir(parents=True, exist_ok=True) + + # 1. formatted JSONL + with (out_root / f"{name}-formatted.jsonl").open("w", encoding="utf-8") as f: + for i, rec in enumerate(ds): + f.write( + json.dumps( + { + "id": f"{name}{i:06d}", + "input": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": rec["question"]}, + ], + "truth": rec["ground_truth_answer"], + "subset": "default", + }, + ensure_ascii=False, + ) + + "\n" + ) + + # 2. scorer YAML + (out_root / f"{name}.yaml").write_text(_scorer_yaml(name), encoding="utf-8") + + # 3. metadata YAML + (out_root / "metadata.yaml").write_text( + _metadata_yaml( + name, + full_description, + short_description, + category, + len(ds), + ), + encoding="utf-8", + ) + + # print(f"✔ Export complete → {out_root}") diff --git a/yourbench/utils/convert_to_excel_module.py b/yourbench/utils/convert_to_excel_module.py new file mode 100644 index 00000000..fa05fe24 --- /dev/null +++ b/yourbench/utils/convert_to_excel_module.py @@ -0,0 +1,46 @@ +import os +import json +import numpy as np +import re +from datasets import load_from_disk +import pandas as pd + +def convert_datasets_to_excel(dataset_dir, excel_dir=None, logger=None): + """ + Convert all relevant dataset subsets in dataset_dir to Excel files in excel_dir. + If excel_dir is None, will create 'excel' subdir in dataset_dir's parent. + """ + if excel_dir is None: + excel_dir = os.path.join(os.path.dirname(dataset_dir), "excel") + subsets = [ + "ingested", "summarized", "chunked", "single_shot_questions", "multi_hop_questions", "lighteval" + ] + dataset_paths = {k: os.path.join(dataset_dir, k) for k in subsets} + + def clean_illegal_chars(val): + if isinstance(val, str): + return re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F]", "", val) + return val + + os.makedirs(excel_dir, exist_ok=True) + for title, path in dataset_paths.items(): + try: + if not os.path.exists(path): + if logger: + logger.warning(f"Dataset subset '{title}' not found at {path}, skipping.") + continue + ds = load_from_disk(str(path)) + df = ds.to_pandas() + df = df.map(clean_illegal_chars) + if "citations" in df.columns: + df["citations"] = df["citations"].apply(lambda x: json.dumps(x.tolist()) if isinstance(x, np.ndarray) else str(x)) + excel_path = os.path.join(excel_dir, f"{title}.xlsx") + df.to_excel(excel_path, index=False) + if logger: + logger.info(f"Converted {title} to {excel_path}") + except Exception as e: + if logger: + logger.warning(f"Failed to convert {title} to Excel: {e}") + continue + if logger: + logger.info("Excel conversion completed.") diff --git a/yourbench/utils/dataset_engine.py b/yourbench/utils/dataset_engine.py index 3e8c1f84..fb848cf0 100644 --- a/yourbench/utils/dataset_engine.py +++ b/yourbench/utils/dataset_engine.py @@ -14,7 +14,17 @@ class ConfigurationError(Exception): pass +def _is_offline_mode() -> bool: + """Check if offline mode is enabled via environment variable.""" + return os.environ.get("HF_HUB_OFFLINE", "0").lower() in ("1", "true", "yes") + + def _safe_get_organization(config: Dict, dataset_name: str, organization: str, token: str) -> str: + # In offline mode, don't try to fetch organization + if _is_offline_mode(): + logger.info("Offline mode detected. Skipping organization fetch.") + return organization + if not organization or (isinstance(organization, str) and organization.startswith("$")): if isinstance(organization, str) and organization.startswith("$"): # Log if it was explicitly set but unexpanded @@ -106,6 +116,11 @@ def _get_full_dataset_repo_name(config: Dict[str, Any]) -> str: if organization and "/" not in dataset_name: full_dataset_name = f"{organization}/{dataset_name}" + # Skip Hub validation in offline mode + if _is_offline_mode(): + logger.debug(f"Offline mode detected. Skipping Hub validation for repo ID '{full_dataset_name}'") + return full_dataset_name + # Use HfApi for robust validation api = HfApi() try: @@ -147,11 +162,18 @@ def _get_full_dataset_repo_name(config: Dict[str, Any]) -> str: def custom_load_dataset(config: Dict[str, Any], subset: Optional[str] = None) -> Dataset: """ Load a dataset subset from a local directory if specified, otherwise from Hugging Face. + In offline mode, only load from local directory. """ local_dataset_dir = config.get("local_dataset_dir", None) + if ( + local_dataset_dir is None + and "hf_configuration" in config + and "local_dataset_dir" in config["hf_configuration"] + ): + local_dataset_dir = config["hf_configuration"].get("local_dataset_dir") + + # First try loading from local path if local_dataset_dir: - import os - if os.path.exists(local_dataset_dir): logger.info(f"Loading dataset locally from '{local_dataset_dir}'") dataset = load_from_disk(local_dataset_dir) @@ -164,11 +186,21 @@ def custom_load_dataset(config: Dict[str, Any], subset: Optional[str] = None) -> return Dataset.from_dict({}) return dataset else: - logger.warning( - f"local_dataset_dir '{local_dataset_dir}' does not exist. Falling back to Hugging Face Hub." - ) + logger.warning(f"local_dataset_dir '{local_dataset_dir}' does not exist.") + if _is_offline_mode(): + raise ValueError("Offline mode is enabled but local dataset not found") + else: + logger.warning("Falling back to Hugging Face Hub.") + + # If we're in offline mode and made it here, the local dataset doesn't exist + if _is_offline_mode(): + logger.warning("Offline mode enabled but no local dataset found. Returning empty dataset.") + return Dataset.from_dict({}) + + # If we're here, try to get from Hub dataset_repo_name = _get_full_dataset_repo_name(config) - logger.info(f"Loading dataset HuggingFace Hub with repo_id='{dataset_repo_name}'") + logger.info(f"Loading dataset from HuggingFace Hub with repo_id='{dataset_repo_name}'") + # If subset name does NOT exist, return an empty dataset to avoid the crash: try: return load_dataset(dataset_repo_name, name=subset, split="train") @@ -186,7 +218,7 @@ def custom_save_dataset( config: Dict[str, Any], subset: Optional[str] = None, save_local: bool = True, - push_to_hub: bool = False, + push_to_hub: bool = True, ) -> None: """ Save a dataset subset locally and push it to Hugging Face Hub. @@ -196,10 +228,23 @@ def custom_save_dataset( create a new DatasetDict containing that subset. - All subsets are saved to the same local_dataset_dir. """ + # In offline mode, force save local and disable push to hub + if _is_offline_mode(): + save_local = True + if push_to_hub: + logger.warning("Offline mode enabled. Disabling push_to_hub operation.") + push_to_hub = False dataset_repo_name = _get_full_dataset_repo_name(config) local_dataset_dir = config.get("local_dataset_dir", None) + if ( + local_dataset_dir is None + and "hf_configuration" in config + and "local_dataset_dir" in config["hf_configuration"] + ): + local_dataset_dir = config["hf_configuration"].get("local_dataset_dir") + if local_dataset_dir and save_local: logger.info(f"Saving dataset locally to: '{local_dataset_dir}'") @@ -250,7 +295,7 @@ def custom_save_dataset( local_dataset = dataset # Create the directory if it doesn't exist - os.makedirs(local_dataset_dir, exist_ok=True) + os.makedirs(os.path.dirname(local_dataset_dir), exist_ok=True) try: # Save the dataset to disk @@ -281,7 +326,7 @@ def custom_save_dataset( # Re-raise if it's a different permission error raise - if config["hf_configuration"].get("concat_if_exist", False): + if config["hf_configuration"].get("concat_if_exist", False) and not _is_offline_mode(): existing_dataset = custom_load_dataset(config=config, subset=subset) dataset = concatenate_datasets([existing_dataset, dataset]) logger.info("Concatenated dataset with an existing one") @@ -291,7 +336,7 @@ def custom_save_dataset( else: config_name = "default" - if push_to_hub: + if push_to_hub and not _is_offline_mode(): logger.info(f"Pushing dataset to HuggingFace Hub with repo_id='{dataset_repo_name}'") dataset.push_to_hub( repo_id=dataset_repo_name, @@ -299,3 +344,40 @@ def custom_save_dataset( config_name=config_name, ) logger.success(f"Dataset successfully pushed to HuggingFace Hub with repo_id='{dataset_repo_name}'") + + +def replace_dataset_columns( + dataset: Dataset, columns_data: dict[str, list], preserve_metadata: bool = False +) -> Dataset: + """ + Replace columns in a dataset by removing existing columns and adding new ones. + + This helper function handles the common pattern of: + 1. Removing existing columns (if they exist) + 2. Adding new columns with computed data + + Args: + dataset: The input dataset to modify + columns_data: Dictionary mapping column names to their data lists + preserve_metadata: If True, attempts to preserve column metadata (not implemented) + + Returns: + Updated dataset with replaced columns + + Note: + Column metadata (types, features) is not preserved in the current implementation. + New columns will have types inferred from the provided data. + """ + # Remove existing columns to prevent duplication errors + columns_to_replace = list(columns_data.keys()) + existing_columns_to_remove = [col for col in columns_to_replace if col in dataset.column_names] + + if existing_columns_to_remove: + logger.info(f"Removing existing columns before adding new ones: {existing_columns_to_remove}") + dataset = dataset.remove_columns(existing_columns_to_remove) + + # Add new columns + for column_name, column_data in columns_data.items(): + dataset = dataset.add_column(column_name, column_data) + + return dataset diff --git a/yourbench/utils/inference/inference_builders.py b/yourbench/utils/inference/inference_builders.py new file mode 100644 index 00000000..2a266cc2 --- /dev/null +++ b/yourbench/utils/inference/inference_builders.py @@ -0,0 +1,71 @@ +from typing import List +from dataclasses import dataclass + +from loguru import logger + +from yourbench.utils.prompts import QUESTION_GENERATION_USER_PROMPT, MULTI_HOP_QUESTION_GENERATION_USER_PROMPT +from yourbench.utils.chunking_utils import sample_multihop_groups, sample_single_hop_chunks +from yourbench.utils.inference.inference_core import InferenceCall + + +@dataclass +class InferenceJob: + inference_calls: List[InferenceCall] + + +def build_single_shot_inference_calls(dataset, system_msg, stage_cfg, sampling_cfg): + calls = [] + index_map = [] + + for idx, row in enumerate(dataset): + document_chunks = row.get("chunks") or [] + selected_chunks = sample_single_hop_chunks(document_chunks, sampling_cfg) + + for ch_idx, chunk in enumerate(selected_chunks): + chunk_id = chunk.get("chunk_id", f"{idx}_{ch_idx}") + chunk_text = chunk.get("chunk_text", "") + user_msg = { + "role": "user", + "content": QUESTION_GENERATION_USER_PROMPT.format( + title=row.get("document_filename", f"doc_{idx}"), + document_summary=row.get("document_summary", ""), + text_chunk=chunk_text, + additional_instructions=stage_cfg.get("additional_instructions", ""), + ), + } + calls.append(InferenceCall(messages=[system_msg, user_msg], tags=["single_shot_qa"])) + index_map.append((idx, row.get("document_id", f"doc_{idx}"), chunk_id)) + + return calls, index_map + + +def build_multi_hop_inference_calls(dataset, system_msg, stage_cfg): + calls = [] + index_map = [] + + for idx, row in enumerate(dataset): + groups = sample_multihop_groups(row.get("multihop_chunks") or [], stage_cfg.get("chunk_sampling", {})) + for group in groups: + # TODO how it's possible here? + if not isinstance(group, dict): + logger.warning("Multihop groups are not a dict, skipping") + continue + chunk_ids = group.get("chunk_ids", []) + texts = group.get("chunks_text", []) + if not texts: + logger.warning("Chunks texts are empty, skipping") + continue + full_text = "".join([f"{t}\n" for i, t in enumerate(texts)]) + user_msg = { + "role": "user", + "content": MULTI_HOP_QUESTION_GENERATION_USER_PROMPT.format( + title=row.get("document_filename", ""), + document_summary=row.get("document_summary", ""), + chunks=full_text, + additional_instructions=stage_cfg.get("additional_instructions", ""), + ), + } + calls.append(InferenceCall(messages=[system_msg, user_msg], tags=["multi_hop_qa"])) + index_map.append((idx, row.get("document_id", f"doc_{idx}"), chunk_ids)) + + return calls, index_map diff --git a/yourbench/utils/inference_engine.py b/yourbench/utils/inference/inference_core.py similarity index 69% rename from yourbench/utils/inference_engine.py rename to yourbench/utils/inference/inference_core.py index d624b640..e313ce72 100644 --- a/yourbench/utils/inference_engine.py +++ b/yourbench/utils/inference/inference_core.py @@ -1,7 +1,3 @@ -""" -Inference Engine For Yourbench - Now with true concurrency throttling. -""" - import os import time import uuid @@ -9,15 +5,19 @@ from typing import Any, Dict, List, Optional from dataclasses import field, dataclass -from dotenv import load_dotenv from loguru import logger from tqdm.asyncio import tqdm_asyncio from huggingface_hub import AsyncInferenceClient +from yourbench.utils.inference.inference_tracking import ( + _count_tokens, + _get_encoding, + _log_individual_call, + _count_message_tokens, + _update_aggregate_cost, +) -load_dotenv() - GLOBAL_TIMEOUT = 300 @@ -29,8 +29,8 @@ class Model: base_url: str | None = None api_key: str | None = field(default=None, repr=False) bill_to: str | None = None - max_concurrent_requests: int = 16 + encoding_name: str = "cl100k_base" def __post_init__(self): if self.api_key is None: @@ -46,32 +46,64 @@ class InferenceCall: messages: List of message dictionaries in the format expected by the LLM API. temperature: Optional sampling temperature for controlling randomness in generation. tags: List of string tags that can be set to any values by the user. Used internally - for logging and cost tracking purposes. + for logging and cost tracking purposes (e.g., pipeline stage). max_retries: Maximum number of retry attempts for failed inference calls. seed: Optional random seed for reproducible outputs. """ messages: List[Dict[str, str]] temperature: Optional[float] = None - tags: List[str] = field(default_factory=lambda: ["dev"]) - max_retries: int = 8 + tags: List[str] = field(default_factory=lambda: ["dev"]) # Tags will identify the 'stage' + max_retries: int = 12 seed: Optional[int] = None -@dataclass -class InferenceJob: - inference_calls: List[InferenceCall] +def _load_models(base_config: Dict[str, Any], step_name: str) -> List[Model]: + """ + Load only the models assigned to this step from the config's 'model_list' and 'model_roles'. + If no model role is defined for the step, use the first model from model_list. + """ + all_configured_models = base_config.get("model_list", []) + role_models = base_config.get("model_roles", {}).get(step_name, []) + + # If no role models are defined for this step, use the first model from model_list + if not role_models and all_configured_models: + first_model_config = all_configured_models[0] + logger.info( + "No models defined in model_roles for step '{}'. Using the first model from model_list: {}", + step_name, + first_model_config["model_name"], + ) + return [ + Model(**{**first_model_config, "encoding_name": first_model_config.get("encoding_name", "cl100k_base")}) + ] + + # Filter out only those with a matching 'model_name' + matched = [] + for m_config in all_configured_models: + if m_config["model_name"] in role_models: + model_instance = Model(**{**m_config, "encoding_name": m_config.get("encoding_name", "cl100k_base")}) + matched.append(model_instance) + + logger.info( + "Found {} models in config for step '{}': {}", + len(matched), + step_name, + [m.model_name for m in matched], + ) + return matched async def _get_response(model: Model, inference_call: InferenceCall) -> str: """ Send one inference call to the model endpoint within a global timeout context. - Logs start/end times for better concurrency tracing. + Logs start/end times for better concurrency tracing and tracks token costs. """ start_time = time.time() logger.debug( - "START _get_response: model='{}' (timestamp={:.4f})", + "START _get_response: model='{}' (encoding='{}') (timestamp={:.4f})", model.model_name, + model.encoding_name, start_time, ) @@ -92,16 +124,32 @@ async def _get_response(model: Model, inference_call: InferenceCall) -> str: model=model.model_name, messages=inference_call.messages, temperature=inference_call.temperature, + # Note: seed is not directly supported by chat_completion in huggingface_hub client API as of recent versions + # It might need to be passed via extra_body if the provider supports it. + # seed=inference_call.seed, # This might cause an error if not supported ) # Safe-guarding in case the response is missing .choices if not response or not response.choices: - logger.error("Empty response or missing .choices from model {}", model.model_name) + logger.warning("Empty response or missing .choices from model {}", model.model_name) raise Exception("Failed Inference") + output_content = response.choices[0].message.content + + try: + encoding = _get_encoding(model.encoding_name) + input_tokens = _count_message_tokens(inference_call.messages, encoding) + output_tokens = _count_tokens(output_content, encoding) + + _log_individual_call(model.model_name, input_tokens, output_tokens, inference_call.tags, model.encoding_name) + _update_aggregate_cost(model.model_name, input_tokens, output_tokens) + logger.debug(f"Cost tracked: Model={model.model_name}, Input={input_tokens}, Output={output_tokens}") + except Exception as cost_e: + logger.error(f"Error during cost tracking for model {model.model_name}: {cost_e}") + finish_time = time.time() logger.debug( - "END _get_response: model='{}' (timestamp={:.4f}, duration={:.2f}s)", + "END _get_response: model='{}' (timestamp={:.4f}, duration={:.2f}s)", model.model_name, finish_time, (finish_time - start_time), @@ -109,9 +157,9 @@ async def _get_response(model: Model, inference_call: InferenceCall) -> str: logger.debug( "Response content from model {} = {}", model.model_name, - response.choices[0].message.content, + output_content, ) - return response.choices[0].message.content + return output_content async def _retry_with_backoff(model: Model, inference_call: InferenceCall, semaphore: asyncio.Semaphore) -> str: @@ -136,13 +184,13 @@ async def _retry_with_backoff(model: Model, inference_call: InferenceCall, semap attempt + 1, model.max_concurrent_requests, ) - return await _get_response(model, inference_call) + return await _get_response(model, inference_call) # Cost tracking happens inside _get_response except Exception as e: - logger.error("Error invoking model {}: {}", model.model_name, e) + logger.warning("Error invoking model {}: {}", model.model_name, e) # Only sleep if not on the last attempt if attempt < inference_call.max_retries - 1: - backoff_secs = 2 ** (attempt + 2) + backoff_secs = 2 ** (attempt + 2) # Exponential backoff (4, 8, 16, ...) logger.debug("Backing off for {} seconds before next attempt...", backoff_secs) await asyncio.sleep(backoff_secs) @@ -151,6 +199,16 @@ async def _retry_with_backoff(model: Model, inference_call: InferenceCall, semap model.model_name, inference_call.max_retries, ) + + try: + encoding = _get_encoding(model.encoding_name) + input_tokens = _count_message_tokens(inference_call.messages, encoding) + _log_individual_call(model.model_name, input_tokens, 0, ["FAILED"] + inference_call.tags, model.encoding_name) + _update_aggregate_cost(model.model_name, input_tokens, 0) + logger.warning(f"Logged failed call for {model.model_name} with input tokens {input_tokens}, output 0.") + except Exception as cost_e: + logger.error(f"Error during cost tracking for *failed* call {model.model_name}: {cost_e}") + return "" @@ -219,34 +277,6 @@ async def _run_inference_async_helper( return responses -def _load_models(base_config: Dict[str, Any], step_name: str) -> List[Model]: - """ - Load only the models assigned to this step from the config's 'model_list' and 'model_roles'. - If no model role is defined for the step, use the first model from model_list. - """ - all_configured_models = base_config.get("model_list", []) - role_models = base_config.get("model_roles", {}).get(step_name, []) - - # If no role models are defined for this step, use the first model from model_list - if not role_models and all_configured_models: - logger.info( - "No models defined in model_roles for step '{}'. Using the first model from model_list: {}", - step_name, - all_configured_models[0]["model_name"], - ) - return [Model(**all_configured_models[0])] - - # Filter out only those with a matching 'model_name' - matched = [Model(**m) for m in all_configured_models if m["model_name"] in role_models] - logger.info( - "Found {} models in config for step '{}': {}", - len(matched), - step_name, - [m.model_name for m in matched], - ) - return matched - - def run_inference( config: Dict[str, Any], step_name: str, inference_calls: List[InferenceCall] ) -> Dict[str, List[str]]: @@ -266,9 +296,17 @@ def run_inference( logger.warning("No models found for step '{}'. Returning empty dictionary.", step_name) return {} + # Assign the step_name as a tag if not already present (for cost tracking) + for call in inference_calls: + if step_name not in call.tags: + call.tags.append(step_name) + # 2. Run the concurrency-enabled async helper try: return asyncio.run(_run_inference_async_helper(models, inference_calls)) except Exception as e: logger.critical("Error running inference for step '{}': {}", step_name, e) - return {} + # Ensure aggregate log is attempted even on critical error during run + # Note: atexit should handle this, but adding a safeguard doesn't hurt + # _write_aggregate_log() # Redundant due to atexit + return {} # Return empty on failure diff --git a/yourbench/utils/inference/inference_tracking.py b/yourbench/utils/inference/inference_tracking.py new file mode 100644 index 00000000..a3bcb7b9 --- /dev/null +++ b/yourbench/utils/inference/inference_tracking.py @@ -0,0 +1,116 @@ +import os +import csv +import atexit +import datetime +import collections +from typing import Dict, List + +import tiktoken +from loguru import logger + + +# Using defaultdict for easier accumulation +_cost_data = collections.defaultdict(lambda: {"input_tokens": 0, "output_tokens": 0, "calls": 0}) +_individual_log_file = os.path.join("logs", "inference_cost_log_individual.csv") +_aggregate_log_file = os.path.join("logs", "inference_cost_log_aggregate.csv") +_individual_header_written = False + + +def _get_encoding(encoding_name: str = "cl100k_base") -> tiktoken.Encoding: + """Gets a tiktoken encoding, defaulting to cl100k_base with fallback.""" + try: + return tiktoken.get_encoding(encoding_name) + except Exception as e: + logger.warning(f"Failed to get encoding '{encoding_name}'. Falling back to 'cl100k_base'. Error: {e}") + return tiktoken.get_encoding("cl100k_base") + + +def _ensure_logs_dir(): + """Ensures the logs directory exists.""" + os.makedirs("logs", exist_ok=True) + + +def _count_tokens(text: str, encoding: tiktoken.Encoding) -> int: + """Counts tokens in a single string.""" + if not text: + return 0 + try: + return len(encoding.encode(text)) + except Exception as e: + logger.error(f"Error counting tokens: {e}") + return 0 + + +def _count_message_tokens(messages: List[Dict[str, str]], encoding: tiktoken.Encoding) -> int: + """Counts tokens in a list of messages, approximating OpenAI's format.""" + num_tokens = 0 + # Approximation based on OpenAI's cookbook: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb + # This might not be perfectly accurate for all models/providers but is a reasonable estimate. + tokens_per_message = 3 + tokens_per_name = 1 + + for message in messages: + num_tokens += tokens_per_message + for key, value in message.items(): + if value: + num_tokens += _count_tokens(str(value), encoding) + if key == "name": + num_tokens += tokens_per_name + num_tokens += 3 + return num_tokens + + +def _log_individual_call(model_name: str, input_tokens: int, output_tokens: int, tags: List[str], encoding_name: str): + """Logs a single inference call's cost details.""" + global _individual_header_written + try: + _ensure_logs_dir() + is_new_file = not os.path.exists(_individual_log_file) + mode = "a" if not is_new_file else "w" + + with open(_individual_log_file, mode, newline="", encoding="utf-8") as f: + writer = csv.writer(f) + # Write header only if the file is new or header wasn't written yet in this run + if is_new_file or not _individual_header_written: + writer.writerow(["timestamp", "model_name", "stage", "input_tokens", "output_tokens", "encoding_used"]) + _individual_header_written = True + + stage = ";".join(tags) if tags else "unknown" + timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat() + writer.writerow([timestamp, model_name, stage, input_tokens, output_tokens, encoding_name]) + except Exception as e: + logger.error(f"Failed to write to individual cost log: {e}") + + +def _update_aggregate_cost(model_name: str, input_tokens: int, output_tokens: int): + """Updates the global dictionary for aggregate costs.""" + try: + _cost_data[model_name]["input_tokens"] += input_tokens + _cost_data[model_name]["output_tokens"] += output_tokens + _cost_data[model_name]["calls"] += 1 + except Exception as e: + logger.error(f"Failed to update aggregate cost data: {e}") + + +def _write_aggregate_log(): + """Writes the aggregated cost data to a file at program exit.""" + try: + if not _cost_data: + logger.info("No cost data collected, skipping aggregate log.") + return + + _ensure_logs_dir() + logger.info(f"Writing aggregate cost log to {_aggregate_log_file}") + with open(_aggregate_log_file, "w", newline="", encoding="utf-8") as f: + writer = csv.writer(f) + writer.writerow(["model_name", "total_input_tokens", "total_output_tokens", "total_calls"]) + for model_name, data in sorted(_cost_data.items()): + writer.writerow([model_name, data["input_tokens"], data["output_tokens"], data["calls"]]) + logger.success(f"Aggregate cost log successfully written to {_aggregate_log_file}") + except Exception as e: + # Use print here as logger might be shutting down during atexit + print(f"ERROR: Failed to write aggregate cost log: {e}", flush=True) + + +# Register the aggregate log function to run at exit +atexit.register(_write_aggregate_log) diff --git a/yourbench/utils/load_task_config.py b/yourbench/utils/load_task_config.py deleted file mode 100644 index e69de29b..00000000 diff --git a/yourbench/utils/loading_engine.py b/yourbench/utils/loading_engine.py index 502084ba..5bb0d4cb 100644 --- a/yourbench/utils/loading_engine.py +++ b/yourbench/utils/loading_engine.py @@ -1,12 +1,9 @@ -""" -Loading Engine Module - -This module provides utility functions to load configuration files for tasks, -with support for environment variable substitution. -""" +"""Utilities for loading YAML configuration files with environment expansion.""" import os -from typing import Any, Dict +from typing import Any +from pathlib import Path +from dataclasses import dataclass import yaml from dotenv import load_dotenv @@ -14,73 +11,43 @@ def _expand_env_vars(obj: Any) -> Any: - """ - Recursively substitute environment variables in all string values within a data structure. - - Args: - obj (Any): The input data structure (dict, list, or primitive). - - Returns: - Any: The data structure with environment variables expanded in all string values. - - Example: - >>> os.environ['FOO'] = 'bar' - >>> _expand_env_vars({'a': '$FOO', 'b': ['${FOO}', 123]}) - {'a': 'bar', 'b': ['bar', 123]} - """ + """Recursively expand environment variables in nested structures.""" if isinstance(obj, dict): return {k: _expand_env_vars(v) for k, v in obj.items()} if isinstance(obj, list): - return [_expand_env_vars(item) for item in obj] + return [_expand_env_vars(i) for i in obj] if isinstance(obj, str): return os.path.expandvars(obj) return obj -def load_config(config_path: str) -> Dict[str, Any]: - """ - Load the task configuration from a YAML file, substituting environment variables. - - This function reads a YAML configuration file, expands any environment variables - present (using the '$VAR' syntax), and returns the configuration as a dictionary. - Environment variable substitution is performed recursively on all string values - in the resulting configuration dictionary. - - Parameters: - config_path (str): Path to the YAML configuration file. - - Returns: - Dict[str, Any]: The configuration loaded as a dictionary. - - Raises: - FileNotFoundError: If the configuration file could not be found at config_path. - yaml.YAMLError: If there was an error parsing the YAML content. - """ - # Load environment variables from .env files - load_dotenv() - - if not os.path.exists(config_path): - logger.error("Configuration file not found: {}", config_path) - raise FileNotFoundError(f"Configuration file not found: {config_path}") - - try: - # Read the raw configuration file - with open(config_path, "r") as file: - config_str = file.read() - logger.debug("Successfully read configuration file from {}", config_path) - - # Substitute environment variables in the configuration string - expanded_config_str = os.path.expandvars(config_str) - - # Parse the YAML configuration - config = yaml.safe_load(expanded_config_str) - - # Recursively expand environment variables in all string values - config = _expand_env_vars(config) - - logger.info("Configuration loaded successfully from {}", config_path) - return config - - except Exception as exc: - logger.exception("Failed to load configuration due to: {}", exc) - raise +def load_config(config_path: str | Path) -> dict[str, Any]: + """Convenience wrapper returning ``ConfigLoader(config_path).load()``.""" + return ConfigLoader(Path(config_path)).load() + + +@dataclass(slots=True) +class ConfigLoader: + path: Path + + def load(self) -> dict[str, Any]: + load_dotenv() + + if not self.path.is_file(): + logger.error(f"Configuration file not found: {self.path}") + raise FileNotFoundError(self.path) + + try: + text = self.path.read_text() + logger.debug(f"Read configuration from {self.path}") + expanded = os.path.expandvars(text) + config = yaml.safe_load(expanded) or {} + result = _expand_env_vars(config) + logger.debug(f"Configuration loaded successfully from {self.path}") + return result + except yaml.YAMLError as exc: + logger.error(f"Error parsing YAML {self.path}: {exc}") + raise + except Exception as exc: # noqa: BLE001 + logger.error(f"Failed to load configuration {self.path}: {exc}") + raise diff --git a/yourbench/utils/parsing_engine.py b/yourbench/utils/parsing_engine.py index 2586ce03..5c420a52 100644 --- a/yourbench/utils/parsing_engine.py +++ b/yourbench/utils/parsing_engine.py @@ -2,10 +2,79 @@ import json import random import hashlib -from typing import Any +from typing import Any, Optional from loguru import logger +from yourbench.utils.question_models import QuestionRow, validate_list, force_int_in_range + + +# JSON parsing functions + + +def _attempt_json_parse(json_str: str) -> Any: + """ + Attempt to parse a JSON string. Return parsed object if success, + or None if parsing fails. + """ + try: + return json.loads(json_str) + except Exception: + return None + + +def _maybe_strip_triple_backticks(text_in: str) -> str: + """ + Removes triple backticks (``` or ```json) from the beginning + and end of a string, if present. + """ + if not text_in or not isinstance(text_in, str): + return "" + try: + pattern = r"^\s*```(?:json)?\s*([\s\S]*?)\s*```$" + match = re.match(pattern, text_in) + if match: + return match.group(1) + except Exception as e: + logger.debug(f"Error stripping backticks: {e}") + return text_in + + +def _best_effort_json_extract(full_text: str) -> list[str]: + """ + Collect bracket-delimited substrings that might be valid JSON. + Returns a list of candidates (which may be empty). + """ + if not full_text or not isinstance(full_text, str): + return [] + candidates = [] + try: + pattern = r"([\[{].*?[\]}])" + matches = re.findall(pattern, full_text, flags=re.DOTALL) + for match_text in matches: + if (match_text.startswith("[") and match_text.endswith("]")) or ( + match_text.startswith("{") and match_text.endswith("}") + ): + candidates.append(match_text.strip()) + except Exception as e: + logger.debug(f"Error in best-effort JSON extraction: {e}") + return candidates + + +def _extract_tag_content(text: str, tag: str) -> str: + """ + Extract text enclosed in ... from the given string. + Returns an empty string if the tag is not found. + """ + try: + pattern = rf"<{tag}\s*>([\s\S]*?)" + match = re.search(pattern, text) + if match: + return match.group(1).strip() + except Exception as e: + logger.debug(f"Error extracting tag content for '{tag}': {e}") + return "" + def extract_content_from_xml_tags(full_content, xml_tag): # This function extracts the content between the XML tags @@ -87,69 +156,199 @@ def parse_qa_pairs_from_response(raw_response: str) -> list[dict[str, Any]]: return [] -def _extract_tag_content(text: str, tag: str) -> str: +# QA response parsing utils + +OPEN_ENDED_TYPES = { + "analytical", + "application-based", + "clarification", + "counterfactual", + "conceptual", + "true-false", + "factual", + "open-ended", + "false-premise", + "edge-case", +} + +MULTI_CHOICE_TYPES = { + "analytical", + "application-based", + "clarification", + "counterfactual", + "conceptual", + "true-false", + "factual", + "false-premise", + "edge-case", +} + + +def normalize_open_ended(pair: dict[str, Any]) -> Optional[dict[str, Any]]: """ - Extract text enclosed in ... from the given string. - Returns an empty string if the tag is not found. + Ensures open-ended questions are valid. + Returns None if the entry should be skipped. """ - try: - pattern = rf"<{tag}\s*>([\s\S]*?)" - match = re.search(pattern, text) - if match: - return match.group(1).strip() - except Exception as e: - logger.debug(f"Error extracting tag content for '{tag}': {e}") - return "" + pair = dict(pair) # defensive copy + mode = pair.get("question_mode", "").strip().lower() + q_type = pair.get("question_type", "").strip().lower() + if mode != "open-ended": + return pair -def _maybe_strip_triple_backticks(text_in: str) -> str: - """ - Removes triple backticks (``` or ```json) from the beginning - and end of a string, if present. - """ - if not text_in or not isinstance(text_in, str): - return "" - try: - pattern = r"^\s*```(?:json)?\s*([\s\S]*?)\s*```$" - match = re.match(pattern, text_in) - if match: - return match.group(1) - except Exception as e: - logger.debug(f"Error stripping backticks: {e}") - return text_in + if q_type not in OPEN_ENDED_TYPES: + logger.warning(f"Inconsistent open-ended question_type: '{q_type}'") + return None + # No choices for open-ended + pair["choices"] = [] -def _best_effort_json_extract(full_text: str) -> list[str]: - """ - Collect bracket-delimited substrings that might be valid JSON. - Returns a list of candidates (which may be empty). - """ - if not full_text or not isinstance(full_text, str): - return [] - candidates = [] - try: - pattern = r"([\[{].*?[\]}])" - matches = re.findall(pattern, full_text, flags=re.DOTALL) - for match_text in matches: - if (match_text.startswith("[") and match_text.endswith("]")) or ( - match_text.startswith("{") and match_text.endswith("}") - ): - candidates.append(match_text.strip()) - except Exception as e: - logger.debug(f"Error in best-effort JSON extraction: {e}") - return candidates + answer = pair.get("answer", "").strip() + if len(answer) == 1 and answer.upper() in {"A", "B", "C", "D"}: + # Misclassified multiple choice + return None + return pair -def _attempt_json_parse(json_str: str) -> Any: + +def normalize_multi_choice(pair: dict[str, Any]) -> Optional[dict[str, Any]]: """ - Attempt to parse a JSON string. Return parsed object if success, - or None if parsing fails. + Ensures multiple-choice questions are valid. + Returns None if the entry should be skipped. """ - try: - return json.loads(json_str) - except Exception: + pair = dict(pair) + mode = pair.get("question_mode", "").strip().lower() + q_type = pair.get("question_type", "").strip().lower() + + if mode != "multi-choice": + return pair + + if q_type not in MULTI_CHOICE_TYPES: + logger.warning(f"Inconsistent multiple-choice question_type: '{q_type}'") return None + choices = validate_list(pair.get("choices", [])) + if len(choices) != 4: + logger.warning("MCQ must have exactly 4 choices.") + return None + + pair["choices"] = choices + return pair + + +def parse_single_shot_responses(responses, index_map, stage_cfg): + rows = [] + question_mode = str(stage_cfg.get("question_mode", "open-ended")).strip().lower() + + for model, replies in responses.items(): + if len(replies) != len(index_map): + logger.error(f"Mismatch: model '{model}' replies={len(replies)}, expected={len(index_map)}") + continue + + for i, reply in enumerate(replies): + parsed_qa_pairs = parse_qa_pairs_from_response(reply) + if not parsed_qa_pairs: + logger.warning(f"No parseable QA pairs at index {i}.") + continue + + for pair in parsed_qa_pairs: + try: + pair = shuffle_mcq(pair) + pair["question_mode"] = question_mode + + if question_mode == "open-ended": + pair = normalize_open_ended(pair) + if pair is None: + continue + choices = [] + elif question_mode == "multi-choice": + pair = normalize_multi_choice(pair) + if pair is None: + continue + choices = pair["choices"] + else: + logger.warning(f"Unsupported question_mode: {question_mode}") + continue + + citations = validate_list(pair.get("citations", [])) + + rows.append( + QuestionRow( + chunk_id=index_map[i][2], + source_chunk_ids=None, + document_id=index_map[i][1], + additional_instructions=stage_cfg.get("additional_instructions", ""), + question=str(pair.get("question", "")).strip(), + self_answer=str(pair.get("answer", "")).strip(), + choices=choices, + estimated_difficulty=force_int_in_range(pair.get("estimated_difficulty", 5), 1, 10), + self_assessed_question_type=str(pair.get("question_type", "")).strip(), + question_mode=pair["question_mode"], + generating_model=model, + thought_process=str(pair.get("thought_process", "")), + raw_response=reply, + citations=citations, + ).to_dict(format="single-hop") + ) + except Exception as e: + logger.error(f"Error parsing QA pair at index {i}: {e}") + continue + + return rows + + +def parse_multi_hop_responses(responses, index_map, stage_cfg): + rows = [] + question_mode = str(stage_cfg.get("question_mode", "open-ended")).strip().lower() + + for model, replies in responses.items(): + for i, raw in enumerate(replies): + parsed = parse_qa_pairs_from_response(raw) + for pair in parsed: + try: + pair = shuffle_mcq(pair) + pair["question_mode"] = question_mode + + if question_mode == "open-ended": + pair = normalize_open_ended(pair) + if pair is None: + continue + choices = [] + elif question_mode == "multi-choice": + pair = normalize_multi_choice(pair) + if pair is None: + continue + choices = pair["choices"] + else: + logger.warning(f"Unsupported question_mode: {question_mode}") + continue + + citations = validate_list(pair.get("citations", [])) + + rows.append( + QuestionRow( + chunk_id=None, + source_chunk_ids=index_map[i][2], + document_id=index_map[i][1], + additional_instructions=stage_cfg.get("additional_instructions", ""), + question=str(pair.get("question", "")).strip(), + self_answer=str(pair.get("answer", "")).strip(), + choices=choices, + estimated_difficulty=force_int_in_range(pair.get("estimated_difficulty", 5), 1, 10), + self_assessed_question_type=str(pair.get("question_type", "")).strip(), + question_mode=pair["question_mode"], + generating_model=model, + thought_process=str(pair.get("thought_process", "")), + raw_response=raw, + citations=citations, + ).to_dict(format="multi-hop") + ) + except Exception as e: + logger.warning(f"Parse error in multi-hop QA for doc {index_map[i][1]}: {e}") + continue + + return rows + def shuffle_mcq(question_dict: dict) -> dict: """ diff --git a/yourbench/utils/prompts.py b/yourbench/utils/prompts.py index bbfe87ea..0ab62786 100644 --- a/yourbench/utils/prompts.py +++ b/yourbench/utils/prompts.py @@ -43,7 +43,7 @@ QUESTION_GENERATION_SYSTEM_PROMPT_HEADER = """## Your Role -You are an expert educational content creator specializing in crafting thoughtful, rich, and engaging questions based on provided textual information. Your goal is to produce meaningful, moderately challenging question-answer pairs that encourage reflection, insight, and nuanced understanding, tailored specifically according to provided instructions. +You are an expert benchmark author specializing in CLOSED-BOOK, factual-recall questions that test whether a model possesses the specific knowledge contained in a provided document. Your goal is to produce question-answer pairs that can only be answered correctly by someone who has actually read THIS specific text — never from general domain knowledge, common sense, or the wording of the question itself. ## Input Structure @@ -67,7 +67,7 @@ ## Primary Objective -Your goal is to generate a thoughtful set of question-answer pairs from a single provided ``. Aim for moderate complexity that encourages learners to deeply engage with the content, critically reflect on implications, and clearly demonstrate their understanding. +Your goal is to generate fact-anchored question-answer pairs from a single provided ``. Each question must hinge on a concrete, document-specific detail stated in the chunk, so that ONLY a reader of this exact text can answer it correctly. Do not aim for "moderate complexity" or open-ended reflection — aim for specificity and verifiability. ### Context Fields: @@ -86,8 +86,8 @@ 2. **Concept Exploration** - Consider implicit assumptions, subtle details, underlying theories, and potential applications of the provided information. -3. **Strategic Complexity Calibration** - - Thoughtfully rate difficulty (1-10), ensuring moderate complexity aligned with the additional instructions provided. +3. **Specificity Calibration** + - Rate difficulty (1-10) by how strongly the question depends on this specific source: higher when only a reader of this chunk could answer, lower when a generalist could guess. Discard questions a model could answer without the document. 4. **Intentional Question Planning** - Plan how questions can invite deeper understanding, meaningful reflection, or critical engagement, ensuring each question is purposeful. @@ -112,91 +112,121 @@ ## Question Generation Guidelines -### Encouraged Question Characteristics: +### Required Question Characteristics: -- **Thoughtful Engagement**: Prioritize creating questions that inspire deeper thought and nuanced consideration. -- **Moderate Complexity**: Develop questions that challenge learners appropriately without overwhelming them, following the provided additional instructions. -- **Self-contained Clarity**: Questions and answers should contain sufficient context, clearly understandable independently of external references. -- **Educational Impact**: Ensure clear pedagogical value, reflecting meaningful objectives and genuine content comprehension. -- **Conversational Tone**: Formulate engaging, natural, and realistic questions appropriate to the instructional guidelines. +- **Source-Dependent Specificity**: Each question MUST hinge on a specific fact, name, number, value, configuration, limit, default, or step that is stated in the ``. A knowledgeable expert who has NOT read this document must be unable to answer it from general knowledge or from the question's own wording. +- **Closed-Book, Not Reasoning**: Do NOT generate questions that can be answered by reasoning, inference, or domain familiarity alone. Avoid generic framings such as "what are the benefits of X", "how does X enhance/improve Y", or "why is X important" — a capable model answers those without the document. +- **Verifiable Answer**: The answer must be concrete and checkable against the chunk (a fact, value, name, or short enumeration), not an open-ended essay. +- **Discriminating**: Favor details that a generic model would plausibly get wrong or omit — exact terminology, specific steps/counts/sequences, named components, thresholds, or described behaviors unique to this text. +- **Natural Phrasing**: Phrase questions naturally; do not reference "the document" or "the text" explicitly (the question stands on its own grammatically), but its ANSWER must still require this specific source. -### Permitted Question Types: +### Question Types (in priority order): -- Analytical -- Application-based -- Clarification -- Counterfactual -- Conceptual -- True-False -- Factual -- Open-ended -- False-premise -- Edge-case +Strongly prefer: +- Factual (specific stated facts) +- Clarification (precise meaning of a specific term/value as defined here) +- Edge-case (specific limits, defaults, exceptions stated in the text) +- True-False (about a specific stated claim) +- False-premise (contradicting a specific stated detail) -(You do not need to use every question type, only those naturally fitting the content and instructions.)""" +Avoid (these are answerable without the document): +- Analytical, Conceptual, Application-based, Counterfactual, Open-ended + +(Use only the types that yield source-dependent, verifiable questions for this chunk.)""" QUESTION_GENERATION_SYSTEM_PROMPT_OUTPUT = """## Output Structure -Present your final output as JSON objects strictly adhering to this Pydantic model within `` XML tags: +This prompt is used exclusively for generating **open-ended** questions. + +Present your final output as a list of JSON objects strictly adhering to this Pydantic model, wrapped within `` XML tags: ```python -class QuestionAnswerPair(BaseModel): +class QuestionRow(BaseModel): thought_process: str # Clear, detailed rationale for selecting question and analysis approach question_type: Literal["analytical", "application-based", "clarification", "counterfactual", "conceptual", "true-false", "factual", "open-ended", "false-premise", "edge-case"] - question: str - answer: str - estimated_difficulty: int # 1-10, calibrated according to additional instructions + question: str # The generated question + answer: str # Full answer to the question + estimated_difficulty: int # Difficulty level from 1 (easy) to 10 (very difficult), calibrated according to additional instructions citations: List[str] # Direct quotes from the text_chunk supporting the answer ``` ## Output Format -Begin by thoughtfully analyzing the provided text_chunk within `` XML tags. Then present the resulting JSON-formatted QuestionAnswerPairs clearly within `` XML tags.""" +Begin by thoughtfully analyzing the provided text_chunk within XML tags. +Then present the resulting list of QuestionRow objects in proper JSON format inside XML tags. + +## Example: + + +Key concept: Semantic chunking and its effect on information retrieval +Facts: Semantic chunking groups semantically similar sentences within token limits +Reasoning cues: Relevance of chunk boundaries for downstream QA tasks + + + +[ + { + "thought_process": "The question evaluates whether the model understands how semantic chunking contributes to retrieval quality. It encourages reflection on how coherence impacts model outputs.", + "question_type": "open-ended", + "question": "How does semantic chunking improve information retrieval performance in large document processing?", + "answer": "Semantic chunking improves retrieval by preserving contextual coherence, allowing models to access more relevant and interpretable chunks during downstream tasks like question answering.", + "estimated_difficulty": 6, + "citations": [ + "Semantic chunking groups related sentences within token boundaries.", + "Coherent chunks help downstream tasks focus on relevant context." + ], + }, + ... +] + +""" QUESTION_GENERATION_SYSTEM_PROMPT_OUTPUT_MULTI = """## Output Structure -Present your final output as JSON objects strictly adhering to this Pydantic model within `` XML tags: +Present your final output as JSON objects strictly adhering to this schema, enclosed within `` XML tags. This structure supports both open-ended and multiple-choice questions. ```python -class MultipleChoiceQuestion(BaseModel): - thought_process: str # Rationale for the question and distractors - question_type: Literal["analytical", "application-based", "clarification", +class QuestionRow(BaseModel): + thought_process: str # Explanation for why this question was generated, including reasoning or distractor logic + question_type: Literal["analytical", "application-based", "clarification", "counterfactual", "conceptual", "true-false", "factual", "false-premise", "edge-case"] - question: str - answer: str # One of "A", "B", "C", or "D" - choices: List[str] # Must contain exactly 4 items - estimated_difficulty: int # 1-10 - citations: List[str] # Direct support from the text_chunk + question: str # The question text + answer: str # One of "A", "B", "C", or "D" + choices: List[str] # Must contain exactly 4 items + estimated_difficulty: int # Integer between 1 (easy) and 10 (difficult) + citations: List[str] # Supporting quotes or phrases from the text ``` ## Output Format -Begin by thoughtfully analyzing the provided within XML tags. Your analysis should identify the key concepts, technical details, and reasoning opportunities found in the text. +Start with a thoughtful analysis of the wrapped inside tags. Identify key concepts, reasoning paths, and challenging content. + +Then output a list of well-structured questions in valid JSON syntax inside tags. -Then present the resulting multiple-choice questions as valid JSON objects within tags, strictly following this structure: +## Example: -- Key concept: ... -- Important facts: ... -- Reasoning opportunities: ... +Key concept: Semantic chunking and its role in preprocessing +Facts: Chunking maintains coherence based on token and semantic similarity +Reasoning cues: Trade-offs in chunk size and overlap [ { - "thought_process": "This question targets understanding of how the chunk explains the purpose of semantic chunking in document processing. Distractors are phrased using near-synonyms or subtle distortions of the true concept.", + "thought_process": "This question targets a conceptual understanding of why semantic chunking is needed. Distractors reflect common misconceptions.", "question_type": "conceptual", - "question": "What is the primary reason for using semantic chunking in document preprocessing?", + "question": "What is the primary benefit of using semantic chunking in document processing?", + "answer": "B", "choices": [ - "(A) To compress the document into fewer tokens.", - "(B) To group content based on semantic similarity and token limits.", - "(C) To translate the text into multiple languages.", - "(D) To strip metadata and formatting from the input file." + "(A) It compresses documents by removing white space.", + "(B) It groups related content within token constraints for coherence.", + "(C) It translates the document into a semantic graph.", + "(D) It removes all non-ASCII characters for parsing." ], - "answer": "B", "estimated_difficulty": 6, "citations": ["Semantic chunking partitions documents into coherent segments based on semantic similarity and token length constraints."] }, @@ -207,10 +237,10 @@ class MultipleChoiceQuestion(BaseModel): QUESTION_GENERATION_SYSTEM_PROMPT_FOOTER = """## Important Notes - Strive to generate questions that inspire genuine curiosity, reflection, and thoughtful engagement. - Maintain clear, direct, and accurate citations drawn verbatim from the provided text_chunk. -- Ensure complexity and depth reflect thoughtful moderation as guided by the additional instructions. -- Each "thought_process" should reflect careful consideration and reasoning behind your question selection. +- Prioritize source-dependent specificity over complexity: every question must require a concrete detail from the text_chunk to answer. +- Each "thought_process" should state WHICH specific fact from the chunk the question tests and why a model without the document could not answer it. - Ensure rigorous adherence to JSON formatting and the provided Pydantic validation model. -- When generating questions, NEVER include phrases like 'as per the text,' 'according to the document,' or any similar explicit references. Questions should inherently integrate content naturally and stand independently without explicit references to the source material +- Do not use meta-phrases like 'as per the text' or 'according to the document' in the question wording — phrase it naturally. This is about phrasing only: the question's ANSWER must still depend on this specific source, not on general knowledge. """ QUESTION_GENERATION_SYSTEM_PROMPT = ( @@ -243,7 +273,7 @@ class MultipleChoiceQuestion(BaseModel): MULTI_HOP_QUESTION_GENERATION_SYSTEM_HEADER = """## Your Role -You are an expert educational content creator specialized in generating insightful and thoughtfully designed multi-hop questions. Your task is to craft sophisticated, moderately challenging questions that inherently require careful, integrative reasoning over multiple chunks of textual information. Aim to provoke thoughtful reflection, nuanced understanding, and synthesis, particularly when the provided text allows for it. +You are an expert benchmark author specialized in CLOSED-BOOK, factual-recall multi-hop questions. Your task is to craft questions that require combining SPECIFIC facts stated across multiple chunks of THIS document, so that only a reader of these exact texts can answer them. Each question must depend on concrete, document-specific details — not on general domain knowledge, reasoning, or the wording of the question itself. ## Input Structure @@ -273,7 +303,7 @@ class MultipleChoiceQuestion(BaseModel): ## Primary Objective -Generate a thoughtful, educationally meaningful set of multi-hop question-answer pairs. Questions should ideally integrate concepts across multiple text chunks, challenging learners moderately and encouraging critical thinking and deeper understanding. +Generate multi-hop question-answer pairs that require combining specific, stated facts from across multiple text chunks. Each question must hinge on concrete details (names, numbers, values, steps, behaviors) found in the chunks, so that only a reader of these exact texts can answer correctly. Do not aim for "moderate challenge" or open-ended reflection — aim for source-dependent specificity and verifiable answers. ### Context Fields: - ``: Document context @@ -293,8 +323,8 @@ class MultipleChoiceQuestion(BaseModel): 2. **Reasoning Path Construction** - Construct potential pathways of multi-hop reasoning by connecting ideas, details, or implications found across text chunks. -3. **Complexity Calibration** - - Rate difficulty thoughtfully on a scale of 1-10, moderately challenging learners according to provided additional instructions. +3. **Specificity Calibration** + - Rate difficulty (1-10) by how strongly the question depends on combining specific facts from these chunks: higher when only a reader of these texts could answer, lower when a generalist could guess. Discard questions answerable without the document. 4. **Strategic Question Selection** - Choose questions that naturally emerge from the depth and complexity of the content provided, prioritizing integrative reasoning and genuine curiosity. @@ -302,24 +332,15 @@ class MultipleChoiceQuestion(BaseModel): ## Question Generation Guidelines ### Question Characteristics -- **Multi-Hop Integration**: Questions should naturally require integration across multiple chunks, demonstrating clear interconnected reasoning. -- **Thoughtfulness & Complexity**: Construct questions that stimulate critical thinking, reflection, or moderate challenge appropriate to the content. -- **Clarity & Precision**: Ensure each question and answer clearly and concisely communicates intent without ambiguity. -- **Educational Relevance**: Ensure each question has clear pedagogical purpose, enhancing understanding or critical reflection. -- **Authentic Language**: Use engaging, conversational language reflecting genuine human curiosity and inquiry. - -### Suggested Question Types -(Use naturally, as fitting to the content complexity) -- Analytical -- Application-based -- Clarification -- Counterfactual -- Conceptual -- True-False -- Factual -- Open-ended -- False-premise -- Edge-case +- **Multi-Hop Source Dependency**: Questions must require combining two or more SPECIFIC facts stated across the chunks. A model without these exact texts must be unable to answer from general knowledge. +- **Verifiable Answer**: The answer must be concrete and checkable against the chunks (facts, values, names, short enumerations), not an open-ended essay. +- **Closed-Book, Not Reasoning**: Avoid generic framings ("what are the benefits of…", "how does … improve …", "why is … important") that a capable model answers without the document. +- **Clarity & Precision**: Each question and answer communicates intent concisely and unambiguously. +- **Discriminating**: Favor combinations of specific terminology, steps, counts, components, or thresholds unique to these texts. + +### Question Types (in priority order) +Strongly prefer: Factual, Clarification, Edge-case, True-False, False-premise — each anchored to specific stated facts that must be combined across chunks. +Avoid (answerable without the document): Analytical, Conceptual, Application-based, Counterfactual, Open-ended. ## **Filtering Irrelevant Content**: @@ -335,13 +356,11 @@ class MultipleChoiceQuestion(BaseModel): MULTI_HOP_QUESTION_GENERATION_SYSTEM_FOOTER = """## Important Notes -- Prioritize depth and thoughtfulness in your reasoning paths. -- Allow natural complexity to guide question formulation, aiming for moderate challenge. -- Precisely cite verbatim excerpts from text chunks. -- Clearly communicate your thought process for integrative reasoning. +- Prioritize source-dependent specificity: each question must require combining concrete facts stated across the chunks. +- Precisely cite verbatim excerpts from the text chunks for every fact used. +- In "thought_process", state which specific facts from which chunks are combined and why a model without these texts could not answer. - Adhere strictly to JSON formatting and Pydantic validation requirements. -- Generate questions that genuinely inspire deeper reflection or meaningful exploration of the provided content. -- When generating questions, NEVER include phrases like 'as per the text,' 'according to the document,' or any similar explicit references. Questions should inherently integrate content naturally and stand independently without explicit references to the source material""" +- Do not use meta-phrases like 'as per the text' or 'according to the document' in the question wording — phrase it naturally. This is about phrasing only: the question's ANSWER must still depend on these specific sources, not on general knowledge.""" MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT = ( MULTI_HOP_QUESTION_GENERATION_SYSTEM_HEADER @@ -543,22 +562,10 @@ class MultipleChoiceQuestion(BaseModel): {answer_b} </answer_b>""" -CHUNK_SUMMARIZATION_USER_PROMPT = """\ -You are an expert note-taker. Summarise the following document *chunk* in \ -10-12 crisp sentences capturing only the information that will matter for a \ -later global summary. - -<chunk> -{chunk} -</chunk> - -Wrap your output inside <chunk_summary> tags.""" - COMBINE_SUMMARIES_USER_PROMPT = """\ -You will receive a bullet-list of chunk-level summaries from the *same* \ +You will receive a list of chunk-level summaries from the *same* \ document. Combine them into a single, well-structured paragraph that reads \ -naturally and eliminates redundancy. Keep the final answer under 1/3rd of the \ -original combined length. +naturally and eliminates redundancy. <chunk_summaries> {chunk_summaries} diff --git a/yourbench/utils/question_models.py b/yourbench/utils/question_models.py new file mode 100644 index 00000000..a475b256 --- /dev/null +++ b/yourbench/utils/question_models.py @@ -0,0 +1,160 @@ +from __future__ import annotations +from typing import Any, Dict, List, Optional +from dataclasses import field, dataclass + + +def force_int_in_range(value: Any, min_val: int, max_val: int) -> int: + try: + ivalue = int(value) + except (ValueError, TypeError): + ivalue = (min_val + max_val) // 2 + return max(min_val, min(ivalue, max_val)) + + +def validate_list(some_list: list[str]) -> list[str]: + if not isinstance(some_list, list): + return [] + try: + return [str(value) for value in some_list] + except Exception: + return [] + + +@dataclass +class QuestionRow: + document_id: str + additional_instructions: str + question: str + self_answer: str + estimated_difficulty: int + self_assessed_question_type: str + question_mode: str + generating_model: str + thought_process: str + raw_response: str + + citations: List[str] = field(default_factory=list) + choices: Optional[List[str]] = field(default_factory=list) + + chunk_id: Optional[str] = None + source_chunk_ids: Optional[List[str]] = None + + def __post_init__(self) -> None: + self.question = str(self.question).strip() + self.self_answer = str(self.self_answer).strip() + self.estimated_difficulty = force_int_in_range(self.estimated_difficulty, 1, 10) + self.self_assessed_question_type = str(self.self_assessed_question_type).strip() + self.thought_process = str(self.thought_process) + self.citations = validate_list(self.citations) + self.question_mode = str(self.question_mode).strip().lower() + + if self.question_mode == "multi-choice": + self.choices = validate_list(self.choices) + if len(self.choices) != 4: + raise ValueError("Multi-choice questions must have exactly 4 choices.") + else: + self.choices = [] + + if self.chunk_id and self.source_chunk_ids: + raise ValueError("Cannot have both chunk_id and source_chunk_ids.") + if not self.chunk_id and not self.source_chunk_ids: + raise ValueError("Must have either chunk_id or source_chunk_ids.") + + @property + def answer(self) -> str: + return self.self_answer + + @property + def question_type(self) -> str: + return self.self_assessed_question_type + + def is_multi_hop(self) -> bool: + return self.source_chunk_ids is not None + + def is_single_hop(self) -> bool: + return self.chunk_id is not None + + @classmethod + def from_single_hop( + cls, + pair: Dict[str, Any], + chunk_id: str, + document_id: str, + model: str, + raw_response: str, + additional_instructions: str = "", + ) -> QuestionRow: + return cls( + chunk_id=chunk_id, + source_chunk_ids=None, + document_id=document_id, + additional_instructions=additional_instructions, + question=str(pair.get("question", "")).strip(), + self_answer=str(pair.get("answer", "")).strip(), + choices=pair.get("choices"), + estimated_difficulty=force_int_in_range(pair.get("estimated_difficulty", 5), 1, 10), + self_assessed_question_type=str(pair.get("question_type", "")).strip(), + question_mode=str(pair.get("question_mode", "")).strip().lower(), + generating_model=model, + thought_process=str(pair.get("thought_process", "")), + raw_response=raw_response, + citations=validate_list(pair.get("citations", [])), + ) + + @classmethod + def from_multi_hop( + cls, + pair: Dict[str, Any], + source_chunk_ids: List[str], + document_id: str, + model: str, + raw_response: str, + additional_instructions: str = "", + ) -> QuestionRow: + return cls( + chunk_id=None, + source_chunk_ids=source_chunk_ids, + document_id=document_id, + additional_instructions=additional_instructions, + question=str(pair.get("question", "")).strip(), + self_answer=str(pair.get("answer", "")).strip(), + choices=pair.get("choices"), + estimated_difficulty=force_int_in_range(pair.get("estimated_difficulty", 5), 1, 10), + self_assessed_question_type=str(pair.get("question_type", "")).strip(), + question_mode=str(pair.get("question_mode", "")).strip().lower(), + generating_model=model, + thought_process=str(pair.get("thought_process", "")), + raw_response=raw_response, + citations=validate_list(pair.get("citations", [])), + ) + + def to_dict(self, format: str = "unified") -> Dict[str, Any]: + base = { + "document_id": self.document_id, + "additional_instructions": self.additional_instructions, + "question": self.question, + "self_answer": self.self_answer, + "estimated_difficulty": self.estimated_difficulty, + "self_assessed_question_type": self.self_assessed_question_type, + "generating_model": self.generating_model, + "thought_process": self.thought_process, + "raw_response": self.raw_response, + "citations": self.citations, + } + + if self.question_mode == "multi-choice": + base["choices"] = self.choices + + if format == "multi-hop": + return { + **base, + "source_chunk_ids": self.source_chunk_ids, + } + + if format == "single-hop": + return { + **base, + "chunk_id": self.chunk_id, + } + + return {**base, "chunk_id": self.chunk_id, "source_chunk_ids": self.source_chunk_ids}