feat: Add PowerPoint (PPTX) extraction support #11
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [ main, develop ] | |
| pull_request: | |
| branches: [ main, develop ] | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12"] | |
| fail-fast: false # Continue running other versions even if one fails | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Cache pip packages | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/pip | |
| key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-pip- | |
| - name: Install system dependencies | |
| continue-on-error: true # Don't fail workflow if system deps unavailable | |
| run: | | |
| sudo apt-get update || true | |
| sudo apt-get install -y tesseract-ocr tesseract-ocr-eng poppler-utils libgl1-mesa-glx libglib2.0-0 || true | |
| - name: Install Python dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e ".[dev]" | |
| - name: Run linting | |
| run: | | |
| echo "Running flake8..." | |
| flake8 docprocessor tests --max-line-length=100 --extend-ignore=E203,W503 --count --show-source --statistics | |
| - name: Check code formatting | |
| run: | | |
| echo "Checking Black formatting..." | |
| black --check docprocessor tests --line-length=100 | |
| echo "Checking isort..." | |
| isort --check-only docprocessor tests --profile black --line-length=100 | |
| - name: Run type checking | |
| run: | | |
| echo "Running mypy..." | |
| mypy docprocessor --ignore-missing-imports --no-error-summary || true | |
| - name: Run security checks | |
| run: | | |
| echo "Installing security tools..." | |
| pip install bandit safety | |
| echo "Running Bandit security scanner..." | |
| bandit -r docprocessor -f json -o bandit-report.json || true | |
| bandit -r docprocessor -f screen | |
| echo "Running Safety dependency checker..." | |
| safety check --json || true | |
| - name: Run tests with coverage | |
| run: | | |
| pytest --cov=docprocessor --cov-report=xml --cov-report=term --cov-report=html -v | |
| - name: Upload coverage to Codecov | |
| if: matrix.python-version == '3.11' | |
| uses: codecov/codecov-action@v4 | |
| with: | |
| file: ./coverage.xml | |
| flags: unittests | |
| name: codecov-umbrella | |
| fail_ci_if_error: false | |
| - name: Upload coverage artifacts | |
| if: matrix.python-version == '3.11' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-report | |
| path: htmlcov/ | |
| docs: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.11" | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e ".[docs]" | |
| - name: Build documentation | |
| run: | | |
| cd docs | |
| make html | |
| echo "Documentation built successfully!" | |
| - name: Upload documentation artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: documentation | |
| path: docs/_build/html/ | |
| build: | |
| runs-on: ubuntu-latest | |
| needs: test | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.11" | |
| - name: Install build dependencies | |
| run: | | |
| python -m pip install --upgrade pip build twine | |
| - name: Build package | |
| run: | | |
| python -m build | |
| - name: Check package | |
| run: | | |
| twine check dist/* | |
| - name: Upload build artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: dist-packages | |
| path: dist/ | |
| integration-test: | |
| runs-on: ubuntu-latest | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.11" | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y tesseract-ocr poppler-utils | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: dist-packages | |
| path: dist/ | |
| - name: Install from wheel | |
| run: | | |
| pip install dist/*.whl | |
| - name: Test import | |
| run: | | |
| python -c "from docprocessor import DocumentProcessor, MeiliSearchIndexer; print('✅ Import successful')" | |
| - name: Test basic functionality | |
| run: | | |
| python -c " | |
| from docprocessor import DocumentProcessor | |
| import tempfile | |
| from pathlib import Path | |
| # Create a test file | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: | |
| f.write('Test document content for integration testing.') | |
| test_file = Path(f.name) | |
| # Test processing | |
| processor = DocumentProcessor() | |
| result = processor.process(test_file, extract_text=True) | |
| assert result.text | |
| print(f'✅ Processed {len(result.text)} characters') | |
| # Cleanup | |
| test_file.unlink() | |
| " |