Skip to content

feat: Add PowerPoint (PPTX) extraction support #11

feat: Add PowerPoint (PPTX) extraction support

feat: Add PowerPoint (PPTX) extraction support #11

Workflow file for this run

name: CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
fail-fast: false # Continue running other versions even if one fails
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip packages
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install system dependencies
continue-on-error: true # Don't fail workflow if system deps unavailable
run: |
sudo apt-get update || true
sudo apt-get install -y tesseract-ocr tesseract-ocr-eng poppler-utils libgl1-mesa-glx libglib2.0-0 || true
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
- name: Run linting
run: |
echo "Running flake8..."
flake8 docprocessor tests --max-line-length=100 --extend-ignore=E203,W503 --count --show-source --statistics
- name: Check code formatting
run: |
echo "Checking Black formatting..."
black --check docprocessor tests --line-length=100
echo "Checking isort..."
isort --check-only docprocessor tests --profile black --line-length=100
- name: Run type checking
run: |
echo "Running mypy..."
mypy docprocessor --ignore-missing-imports --no-error-summary || true
- name: Run security checks
run: |
echo "Installing security tools..."
pip install bandit safety
echo "Running Bandit security scanner..."
bandit -r docprocessor -f json -o bandit-report.json || true
bandit -r docprocessor -f screen
echo "Running Safety dependency checker..."
safety check --json || true
- name: Run tests with coverage
run: |
pytest --cov=docprocessor --cov-report=xml --cov-report=term --cov-report=html -v
- name: Upload coverage to Codecov
if: matrix.python-version == '3.11'
uses: codecov/codecov-action@v4
with:
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: false
- name: Upload coverage artifacts
if: matrix.python-version == '3.11'
uses: actions/upload-artifact@v4
with:
name: coverage-report
path: htmlcov/
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[docs]"
- name: Build documentation
run: |
cd docs
make html
echo "Documentation built successfully!"
- name: Upload documentation artifacts
uses: actions/upload-artifact@v4
with:
name: documentation
path: docs/_build/html/
build:
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install build dependencies
run: |
python -m pip install --upgrade pip build twine
- name: Build package
run: |
python -m build
- name: Check package
run: |
twine check dist/*
- name: Upload build artifacts
uses: actions/upload-artifact@v4
with:
name: dist-packages
path: dist/
integration-test:
runs-on: ubuntu-latest
needs: build
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y tesseract-ocr poppler-utils
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: dist-packages
path: dist/
- name: Install from wheel
run: |
pip install dist/*.whl
- name: Test import
run: |
python -c "from docprocessor import DocumentProcessor, MeiliSearchIndexer; print('✅ Import successful')"
- name: Test basic functionality
run: |
python -c "
from docprocessor import DocumentProcessor
import tempfile
from pathlib import Path
# Create a test file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write('Test document content for integration testing.')
test_file = Path(f.name)
# Test processing
processor = DocumentProcessor()
result = processor.process(test_file, extract_text=True)
assert result.text
print(f'✅ Processed {len(result.text)} characters')
# Cleanup
test_file.unlink()
"