Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:

jobs:
test:
runs-on: self-hosted
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12"]
Expand Down
234 changes: 234 additions & 0 deletions src/arbiter/artifact_scorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
"""Artifact Scorer -- generic quality scoring for structured knowledge artifacts.

Extends Arbiter beyond code quality to score any structured artifact that has
a schema and quality dimensions. Mirrors the Analyzer / Finding / RepoScore
pattern from analyzers/base.py and scoring.py.

Supported artifact types (registered in scorers/):
bibliography -- hummbl-bibliography batch quality
governance -- HUMMBL governance receipt quality

Adding a new scorer:
1. Subclass ArtifactScorer, implement artifact_type and score()
2. Register via DEFAULT_REGISTRY.register(MyScorer())
3. Add tests to tests/test_artifact_scorer.py

Usage:
from arbiter.artifact_scorer import DEFAULT_REGISTRY

result = DEFAULT_REGISTRY.score("bibliography", {"entries": [...]})
print(f"{result.grade} ({result.overall}/100)")
for finding in result.findings:
print(f" [{finding.severity}] {finding.field}: {finding.message}")
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass, field


# ---------------------------------------------------------------------------
# Finding
# ---------------------------------------------------------------------------

@dataclass(frozen=True, slots=True)
class ArtifactFinding:
"""A single quality finding from an artifact scorer.

Parallel to analyzers/base.py Finding, but for knowledge artifacts
rather than code files.
"""

field: str # Which field or dimension the finding concerns
severity: str # CRITICAL, HIGH, MEDIUM, LOW
rule_id: str # Stable identifier for the rule (e.g. BIB001)
message: str # Human-readable description
scorer: str # Which scorer produced this finding


# ---------------------------------------------------------------------------
# Score
# ---------------------------------------------------------------------------

@dataclass(frozen=True)
class ArtifactScore:
"""Quality score for a structured knowledge artifact.

Parallel to scoring.py RepoScore, but for arbitrary artifact types.
"""

artifact_type: str # Which scorer produced this
overall: float # 0-100 weighted composite
dimensions: dict[str, float] # Per-dimension scores (0-100 each)
total_findings: int
findings: tuple[ArtifactFinding, ...] = field(default_factory=tuple)
findings_by_severity: dict[str, int] = field(default_factory=dict)
metadata: dict[str, object] = field(default_factory=dict)

@property
def grade(self) -> str:
"""Letter grade matching Arbiter's RepoScore grade scale."""
if self.overall >= 90:
return "A"
if self.overall >= 80:
return "B"
if self.overall >= 70:
return "C"
if self.overall >= 60:
return "D"
return "F"

def summary(self) -> str:
"""One-line summary string."""
dim_str = ", ".join(f"{k}={v:.0f}" for k, v in sorted(self.dimensions.items()))
return (
f"{self.artifact_type} | Grade {self.grade} | {self.overall:.1f}/100 | "
f"{self.total_findings} findings | [{dim_str}]"
)

def to_dict(self) -> dict:
"""Serialise to a plain dict (JSON-safe)."""
return {
"artifact_type": self.artifact_type,
"overall": self.overall,
"grade": self.grade,
"dimensions": dict(self.dimensions),
"total_findings": self.total_findings,
"findings_by_severity": dict(self.findings_by_severity),
"findings": [
{
"field": f.field,
"severity": f.severity,
"rule_id": f.rule_id,
"message": f.message,
"scorer": f.scorer,
}
for f in self.findings
],
"metadata": dict(self.metadata),
}


# ---------------------------------------------------------------------------
# Base scorer
# ---------------------------------------------------------------------------

class ArtifactScorer(ABC):
"""Abstract base for knowledge artifact scorers.

Parallel to analyzers/base.py Analyzer, but takes an arbitrary dict
instead of a filesystem path.
"""

@property
@abstractmethod
def artifact_type(self) -> str:
"""Unique type key (e.g. 'bibliography', 'governance')."""

@abstractmethod
def score(self, artifact: dict) -> ArtifactScore:
"""Score the artifact. Returns an ArtifactScore."""

def is_available(self) -> bool:
"""Check if this scorer can run (override if external tools needed)."""
return True


# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------

class ArtifactScorerRegistry:
"""Maps artifact type strings to ArtifactScorer instances.

Usage:
registry = ArtifactScorerRegistry()
registry.register(BibliographyScorer())
registry.register(GovernanceReceiptScorer())

result = registry.score("bibliography", {"entries": [...]})
"""

def __init__(self) -> None:
self._scorers: dict[str, ArtifactScorer] = {}

def register(self, scorer: ArtifactScorer) -> None:
"""Register a scorer. Overwrites any existing scorer for the same type."""
self._scorers[scorer.artifact_type] = scorer

def score(self, artifact_type: str, artifact: dict) -> ArtifactScore:
"""Score an artifact by type. Raises KeyError if type not registered."""
scorer = self._scorers.get(artifact_type)
if scorer is None:
available = sorted(self._scorers)
raise KeyError(
f"No scorer registered for artifact_type={artifact_type!r}. "
f"Available: {available}"
)
return scorer.score(artifact)

def available_types(self) -> list[str]:
"""Return sorted list of registered artifact type keys."""
return sorted(self._scorers)

def __contains__(self, artifact_type: str) -> bool:
return artifact_type in self._scorers


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _score_from_findings(
findings: list[ArtifactFinding],
dimension_findings: dict[str, list[ArtifactFinding]],
weights: dict[str, float],
artifact_type: str,
metadata: dict | None = None,
) -> ArtifactScore:
"""Build an ArtifactScore from findings and dimension weights.

Each dimension score starts at 100 and is penalised by findings:
CRITICAL: -25, HIGH: -15, MEDIUM: -7, LOW: -3

Dimensions not present in weights or dimension_findings get 100.
"""
severity_penalty = {"CRITICAL": 25, "HIGH": 15, "MEDIUM": 7, "LOW": 3}
dimensions: dict[str, float] = {}

for dim, dim_findings in dimension_findings.items():
penalty = sum(severity_penalty.get(f.severity, 3) for f in dim_findings)
dimensions[dim] = max(0.0, min(100.0, 100.0 - float(penalty)))

# Fill in any weight-declared dimensions with no findings
for dim in weights:
if dim not in dimensions:
dimensions[dim] = 100.0

# Weighted overall
total_weight = sum(weights.values()) or 1.0
overall = sum(
dimensions.get(dim, 100.0) * w for dim, w in weights.items()
) / total_weight

by_severity: dict[str, int] = {}
for f in findings:
by_severity[f.severity] = by_severity.get(f.severity, 0) + 1

return ArtifactScore(
artifact_type=artifact_type,
overall=round(overall, 1),
dimensions={k: round(v, 1) for k, v in dimensions.items()},
total_findings=len(findings),
findings=tuple(findings),
findings_by_severity=by_severity,
metadata=metadata or {},
)


# ---------------------------------------------------------------------------
# Default registry (populated by scorers/__init__.py on import)
# ---------------------------------------------------------------------------

DEFAULT_REGISTRY = ArtifactScorerRegistry()
22 changes: 22 additions & 0 deletions src/arbiter/scorers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Arbiter knowledge artifact scorers.

Registers all built-in ArtifactScorer implementations into DEFAULT_REGISTRY.

Importing this package is sufficient to make all built-in scorers available:

import arbiter.scorers # noqa: F401
from arbiter.artifact_scorer import DEFAULT_REGISTRY

result = DEFAULT_REGISTRY.score("bibliography", {"entries": [...]})
"""

from arbiter.scorers.bibliography_scorer import BibliographyScorer
from arbiter.scorers.governance_receipt_scorer import GovernanceReceiptScorer
from arbiter.scorers.arcana_essay_scorer import ArcanaEssayScorer
from arbiter.artifact_scorer import DEFAULT_REGISTRY

DEFAULT_REGISTRY.register(BibliographyScorer())
DEFAULT_REGISTRY.register(GovernanceReceiptScorer())
DEFAULT_REGISTRY.register(ArcanaEssayScorer())

__all__ = ["BibliographyScorer", "GovernanceReceiptScorer", "ArcanaEssayScorer"]
Loading
Loading