Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions ROADMAP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Roadmap

Стратегический план развития Auto Code (5 целей, реализация волнами) — полностью в
[docs/strategy/roadmap.md](docs/strategy/roadmap.md), с задачами, путями к файлам и критериями приёмки.

**Порядок реализации:**

- **Волна 1** (параллельно): `P3` доделать мульти-провайдерную автономность · `P5` прозрачность стоимости · `P1` слой доверия (наш дифференциатор).
- **Волна 2**: `P2` GitHub App (issue → автономный PR с отчётом доверия).
- **Волна 3**: `P4` облако / команды (мультиарендность, роли, история запусков).

**Стратегия:** позиционирование «автономный кодер, которому можно доверять и который запускается на своей инфраструктуре» — прицел на регулируемый self-hosted энтерпрайз.

Подробности — в [docs/strategy/roadmap.md](docs/strategy/roadmap.md).
38 changes: 38 additions & 0 deletions apps/backend/agents/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ def load_token_stats(spec_dir: Path) -> TaskTokenStats | None:
input_tokens=phase_data["input_tokens"],
output_tokens=phase_data["output_tokens"],
session_count=phase_data.get("session_count", 0),
model=phase_data.get("model"),
provider=phase_data.get("provider"),
updated_at=datetime.fromisoformat(phase_data["updated_at"]),
)

Expand All @@ -387,11 +389,31 @@ def load_token_stats(spec_dir: Path) -> TaskTokenStats | None:
return None


def _resolve_active_provider() -> str | None:
"""Best-effort resolve the active AI provider name (for cost attribution).

Returns the configured provider string (e.g. "claude", "openai") or None
if it cannot be determined. Never raises — token stats must persist even
when provider config is unavailable.
"""
try:
from core.providers.config import get_provider_config

provider_config = get_provider_config()
if provider_config is not None:
return getattr(provider_config, "provider", None)
except Exception: # pragma: no cover - defensive, provider config optional
return None
return None


def save_token_stats(
spec_dir: Path,
phase: PhaseType,
input_tokens: int,
output_tokens: int,
model: str | None = None,
provider: str | None = None,
) -> bool:
"""
Update token statistics for a phase and persist to token_stats.json.
Expand All @@ -404,6 +426,9 @@ def save_token_stats(
phase: Execution phase (planning, coding, validation)
input_tokens: Number of input tokens used in this session
output_tokens: Number of output tokens used in this session
model: Model used in this session (recorded for cost attribution).
provider: Provider used; when omitted, the active provider is resolved
from provider config so every caller records it without changes.

Returns:
True if saved successfully, False otherwise
Expand All @@ -413,6 +438,10 @@ def save_token_stats(
existing_stats = load_token_stats(spec_dir)
now = datetime.now()

# Resolve provider once so every caller records it without changes.
if provider is None:
provider = _resolve_active_provider()

if existing_stats:
phases = existing_stats.phases.copy()
created_at = existing_stats.created_at
Expand All @@ -427,12 +456,20 @@ def save_token_stats(
phase_stats.output_tokens += output_tokens
phase_stats.session_count += 1
phase_stats.updated_at = now
# Last non-empty value wins; don't clobber a known model/provider
# with None from a later session that didn't supply one.
if model:
phase_stats.model = model
if provider:
phase_stats.provider = provider
else:
phase_stats = PhaseTokenStats(
phase=phase,
input_tokens=input_tokens,
output_tokens=output_tokens,
session_count=1,
model=model,
provider=provider,
updated_at=now,
)
phases[phase] = phase_stats
Expand Down Expand Up @@ -1502,6 +1539,7 @@ async def run_agent_session(
phase_type,
usage_metadata["input_tokens"],
usage_metadata["output_tokens"],
model=getattr(client, "model", None),
)
if saved:
print_status(
Expand Down
124 changes: 124 additions & 0 deletions apps/backend/cli/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@

logger = logging.getLogger(__name__)

# Trust Layer verification report (P1) — see docs/strategy/roadmap.md
VERIFICATION_REPORT_FILENAME = "verification-report.json"
VERIFICATION_REPORT_SCHEMA_VERSION = 1
_ALLOWED_VERDICTS = ("approved", "rejected", "error")


class ArtifactManager:
"""
Expand Down Expand Up @@ -212,6 +217,51 @@
logger.warning(f"Failed to save coverage report: {e}")
return None

def save_verification_report(
self,
verification_data: dict[str, Any],
) -> Path | None:
"""
Save the Trust Layer verification report as a JSON artifact.

Persists the structured QA verdict that the desktop UI and GitHub PR
comments surface as a "what was verified" report: verdict, confidence,
tests run, diff summary, the agent's uncertainty list, and any
out-of-scope edits. Use :func:`build_verification_report` to assemble
``verification_data`` from the QA loop's existing signals.

Args:
verification_data: Verification report dict (see
build_verification_report). A ``timestamp`` is added if absent.

Returns:
Path to saved artifact file, or None if disabled.

Example:
>>> report = build_verification_report(verdict="approved")
>>> manager.save_verification_report(report)
"""
if not self.enabled:
return None

artifact_path = self.artifact_dir / VERIFICATION_REPORT_FILENAME

try:
# Add timestamp if not present (copy to avoid mutating caller's dict)
if "timestamp" not in verification_data:
verification_data = dict(verification_data)
verification_data["timestamp"] = datetime.utcnow().isoformat() + "Z"

Check failure on line 253 in apps/backend/cli/artifacts.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Don't use `datetime.datetime.utcnow` to create this datetime object.

See more on https://sonarcloud.io/project/issues?id=OBenner_Auto-Coding&issues=AZ76JuW9tTAdk7PlSGMH&open=AZ76JuW9tTAdk7PlSGMH&pullRequest=361

with open(artifact_path, "w", encoding="utf-8") as f:
json.dump(verification_data, f, indent=2)

logger.debug(f"Verification report saved: {artifact_path}")
return artifact_path

except (OSError, ValueError, TypeError) as e:
logger.warning(f"Failed to save verification report: {e}")
return None

def save_custom_artifact(
self,
artifact_name: str,
Expand Down Expand Up @@ -475,6 +525,80 @@
return None


def build_verification_report(
*,
verdict: str | None,
qa_session: int | None = None,
iteration: int | None = None,
confidence: float | None = None,
tests_run: dict[str, Any] | None = None,
diff_summary: dict[str, Any] | None = None,
issues: list[dict[str, Any]] | None = None,
uncertainty: list[dict[str, Any]] | None = None,
out_of_scope_edits: list[dict[str, Any]] | None = None,
duration_seconds: float | None = None,
notes: str | None = None,
) -> dict[str, Any]:
"""
Assemble a normalized Trust Layer verification report (no I/O).

Pure helper so the schema can be unit-tested and reused by the QA reviewer
and fixer. ``verdict`` is normalized to one of ``approved``/``rejected``/
``error`` and ``confidence`` is clamped to ``[0, 1]``. The ``uncertainty``
and ``out_of_scope_edits`` lists are part of the contract today and stay
empty until P1·T2 (out-of-scope detection) and P1·T3 (confidence /
uncertainty extraction) populate them — see docs/strategy/roadmap.md.

Args:
verdict: QA outcome; ``None`` or unknown values map to ``"error"``.
qa_session: QA session/pass index, if known.
iteration: QA loop iteration number, if known.
confidence: Optional 0..1 confidence signal (clamped).
tests_run: Test/coverage summary (e.g. passed/failed/total/coverage).
diff_summary: Change summary (e.g. files_changed, files).
issues: Issues found (reuses the ``qa_signoff`` issue shape).
uncertainty: Areas the agent is unsure about.
out_of_scope_edits: Edits made outside the planned files.
duration_seconds: Optional duration of the QA pass.
notes: Free-form notes.

Returns:
A JSON-serializable verification report dict (no timestamp — the
timestamp is stamped by :meth:`ArtifactManager.save_verification_report`).
"""
normalized_verdict = (verdict or "error").strip().lower()
if normalized_verdict not in _ALLOWED_VERDICTS:
normalized_verdict = "error"

clamped_confidence: float | None = None
if confidence is not None:
try:
clamped_confidence = max(0.0, min(1.0, float(confidence)))
except (TypeError, ValueError):
clamped_confidence = None

report: dict[str, Any] = {
"schema_version": VERIFICATION_REPORT_SCHEMA_VERSION,
"verdict": normalized_verdict,
"qa_session": qa_session,
"iteration": iteration,
"confidence": clamped_confidence,
"tests_run": dict(tests_run) if tests_run else {},
"diff_summary": dict(diff_summary) if diff_summary else {},
"issues": list(issues) if issues else [],
"uncertainty": list(uncertainty) if uncertainty else [],
"out_of_scope_edits": list(out_of_scope_edits) if out_of_scope_edits else [],
"notes": notes,
}
if duration_seconds is not None:
try:
report["duration_seconds"] = round(float(duration_seconds), 2)
except (TypeError, ValueError):
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
# Non-numeric duration is dropped rather than failing the report.
pass
return report


def create_artifact_manager(
spec_dir: Path,
enabled: bool = True,
Expand Down
Loading
Loading