Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/en/learn/human-feedback-in-flows.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -682,14 +682,16 @@ class ArticleReviewFlow(Flow):
| Parameter | Default | Description |
|-----------|---------|-------------|
| `learn` | `False` | Enable HITL learning |
| `learn_strict` | `False` | If `True`, pre-review failures are re-raised instead of falling back to raw output |
| `learn_limit` | `5` | Max past lessons to recall for pre-review |

### Key Design Decisions

- **Same LLM for everything**: The `llm` parameter on the decorator is shared by outcome collapsing, lesson distillation, and pre-review. No need to configure multiple models.
- **Structured output**: Both distillation and pre-review use function calling with Pydantic models when the LLM supports it, falling back to text parsing otherwise.
- **Non-blocking storage**: Lessons are stored via `remember_many()` which runs in a background thread -- the flow continues immediately.
- **Graceful degradation**: If the LLM fails during distillation, nothing is stored. If it fails during pre-review, the raw output is shown. Neither failure blocks the flow.
- **Observable graceful degradation**: If the LLM fails during distillation, nothing is stored. If it fails during pre-review, the raw output is shown to the human and the failure is logged at `WARNING` level with the full traceback (`exc_info=True`) under the `crewai.flow.human_feedback` logger -- so the silent fallback is detectable. Neither failure blocks the flow.
- **Strict mode**: Pass `learn_strict=True` to make pre-review fail closed -- failures (LLM error, network/auth error, structured-output parse error, memory `recall` error) propagate out of the flow method instead of being swallowed. Use this when downstream code must be able to assume that pre-review actually executed.
- **No scope/categories needed**: When storing lessons, only `source` is passed. The encoding pipeline infers scope, categories, and importance automatically.

<Note>
Expand Down
67 changes: 59 additions & 8 deletions lib/crewai/src/crewai/flow/human_feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def generate_content(self):
from dataclasses import dataclass, field
from datetime import datetime
from functools import wraps
import logging
from typing import TYPE_CHECKING, Any, TypeVar

from pydantic import BaseModel, Field
Expand All @@ -75,6 +76,8 @@ def generate_content(self):

F = TypeVar("F", bound=Callable[..., Any])

logger = logging.getLogger(__name__)


def _serialize_llm_for_context(llm: Any) -> dict[str, Any] | str | None:
"""Serialize a BaseLLM object to a dict preserving full config.
Expand Down Expand Up @@ -188,6 +191,7 @@ class HumanFeedbackConfig:
provider: HumanFeedbackProvider | None = None
learn: bool = False
learn_source: str = "hitl"
learn_strict: bool = False


class HumanFeedbackMethod(FlowMethod[Any, Any]):
Expand Down Expand Up @@ -237,6 +241,7 @@ def human_feedback(
provider: HumanFeedbackProvider | None = None,
learn: bool = False,
learn_source: str = "hitl",
learn_strict: bool = False,
) -> Callable[[F], F]:
"""Decorator for Flow methods that require human feedback.

Expand Down Expand Up @@ -275,6 +280,20 @@ def human_feedback(
external systems like Slack, Teams, or webhooks. When the
provider raises HumanFeedbackPending, the flow pauses and
can be resumed later with Flow.resume().
learn: When True, enables HITL learning. After feedback is
collected, the LLM distills generalizable lessons and stores
them in memory. Before the next review, past lessons are
recalled and applied via an LLM pre-review step so the human
sees a progressively improved output.
learn_source: The memory ``source`` tag used when storing and
recalling HITL lessons. Defaults to ``"hitl"``.
learn_strict: When True (default False), pre-review failures are
re-raised instead of falling back to the raw output. By
default, failures are logged at WARNING level with full
traceback (``exc_info=True``) and the raw method output is
shown to the human. Set this to True if downstream callers
must be able to assume that pre-review actually executed
successfully.

Returns:
A decorator function that wraps the method with human feedback
Expand Down Expand Up @@ -373,16 +392,40 @@ def _resolve_llm_instance() -> Any:
def _pre_review_with_lessons(
flow_instance: Flow[Any], method_output: Any
) -> Any:
"""Recall past HITL lessons and use LLM to pre-review the output."""
"""Recall past HITL lessons and use LLM to pre-review the output.

Returns the original ``method_output`` when memory is unavailable
or no lessons match — these are not error cases.

When the recall or LLM pre-review call raises an exception (for
example LLM/network/auth failure or structured-output parse
error), the failure is logged at WARNING level with full
traceback (``exc_info=True``) so callers can detect the silent
fallback. When ``learn_strict=True`` was passed to the decorator,
the exception is re-raised instead of being swallowed.
"""
mem = flow_instance.memory
if mem is None:
return method_output

query = f"human feedback lessons for {func.__name__}: {method_output!s}"
try:
mem = flow_instance.memory
if mem is None:
return method_output
query = f"human feedback lessons for {func.__name__}: {method_output!s}"
matches = mem.recall(query, source=learn_source)
if not matches:
return method_output
except Exception:
logger.warning(
"HITL pre-review: memory recall failed for %s; falling "
"back to raw output.",
func.__name__,
exc_info=True,
)
if learn_strict:
raise
return method_output

if not matches:
return method_output

try:
lessons = "\n".join(f"- {m.record.content}" for m in matches)
llm_inst = _resolve_llm_instance()
prompt = _get_hitl_prompt("hitl_pre_review_user").format(
Expand All @@ -404,7 +447,14 @@ def _pre_review_with_lessons(
reviewed = llm_inst.call(messages)
return reviewed if isinstance(reviewed, str) else str(reviewed)
except Exception:
return method_output # fallback to raw output on any failure
logger.warning(
"HITL pre-review failed for %s; falling back to raw output.",
func.__name__,
exc_info=True,
)
if learn_strict:
raise
return method_output

def _distill_and_store_lessons(
flow_instance: Flow[Any], method_output: Any, raw_feedback: str
Expand Down Expand Up @@ -654,6 +704,7 @@ def sync_wrapper(self: Flow[Any], *args: Any, **kwargs: Any) -> Any:
provider=provider,
learn=learn,
learn_source=learn_source,
learn_strict=learn_strict,
)
wrapper.__is_flow_method__ = True

Expand Down
Loading
Loading