-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpost-task
More file actions
executable file
Β·191 lines (157 loc) Β· 8.26 KB
/
post-task
File metadata and controls
executable file
Β·191 lines (157 loc) Β· 8.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env bash
# post-task β evaluate a completed task, close the reflexion feedback loop
# 1. Runs reflexion-eval evaluate (extracts new rules)
# 2. Checks which pre-task rules were followed/ignored β reinforce/contradict
# 3. Ingests into graphmem
#
# Usage: post-task "task description" "outcome summary"
# Options:
# --score-only Just output the score (for scripting)
# --no-graph Skip graphmem ingestion
# --no-feedback Skip rule feedback loop
set -euo pipefail
TASK="${1:-}"
OUTCOME="${2:-}"
SCORE_ONLY=false
NO_GRAPH=false
NO_FEEDBACK=false
for arg in "$@"; do
case "$arg" in
--score-only) SCORE_ONLY=true ;;
--no-graph) NO_GRAPH=true ;;
--no-feedback) NO_FEEDBACK=true ;;
esac
done
[[ -z "$TASK" ]] && { echo "Usage: post-task '<task>' '<outcome>'" >&2; exit 1; }
[[ -z "$OUTCOME" ]] && { echo "Usage: post-task '<task>' '<outcome>'" >&2; exit 1; }
WORKSPACE="${WORKSPACE:-$HOME/.openclaw/workspace}"
PRETASK_FILE="$WORKSPACE/memory/last-pretask.json"
C_RESET='\033[0m'; C_BOLD='\033[1m'; C_GREEN='\033[32m'; C_CYAN='\033[36m'; C_YELLOW='\033[33m'; C_DIM='\033[2m'
# --- Run reflexion evaluation (extract new rules) ---
eval_id=""
if command -v reflexion-eval &>/dev/null; then
eval_output=$(reflexion-eval evaluate "$TASK" "$OUTCOME" 2>&1)
if $SCORE_ONLY; then
echo "$eval_output" | grep -oP 'score=\K[0-9]+' || echo "unknown"
exit 0
fi
echo -e "${C_CYAN}${C_BOLD}Post-task evaluation${C_RESET}"
echo "$eval_output"
# Extract eval ID from output if available
eval_id=$(echo "$eval_output" | grep -oP 'eval_id=\K[a-f0-9-]+' || echo "post-$(date +%s)")
fi
# --- Rule Feedback Loop ---
if ! $NO_FEEDBACK && [[ -f "$PRETASK_FILE" ]] && command -v reflexion-eval &>/dev/null; then
pretask_task=$(jq -r '.task // ""' "$PRETASK_FILE" 2>/dev/null)
rule_count=$(jq '.matched_rules | length' "$PRETASK_FILE" 2>/dev/null || echo 0)
if [[ "$rule_count" -gt 0 ]]; then
echo -e "\n${C_CYAN}${C_BOLD}Rule Feedback Loop${C_RESET} ($rule_count rules to evaluate)"
# Build a summary of matched rules for the LLM
rules_summary=$(jq -r '.matched_rules[] | "- [\(.id)] WHEN \(.trigger) β \(.action)"' "$PRETASK_FILE" 2>/dev/null)
# Ask LLM to evaluate which rules were followed
if [[ -n "${OPENAI_API_KEY:-}" ]]; then
system_prompt="You evaluate whether behavioral rules were followed during a task. For each rule, respond with a JSON array of objects: {\"id\": \"rule-id\", \"verdict\": \"reinforced\" or \"contradicted\" or \"not_applicable\", \"reason\": \"brief explanation\"}. Only output the JSON array, nothing else."
user_prompt="Task: $TASK
Outcome: $OUTCOME
Rules that were available during this task:
$rules_summary
For each rule, was it followed (reinforced), violated (contradicted), or not relevant (not_applicable)?"
response=$(curl -s --max-time 30 "https://api.openai.com/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-d "$(jq -n --arg sys "$system_prompt" --arg usr "$user_prompt" '{
model: "gpt-4o-mini",
messages: [{role: "system", content: $sys}, {role: "user", content: $usr}],
temperature: 0.1
}')" 2>/dev/null)
verdicts=$(echo "$response" | jq -r '.choices[0].message.content' 2>/dev/null | sed 's/^```json//;s/^```//;s/```$//' || echo "[]")
# Process verdicts
reinforced=0
contradicted=0
skipped=0
while IFS= read -r verdict_json; do
[[ -z "$verdict_json" || "$verdict_json" == "null" ]] && continue
rid=$(echo "$verdict_json" | jq -r '.id' 2>/dev/null)
v=$(echo "$verdict_json" | jq -r '.verdict' 2>/dev/null)
reason=$(echo "$verdict_json" | jq -r '.reason' 2>/dev/null)
[[ -z "$rid" || "$rid" == "null" ]] && continue
case "$v" in
reinforced)
reflexion-eval reinforce "$rid" "$eval_id" 2>/dev/null && \
echo -e " ${C_GREEN}β${C_RESET} Reinforced: $reason" || true
reinforced=$((reinforced + 1))
;;
contradicted)
reflexion-eval contradict "$rid" "$eval_id" 2>/dev/null && \
echo -e " ${C_YELLOW}β${C_RESET} Contradicted: $reason" || true
contradicted=$((contradicted + 1))
;;
*)
skipped=$((skipped + 1))
;;
esac
done < <(echo "$verdicts" | jq -c '.[]' 2>/dev/null)
echo -e "\n ${C_DIM}Feedback: $reinforced reinforced, $contradicted contradicted, $skipped skipped${C_RESET}"
# Persist feedback data into the eval file for later analysis
eval_file=$(ls -t "$WORKSPACE/memory/reflexion-evals/"*.json 2>/dev/null | head -1)
if [[ -n "$eval_file" ]]; then
matched_rules=$(jq -c '.matched_rules' "$PRETASK_FILE" 2>/dev/null || echo "[]")
tmp=$(mktemp)
jq --argjson matched "$matched_rules" \
--argjson verdicts "$(echo "$verdicts" | jq -c '.' 2>/dev/null || echo '[]')" \
--argjson reinforced "$reinforced" \
--argjson contradicted "$contradicted" \
--argjson skipped "$skipped" \
'. + {feedback: {matched_rules: $matched, verdicts: $verdicts, reinforced: $reinforced, contradicted: $contradicted, skipped: $skipped}}' \
"$eval_file" > "$tmp" 2>/dev/null && mv "$tmp" "$eval_file" && \
echo -e " ${C_GREEN}β${C_RESET} Feedback persisted to eval file" || rm -f "$tmp"
fi
else
echo -e " ${C_YELLOW}β ${C_RESET} OPENAI_API_KEY not set β skipping rule feedback"
fi
fi
fi
# --- Retrieval Quality Feedback ---
if ! $NO_FEEDBACK && [[ -f "$PRETASK_FILE" ]] && [[ -n "${OPENAI_API_KEY:-}" ]]; then
distilled_used=$(jq -r '.retrieval.distilled_used // false' "$PRETASK_FILE" 2>/dev/null)
distilled_context=$(jq -r '.retrieval.distilled // ""' "$PRETASK_FILE" 2>/dev/null)
if [[ "$distilled_used" == "true" && -n "$distilled_context" ]]; then
echo -e "\n${C_CYAN}${C_BOLD}Retrieval Quality Feedback${C_RESET}"
system_prompt="Rate how useful the retrieved context was for completing this task. Output JSON: {\"retrieval_score\": 1-5, \"reason\": \"brief explanation\", \"what_was_missing\": \"what context would have helped but wasn't retrieved\"}. 1=useless noise, 3=somewhat helpful, 5=exactly what was needed. Only output the JSON."
user_prompt="Task: $TASK
Outcome: $OUTCOME
Context that was retrieved and injected:
$distilled_context"
ret_response=$(curl -s --max-time 15 "https://api.openai.com/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-d "$(jq -n --arg sys "$system_prompt" --arg usr "$user_prompt" '{
model: "gpt-4o-mini",
messages: [{role: "system", content: $sys}, {role: "user", content: $usr}],
temperature: 0.1,
max_tokens: 200
}')" 2>/dev/null)
ret_eval=$(echo "$ret_response" | jq -r '.choices[0].message.content // ""' 2>/dev/null | sed 's/^```json//;s/^```//;s/```$//')
ret_score=$(echo "$ret_eval" | jq -r '.retrieval_score // "?"' 2>/dev/null || echo "?")
ret_reason=$(echo "$ret_eval" | jq -r '.reason // ""' 2>/dev/null || echo "")
ret_missing=$(echo "$ret_eval" | jq -r '.what_was_missing // ""' 2>/dev/null || echo "")
echo -e " Retrieval score: ${C_BOLD}$ret_score/5${C_RESET} β $ret_reason"
[[ -n "$ret_missing" && "$ret_missing" != "null" ]] && echo -e " ${C_DIM}Missing: $ret_missing${C_RESET}"
# Persist retrieval feedback into eval file
eval_file=$(ls -t "$WORKSPACE/memory/reflexion-evals/"*.json 2>/dev/null | head -1)
if [[ -n "$eval_file" ]]; then
tmp=$(mktemp)
jq --argjson ret_feedback "$(echo "$ret_eval" | jq -c '.' 2>/dev/null || echo '{}')" \
'. + {retrieval_feedback: $ret_feedback}' \
"$eval_file" > "$tmp" 2>/dev/null && mv "$tmp" "$eval_file" && \
echo -e " ${C_GREEN}β${C_RESET} Retrieval feedback persisted" || rm -f "$tmp"
fi
fi
fi
# --- Ingest into graphmem ---
if ! $NO_GRAPH && command -v graphmem-sqlite &>/dev/null; then
if [[ ${#OUTCOME} -gt 20 ]]; then
graphmem-sqlite add "Task: $TASK. Outcome: $OUTCOME" 2>/dev/null && \
echo -e "\n ${C_GREEN}β${C_RESET} Added to graph memory" || true
fi
fi