Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/RockBot.Agent/agent/common-directives.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,12 @@ Call `search_memory` explicitly only when you want to search with a specific
query that differs from the raw message (e.g., after clarification, or when you
want to narrow to a category).

`search_memory` has two modes. Use `mode='regex'` when you know the literal
token you're hunting for — a file path, ID, version string, or exact phrase —
and the regex matches against both the memory's path name (`category/id`) and
its content. Otherwise leave the default `mode='hybrid'` for semantic/keyword
search.

### Narrative identity

Your evolving self-model is stored in long-term memory under `agent-identity/`
Expand Down
62 changes: 37 additions & 25 deletions src/RockBot.Host.Abstractions/MemorySearchCriteria.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,37 @@
namespace RockBot.Host;

/// <summary>
/// Criteria for searching long-term memory entries.
/// All specified criteria are combined with AND logic.
/// </summary>
/// <param name="Query">Case-insensitive substring to match against content.</param>
/// <param name="Category">Category prefix to match (e.g. "project-context" matches "project-context/rockbot").</param>
/// <param name="Tags">Tags that entries must contain (all specified tags must be present).</param>
/// <param name="CreatedAfter">Only include entries created after this time.</param>
/// <param name="CreatedBefore">Only include entries created before this time.</param>
/// <param name="MaxResults">Maximum number of results to return. Defaults to 20.</param>
/// <param name="QueryEmbedding">
/// Pre-computed query embedding vector. When provided, stores skip generating their own
/// query embedding — avoiding redundant calls to the embedding endpoint when multiple
/// searches share the same query text (e.g. during context building).
/// </param>
public sealed record MemorySearchCriteria(
string? Query = null,
string? Category = null,
IReadOnlyList<string>? Tags = null,
DateTimeOffset? CreatedAfter = null,
DateTimeOffset? CreatedBefore = null,
int MaxResults = 20,
float[]? QueryEmbedding = null);
namespace RockBot.Host;

/// <summary>
/// Criteria for searching long-term memory entries.
/// All specified criteria are combined with AND logic.
/// </summary>
/// <param name="Query">
/// In <see cref="MemorySearchMode.Hybrid"/> (default), a case-insensitive keyword/phrase to rank against.
/// In <see cref="MemorySearchMode.Regex"/>, a .NET regex pattern matched against the entry's
/// memory path name (<c>{category}/{id}</c> or <c>{id}</c>) plus its content, tags, and category words.
/// </param>
/// <param name="Category">Category prefix to match (e.g. "project-context" matches "project-context/rockbot").</param>
/// <param name="Tags">Tags that entries must contain (all specified tags must be present).</param>
/// <param name="CreatedAfter">Only include entries created after this time.</param>
/// <param name="CreatedBefore">Only include entries created before this time.</param>
/// <param name="MaxResults">Maximum number of results to return. Defaults to 20.</param>
/// <param name="QueryEmbedding">
/// Pre-computed query embedding vector. When provided, stores skip generating their own
/// query embedding — avoiding redundant calls to the embedding endpoint when multiple
/// searches share the same query text (e.g. during context building). Ignored in
/// <see cref="MemorySearchMode.Regex"/>.
/// </param>
/// <param name="Mode">Search backend selector. Defaults to <see cref="MemorySearchMode.Hybrid"/>.</param>
/// <param name="RegexCaseSensitive">
/// When <see cref="Mode"/> is <see cref="MemorySearchMode.Regex"/>, controls case sensitivity of the regex.
/// Default <c>false</c> mirrors Claude Code's Grep tool. Ignored in hybrid mode.
/// </param>
public sealed record MemorySearchCriteria(
string? Query = null,
string? Category = null,
IReadOnlyList<string>? Tags = null,
DateTimeOffset? CreatedAfter = null,
DateTimeOffset? CreatedBefore = null,
int MaxResults = 20,
float[]? QueryEmbedding = null,
MemorySearchMode Mode = MemorySearchMode.Hybrid,
bool RegexCaseSensitive = false);
14 changes: 14 additions & 0 deletions src/RockBot.Host.Abstractions/MemorySearchException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
namespace RockBot.Host;

/// <summary>
/// Thrown by memory search backends when a caller-supplied query cannot be executed —
/// for example an invalid regex pattern, a per-entry match timeout, or an overall
/// scan-budget overrun. The tool layer surfaces the message verbatim to the model so
/// it can refine its query.
/// </summary>
public sealed class MemorySearchException : Exception
{
public MemorySearchException(string message) : base(message) { }

public MemorySearchException(string message, Exception inner) : base(message, inner) { }
}
21 changes: 21 additions & 0 deletions src/RockBot.Host.Abstractions/MemorySearchMode.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
namespace RockBot.Host;

/// <summary>
/// Backend used by <see cref="ILongTermMemory.SearchAsync"/> when scoring candidate entries
/// against a query.
/// </summary>
public enum MemorySearchMode
{
/// <summary>
/// Default. BM25 keyword ranking, optionally combined with vector similarity when
/// embeddings are configured. Recall-oriented and tolerant of paraphrase.
/// </summary>
Hybrid = 0,

/// <summary>
/// .NET regex pattern matched against the literal stored content (memory path name +
/// content + tags + category words). Exact and deterministic — preferred when the
/// caller already knows the literal token (file path, id, version, exact phrase).
/// </summary>
Regex = 1,
}
28 changes: 27 additions & 1 deletion src/RockBot.Host/FileMemoryStore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,25 @@ public async Task<IReadOnlyList<MemoryEntry>> SearchAsync(MemorySearchCriteria c
// No query: return most-recently reinforced entries up to MaxResults.
// Ordered by LastSeenAt (real reinforcement) rather than UpdatedAt (dream rewrites),
// so dream housekeeping does not artificially promote entries in no-query results.
if (criteria.Query is null)
if (string.IsNullOrWhiteSpace(criteria.Query))
{
return candidates
.OrderByDescending(e => e.LastSeenAt)
.Take(criteria.MaxResults)
.ToList();
}

// Regex mode: literal pattern matching, no scoring, bounded by timeouts.
if (criteria.Mode == MemorySearchMode.Regex)
{
return RegexMatcher.MatchEntries(
candidates,
criteria.Query,
criteria.RegexCaseSensitive,
criteria.MaxResults,
BuildRegexSurface);
}

// With query: use hybrid ranking if embeddings available, else BM25-only.
if (_embeddingCache is not null)
{
Expand Down Expand Up @@ -263,6 +274,21 @@ internal static string GetDocumentText(MemoryEntry entry)
return string.Join(" ", parts);
}

// ── Regex match surface ───────────────────────────────────────────────────

/// <summary>
/// Returns the text the regex backend matches against: the entry's logical memory
/// path name (<c>{category}/{id}</c> or <c>{id}</c> when uncategorized) on its own
/// line, then the BM25 document text (content + tags + category words). The on-disk
/// file path from <see cref="GetFilePath"/> is deliberately never included — the
/// model interacts with memories by id, not by storage layout.
/// </summary>
internal static string BuildRegexSurface(MemoryEntry entry)
{
var pathName = entry.Category is null ? entry.Id : $"{entry.Category}/{entry.Id}";
return $"{pathName}\n{GetDocumentText(entry)}";
}

// ── Structural Filter ─────────────────────────────────────────────────────

private static bool PassesStructuralFilters(MemoryEntry entry, MemorySearchCriteria criteria)
Expand Down
87 changes: 87 additions & 0 deletions src/RockBot.Host/RegexMatcher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
using System.Diagnostics;
using System.Text.RegularExpressions;

namespace RockBot.Host;

/// <summary>
/// Runs a caller-supplied regex pattern across a pre-filtered set of <see cref="MemoryEntry"/>
/// candidates. Used by the regex backend of <see cref="ILongTermMemory.SearchAsync"/>.
/// Bounds cost two ways: a per-entry <see cref="Regex.MatchTimeout"/> catches catastrophic
/// backtracking on a single input, and an overall wall-clock budget across the scan stops a
/// slow-but-not-pathological pattern from dominating as the corpus grows.
/// </summary>
internal static class RegexMatcher
{
internal static readonly TimeSpan DefaultPerEntryTimeout = TimeSpan.FromSeconds(1);
internal static readonly TimeSpan DefaultOverallBudget = TimeSpan.FromSeconds(10);
internal const int MaxPatternLength = 512;

public static IReadOnlyList<MemoryEntry> MatchEntries(
IReadOnlyCollection<MemoryEntry> candidates,
string pattern,
bool caseSensitive,
int maxResults,
Func<MemoryEntry, string> documentText) =>
MatchEntries(candidates, pattern, caseSensitive, maxResults, documentText,
DefaultPerEntryTimeout, DefaultOverallBudget);

/// <summary>
/// Test-friendly overload that allows custom timeouts. Production callers should use
/// the parameterless-budget overload above.
/// </summary>
internal static IReadOnlyList<MemoryEntry> MatchEntries(
IReadOnlyCollection<MemoryEntry> candidates,
string pattern,
bool caseSensitive,
int maxResults,
Func<MemoryEntry, string> documentText,
TimeSpan perEntryTimeout,
TimeSpan overallBudget)
{
if (pattern.Length > MaxPatternLength)
throw new MemorySearchException(
$"Regex pattern exceeds {MaxPatternLength} characters. Narrow the pattern.");

Regex regex;
var options = RegexOptions.CultureInvariant;
if (!caseSensitive) options |= RegexOptions.IgnoreCase;
try
{
regex = new Regex(pattern, options, perEntryTimeout);
}
catch (ArgumentException ex)
{
throw new MemorySearchException($"Invalid regex pattern: {ex.Message}", ex);
}

var matches = new List<MemoryEntry>();
var sw = Stopwatch.StartNew();
var scanned = 0;
foreach (var entry in candidates)
{
if (sw.Elapsed > overallBudget)
throw new MemorySearchException(
$"Regex search exceeded {overallBudget.TotalSeconds:F1}s after scanning " +
$"{scanned}/{candidates.Count} entries. Narrow the pattern or add a category filter.");

scanned++;
try
{
if (regex.IsMatch(documentText(entry)))
matches.Add(entry);
}
catch (RegexMatchTimeoutException ex)
{
throw new MemorySearchException(
$"Regex match timed out after {perEntryTimeout.TotalSeconds:F1}s on a single entry. " +
"Try a more specific pattern.", ex);
}
}

return matches
.OrderByDescending(e => e.ImportanceScore)
.ThenByDescending(e => e.LastSeenAt)
.Take(maxResults)
.ToList();
}
}
43 changes: 37 additions & 6 deletions src/RockBot.Memory/MemoryTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,34 @@ public Task<string> SaveMemory(
}

[Description("Search long-term memory for previously saved facts, preferences, or patterns. " +
"Use query for keyword search and category for scoping to a knowledge area.")]
"Use query for keyword search and category for scoping to a knowledge area. " +
"Set mode='regex' when you know the literal token (file path, id, version, exact phrase); " +
"otherwise leave mode='hybrid' (default) for semantic/keyword search.")]
public async Task<string> SearchMemory(
[Description("Optional keyword to search for in memory content")] string? query = null,
[Description("Optional category prefix to filter by (e.g. 'user-preferences')")] string? category = null)
[Description("Optional keyword (hybrid mode) or .NET regex pattern (regex mode) to search for")] string? query = null,
[Description("Optional category prefix to filter by (e.g. 'user-preferences')")] string? category = null,
[Description("Search backend: 'hybrid' (default, BM25 + optional vector) or 'regex' (case-insensitive .NET regex against memory path name and content)")] string? mode = null)
{
_logger.LogInformation("Tool call: SearchMemory(query={Query}, category={Category})", query, category);
_logger.LogInformation("Tool call: SearchMemory(query={Query}, category={Category}, mode={Mode})", query, category, mode);

if (!TryParseMode(mode, out var parsedMode))
return $"Unknown search mode '{mode}'. Use 'hybrid' or 'regex'.";

var criteria = new MemorySearchCriteria(
Query: string.IsNullOrWhiteSpace(query) ? null : query.Trim(),
Category: string.IsNullOrWhiteSpace(category) ? null : category.Trim());
Category: string.IsNullOrWhiteSpace(category) ? null : category.Trim(),
Mode: parsedMode);

var results = await _memory.SearchAsync(criteria);
IReadOnlyList<MemoryEntry> results;
try
{
results = await _memory.SearchAsync(criteria);
}
catch (MemorySearchException ex)
{
_logger.LogInformation("SearchMemory rejected by backend: {Message}", ex.Message);
return ex.Message;
}

_logger.LogInformation("SearchMemory returned {Count} results", results.Count);

Expand All @@ -160,6 +176,21 @@ public async Task<string> SearchMemory(
return sb.ToString();
}

private static bool TryParseMode(string? mode, out MemorySearchMode parsed)
{
if (string.IsNullOrWhiteSpace(mode))
{
parsed = MemorySearchMode.Hybrid;
return true;
}

if (Enum.TryParse(mode.Trim(), ignoreCase: true, out parsed))
return true;

parsed = MemorySearchMode.Hybrid;
return false;
}

private static string FormatAge(MemoryEntry e)
{
var now = DateTimeOffset.UtcNow;
Expand Down
Loading
Loading