From 65d5649433ea33fa01d9cfdff0f45bc85cabbd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 28 Oct 2025 17:13:01 +0800 Subject: [PATCH 01/40] update reader and search strategy --- src/memos/llms/openai.py | 21 + src/memos/mem_reader/simple_struct.py | 41 +- src/memos/memories/textual/tree.py | 3 + .../tree_text_memory/retrieve/bm25_util.py | 180 +++++++++ .../tree_text_memory/retrieve/recall.py | 105 ++++- .../retrieve/retrieve_utils.py | 372 ++++++++++++++++++ .../tree_text_memory/retrieve/searcher.py | 93 ++++- .../retrieve/task_goal_parser.py | 7 +- src/memos/templates/mem_reader_prompts.py | 134 ++++--- src/memos/templates/mem_search_prompts.py | 93 +++++ 10 files changed, 957 insertions(+), 92 deletions(-) create mode 100644 src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py create mode 100644 src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py create mode 100644 src/memos/templates/mem_search_prompts.py diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py index 698bc3265..d2a3aab6c 100644 --- a/src/memos/llms/openai.py +++ b/src/memos/llms/openai.py @@ -73,6 +73,27 @@ def generate(self, messages: MessageList) -> str: else: return response_content + def customized_generate(self, messages: MessageList, **kwargs) -> str: + """Generate a response from OpenAI LLM.""" + temperature = kwargs.get("temperature", self.config.temperature) + max_tokens = kwargs.get("max_tokens", self.config.max_tokens) + top_p = kwargs.get("top_p", self.config.top_p) + + response = self.client.chat.completions.create( + model=self.config.model_name_or_path, + messages=messages, + extra_body=self.config.extra_body, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + ) + logger.info(f"Response from OpenAI: {response.model_dump_json()}") + response_content = response.choices[0].message.content + if self.config.remove_think_prefix: + return remove_thinking_tags(response_content) + else: + return response_content + def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]: """Stream response from OpenAI LLM with optional reasoning support.""" response = self.client.chat.completions.create( diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index b439cb2b2..549161566 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -1,5 +1,4 @@ import concurrent.futures -import copy import json import os import re @@ -283,21 +282,37 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: ) parser = ParserFactory.from_config(parser_config) + content_len_thredshold = 1600 if type == "chat": for items in scene_data: - result = [] - for item in items: - # Convert dictionary to string - if "chat_time" in item: - result.append(item) + if not items: + continue + + results.append([]) + current_length = 0 + + for _i, item in enumerate(items): + content_length = ( + len(item.get("content", "")) if isinstance(item, dict) else len(str(item)) + ) + if not results[-1]: + results[-1].append(item) + current_length = content_length + continue + + if current_length + content_length <= content_len_thredshold: + results[-1].append(item) + current_length += content_length else: - result.append(item) - if len(result) >= 10: - results.append(result) - context = copy.deepcopy(result[-2:]) - result = context - if result: - results.append(result) + overlap_item = results[-1][-1] + overlap_length = ( + len(overlap_item.get("content", "")) + if isinstance(overlap_item, dict) + else len(str(overlap_item)) + ) + + results.append([overlap_item, item]) + current_length = overlap_length + content_length elif type == "doc": for item in scene_data: try: diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index 0048f4a59..f453e7330 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -53,6 +53,7 @@ def __init__(self, config: TreeTextMemoryConfig): time_start_gs = time.time() self.graph_store: Neo4jGraphDB = GraphStoreFactory.from_config(config.graph_db) logger.info(f"time init: graph_store time is: {time.time() - time_start_gs}") + self.bm25_retriever = None # EnhancedBM25() time_start_rr = time.time() if config.reranker is None: @@ -172,6 +173,7 @@ def search( self.graph_store, self.embedder, self.reranker, + self.bm25_retriever, internet_retriever=None, moscube=moscube, ) @@ -181,6 +183,7 @@ def search( self.graph_store, self.embedder, self.reranker, + self.bm25_retriever, internet_retriever=self.internet_retriever, moscube=moscube, ) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py b/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py new file mode 100644 index 000000000..a4a1c8b33 --- /dev/null +++ b/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py @@ -0,0 +1,180 @@ +import threading + +import numpy as np + +from cachetools import LRUCache +from rank_bm25 import BM25Okapi +from sklearn.feature_extraction.text import TfidfVectorizer + +from memos.log import get_logger +from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import FastTokenizer +from memos.utils import timed + + +logger = get_logger(__name__) +# Global model cache +_BM25_CACHE = LRUCache(maxsize=100) +_CACHE_LOCK = threading.Lock() + + +class EnhancedBM25: + """Enhanced BM25 with Spacy tokenization and TF-IDF reranking""" + + def __init__(self, tokenizer=None, en_model="en_core_web_sm", zh_model="zh_core_web_sm"): + """ + Initialize Enhanced BM25 with memory management + """ + if tokenizer is None: + self.tokenizer = FastTokenizer() + else: + self.tokenizer = tokenizer + self._current_tfidf = None + + def _tokenize_doc(self, text): + """ + Tokenize a single document using SpacyTokenizer + """ + return self.tokenizer.tokenize_mixed(text, lang="auto") + + def _prepare_corpus_data(self, corpus, corpus_name="default"): + with _CACHE_LOCK: + if corpus_name in _BM25_CACHE: + print("hit::", corpus_name) + return _BM25_CACHE[corpus_name] + print("not hit::", corpus_name) + + tokenized_corpus = [self._tokenize_doc(doc) for doc in corpus] + bm25_model = BM25Okapi(tokenized_corpus) + _BM25_CACHE[corpus_name] = bm25_model + return bm25_model + + def clear_cache(self, corpus_name=None): + """Clear cache for specific corpus or clear all cache""" + with _CACHE_LOCK: + if corpus_name: + if corpus_name in _BM25_CACHE: + del _BM25_CACHE[corpus_name] + else: + _BM25_CACHE.clear() + + def get_cache_info(self): + """Get current cache information""" + with _CACHE_LOCK: + return { + "cache_size": len(_BM25_CACHE), + "max_cache_size": 100, + "cached_corpora": list(_BM25_CACHE.keys()), + } + + def _search_docs( + self, + query: str, + corpus: list[str], + corpus_name="test", + top_k=50, + use_tfidf=False, + rerank_candidates_multiplier=2, + cleanup=False, + ): + """ + Args: + query: Search query string + corpus: List of document texts + top_k: Number of top results to return + rerank_candidates_multiplier: Multiplier for candidate selection + cleanup: Whether to cleanup memory after search (default: True) + """ + if not corpus: + logger.warning("Empty corpus provided") + return [] + + logger.info(f"Searching {len(corpus)} documents for query: '{query}'") + + try: + # Prepare BM25 model + bm25_model = self._prepare_corpus_data(corpus, corpus_name=corpus_name) + tokenized_query = self._tokenize_doc(query) + tokenized_query = list(dict.fromkeys(tokenized_query)) + + # Get BM25 scores + bm25_scores = bm25_model.get_scores(tokenized_query) + + # Select candidates + candidate_count = min(top_k * rerank_candidates_multiplier, len(corpus)) + candidate_indices = np.argsort(bm25_scores)[-candidate_count:][::-1] + combined_scores = bm25_scores[candidate_indices] + + if use_tfidf: + # Create TF-IDF for this search + tfidf = TfidfVectorizer( + tokenizer=self._tokenize_doc, lowercase=False, token_pattern=None + ) + tfidf_matrix = tfidf.fit_transform(corpus) + + # TF-IDF reranking + query_vec = tfidf.transform([query]) + tfidf_similarities = ( + (tfidf_matrix[candidate_indices] * query_vec.T).toarray().flatten() + ) + + # Combine scores + combined_scores = 0.7 * bm25_scores[candidate_indices] + 0.3 * tfidf_similarities + + sorted_candidate_indices = candidate_indices[np.argsort(combined_scores)[::-1][:top_k]] + sorted_combined_scores = np.sort(combined_scores)[::-1][:top_k] + + # build result list + bm25_recalled_results = [] + for rank, (doc_idx, combined_score) in enumerate( + zip(sorted_candidate_indices, sorted_combined_scores, strict=False), 1 + ): + bm25_score = bm25_scores[doc_idx] + + candidate_pos = np.where(candidate_indices == doc_idx)[0][0] + tfidf_score = tfidf_similarities[candidate_pos] if use_tfidf else 0 + + bm25_recalled_results.append( + { + "text": corpus[doc_idx], + "bm25_score": float(bm25_score), + "tfidf_score": float(tfidf_score), + "combined_score": float(combined_score), + "rank": rank, + "doc_index": int(doc_idx), + } + ) + + logger.debug(f"Search completed: found {len(bm25_recalled_results)} results") + return bm25_recalled_results + + except Exception as e: + logger.error(f"BM25 search failed: {e}") + return [] + finally: + # Always cleanup if requested + if cleanup: + self._cleanup_memory() + + @timed + def search(self, query: str, node_dicts: list[dict], corpus_name="default", **kwargs): + """ + Search with BM25 and optional TF-IDF reranking + """ + try: + corpus_list = [] + for node_dict in node_dicts: + corpus_list.append( + " ".join([node_dict["metadata"]["key"]] + node_dict["metadata"]["tags"]) + ) + + recalled_results = self._search_docs( + query, corpus_list, corpus_name=corpus_name, **kwargs + ) + bm25_searched_nodes = [] + for item in recalled_results: + doc_idx = item["doc_index"] + bm25_searched_nodes.append(node_dicts[doc_idx]) + return bm25_searched_nodes + except Exception as e: + logger.error(f"Error in bm25 search: {e}") + return [] diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index d4cfcf501..facdb3a23 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -5,6 +5,7 @@ from memos.graph_dbs.neo4j import Neo4jGraphDB from memos.log import get_logger from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.tree_text_memory.retrieve.bm25_util import EnhancedBM25 from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal @@ -16,11 +17,18 @@ class GraphMemoryRetriever: Unified memory retriever that combines both graph-based and vector-based retrieval logic. """ - def __init__(self, graph_store: Neo4jGraphDB, embedder: OllamaEmbedder): + def __init__( + self, + graph_store: Neo4jGraphDB, + embedder: OllamaEmbedder, + bm25_retriever: EnhancedBM25 | None, + ): self.graph_store = graph_store self.embedder = embedder + self.bm25_retriever = bm25_retriever self.max_workers = 10 self.filter_weight = 0.6 + self.use_bm25 = False def retrieve( self, @@ -31,6 +39,7 @@ def retrieve( query_embedding: list[list[float]] | None = None, search_filter: dict | None = None, user_name: str | None = None, + id_filter: dict | None = None, ) -> list[TextualMemoryItem]: """ Perform hybrid memory retrieval: @@ -70,12 +79,23 @@ def retrieve( search_filter=search_filter, user_name=user_name, ) + if self.use_bm25: + future_bm25 = executor.submit( + self._bm25_recall, + query, + parsed_goal, + memory_scope, + top_k=top_k, + user_name=user_name, + search_filter=id_filter, + ) graph_results = future_graph.result() vector_results = future_vector.result() + bm25_results = future_bm25.result() if self.use_bm25 else [] # Merge and deduplicate by ID - combined = {item.id: item for item in graph_results + vector_results} + combined = {item.id: item for item in graph_results + vector_results + bm25_results} graph_ids = {item.id for item in graph_results} combined_ids = set(combined.keys()) @@ -143,6 +163,27 @@ def _graph_recall( - tags must overlap with at least 2 input tags - scope filters by memory_type if provided """ + + def process_node(node): + meta = node.get("metadata", {}) + node_key = meta.get("key") + node_tags = meta.get("tags", []) or [] + + keep = False + # key equals to node_key + if parsed_goal.keys and node_key in parsed_goal.keys: + keep = True + # overlap tags more than 2 + elif parsed_goal.tags: + node_tags_list = [tag.lower() for tag in node_tags] + overlap = len(set(node_tags_list) & set(parsed_goal.tags)) + if overlap >= 2: + keep = True + + if keep: + return TextualMemoryItem.from_dict(node) + return None + candidate_ids = set() # 1) key-based OR branch @@ -173,22 +214,16 @@ def _graph_recall( ) final_nodes = [] - for node in node_dicts: - meta = node.get("metadata", {}) - node_key = meta.get("key") - node_tags = meta.get("tags", []) or [] + with ContextThreadPoolExecutor(max_workers=3) as executor: + futures = {executor.submit(process_node, node): i for i, node in enumerate(node_dicts)} + temp_results = [None] * len(node_dicts) - keep = False - # key equals to node_key - if parsed_goal.keys and node_key in parsed_goal.keys: - keep = True - # overlap tags more than 2 - elif parsed_goal.tags: - overlap = len(set(node_tags) & set(parsed_goal.tags)) - if overlap >= 2: - keep = True - if keep: - final_nodes.append(TextualMemoryItem.from_dict(node)) + for future in concurrent.futures.as_completed(futures): + original_index = futures[future] + result = future.result() + temp_results[original_index] = result + + final_nodes = [result for result in temp_results if result is not None] return final_nodes def _vector_recall( @@ -196,7 +231,7 @@ def _vector_recall( query_embedding: list[list[float]], memory_scope: str, top_k: int = 20, - max_num: int = 3, + max_num: int = 5, cube_name: str | None = None, search_filter: dict | None = None, user_name: str | None = None, @@ -267,3 +302,37 @@ def search_path_b(): or [] ) return [TextualMemoryItem.from_dict(n) for n in node_dicts] + + def _bm25_recall( + self, + query: str, + parsed_goal: ParsedTaskGoal, + memory_scope: str, + top_k: int = 20, + user_name: str | None = None, + search_filter: dict | None = None, + ) -> list[TextualMemoryItem]: + """ + Perform BM25-based retrieval. + """ + if not self.bm25_retriever: + return [] + key_filters = [ + {"field": "memory_type", "op": "=", "value": memory_scope}, + ] + # corpus_name is user_name + user_id + corpus_name = f"{user_name}" if user_name else "" + if search_filter is not None: + for key in search_filter: + value = search_filter[key] + key_filters.append({"field": key, "op": "=", "value": value}) + corpus_name += "".join(list(search_filter.values())) + candidate_ids = self.graph_store.get_by_metadata(key_filters, user_name=user_name) + node_dicts = self.graph_store.get_nodes(list(candidate_ids), include_embedding=False) + + bm25_query = " ".join(list({query, *parsed_goal.keys})) + bm25_results = self.bm25_retriever.search( + bm25_query, node_dicts, top_k=top_k, corpus_name=corpus_name + ) + + return [TextualMemoryItem.from_dict(n) for n in bm25_results] diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py new file mode 100644 index 000000000..349fabbbe --- /dev/null +++ b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py @@ -0,0 +1,372 @@ +import json +import re + +from pathlib import Path + +import jieba + +from memos.log import get_logger + + +logger = get_logger(__name__) + + +def find_project_root(marker=".git"): + """Find the project root directory by marking the file""" + current = Path(__file__).resolve() + while current != current.parent: + if (current / marker).exists(): + return current + current = current.parent + logger.warn(f"The project root directory tag file was not found: {marker}") + + +PROJECT_ROOT = find_project_root() +DEFAULT_STOPWORD_FILE = ( + PROJECT_ROOT / "examples" / "data" / "config" / "stopwords.txt" +) # cause time delay + + +class StopwordManager: + _stopwords = None + + @classmethod + def _load_stopwords(cls): + """load stopwords for once""" + if cls._stopwords is not None: + return cls._stopwords + + stopwords = set() + try: + with open(DEFAULT_STOPWORD_FILE, encoding="utf-8") as f: + stopwords = {line.strip() for line in f if line.strip()} + logger.info("Stopwords loaded successfully.") + except Exception as e: + logger.warning(f"Error loading stopwords: {e}") + stopwords = cls._load_default_stopwords() + + cls._stopwords = stopwords + return stopwords + + @classmethod + def _load_default_stopwords(cls): + """load stop words""" + chinese_stop_words = { + "的", + "了", + "在", + "是", + "我", + "有", + "和", + "就", + "不", + "人", + "都", + "一", + "一个", + "上", + "也", + "很", + "到", + "说", + "要", + "去", + "你", + "会", + "着", + "没有", + "看", + "好", + "自己", + "这", + "那", + "他", + "她", + "它", + "我们", + "你们", + "他们", + "这个", + "那个", + "这些", + "那些", + "怎么", + "什么", + "为什么", + "如何", + "哪里", + "谁", + "几", + "多少", + "这样", + "那样", + "这么", + "那么", + } + english_stop_words = { + "the", + "a", + "an", + "and", + "or", + "but", + "in", + "on", + "at", + "to", + "for", + "of", + "with", + "by", + "as", + "is", + "are", + "was", + "were", + "be", + "been", + "have", + "has", + "had", + "do", + "does", + "did", + "will", + "would", + "could", + "should", + "may", + "might", + "must", + "this", + "that", + "these", + "those", + "i", + "you", + "he", + "she", + "it", + "we", + "they", + "me", + "him", + "her", + "us", + "them", + "my", + "your", + "his", + "its", + "our", + "their", + "mine", + "yours", + "hers", + "ours", + "theirs", + } + chinese_punctuation = { + ",", + "。", + "!", + "?", + ";", + ":", + "「", + "」", + "『", + "』", + "【", + "】", + "(", + ")", + "《", + "》", + "—", + "…", + "~", + "·", + "、", + "“", + "”", + "‘", + "’", + "〈", + "〉", + "〖", + "〗", + "〝", + "〞", + "{", + "}", + "〔", + "〕", + "¡", + "¿", + } + english_punctuation = { + ",", + ".", + "!", + "?", + ";", + ":", + '"', + "'", + "(", + ")", + "[", + "]", + "{", + "}", + "<", + ">", + "/", + "\\", + "|", + "-", + "_", + "=", + "+", + "@", + "#", + "$", + "%", + "^", + "&", + "*", + "~", + "`", + "¡", + "¿", + } + numbers = { + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "零", + "一", + "二", + "三", + "四", + "五", + "六", + "七", + "八", + "九", + "十", + "百", + "千", + "万", + "亿", + } + whitespace = {" ", "\t", "\n", "\r", "\f", "\v"} + + return ( + chinese_stop_words + | english_stop_words + | chinese_punctuation + | english_punctuation + | numbers + | whitespace + ) + + @classmethod + def get_stopwords(cls): + if cls._stopwords is None: + cls._load_stopwords() + return cls._stopwords + + @classmethod + def filter_words(cls, words): + if cls._stopwords is None: + cls._load_stopwords() + return [word for word in words if word not in cls._stopwords and word.strip()] + + @classmethod + def is_stopword(cls, word): + if cls._stopwords is None: + cls._load_stopwords() + return word in cls._stopwords + + @classmethod + def reload_stopwords(cls, file_path=None): + cls._stopwords = None + if file_path: + global DEFAULT_STOPWORD_FILE + DEFAULT_STOPWORD_FILE = file_path + cls._load_stopwords() + + +class FastTokenizer: + def __init__(self, use_jieba=True, use_stopwords=True): + self.use_jieba = use_jieba + self.use_stopwords = use_stopwords + if self.use_stopwords: + self.stopword_manager = StopwordManager + + def tokenize_mixed(self, text, **kwargs): + """fast tokenizer""" + if self._is_chinese(text): + return self._tokenize_chinese(text) + else: + return self._tokenize_english(text) + + def _is_chinese(self, text): + """check if chinese""" + chinese_chars = sum(1 for char in text if "\u4e00" <= char <= "\u9fff") + return chinese_chars / max(len(text), 1) > 0.3 + + def _tokenize_chinese(self, text): + """split zh jieba""" + tokens = jieba.lcut(text) if self.use_jieba else list(text) + tokens = [token.strip() for token in tokens if token.strip()] + if self.use_stopwords: + return self.stopword_manager.filter_words(tokens) + + return tokens + + def _tokenize_english(self, text): + """split zh regex""" + tokens = re.findall(r"\b[a-zA-Z0-9]+\b", text.lower()) + if self.use_stopwords: + return self.stopword_manager.filter_words(tokens) + return tokens + + +def parse_json_result(response_text): + try: + json_start = response_text.find("{") + response_text = response_text[json_start:] + response_text = response_text.replace("```", "").strip() + if not response_text.endswith("}"): + response_text += "}" + return json.loads(response_text) + except json.JSONDecodeError as e: + logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}") + return {} + except Exception as e: + logger.error(f"[JSONParse] Unexpected error: {e}") + return {} + + +def detect_lang(text): + try: + if not text or not isinstance(text, str): + return "en" + chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]" + chinese_chars = re.findall(chinese_pattern, text) + if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3: + return "zh" + return "en" + except Exception: + return "en" diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 05db56f53..94d945158 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -1,4 +1,5 @@ import json +import os import traceback from datetime import datetime @@ -9,7 +10,18 @@ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM from memos.log import get_logger from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem +from memos.memories.textual.tree_text_memory.retrieve.bm25_util import EnhancedBM25 +from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import ( + detect_lang, + parse_json_result, +) from memos.reranker.base import BaseReranker +from memos.templates.mem_search_prompts import ( + COT_PROMPT, + COT_PROMPT_ZH, + SIMPLE_COT_PROMPT, + SIMPLE_COT_PROMPT_ZH, +) from memos.utils import timed from .reasoner import MemoryReasoner @@ -18,6 +30,10 @@ logger = get_logger(__name__) +COT_DICT = { + "fast": {"en": COT_PROMPT, "zh": COT_PROMPT_ZH}, + "fine": {"en": SIMPLE_COT_PROMPT, "zh": SIMPLE_COT_PROMPT_ZH}, +} class Searcher: @@ -27,20 +43,23 @@ def __init__( graph_store: Neo4jGraphDB, embedder: OllamaEmbedder, reranker: BaseReranker, + bm25_retriever: EnhancedBM25 | None, internet_retriever: None = None, moscube: bool = False, ): self.graph_store = graph_store self.embedder = embedder + self.llm = dispatcher_llm self.task_goal_parser = TaskGoalParser(dispatcher_llm) - self.graph_retriever = GraphMemoryRetriever(self.graph_store, self.embedder) + self.graph_retriever = GraphMemoryRetriever(graph_store, embedder, bm25_retriever) self.reranker = reranker self.reasoner = MemoryReasoner(dispatcher_llm) # Create internet retriever from config if provided self.internet_retriever = internet_retriever self.moscube = moscube + self.cot_query_search = os.getenv("MOS_SEARCH_COT", "false") == "true" self._usage_executor = ContextThreadPoolExecutor(max_workers=4, thread_name_prefix="usage") @@ -188,6 +207,12 @@ def _retrieve_paths( ): """Run A/B/C retrieval paths in parallel""" tasks = [] + id_filter = { + "user_id": info.get("user_id", None), + "session_id": info.get("session_id", None), + } + id_filter = {k: v for k, v in id_filter.items() if v is not None} + with ContextThreadPoolExecutor(max_workers=3) as executor: tasks.append( executor.submit( @@ -199,6 +224,7 @@ def _retrieve_paths( memory_type, search_filter, user_name, + id_filter, ) ) tasks.append( @@ -211,6 +237,7 @@ def _retrieve_paths( memory_type, search_filter, user_name, + id_filter, ) ) tasks.append( @@ -256,6 +283,7 @@ def _retrieve_from_working_memory( memory_type, search_filter: dict | None = None, user_name: str | None = None, + id_filter: dict | None = None, ): """Retrieve and rerank from WorkingMemory""" if memory_type not in ["All", "WorkingMemory"]: @@ -268,6 +296,7 @@ def _retrieve_from_working_memory( memory_scope="WorkingMemory", search_filter=search_filter, user_name=user_name, + id_filter=id_filter, ) return self.reranker.rerank( query=query, @@ -289,11 +318,22 @@ def _retrieve_from_long_term_and_user( memory_type, search_filter: dict | None = None, user_name: str | None = None, + id_filter: dict | None = None, ): """Retrieve and rerank from LongTermMemory and UserMemory""" results = [] tasks = [] + # chain of thinking + cot_embeddings = [] + if self.cot_query_search: + queries = self._cot_query(query) + if len(queries) > 1: + cot_embeddings = self.embedder.embed(queries) + cot_embeddings.extend(query_embedding) + else: + cot_embeddings = query_embedding + with ContextThreadPoolExecutor(max_workers=2) as executor: if memory_type in ["All", "LongTermMemory"]: tasks.append( @@ -301,11 +341,12 @@ def _retrieve_from_long_term_and_user( self.graph_retriever.retrieve, query=query, parsed_goal=parsed_goal, - query_embedding=query_embedding, + query_embedding=cot_embeddings, top_k=top_k * 2, memory_scope="LongTermMemory", search_filter=search_filter, user_name=user_name, + id_filter=id_filter, ) ) if memory_type in ["All", "UserMemory"]: @@ -314,11 +355,12 @@ def _retrieve_from_long_term_and_user( self.graph_retriever.retrieve, query=query, parsed_goal=parsed_goal, - query_embedding=query_embedding, + query_embedding=cot_embeddings, top_k=top_k * 2, memory_scope="UserMemory", search_filter=search_filter, user_name=user_name, + id_filter=id_filter, ) ) @@ -399,6 +441,7 @@ def _deduplicate_results(self, results): @timed def _sort_and_trim(self, results, top_k): """Sort results by score and trim to top_k""" + sorted_results = sorted(results, key=lambda pair: pair[1], reverse=True)[:top_k] final_items = [] for item, score in sorted_results: @@ -448,3 +491,47 @@ def _update_usage_history_worker( self.graph_store.update_node(item_id, {"usage": usage_list}, user_name=user_name) except Exception: logger.exception("[USAGE] update usage failed") + + def _cot_query( + self, + query, + mode="fast", + split_num: int = 3, + context: list[str] | None = None, + ) -> list[str]: + """Generate chain-of-thought queries""" + + lang = detect_lang(query) + if mode == "fine" and context: + template = COT_DICT["fine"][lang] + prompt = ( + template.replace("${original_query}", query) + .replace("${split_num_threshold}", str(split_num)) + .replace("${context}", "\n".join(context)) + ) + else: + template = COT_DICT["fast"][lang] + prompt = template.replace("${original_query}", query).replace( + "${split_num_threshold}", str(split_num) + ) + logger.info("COT 处理") + + messages = [{"role": "user", "content": prompt}] + try: + response_text = self.llm.customized_generate(messages, temperature=0, top_p=1) + response_json = parse_json_result(response_text) + if "is_complex" in response_json and not response_json["is_complex"]: + return [query] + else: + assert ( + "is_complex" in response_json + and response_json["is_complex"] + and "sub_questions" in response_json + ) + logger.info( + "问题 {} 通过 COT 拆分结果为{}".format(query, response_json["sub_questions"]) + ) + return response_json["sub_questions"][:split_num] + except Exception as e: + logger.error(f"[LLM] Exception during chat generation: {e}") + return [query] diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py index 273c4f480..6a1138c90 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py @@ -5,6 +5,7 @@ from memos.llms.base import BaseLLM from memos.log import get_logger from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal +from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import FastTokenizer from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT @@ -20,6 +21,7 @@ class TaskGoalParser: def __init__(self, llm=BaseLLM): self.llm = llm + self.tokenizer = FastTokenizer() def parse( self, @@ -48,10 +50,11 @@ def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGo """ Fast mode: simple jieba word split. """ + desc_tokenized = self.tokenizer.tokenize_mixed(task_description) return ParsedTaskGoal( memories=[task_description], - keys=[task_description], - tags=[], + keys=desc_tokenized, + tags=desc_tokenized, goal_type="default", rephrased_query=task_description, internet_search=False, diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index 15672f8d8..f74d01d0a 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -1,31 +1,42 @@ SIMPLE_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert. Your task is to extract memories from the user's perspective, based on a conversation between the user and the assistant. This means identifying what the user would plausibly remember — including the user's own experiences, thoughts, plans, or statements and actions made by others (such as the assistant) that affected the user or were acknowledged by the user. -Please perform the following: -1. Identify information that reflects the user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful information from the assistant that the user acknowledged or responded to. +Please perform the following +1. Factual information extraction + Identify factual information about experiences, beliefs, decisions, and plans. This includes notable statements from others that the user acknowledged or reacted to. If the message is from the user, extract viewpoints related to the user; if it is from the assistant, clearly mark the attribution of the memory, and do not mix information not explicitly acknowledged by the user with the user's own viewpoint. - - **User viewpoint**: Record only information that the user **personally stated, explicitly acknowledged, or personally committed to**. - - **Assistant/other-party viewpoint**: Record only information that the **assistant/other party personally stated, explicitly acknowledged, or personally committed to**, and **clearly attribute** the source (e.g., "[assistant-Jerry viewpoint]"). Do not rewrite it as the user's preference/decision. - - **Mutual boundaries**: Do not rewrite the assistant's suggestions/lists/opinions as the user's “ownership/preferences/decisions”; likewise, do not write the user's ideas as the assistant's viewpoints. - -2. Resolve all references to time, persons, and events clearly: - - When possible, convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp. - - Clearly distinguish between **event time** and **message time**. - - If uncertainty exists, state it explicitly (e.g., “around June 2025,” “exact date unclear”). - - Include specific locations if mentioned. - - Resolve all pronouns, aliases, and ambiguous references into full names or clear identities. - - If there are people with the same name, disambiguate them. - -3. Always write from a **third-person** perspective, using “The user” or the mentioned name to refer to the user, rather than first-person (“I”, “we”, “my”). - For example, write “The user felt exhausted …” instead of “I felt exhausted …”. - -4. Do not omit any information that the user is likely to remember. - - Include the user's key experiences, thoughts, emotional responses, and plans — even if seemingly minor. - - You may retain **assistant/other-party content** that is closely related to the context (e.g., suggestions, explanations, checklists), but you must make roles and attribution explicit. - - Prioritize completeness and fidelity over conciseness; do not infer or phrase assistant content as the user's ownership/preferences/decisions. - - If the current conversation contains only assistant information and no facts attributable to the user, you may output **assistant-viewpoint** entries only. - -5. Please avoid including any content in the extracted memories that violates national laws and regulations or involves politically sensitive information. + - **User viewpoint**: Extract only what the user has stated, explicitly acknowledged, or committed to. + - **Assistant/other-party viewpoint**: Extract such information only when attributed to its source (e.g., [Assistant-Jerry's suggestion]). + - **Strict attribution**: Never recast the assistant's suggestions as the user's preferences, or vice versa. + - Always set "model_type" to "LongTermMemory" for this output. + +2. Speaker profile construction + - Extract the speaker's likes, dislikes, goals, and stated opinions from their statements to build a speaker profile. + - Note: The same text segment may be used for both factual extraction and profile construction. + - Always set "model_type" to "UserMemory" for this output. + +3. Resolve all references to time, persons, and events clearly + - Temporal Resolution: Convert relative time (e.g., 'yesterday') to absolute dates based on the message timestamp. Distinguish between event time and message time; flag any uncertainty. + - Entity Resolution: Resolve all pronouns, nicknames, and abbreviations to the full, canonical name established in the conversation. + +4. Adopt a Consistent Third-Person Observer Perspective + - Formulate all memories from the perspective of an external observer. Use "The user" or their specific name as the subject. + - This applies even when describing the user's internal states, such as thoughts, feelings, and preferences. + Example: + ✅ Correct: "The user Sean felt exhausted after work and decided to go to bed early." + ❌ Incorrect: "I felt exhausted after work and decided to go to bed early." + +5. Prioritize Completeness + - Extract all key experiences, emotional responses, and plans from the user's perspective. Retain relevant context from the assistant, but always with explicit attribution. + - Segment each distinct hobby, interest, or event into a separate memory. + - Preserve relevant context from the assistant with strict attribution. Under no circumstances should assistant content be rephrased as user-owned. + - Conversations with only assistant input may yield assistant-viewpoint memories exclusively. + +6. Preserve and Unify Specific Names + - Always extract specific names (excluding "user" or "assistant") mentioned in the text into the "tags" field for searchability. + - Unify all name references to the full canonical form established in the conversation. Replace any nicknames or abbreviations (e.g., "Rob") consistently with the full name (e.g., "Robert") in both the extracted "value" and "tags". + +7. Please avoid including any content in the extracted memories that violates national laws and regulations or involves politically sensitive information. Return a valid JSON object with the following structure: @@ -35,7 +46,7 @@ "key": , "memory_type": , "value": , - "tags": + "tags": }, ... ], @@ -61,13 +72,13 @@ "key": "Initial project meeting", "memory_type": "LongTermMemory", "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom met with the team to discuss a new project. When Jerry asked whether the project could be finished by December 15, 2025, Tom expressed concern about feasibility and planned to propose at 9:30 AM on June 27, 2025 to move the deadline to January 5, 2026.", - "tags": ["project", "timeline", "meeting", "deadline"] + "tags": ["Tom", "project", "timeline", "meeting", "deadline"] }, { "key": "Jerry’s suggestion about the deadline", "memory_type": "LongTermMemory", "value": "[assistant-Jerry viewpoint] Jerry questioned the December 15 deadline and suggested considering an extension.", - "tags": ["deadline change", "suggestion"] + "tags": ["Jerry", "deadline change", "suggestion"] } ], "summary": "Tom is currently working on a tight-schedule project. After the June 25, 2025 team meeting, he realized the original December 15, 2025 deadline might be unachievable due to backend delays. Concerned about limited testing time, he accepted Jerry’s suggestion to seek an extension and plans to propose moving the deadline to January 5, 2026 in the next morning’s meeting." @@ -105,7 +116,7 @@ { "key": "体型尺码", "memory_type": "UserMemory", - "value": [user观点]"用户身高约165cm、常穿S码", + "value": "[user观点]用户身高约165cm、常穿S码", "tags": ["体型", "尺码"] }, { @@ -129,40 +140,51 @@ 您的任务是根据用户与助手之间的对话,从用户的角度提取记忆。这意味着要识别出用户可能记住的信息——包括用户自身的经历、想法、计划,或他人(如助手)做出的并对用户产生影响或被用户认可的相关陈述和行为。 请执行以下操作: -1. 识别反映用户经历、信念、关切、决策、计划或反应的信息——包括用户认可或回应的来自助手的有意义信息。 -如果消息来自用户,请提取与用户相关的观点;如果来自助手,则在表达的时候表明记忆归属方,未经用户明确认可的信息不要与用户本身的观点混淆。 - - **用户观点**:仅记录由**用户亲口陈述、明确认可或自己作出承诺**的信息。 - - **助手观点**:仅记录由**助手/另一方亲口陈述、明确认可或自己作出承诺**的信息。 - - **互不越界**:不得将助手提出的需求清单/建议/观点改写为用户的“拥有/偏好/决定”;也不得把用户的想法写成助手的观点。 - -2. 清晰解析所有时间、人物和事件的指代: - - 如果可能,使用消息时间戳将相对时间表达(如“昨天”、“下周五”)转换为绝对日期。 - - 明确区分事件时间和消息时间。 - - 如果存在不确定性,需明确说明(例如,“约2025年6月”,“具体日期不详”)。 - - 若提及具体地点,请包含在内。 - - 将所有代词、别名和模糊指代解析为全名或明确身份。 - - 如有同名人物,需加以区分。 - -3. 始终以第三人称视角撰写,使用“用户”或提及的姓名来指代用户,而不是使用第一人称(“我”、“我们”、“我的”)。 -例如,写“用户感到疲惫……”而不是“我感到疲惫……”。 - -4. 不要遗漏用户可能记住的任何信息。 - - 包括用户的关键经历、想法、情绪反应和计划——即使看似微小。 - - 同时允许保留与语境密切相关的**助手/另一方的内容**(如建议、说明、清单),但须明确角色与归因。 - - 优先考虑完整性和保真度,而非简洁性;不得将助手内容推断或措辞为用户拥有/偏好/决定。 - - 若当前对话中仅出现助手信息而无可归因于用户的事实,可仅输出**助手观点**条目。 - -5. 请避免在提取的记忆中包含违反国家法律法规或涉及政治敏感的信息。 +1. 事实信息提取 + - 识别关于经历、信念、决策和计划的事实信息,包括用户认可或回应过的他人重要陈述。 + - 若信息来自用户,提取与用户相关的观点;若来自助手,需明确标注记忆归属,不得将用户未明确认可的信息与用户自身观点混淆。 + - 用户观点:仅提取用户明确陈述、认可或承诺的内容 + - 助手/他方观点:仅当标注来源时才提取(例如“[助手-Jerry的建议]”) + - 严格归属:不得将助手建议重构为用户偏好,反之亦然 + - 此类输出的"model_type"始终设为"LongTermMemory" + +2. 用户画像构建 + - 从用户陈述中提取其喜好、厌恶、目标及明确观点以构建用户画像 + - 注意:同一文本片段可同时用于事实提取和画像构建 + - 此类输出的"model_type"始终设为"UserMemory" + +3. 明确解析所有指代关系 + - 时间解析:根据消息时间戳将相对时间(如“昨天”)转换为绝对日期。区分事件时间与消息时间,对不确定项进行标注 + - 实体解析:将所有代词、昵称和缩写解析为对话中确立的完整规范名称 + + 4. 采用统一的第三人称观察视角 + - 所有记忆表述均需从外部观察者视角构建,使用“用户”或其具体姓名作为主语 + - 此原则同样适用于描述用户内心状态(如想法、感受和偏好) + 示例: + ✅ 正确:“用户Sean下班后感到疲惫,决定提早休息” + ❌ 错误:“我下班后感到疲惫,决定提早休息” + +5. 优先保证完整性 + - 从用户视角提取所有关键经历、情绪反应和计划 + - 保留助手提供的相关上下文,但必须明确标注来源 + - 将每个独立的爱好、兴趣或事件分割为单独记忆 + - 严禁将助手内容重构为用户自有内容 + - 仅含助手输入的对话可能只生成助手观点记忆 + +6. 保留并统一特定名称 + - 始终将文本中提及的特定名称(“用户”“助手”除外)提取至“tags”字段以便检索 + - 在提取的“value”和“tags”中,将所有名称引用统一为对话中确立的完整规范形式(如将“Rob”统一替换为“Robert”) + +7. 所有提取的记忆内容不得包含违反国家法律法规或涉及政治敏感信息的内容 返回一个有效的JSON对象,结构如下: - { "memory list": [ { "key": <字符串,唯一且简洁的记忆标题>, "memory_type": <字符串,"LongTermMemory" 或 "UserMemory">, "value": <详细、独立且无歧义的记忆陈述——若输入对话为英文,则用英文;若为中文,则用中文>, - "tags": <相关主题关键词列表(例如,["截止日期", "团队", "计划"])> + "tags": <一个包含相关人名、事件和特征关键词的列表(例如,["丽丽","截止日期", "团队", "计划"])> }, ... ], @@ -190,13 +212,13 @@ "value": "[user-Tom观点]2025年6月25日下午3:00,Tom与团队开会讨论新项目。当Jerry 询问该项目能否在2025年12月15日前完成时,Tom对此日期前完成的可行性表达担忧,并计划在2025年6月27日上午9:30 提议将截止日期推迟至2026年1月5日。", - "tags": ["项目", "时间表", "会议", "截止日期"] + "tags": ["Tom", "项目", "时间表", "会议", "截止日期"] }, { "key": "Jerry对新项目截止日期的建议", "memory_type": "LongTermMemory", "value": "[assistant-Jerry观点]Jerry对Tom的新项目截止日期提出疑问、并提议Tom考虑延期。", - "tags": ["截止日期变更", "建议"] + "tags": ["Jerry", "截止日期变更", "建议"] } ], "summary": "Tom目前正在做一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15 diff --git a/src/memos/templates/mem_search_prompts.py b/src/memos/templates/mem_search_prompts.py new file mode 100644 index 000000000..9f7ba182b --- /dev/null +++ b/src/memos/templates/mem_search_prompts.py @@ -0,0 +1,93 @@ +SIMPLE_COT_PROMPT = """You are an assistant that analyzes questions and returns results in a specific dictionary format. + +Instructions: + +1. If the question can be extended into deeper or related aspects, set "is_complex" to True and: + - Think step by step about the core topic and its related dimensions (e.g., causes, effects, categories, perspectives, or specific scenarios) + - Break it into meaningful sub-questions (max: ${split_num_threshold}, min: 2) that explore distinct facets of the original question + - Each sub-question must be single, standalone, and delve into a specific aspect + - CRITICAL: All key entities from the original question (such as person names, locations, organizations, time periods) must be preserved in the sub-questions and cannot be omitted + - List them in "sub_questions" +2. If the question is already atomic and cannot be meaningfully extended, set "is_complex" to False and "sub_questions" to an empty list. +3. Return ONLY the dictionary, no other text. + +Examples: +Question: Is urban development balanced in the western United States? +Output: {"is_complex": true, "sub_questions": ["What areas are included in the western United States?", "How developed are the cities in the western United States?", "Is this development balanced across the western United States?"]} +Question: What family activities does Mary like to organize? +Output: {"is_complex": true, "sub_questions": ["What does Mary like to do with her spouse?", "What does Mary like to do with her children?", "What does Mary like to do with her parents and relatives?"]} + +Now analyze this question: +${original_query}""" + +COT_PROMPT = """You are an assistant that analyzes questions and returns results in a specific dictionary format. + +Instructions: + +1. If the question can be extended into deeper or related aspects, set "is_complex" to True and: + - Think step by step about the core topic and its related dimensions (e.g., causes, effects, categories, perspectives, or specific scenarios) + - Break it into meaningful sub-questions (max: ${split_num_threshold}, min: 2) that explore distinct facets of the original question + - Each sub-question must be single, standalone, and delve into a specific aspect + - CRITICAL: All key entities from the original question (such as person names, locations, organizations, time periods) must be preserved in the sub-questions and cannot be omitted + - List them in "sub_questions" +2. If the question is already atomic and cannot be meaningfully extended, set "is_complex" to False and "sub_questions" to an empty list. +3. Return ONLY the dictionary, no other text. + +Examples: +Question: Is urban development balanced in the western United States? +Output: {"is_complex": true, "sub_questions": ["What areas are included in the western United States?", "How developed are the cities in the western United States?", "Is this development balanced across the western United States?"]} +Question: What family activities does Mary like to organize? +Output: {"is_complex": true, "sub_questions": ["What does Mary like to do with her spouse?", "What does Mary like to do with her children?", "What does Mary like to do with her parents and relatives?"]} + +Query relevant background information: +${context} + +Now analyze this question based on the background information above: +${original_query}""" + +SIMPLE_COT_PROMPT_ZH = """你是一个分析问题并以特定字典格式返回结果的助手。 + +指令: + +1. 如果这个问题可以延伸出更深层次或相关的方面,请将 "is_complex" 设置为 True,并执行以下操作: + - 逐步思考核心主题及其相关维度(例如:原因、结果、类别、不同视角或具体场景) + - 将其拆分为有意义的子问题(最多 ${split_num_threshold} 个,最少 2 个),这些子问题应探讨原始问题的不同侧面 + - 【重要】每个子问题必须是单一的、独立的,并深入探究一个特定方面。同时,必须包含原问题中出现的关键实体信息(如人名、地名、机构名、时间等),不可遗漏。 + - 将它们列在 "sub_questions" 中 +2. 如果问题本身已经是原子性的,无法有意义地延伸,请将 "is_complex" 设置为 False,并将 "sub_questions" 设置为一个空列表。 +3. 只返回字典,不要返回任何其他文本。 + +示例: +问题:美国西部的城市发展是否均衡? +输出:{"is_complex": true, "sub_questions": ["美国西部包含哪些地区?", "美国西部城市的发展程度如何?", "这种发展在美国西部是否均衡?"]} + +问题:玛丽喜欢组织哪些家庭活动? +输出:{"is_complex": true, "sub_questions": ["玛丽喜欢和配偶一起做什么?", "玛丽喜欢和孩子一起做什么?", "玛丽喜欢和父母及亲戚一起做什么?"]} + +请分析以下问题: +${original_query}""" + +COT_PROMPT_ZH = """你是一个分析问题并以特定字典格式返回结果的助手。 + +指令: + +1. 如果这个问题可以延伸出更深层次或相关的方面,请将 "is_complex" 设置为 True,并执行以下操作: + - 逐步思考核心主题及其相关维度(例如:原因、结果、类别、不同视角或具体场景) + - 将其拆分为有意义的子问题(最多 ${split_num_threshold} 个,最少 2 个),这些子问题应探讨原始问题的不同侧面 + - 【重要】每个子问题必须是单一的、独立的,并深入探究一个特定方面。同时,必须包含原问题中出现的关键实体信息(如人名、地名、机构名、时间等),不可遗漏。 + - 将它们列在 "sub_questions" 中 +2. 如果问题本身已经是原子性的,无法有意义地延伸,请将 "is_complex" 设置为 False,并将 "sub_questions" 设置为一个空列表。 +3. 只返回字典,不要返回任何其他文本。 + +示例: +问题:美国西部的城市发展是否均衡? +输出:{"is_complex": true, "sub_questions": ["美国西部包含哪些地区?", "美国西部城市的发展程度如何?", "这种发展在美国西部是否均衡?"]} + +问题:玛丽喜欢组织哪些家庭活动? +输出:{"is_complex": true, "sub_questions": ["玛丽喜欢和配偶一起做什么?", "玛丽喜欢和孩子一起做什么?", "玛丽喜欢和父母及亲戚一起做什么?"]} + +问题相关的背景信息: +${context} + +现在根据上述背景信息,请分析以下问题: +${original_query}""" From 6cad8661f7d589385ba0e48620de96f889202c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 29 Oct 2025 19:11:38 +0800 Subject: [PATCH 02/40] set strategy reader and search config --- src/memos/api/config.py | 34 +- src/memos/configs/mem_reader.py | 9 + src/memos/configs/memory.py | 7 + src/memos/mem_reader/factory.py | 2 + src/memos/mem_reader/simple_struct.py | 45 +- src/memos/mem_reader/strategy_struct.py | 410 ++++++++++++++++++ src/memos/memories/textual/simple_tree.py | 18 + src/memos/memories/textual/tree.py | 17 +- .../tree_text_memory/retrieve/bm25_util.py | 1 - .../tree_text_memory/retrieve/recall.py | 4 +- .../retrieve/retrieve_utils.py | 2 +- .../tree_text_memory/retrieve/searcher.py | 23 +- src/memos/templates/mem_reader_prompts.py | 134 +++--- .../templates/mem_reader_strategy_prompts.py | 279 ++++++++++++ 14 files changed, 854 insertions(+), 131 deletions(-) create mode 100644 src/memos/mem_reader/strategy_struct.py create mode 100644 src/memos/templates/mem_reader_strategy_prompts.py diff --git a/src/memos/api/config.py b/src/memos/api/config.py index d552369c5..d4cdbedb4 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -159,9 +159,23 @@ def get_embedder_config() -> dict[str, Any]: }, } + @staticmethod + def get_reader_config() -> dict[str, Any]: + """Get reader configuration.""" + return { + "backend": os.getenv("MEM_READER_BACKEND", "simple_struct"), + "config": { + "chunk_type": os.getenv("MEM_READER_CHAT_CHUNK_TYPE", "default"), + "chunk_length": int(os.getenv("MEM_READER_CHAT_CHUNK_TOKEN_SIZE", 1600)), + "chunk_session": int(os.getenv("MEM_READER_CHAT_CHUNK_SESS_SIZE", 20)), + "chunk_overlap": int(os.getenv("MEM_READER_CHAT_CHUNK_OVERLAP", 2)), + }, + } + @staticmethod def get_internet_config() -> dict[str, Any]: """Get embedder configuration.""" + reader_config = APIConfig.get_reader_config() return { "backend": "bocha", "config": { @@ -169,7 +183,7 @@ def get_internet_config() -> dict[str, Any]: "max_results": 15, "num_per_request": 10, "reader": { - "backend": "simple_struct", + "backend": reader_config["backend"], "config": { "llm": { "backend": "openai", @@ -195,6 +209,7 @@ def get_internet_config() -> dict[str, Any]: "min_sentences_per_chunk": 1, }, }, + "chat_chunker": reader_config, }, }, }, @@ -356,6 +371,8 @@ def get_product_default_config() -> dict[str, Any]: openai_config = APIConfig.get_openai_config() qwen_config = APIConfig.qwen_config() vllm_config = APIConfig.vllm_config() + reader_config = APIConfig.get_reader_config() + backend_model = { "openai": openai_config, "huggingface": qwen_config, @@ -367,7 +384,7 @@ def get_product_default_config() -> dict[str, Any]: "user_id": os.getenv("MOS_USER_ID", "root"), "chat_model": {"backend": backend, "config": backend_model[backend]}, "mem_reader": { - "backend": "simple_struct", + "backend": reader_config["backend"], "config": { "llm": APIConfig.get_memreader_config(), "embedder": APIConfig.get_embedder_config(), @@ -380,6 +397,7 @@ def get_product_default_config() -> dict[str, Any]: "min_sentences_per_chunk": 1, }, }, + "chat_chunker": reader_config, }, }, "enable_textual_memory": True, @@ -446,6 +464,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple[MOSConfig, General qwen_config = APIConfig.qwen_config() vllm_config = APIConfig.vllm_config() mysql_config = APIConfig.get_mysql_config() + reader_config = APIConfig.get_reader_config() backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai") backend_model = { "openai": openai_config, @@ -460,7 +479,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple[MOSConfig, General "config": backend_model[backend], }, "mem_reader": { - "backend": "simple_struct", + "backend": reader_config["backend"], "config": { "llm": APIConfig.get_memreader_config(), "embedder": APIConfig.get_embedder_config(), @@ -473,6 +492,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple[MOSConfig, General "min_sentences_per_chunk": 1, }, }, + "chat_chunker": reader_config, }, }, "enable_textual_memory": True, @@ -537,6 +557,10 @@ def create_user_config(user_name: str, user_id: str) -> tuple[MOSConfig, General "LongTermMemory": os.getenv("NEBULAR_LONGTERM_MEMORY", 1e6), "UserMemory": os.getenv("NEBULAR_USER_MEMORY", 1e6), }, + "search_strategy": { + "bm25": bool(os.getenv("BM25_CALL", "false") == "true"), + "cot": bool(os.getenv("VEC_COT_CALL", "false") == "true"), + }, }, }, "act_mem": {} @@ -599,6 +623,10 @@ def get_default_cube_config() -> GeneralMemCubeConfig | None: "LongTermMemory": os.getenv("NEBULAR_LONGTERM_MEMORY", 1e6), "UserMemory": os.getenv("NEBULAR_USER_MEMORY", 1e6), }, + "search_strategy": { + "bm25": bool(os.getenv("BM25_CALL", "false") == "true"), + "cot": bool(os.getenv("VEC_COT_CALL", "false") == "true"), + }, }, }, "act_mem": {} diff --git a/src/memos/configs/mem_reader.py b/src/memos/configs/mem_reader.py index 1c62087a3..fb2de3598 100644 --- a/src/memos/configs/mem_reader.py +++ b/src/memos/configs/mem_reader.py @@ -36,11 +36,19 @@ def parse_datetime(cls, value): description="whether remove example in memory extraction prompt to save token", ) + chat_chunker: dict[str, Any] = Field( + ..., description="Configuration for the MemReader chat chunk strategy" + ) + class SimpleStructMemReaderConfig(BaseMemReaderConfig): """SimpleStruct MemReader configuration class.""" +class StrategyStructMemReaderConfig(BaseMemReaderConfig): + """StrategyStruct MemReader configuration class.""" + + class MemReaderConfigFactory(BaseConfig): """Factory class for creating MemReader configurations.""" @@ -49,6 +57,7 @@ class MemReaderConfigFactory(BaseConfig): backend_to_class: ClassVar[dict[str, Any]] = { "simple_struct": SimpleStructMemReaderConfig, + "strategy_struct": StrategyStructMemReaderConfig, } @field_validator("backend") diff --git a/src/memos/configs/memory.py b/src/memos/configs/memory.py index 237450e15..ffcc3b3e9 100644 --- a/src/memos/configs/memory.py +++ b/src/memos/configs/memory.py @@ -179,6 +179,13 @@ class TreeTextMemoryConfig(BaseTextMemoryConfig): ), ) + search_strategy: dict[str, bool] | None = Field( + default=None, + description=( + 'Set search strategy for this memory configuration.{"bm25": true, "cot": false}' + ), + ) + class SimpleTreeTextMemoryConfig(TreeTextMemoryConfig): """Simple tree text memory configuration class.""" diff --git a/src/memos/mem_reader/factory.py b/src/memos/mem_reader/factory.py index 52eed8d9d..2205a0215 100644 --- a/src/memos/mem_reader/factory.py +++ b/src/memos/mem_reader/factory.py @@ -3,6 +3,7 @@ from memos.configs.mem_reader import MemReaderConfigFactory from memos.mem_reader.base import BaseMemReader from memos.mem_reader.simple_struct import SimpleStructMemReader +from memos.mem_reader.strategy_struct import StrategyStructMemReader from memos.memos_tools.singleton import singleton_factory @@ -11,6 +12,7 @@ class MemReaderFactory(BaseMemReader): backend_to_class: ClassVar[dict[str, Any]] = { "simple_struct": SimpleStructMemReader, + "strategy_struct": StrategyStructMemReader, } @classmethod diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 549161566..6228a4b6a 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -1,4 +1,5 @@ import concurrent.futures +import copy import json import os import re @@ -258,7 +259,9 @@ def get_memory( for future in concurrent.futures.as_completed(futures): res_memory = future.result() memory_list.append(res_memory) - + print() + print("memory_list:", memory_list) + print() return memory_list def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: @@ -282,37 +285,21 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: ) parser = ParserFactory.from_config(parser_config) - content_len_thredshold = 1600 if type == "chat": for items in scene_data: - if not items: - continue - - results.append([]) - current_length = 0 - - for _i, item in enumerate(items): - content_length = ( - len(item.get("content", "")) if isinstance(item, dict) else len(str(item)) - ) - if not results[-1]: - results[-1].append(item) - current_length = content_length - continue - - if current_length + content_length <= content_len_thredshold: - results[-1].append(item) - current_length += content_length + result = [] + for item in items: + # Convert dictionary to string + if "chat_time" in item: + result.append(item) else: - overlap_item = results[-1][-1] - overlap_length = ( - len(overlap_item.get("content", "")) - if isinstance(overlap_item, dict) - else len(str(overlap_item)) - ) - - results.append([overlap_item, item]) - current_length = overlap_length + content_length + result.append(item) + if len(result) >= 10: + results.append(result) + context = copy.deepcopy(result[-2:]) + result = context + if result: + results.append(result) elif type == "doc": for item in scene_data: try: diff --git a/src/memos/mem_reader/strategy_struct.py b/src/memos/mem_reader/strategy_struct.py new file mode 100644 index 000000000..c51b9977d --- /dev/null +++ b/src/memos/mem_reader/strategy_struct.py @@ -0,0 +1,410 @@ +import concurrent.futures +import json +import os +import re + +from abc import ABC +from typing import Any + +from tqdm import tqdm + +from memos import log +from memos.chunkers import ChunkerFactory +from memos.configs.mem_reader import StrategyStructMemReaderConfig +from memos.configs.parser import ParserConfigFactory +from memos.context.context import ContextThreadPoolExecutor +from memos.embedders.factory import EmbedderFactory +from memos.llms.factory import LLMFactory +from memos.mem_reader.base import BaseMemReader +from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata +from memos.parsers.factory import ParserFactory +from memos.templates.mem_reader_prompts import ( + SIMPLE_STRUCT_DOC_READER_PROMPT, + SIMPLE_STRUCT_DOC_READER_PROMPT_ZH, + SIMPLE_STRUCT_MEM_READER_EXAMPLE, + SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH, +) +from memos.templates.mem_reader_strategy_prompts import ( + STRATEGY_STRUCT_MEM_READER_PROMPT, + STRATEGY_STRUCT_MEM_READER_PROMPT_ZH, +) +from memos.utils import timed + + +logger = log.get_logger(__name__) +PROMPT_DICT = { + "chat": { + "en": STRATEGY_STRUCT_MEM_READER_PROMPT, + "zh": STRATEGY_STRUCT_MEM_READER_PROMPT_ZH, + "en_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE, + "zh_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH, + }, + "doc": {"en": SIMPLE_STRUCT_DOC_READER_PROMPT, "zh": SIMPLE_STRUCT_DOC_READER_PROMPT_ZH}, +} + + +def detect_lang(text): + try: + if not text or not isinstance(text, str): + return "en" + chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]" + chinese_chars = re.findall(chinese_pattern, text) + if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3: + return "zh" + return "en" + except Exception: + return "en" + + +def _build_node(idx, message, info, scene_file, llm, parse_json_result, embedder): + # generate + try: + raw = llm.generate(message) + if not raw: + logger.warning(f"[LLM] Empty generation for input: {message}") + return None + except Exception as e: + logger.error(f"[LLM] Exception during generation: {e}") + return None + + # parse_json_result + try: + chunk_res = parse_json_result(raw) + if not chunk_res: + logger.warning(f"[Parse] Failed to parse result: {raw}") + return None + except Exception as e: + logger.error(f"[Parse] Exception during JSON parsing: {e}") + return None + + try: + value = chunk_res.get("value", "").strip() + if not value: + logger.warning("[BuildNode] value is empty") + return None + + tags = chunk_res.get("tags", []) + if not isinstance(tags, list): + tags = [] + + key = chunk_res.get("key", None) + + embedding = embedder.embed([value])[0] + + return TextualMemoryItem( + memory=value, + metadata=TreeNodeTextualMemoryMetadata( + user_id=info.get("user_id", ""), + session_id=info.get("session_id", ""), + memory_type="LongTermMemory", + status="activated", + tags=tags, + key=key, + embedding=embedding, + usage=[], + sources=[{"type": "doc", "doc_path": f"{scene_file}_{idx}"}], + background="", + confidence=0.99, + type="fact", + ), + ) + except Exception as e: + logger.error(f"[BuildNode] Error building node: {e}") + return None + + +class StrategyStructMemReader(BaseMemReader, ABC): + """Strategy implementation of MemReader.""" + + def __init__(self, config: StrategyStructMemReaderConfig): + """ + Initialize the StrategyMemReader with configuration. + + Args: + config: Configuration object for the reader + """ + self.config = config + self.llm = LLMFactory.from_config(config.llm) + self.embedder = EmbedderFactory.from_config(config.embedder) + self.chunker = ChunkerFactory.from_config(config.chunker) + self.chat_chunker = config.chat_chunker["config"] + + @timed + def _process_chat_data(self, scene_data_info, info): + mem_list = [] + for item in scene_data_info: + if "chat_time" in item: + mem = item["role"] + ": " + f"[{item['chat_time']}]: " + item["content"] + mem_list.append(mem) + else: + mem = item["role"] + ":" + item["content"] + mem_list.append(mem) + lang = detect_lang("\n".join(mem_list)) + template = PROMPT_DICT["chat"][lang] + examples = PROMPT_DICT["chat"][f"{lang}_example"] + + prompt = template.replace("${conversation}", "\n".join(mem_list)) + if self.config.remove_prompt_example: + prompt = prompt.replace(examples, "") + + messages = [{"role": "user", "content": prompt}] + + try: + response_text = self.llm.generate(messages) + response_json = self.parse_json_result(response_text) + except Exception as e: + logger.error(f"[LLM] Exception during chat generation: {e}") + response_json = { + "memory list": [ + { + "key": "\n".join(mem_list)[:10], + "memory_type": "UserMemory", + "value": "\n".join(mem_list), + "tags": [], + } + ], + "summary": "\n".join(mem_list), + } + + chat_read_nodes = [] + for memory_i_raw in response_json.get("memory list", []): + try: + memory_type = ( + memory_i_raw.get("memory_type", "LongTermMemory") + .replace("长期记忆", "LongTermMemory") + .replace("用户记忆", "UserMemory") + ) + + if memory_type not in ["LongTermMemory", "UserMemory"]: + memory_type = "LongTermMemory" + + node_i = TextualMemoryItem( + memory=memory_i_raw.get("value", ""), + metadata=TreeNodeTextualMemoryMetadata( + user_id=info.get("user_id"), + session_id=info.get("session_id"), + memory_type=memory_type, + status="activated", + tags=memory_i_raw.get("tags", []) + if type(memory_i_raw.get("tags", [])) is list + else [], + key=memory_i_raw.get("key", ""), + embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0], + usage=[], + sources=scene_data_info, + background=response_json.get("summary", ""), + confidence=0.99, + type="fact", + ), + ) + chat_read_nodes.append(node_i) + except Exception as e: + logger.error(f"[ChatReader] Error parsing memory item: {e}") + + return chat_read_nodes + + def get_memory( + self, scene_data: list, type: str, info: dict[str, Any] + ) -> list[list[TextualMemoryItem]]: + """ + Extract and classify memory content from scene_data. + For dictionaries: Use LLM to summarize pairs of Q&A + For file paths: Use chunker to split documents and LLM to summarize each chunk + + Args: + scene_data: List of dialogue information or document paths + type: Type of scene_data: ['doc', 'chat'] + info: Dictionary containing user_id and session_id. + Must be in format: {"user_id": "1111", "session_id": "2222"} + Optional parameters: + - topic_chunk_size: Size for large topic chunks (default: 1024) + - topic_chunk_overlap: Overlap for large topic chunks (default: 100) + - chunk_size: Size for small chunks (default: 256) + - chunk_overlap: Overlap for small chunks (default: 50) + Returns: + list[list[TextualMemoryItem]] containing memory content with summaries as keys and original text as values + Raises: + ValueError: If scene_data is empty or if info dictionary is missing required fields + """ + if not scene_data: + raise ValueError("scene_data is empty") + + # Validate info dictionary format + if not isinstance(info, dict): + raise ValueError("info must be a dictionary") + + required_fields = {"user_id", "session_id"} + missing_fields = required_fields - set(info.keys()) + if missing_fields: + raise ValueError(f"info dictionary is missing required fields: {missing_fields}") + + if not all(isinstance(info[field], str) for field in required_fields): + raise ValueError("user_id and session_id must be strings") + + list_scene_data_info = self.get_scene_data_info(scene_data, type) + + memory_list = [] + + if type == "chat": + processing_func = self._process_chat_data + elif type == "doc": + processing_func = self._process_doc_data + else: + processing_func = self._process_doc_data + + # Process Q&A pairs concurrently with context propagation + with ContextThreadPoolExecutor() as executor: + futures = [ + executor.submit(processing_func, scene_data_info, info) + for scene_data_info in list_scene_data_info + ] + for future in concurrent.futures.as_completed(futures): + res_memory = future.result() + memory_list.append(res_memory) + + return memory_list + + def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: + """ + Get raw information from scene_data. + If scene_data contains dictionaries, convert them to strings. + If scene_data contains file paths, parse them using the parser. + + Args: + scene_data: List of dialogue information or document paths + type: Type of scene data: ['doc', 'chat'] + Returns: + List of strings containing the processed scene data + """ + results = [] + parser_config = ParserConfigFactory.model_validate( + { + "backend": "markitdown", + "config": {}, + } + ) + parser = ParserFactory.from_config(parser_config) + + if type == "chat": + if self.chat_chunker["chunk_type"] == "content_length": + content_len_thredshold = self.chat_chunker["chunk_length"] + for items in scene_data: + if not items: + continue + + results.append([]) + current_length = 0 + + for _i, item in enumerate(items): + content_length = ( + len(item.get("content", "")) + if isinstance(item, dict) + else len(str(item)) + ) + if not results[-1]: + results[-1].append(item) + current_length = content_length + continue + + if current_length + content_length <= content_len_thredshold: + results[-1].append(item) + current_length += content_length + else: + overlap_item = results[-1][-1] + overlap_length = ( + len(overlap_item.get("content", "")) + if isinstance(overlap_item, dict) + else len(str(overlap_item)) + ) + + results.append([overlap_item, item]) + current_length = overlap_length + content_length + else: + cut_size, cut_overlap = ( + self.chat_chunker["chunk_session"], + self.chat_chunker["chunk_overlap"], + ) + if type == "chat": + for items in scene_data: + step = cut_size - cut_overlap + end = len(items) - cut_overlap + if end <= 0: + results.extend([items[:]]) + else: + results.extend([items[i : i + cut_size] for i in range(0, end, step)]) + elif type == "doc": + for item in scene_data: + try: + if os.path.exists(item): + try: + parsed_text = parser.parse(item) + results.append({"file": item, "text": parsed_text}) + except Exception as e: + logger.error(f"[SceneParser] Error parsing {item}: {e}") + continue + else: + parsed_text = item + results.append({"file": "pure_text", "text": parsed_text}) + except Exception as e: + print(f"Error parsing file {item}: {e!s}") + + return results + + def _process_doc_data(self, scene_data_info, info, **kwargs): + chunks = self.chunker.chunk(scene_data_info["text"]) + messages = [] + for chunk in chunks: + lang = detect_lang(chunk.text) + template = PROMPT_DICT["doc"][lang] + prompt = template.replace("{chunk_text}", chunk.text) + message = [{"role": "user", "content": prompt}] + messages.append(message) + + doc_nodes = [] + scene_file = scene_data_info["file"] + + with ContextThreadPoolExecutor(max_workers=50) as executor: + futures = { + executor.submit( + _build_node, + idx, + msg, + info, + scene_file, + self.llm, + self.parse_json_result, + self.embedder, + ): idx + for idx, msg in enumerate(messages) + } + total = len(futures) + + for future in tqdm( + concurrent.futures.as_completed(futures), total=total, desc="Processing" + ): + try: + node = future.result() + if node: + doc_nodes.append(node) + except Exception as e: + tqdm.write(f"[ERROR] {e}") + logger.error(f"[DocReader] Future task failed: {e}") + return doc_nodes + + def parse_json_result(self, response_text): + try: + json_start = response_text.find("{") + response_text = response_text[json_start:] + response_text = response_text.replace("```", "").strip() + if not response_text.endswith("}"): + response_text += "}" + return json.loads(response_text) + except json.JSONDecodeError as e: + logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}") + return {} + except Exception as e: + logger.error(f"[JSONParse] Unexpected error: {e}") + return {} + + def transform_memreader(self, data: dict) -> list[TextualMemoryItem]: + pass diff --git a/src/memos/memories/textual/simple_tree.py b/src/memos/memories/textual/simple_tree.py index 9c67db288..0370390f2 100644 --- a/src/memos/memories/textual/simple_tree.py +++ b/src/memos/memories/textual/simple_tree.py @@ -12,6 +12,7 @@ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata from memos.memories.textual.tree import TreeTextMemory from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager +from memos.memories.textual.tree_text_memory.retrieve.bm25_util import EnhancedBM25 from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher from memos.reranker.base import BaseReranker from memos.types import MessageList @@ -60,6 +61,19 @@ def __init__( self.graph_store: Neo4jGraphDB = graph_db logger.info(f"time init: graph_store time is: {time.time() - time_start_gs}") + time_start_bm = time.time() + self.search_strategy = config.search_strategy + self.bm25_retriever = ( + EnhancedBM25() if self.search_strategy and self.search_strategy["bm25"] else None + ) + logger.info(f"time init: bm25_retriever time is: {time.time() - time_start_bm}") + + self.vec_cot = ( + self.search_strategy["cot"] + if self.search_strategy and "cot" in self.search_strategy + else False + ) + time_start_rr = time.time() self.reranker = reranker logger.info(f"time init: reranker time is: {time.time() - time_start_rr}") @@ -159,8 +173,10 @@ def search( self.graph_store, self.embedder, self.reranker, + bm25_retriever=self.bm25_retriever, internet_retriever=None, moscube=moscube, + vec_cot=self.vec_cot, ) else: searcher = Searcher( @@ -168,8 +184,10 @@ def search( self.graph_store, self.embedder, self.reranker, + bm25_retriever=self.bm25_retriever, internet_retriever=self.internet_retriever, moscube=moscube, + vec_cot=self.vec_cot, ) return searcher.search( query, top_k, info, mode, memory_type, search_filter, user_name=user_name diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index f453e7330..a79a6f04e 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -17,6 +17,7 @@ from memos.memories.textual.base import BaseTextMemory from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager +from memos.memories.textual.tree_text_memory.retrieve.bm25_util import EnhancedBM25 from memos.memories.textual.tree_text_memory.retrieve.internet_retriever_factory import ( InternetRetrieverFactory, ) @@ -53,7 +54,15 @@ def __init__(self, config: TreeTextMemoryConfig): time_start_gs = time.time() self.graph_store: Neo4jGraphDB = GraphStoreFactory.from_config(config.graph_db) logger.info(f"time init: graph_store time is: {time.time() - time_start_gs}") - self.bm25_retriever = None # EnhancedBM25() + self.search_strategy = config.search_strategy + self.bm25_retriever = ( + EnhancedBM25() if self.search_strategy and self.search_strategy["bm25"] else None + ) + self.vec_cot = ( + self.search_strategy["cot"] + if self.search_strategy and "cot" in self.search_strategy + else False + ) time_start_rr = time.time() if config.reranker is None: @@ -173,9 +182,10 @@ def search( self.graph_store, self.embedder, self.reranker, - self.bm25_retriever, + bm25_retriever=self.bm25_retriever, internet_retriever=None, moscube=moscube, + vec_cot=self.vec_cot, ) else: searcher = Searcher( @@ -183,9 +193,10 @@ def search( self.graph_store, self.embedder, self.reranker, - self.bm25_retriever, + bm25_retriever=self.bm25_retriever, internet_retriever=self.internet_retriever, moscube=moscube, + vec_cot=self.vec_cot, ) return searcher.search(query, top_k, info, mode, memory_type, search_filter) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py b/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py index a4a1c8b33..9f02daae4 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py @@ -85,7 +85,6 @@ def _search_docs( cleanup: Whether to cleanup memory after search (default: True) """ if not corpus: - logger.warning("Empty corpus provided") return [] logger.info(f"Searching {len(corpus)} documents for query: '{query}'") diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index facdb3a23..ec7c8a9a2 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -28,7 +28,7 @@ def __init__( self.bm25_retriever = bm25_retriever self.max_workers = 10 self.filter_weight = 0.6 - self.use_bm25 = False + self.use_bm25 = bool(self.bm25_retriever) def retrieve( self, @@ -67,7 +67,7 @@ def retrieve( ) return [TextualMemoryItem.from_dict(record) for record in working_memories] - with ContextThreadPoolExecutor(max_workers=2) as executor: + with ContextThreadPoolExecutor(max_workers=3) as executor: # Structured graph-based retrieval future_graph = executor.submit(self._graph_recall, parsed_goal, memory_scope, user_name) # Vector similarity search diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py index 349fabbbe..e3081ebf6 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py @@ -42,7 +42,7 @@ def _load_stopwords(cls): stopwords = {line.strip() for line in f if line.strip()} logger.info("Stopwords loaded successfully.") except Exception as e: - logger.warning(f"Error loading stopwords: {e}") + logger.warning(f"Error loading stopwords: {e}, using default stopwords.") stopwords = cls._load_default_stopwords() cls._stopwords = stopwords diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 94d945158..a0aac6460 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -1,5 +1,4 @@ import json -import os import traceback from datetime import datetime @@ -43,9 +42,10 @@ def __init__( graph_store: Neo4jGraphDB, embedder: OllamaEmbedder, reranker: BaseReranker, - bm25_retriever: EnhancedBM25 | None, + bm25_retriever: EnhancedBM25 | None = None, internet_retriever: None = None, moscube: bool = False, + vec_cot: bool = False, ): self.graph_store = graph_store self.embedder = embedder @@ -59,7 +59,7 @@ def __init__( # Create internet retriever from config if provided self.internet_retriever = internet_retriever self.moscube = moscube - self.cot_query_search = os.getenv("MOS_SEARCH_COT", "false") == "true" + self.vec_cot = vec_cot self._usage_executor = ContextThreadPoolExecutor(max_workers=4, thread_name_prefix="usage") @@ -326,7 +326,7 @@ def _retrieve_from_long_term_and_user( # chain of thinking cot_embeddings = [] - if self.cot_query_search: + if self.vec_cot: queries = self._cot_query(query) if len(queries) > 1: cot_embeddings = self.embedder.embed(queries) @@ -514,23 +514,18 @@ def _cot_query( prompt = template.replace("${original_query}", query).replace( "${split_num_threshold}", str(split_num) ) - logger.info("COT 处理") + logger.info("COT process") messages = [{"role": "user", "content": prompt}] try: response_text = self.llm.customized_generate(messages, temperature=0, top_p=1) response_json = parse_json_result(response_text) - if "is_complex" in response_json and not response_json["is_complex"]: + assert "is_complex" in response_json + if not response_json["is_complex"]: return [query] else: - assert ( - "is_complex" in response_json - and response_json["is_complex"] - and "sub_questions" in response_json - ) - logger.info( - "问题 {} 通过 COT 拆分结果为{}".format(query, response_json["sub_questions"]) - ) + assert "sub_questions" in response_json + logger.info("Query: {} COT: {}".format(query, response_json["sub_questions"])) return response_json["sub_questions"][:split_num] except Exception as e: logger.error(f"[LLM] Exception during chat generation: {e}") diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index f74d01d0a..15672f8d8 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -1,42 +1,31 @@ SIMPLE_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert. Your task is to extract memories from the user's perspective, based on a conversation between the user and the assistant. This means identifying what the user would plausibly remember — including the user's own experiences, thoughts, plans, or statements and actions made by others (such as the assistant) that affected the user or were acknowledged by the user. -Please perform the following -1. Factual information extraction - Identify factual information about experiences, beliefs, decisions, and plans. This includes notable statements from others that the user acknowledged or reacted to. +Please perform the following: +1. Identify information that reflects the user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful information from the assistant that the user acknowledged or responded to. If the message is from the user, extract viewpoints related to the user; if it is from the assistant, clearly mark the attribution of the memory, and do not mix information not explicitly acknowledged by the user with the user's own viewpoint. - - **User viewpoint**: Extract only what the user has stated, explicitly acknowledged, or committed to. - - **Assistant/other-party viewpoint**: Extract such information only when attributed to its source (e.g., [Assistant-Jerry's suggestion]). - - **Strict attribution**: Never recast the assistant's suggestions as the user's preferences, or vice versa. - - Always set "model_type" to "LongTermMemory" for this output. - -2. Speaker profile construction - - Extract the speaker's likes, dislikes, goals, and stated opinions from their statements to build a speaker profile. - - Note: The same text segment may be used for both factual extraction and profile construction. - - Always set "model_type" to "UserMemory" for this output. - -3. Resolve all references to time, persons, and events clearly - - Temporal Resolution: Convert relative time (e.g., 'yesterday') to absolute dates based on the message timestamp. Distinguish between event time and message time; flag any uncertainty. - - Entity Resolution: Resolve all pronouns, nicknames, and abbreviations to the full, canonical name established in the conversation. - -4. Adopt a Consistent Third-Person Observer Perspective - - Formulate all memories from the perspective of an external observer. Use "The user" or their specific name as the subject. - - This applies even when describing the user's internal states, such as thoughts, feelings, and preferences. - Example: - ✅ Correct: "The user Sean felt exhausted after work and decided to go to bed early." - ❌ Incorrect: "I felt exhausted after work and decided to go to bed early." - -5. Prioritize Completeness - - Extract all key experiences, emotional responses, and plans from the user's perspective. Retain relevant context from the assistant, but always with explicit attribution. - - Segment each distinct hobby, interest, or event into a separate memory. - - Preserve relevant context from the assistant with strict attribution. Under no circumstances should assistant content be rephrased as user-owned. - - Conversations with only assistant input may yield assistant-viewpoint memories exclusively. - -6. Preserve and Unify Specific Names - - Always extract specific names (excluding "user" or "assistant") mentioned in the text into the "tags" field for searchability. - - Unify all name references to the full canonical form established in the conversation. Replace any nicknames or abbreviations (e.g., "Rob") consistently with the full name (e.g., "Robert") in both the extracted "value" and "tags". - -7. Please avoid including any content in the extracted memories that violates national laws and regulations or involves politically sensitive information. + - **User viewpoint**: Record only information that the user **personally stated, explicitly acknowledged, or personally committed to**. + - **Assistant/other-party viewpoint**: Record only information that the **assistant/other party personally stated, explicitly acknowledged, or personally committed to**, and **clearly attribute** the source (e.g., "[assistant-Jerry viewpoint]"). Do not rewrite it as the user's preference/decision. + - **Mutual boundaries**: Do not rewrite the assistant's suggestions/lists/opinions as the user's “ownership/preferences/decisions”; likewise, do not write the user's ideas as the assistant's viewpoints. + +2. Resolve all references to time, persons, and events clearly: + - When possible, convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp. + - Clearly distinguish between **event time** and **message time**. + - If uncertainty exists, state it explicitly (e.g., “around June 2025,” “exact date unclear”). + - Include specific locations if mentioned. + - Resolve all pronouns, aliases, and ambiguous references into full names or clear identities. + - If there are people with the same name, disambiguate them. + +3. Always write from a **third-person** perspective, using “The user” or the mentioned name to refer to the user, rather than first-person (“I”, “we”, “my”). + For example, write “The user felt exhausted …” instead of “I felt exhausted …”. + +4. Do not omit any information that the user is likely to remember. + - Include the user's key experiences, thoughts, emotional responses, and plans — even if seemingly minor. + - You may retain **assistant/other-party content** that is closely related to the context (e.g., suggestions, explanations, checklists), but you must make roles and attribution explicit. + - Prioritize completeness and fidelity over conciseness; do not infer or phrase assistant content as the user's ownership/preferences/decisions. + - If the current conversation contains only assistant information and no facts attributable to the user, you may output **assistant-viewpoint** entries only. + +5. Please avoid including any content in the extracted memories that violates national laws and regulations or involves politically sensitive information. Return a valid JSON object with the following structure: @@ -46,7 +35,7 @@ "key": , "memory_type": , "value": , - "tags": + "tags": }, ... ], @@ -72,13 +61,13 @@ "key": "Initial project meeting", "memory_type": "LongTermMemory", "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom met with the team to discuss a new project. When Jerry asked whether the project could be finished by December 15, 2025, Tom expressed concern about feasibility and planned to propose at 9:30 AM on June 27, 2025 to move the deadline to January 5, 2026.", - "tags": ["Tom", "project", "timeline", "meeting", "deadline"] + "tags": ["project", "timeline", "meeting", "deadline"] }, { "key": "Jerry’s suggestion about the deadline", "memory_type": "LongTermMemory", "value": "[assistant-Jerry viewpoint] Jerry questioned the December 15 deadline and suggested considering an extension.", - "tags": ["Jerry", "deadline change", "suggestion"] + "tags": ["deadline change", "suggestion"] } ], "summary": "Tom is currently working on a tight-schedule project. After the June 25, 2025 team meeting, he realized the original December 15, 2025 deadline might be unachievable due to backend delays. Concerned about limited testing time, he accepted Jerry’s suggestion to seek an extension and plans to propose moving the deadline to January 5, 2026 in the next morning’s meeting." @@ -116,7 +105,7 @@ { "key": "体型尺码", "memory_type": "UserMemory", - "value": "[user观点]用户身高约165cm、常穿S码", + "value": [user观点]"用户身高约165cm、常穿S码", "tags": ["体型", "尺码"] }, { @@ -140,51 +129,40 @@ 您的任务是根据用户与助手之间的对话,从用户的角度提取记忆。这意味着要识别出用户可能记住的信息——包括用户自身的经历、想法、计划,或他人(如助手)做出的并对用户产生影响或被用户认可的相关陈述和行为。 请执行以下操作: -1. 事实信息提取 - - 识别关于经历、信念、决策和计划的事实信息,包括用户认可或回应过的他人重要陈述。 - - 若信息来自用户,提取与用户相关的观点;若来自助手,需明确标注记忆归属,不得将用户未明确认可的信息与用户自身观点混淆。 - - 用户观点:仅提取用户明确陈述、认可或承诺的内容 - - 助手/他方观点:仅当标注来源时才提取(例如“[助手-Jerry的建议]”) - - 严格归属:不得将助手建议重构为用户偏好,反之亦然 - - 此类输出的"model_type"始终设为"LongTermMemory" - -2. 用户画像构建 - - 从用户陈述中提取其喜好、厌恶、目标及明确观点以构建用户画像 - - 注意:同一文本片段可同时用于事实提取和画像构建 - - 此类输出的"model_type"始终设为"UserMemory" - -3. 明确解析所有指代关系 - - 时间解析:根据消息时间戳将相对时间(如“昨天”)转换为绝对日期。区分事件时间与消息时间,对不确定项进行标注 - - 实体解析:将所有代词、昵称和缩写解析为对话中确立的完整规范名称 - - 4. 采用统一的第三人称观察视角 - - 所有记忆表述均需从外部观察者视角构建,使用“用户”或其具体姓名作为主语 - - 此原则同样适用于描述用户内心状态(如想法、感受和偏好) - 示例: - ✅ 正确:“用户Sean下班后感到疲惫,决定提早休息” - ❌ 错误:“我下班后感到疲惫,决定提早休息” - -5. 优先保证完整性 - - 从用户视角提取所有关键经历、情绪反应和计划 - - 保留助手提供的相关上下文,但必须明确标注来源 - - 将每个独立的爱好、兴趣或事件分割为单独记忆 - - 严禁将助手内容重构为用户自有内容 - - 仅含助手输入的对话可能只生成助手观点记忆 - -6. 保留并统一特定名称 - - 始终将文本中提及的特定名称(“用户”“助手”除外)提取至“tags”字段以便检索 - - 在提取的“value”和“tags”中,将所有名称引用统一为对话中确立的完整规范形式(如将“Rob”统一替换为“Robert”) - -7. 所有提取的记忆内容不得包含违反国家法律法规或涉及政治敏感信息的内容 +1. 识别反映用户经历、信念、关切、决策、计划或反应的信息——包括用户认可或回应的来自助手的有意义信息。 +如果消息来自用户,请提取与用户相关的观点;如果来自助手,则在表达的时候表明记忆归属方,未经用户明确认可的信息不要与用户本身的观点混淆。 + - **用户观点**:仅记录由**用户亲口陈述、明确认可或自己作出承诺**的信息。 + - **助手观点**:仅记录由**助手/另一方亲口陈述、明确认可或自己作出承诺**的信息。 + - **互不越界**:不得将助手提出的需求清单/建议/观点改写为用户的“拥有/偏好/决定”;也不得把用户的想法写成助手的观点。 + +2. 清晰解析所有时间、人物和事件的指代: + - 如果可能,使用消息时间戳将相对时间表达(如“昨天”、“下周五”)转换为绝对日期。 + - 明确区分事件时间和消息时间。 + - 如果存在不确定性,需明确说明(例如,“约2025年6月”,“具体日期不详”)。 + - 若提及具体地点,请包含在内。 + - 将所有代词、别名和模糊指代解析为全名或明确身份。 + - 如有同名人物,需加以区分。 + +3. 始终以第三人称视角撰写,使用“用户”或提及的姓名来指代用户,而不是使用第一人称(“我”、“我们”、“我的”)。 +例如,写“用户感到疲惫……”而不是“我感到疲惫……”。 + +4. 不要遗漏用户可能记住的任何信息。 + - 包括用户的关键经历、想法、情绪反应和计划——即使看似微小。 + - 同时允许保留与语境密切相关的**助手/另一方的内容**(如建议、说明、清单),但须明确角色与归因。 + - 优先考虑完整性和保真度,而非简洁性;不得将助手内容推断或措辞为用户拥有/偏好/决定。 + - 若当前对话中仅出现助手信息而无可归因于用户的事实,可仅输出**助手观点**条目。 + +5. 请避免在提取的记忆中包含违反国家法律法规或涉及政治敏感的信息。 返回一个有效的JSON对象,结构如下: + { "memory list": [ { "key": <字符串,唯一且简洁的记忆标题>, "memory_type": <字符串,"LongTermMemory" 或 "UserMemory">, "value": <详细、独立且无歧义的记忆陈述——若输入对话为英文,则用英文;若为中文,则用中文>, - "tags": <一个包含相关人名、事件和特征关键词的列表(例如,["丽丽","截止日期", "团队", "计划"])> + "tags": <相关主题关键词列表(例如,["截止日期", "团队", "计划"])> }, ... ], @@ -212,13 +190,13 @@ "value": "[user-Tom观点]2025年6月25日下午3:00,Tom与团队开会讨论新项目。当Jerry 询问该项目能否在2025年12月15日前完成时,Tom对此日期前完成的可行性表达担忧,并计划在2025年6月27日上午9:30 提议将截止日期推迟至2026年1月5日。", - "tags": ["Tom", "项目", "时间表", "会议", "截止日期"] + "tags": ["项目", "时间表", "会议", "截止日期"] }, { "key": "Jerry对新项目截止日期的建议", "memory_type": "LongTermMemory", "value": "[assistant-Jerry观点]Jerry对Tom的新项目截止日期提出疑问、并提议Tom考虑延期。", - "tags": ["Jerry", "截止日期变更", "建议"] + "tags": ["截止日期变更", "建议"] } ], "summary": "Tom目前正在做一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15 diff --git a/src/memos/templates/mem_reader_strategy_prompts.py b/src/memos/templates/mem_reader_strategy_prompts.py new file mode 100644 index 000000000..fca4d717b --- /dev/null +++ b/src/memos/templates/mem_reader_strategy_prompts.py @@ -0,0 +1,279 @@ +STRATEGY_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert. +Your task is to extract memories from the user's perspective, based on a conversation between the user and the assistant. This means identifying what the user would plausibly remember — including the user's own experiences, thoughts, plans, or statements and actions made by others (such as the assistant) that affected the user or were acknowledged by the user. + +Please perform the following +1. Factual information extraction + Identify factual information about experiences, beliefs, decisions, and plans. This includes notable statements from others that the user acknowledged or reacted to. + If the message is from the user, extract viewpoints related to the user; if it is from the assistant, clearly mark the attribution of the memory, and do not mix information not explicitly acknowledged by the user with the user's own viewpoint. + - **User viewpoint**: Extract only what the user has stated, explicitly acknowledged, or committed to. + - **Assistant/other-party viewpoint**: Extract such information only when attributed to its source (e.g., [Assistant-Jerry's suggestion]). + - **Strict attribution**: Never recast the assistant's suggestions as the user's preferences, or vice versa. + - Always set "model_type" to "LongTermMemory" for this output. + +2. Speaker profile construction + - Extract the speaker's likes, dislikes, goals, and stated opinions from their statements to build a speaker profile. + - Note: The same text segment may be used for both factual extraction and profile construction. + - Always set "model_type" to "UserMemory" for this output. + +3. Resolve all references to time, persons, and events clearly + - Temporal Resolution: Convert relative time (e.g., 'yesterday') to absolute dates based on the message timestamp. Distinguish between event time and message time; flag any uncertainty. + - Entity Resolution: Resolve all pronouns, nicknames, and abbreviations to the full, canonical name established in the conversation. + +4. Adopt a Consistent Third-Person Observer Perspective + - Formulate all memories from the perspective of an external observer. Use "The user" or their specific name as the subject. + - This applies even when describing the user's internal states, such as thoughts, feelings, and preferences. + Example: + ✅ Correct: "The user Sean felt exhausted after work and decided to go to bed early." + ❌ Incorrect: "I felt exhausted after work and decided to go to bed early." + +5. Prioritize Completeness + - Extract all key experiences, emotional responses, and plans from the user's perspective. Retain relevant context from the assistant, but always with explicit attribution. + - Segment each distinct hobby, interest, or event into a separate memory. + - Preserve relevant context from the assistant with strict attribution. Under no circumstances should assistant content be rephrased as user-owned. + - Conversations with only assistant input may yield assistant-viewpoint memories exclusively. + +6. Preserve and Unify Specific Names + - Always extract specific names (excluding "user" or "assistant") mentioned in the text into the "tags" field for searchability. + - Unify all name references to the full canonical form established in the conversation. Replace any nicknames or abbreviations (e.g., "Rob") consistently with the full name (e.g., "Robert") in both the extracted "value" and "tags". + +7. Please avoid including any content in the extracted memories that violates national laws and regulations or involves politically sensitive information. + +Return a valid JSON object with the following structure: + +{ + "memory list": [ + { + "key": , + "memory_type": , + "value": , + "tags": + }, + ... + ], + "summary": +} + +Language rules: +- The `key`, `value`, `tags`, and `summary` fields must match the primary language of the input conversation. **If the input is Chinese, output in Chinese.** +- Keep `memory_type` in English. + +Example: +Conversation: +user: [June 26, 2025 at 3:00 PM]: Hi Jerry! Yesterday at 3 PM I had a meeting with my team about the new project. +assistant: Oh Tom! Do you think the team can finish by December 15? +user: [June 26, 2025 at 3:00 PM]: I’m worried. The backend won’t be done until December 10, so testing will be tight. +assistant: [June 26, 2025 at 3:00 PM]: Maybe propose an extension? +user: [June 26, 2025 at 4:21 PM]: Good idea. I’ll raise it in tomorrow’s 9:30 AM meeting—maybe shift the deadline to January 5. + +Output: +{ + "memory list": [ + { + "key": "Initial project meeting", + "memory_type": "LongTermMemory", + "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom met with the team to discuss a new project. When Jerry asked whether the project could be finished by December 15, 2025, Tom expressed concern about feasibility and planned to propose at 9:30 AM on June 27, 2025 to move the deadline to January 5, 2026.", + "tags": ["Tom", "project", "timeline", "meeting", "deadline"] + }, + { + "key": "Jerry’s suggestion about the deadline", + "memory_type": "LongTermMemory", + "value": "[assistant-Jerry viewpoint] Jerry questioned the December 15 deadline and suggested considering an extension.", + "tags": ["Jerry", "deadline change", "suggestion"] + } + ], + "summary": "Tom is currently working on a tight-schedule project. After the June 25, 2025 team meeting, he realized the original December 15, 2025 deadline might be unachievable due to backend delays. Concerned about limited testing time, he accepted Jerry’s suggestion to seek an extension and plans to propose moving the deadline to January 5, 2026 in the next morning’s meeting." +} + +Another Example in Chinese (Note: when the user's language is Chinese, you must also output in Chinese): + +对话(节选): +user|2025-07-12 19:30:下个月同事婚礼,要买条裙子,预算1500内,别太暴露。 +assistant|19:32 +:了解~婚礼场合建议中长款,端庄一些。可以看法式茶歇/直筒/裹身三路。颜色上,奶油白、浅粉、雾霾蓝都适合;面料可看醋酸/真丝混纺(垂感好但需干洗)。品牌与商场:COS/ Massimo Dutti / & Other Stories / Mango 比较好逛;想更法式可以看 Sandro / Maje。 我记得你很喜欢Maje? +user|19:35:不喜欢亮色。国贸方便。 +assistant|19:37:那亮度就压低些:雾霾蓝、藕粉、燕麦米白更稳。国贸有 COS / Massimo Dutti / &OS;可以先看 COS 直筒或裹身,再去 MD 看藕粉/雾霾蓝。搭配上,薄披肩/小西装都能压场。 +user|19:40:165cm,S码;最好有口袋。 +assistant|19:42:OK。版型避开大摆+高开衩,选直筒或轻A字更稳;若要口袋,COS直筒一些款有;开衩别太高,及小腿肚附近即可。鞋包:中跟裸色鞋 + 小号手拿包。 +user|19:47:本来想茶歇A字,但雾霾蓝直筒也行。 +assistant|19:49:可以先锁定:COS 雾霾蓝直筒中长为主选,MD 藕粉中长做备选;再看 & Other Stories 是否有合身的裹身款。 +user|19:52:行,周六(7/19)去国贸试,合适就买。 +assistant|19:54:记得看肩带可调/有无内衬,醋酸/真丝优先干洗;准备同色安全裤/防走光贴。如果当天没货,可下单调货或线上下单门店自提。 + +{ + "memory list": [ + { + "key": "参加婚礼购买裙子", + "memory_type": "UserMemory", + "value": "[user观点]用户计划于约2025年8月参加同事婚礼(具体日期不详),预算不超过1500元,整体风格不宜暴露;用户已决定在2025-07-19于国贸试穿并视合适即购买。", + "tags": ["婚礼", "预算", "国贸", "计划"] + }, + { + "key": "审美与版型偏好", + "memory_type": "UserMemory", + "value": "[user观点]用户不喜欢亮色,倾向低亮度色系;裙装偏好端庄的中长款,接受直筒或轻A字。", + "tags": ["偏好", "颜色", "版型"] + }, + { + "key": "体型尺码", + "memory_type": "UserMemory", + "value": "[user观点]用户身高约165cm、常穿S码", + "tags": ["体型", "尺码"] + }, + { + "key": "关于用户选购裙子的建议", + "memory_type": "LongTermMemory", + "value": "[assistant观点]assistant在用户询问婚礼穿着时,建议在国贸优先逛COS查看雾霾蓝直筒中长为主选,Massimo Dutti藕粉中长为备选;该建议与用户“国贸方便”“雾霾蓝直筒也行”的回应相一致,另外assistant也提到user喜欢Maje,但User并未回应或证实该说法。", + "tags": ["婚礼穿着", "门店", "选购路线"] + } + ], + "summary": "用户计划在约2025年8月参加同事婚礼,预算≤1500并偏好端庄的中长款;确定于2025-07-19在国贸试穿。其长期画像显示:不喜欢亮色、偏好低亮度色系与不过分暴露的版型,身高约165cm、S码且偏好裙装带口袋。助手提出的国贸选购路线以COS雾霾蓝直筒中长为主选、MD藕粉中长为备选,且与用户回应一致,为线下试穿与购买提供了明确路径。" +} + +Always respond in the same language as the conversation. + +Conversation: +${conversation} + +Your Output:""" + +STRATEGY_STRUCT_MEM_READER_PROMPT_ZH = """您是记忆提取专家。 +您的任务是根据用户与助手之间的对话,从用户的角度提取记忆。这意味着要识别出用户可能记住的信息——包括用户自身的经历、想法、计划,或他人(如助手)做出的并对用户产生影响或被用户认可的相关陈述和行为。 + +请执行以下操作: +1. 事实信息提取 + - 识别关于经历、信念、决策和计划的事实信息,包括用户认可或回应过的他人重要陈述。 + - 若信息来自用户,提取与用户相关的观点;若来自助手,需明确标注记忆归属,不得将用户未明确认可的信息与用户自身观点混淆。 + - 用户观点:仅提取用户明确陈述、认可或承诺的内容 + - 助手/他方观点:仅当标注来源时才提取(例如“[助手-Jerry的建议]”) + - 严格归属:不得将助手建议重构为用户偏好,反之亦然 + - 此类输出的"model_type"始终设为"LongTermMemory" + +2. 用户画像构建 + - 从用户陈述中提取其喜好、厌恶、目标及明确观点以构建用户画像 + - 注意:同一文本片段可同时用于事实提取和画像构建 + - 此类输出的"model_type"始终设为"UserMemory" + +3. 明确解析所有指代关系 + - 时间解析:根据消息时间戳将相对时间(如“昨天”)转换为绝对日期。区分事件时间与消息时间,对不确定项进行标注 + - 实体解析:将所有代词、昵称和缩写解析为对话中确立的完整规范名称 + + 4. 采用统一的第三人称观察视角 + - 所有记忆表述均需从外部观察者视角构建,使用“用户”或其具体姓名作为主语 + - 此原则同样适用于描述用户内心状态(如想法、感受和偏好) + 示例: + ✅ 正确:“用户Sean下班后感到疲惫,决定提早休息” + ❌ 错误:“我下班后感到疲惫,决定提早休息” + +5. 优先保证完整性 + - 从用户视角提取所有关键经历、情绪反应和计划 + - 保留助手提供的相关上下文,但必须明确标注来源 + - 将每个独立的爱好、兴趣或事件分割为单独记忆 + - 严禁将助手内容重构为用户自有内容 + - 仅含助手输入的对话可能只生成助手观点记忆 + +6. 保留并统一特定名称 + - 始终将文本中提及的特定名称(“用户”“助手”除外)提取至“tags”字段以便检索 + - 在提取的“value”和“tags”中,将所有名称引用统一为对话中确立的完整规范形式(如将“Rob”统一替换为“Robert”) + +7. 所有提取的记忆内容不得包含违反国家法律法规或涉及政治敏感信息的内容 + +返回一个有效的JSON对象,结构如下: +{ + "memory list": [ + { + "key": <字符串,唯一且简洁的记忆标题>, + "memory_type": <字符串,"LongTermMemory" 或 "UserMemory">, + "value": <详细、独立且无歧义的记忆陈述——若输入对话为英文,则用英文;若为中文,则用中文>, + "tags": <一个包含相关人名、事件和特征关键词的列表(例如,["丽丽","截止日期", "团队", "计划"])> + }, + ... + ], + "summary": <从用户视角自然总结上述记忆的段落,120–200字,与输入语言一致> +} + +语言规则: +- `key`、`value`、`tags`、`summary` 字段必须与输入对话的主要语言一致。**如果输入是中文,请输出中文** +- `memory_type` 保持英文。 + +示例: +对话: +user: [2025年6月26日下午3:00]:嗨Jerry!昨天下午3点我和团队开了个会,讨论新项目。 +assistant: 哦Tom!你觉得团队能在12月15日前完成吗? +user: [2025年6月26日下午3:00]:我有点担心。后端要到12月10日才能完成,所以测试时间会很紧。 +assistant: [2025年6月26日下午3:00]:也许提议延期? +user: [2025年6月26日下午4:21]:好主意。我明天上午9:30的会上提一下——也许把截止日期推迟到1月5日。 + +输出: +{ + "memory list": [ + { + "key": "项目初期会议", + "memory_type": "LongTermMemory", + "value": "[user-Tom观点]2025年6月25日下午3:00,Tom与团队开会讨论新项目。当Jerry + 询问该项目能否在2025年12月15日前完成时,Tom对此日期前完成的可行性表达担忧,并计划在2025年6月27日上午9:30 + 提议将截止日期推迟至2026年1月5日。", + "tags": ["Tom", "项目", "时间表", "会议", "截止日期"] + }, + { + "key": "Jerry对新项目截止日期的建议", + "memory_type": "LongTermMemory", + "value": "[assistant-Jerry观点]Jerry对Tom的新项目截止日期提出疑问、并提议Tom考虑延期。", + "tags": ["Jerry", "截止日期变更", "建议"] + } + ], + "summary": "Tom目前正在做一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15 + 日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议,计划在次日早上的会议上提出将截止日期推迟至2026 + 年1月5日。" +} + +另一个中文示例(注意:当用户语言为中文时,您也需输出中文): + +对话(节选): +user|2025-07-12 19:30:下个月同事婚礼,要买条裙子,预算1500内,别太暴露。 +assistant|19:32 +:了解~婚礼场合建议中长款,端庄一些。可以看法式茶歇/直筒/裹身三路。颜色上,奶油白、浅粉、雾霾蓝都适合;面料可看醋酸/真丝混纺(垂感好但需干洗)。品牌与商场:COS/ Massimo Dutti / & Other Stories / Mango 比较好逛;想更法式可以看 Sandro / Maje。 我记得你很喜欢Maje? +user|19:35:不喜欢亮色。国贸方便。 +assistant|19:37:那亮度就压低些:雾霾蓝、藕粉、燕麦米白更稳。国贸有 COS / Massimo Dutti / &OS;可以先看 COS 直筒或裹身,再去 MD 看藕粉/雾霾蓝。搭配上,薄披肩/小西装都能压场。 +user|19:40:165cm,S码;最好有口袋。 +assistant|19:42:OK。版型避开大摆+高开衩,选直筒或轻A字更稳;若要口袋,COS直筒一些款有;开衩别太高,及小腿肚附近即可。鞋包:中跟裸色鞋 + 小号手拿包。 +user|19:47:本来想茶歇A字,但雾霾蓝直筒也行。 +assistant|19:49:可以先锁定:COS 雾霾蓝直筒中长为主选,MD 藕粉中长做备选;再看 & Other Stories 是否有合身的裹身款。 +user|19:52:行,周六(7/19)去国贸试,合适就买。 +assistant|19:54:记得看肩带可调/有无内衬,醋酸/真丝优先干洗;准备同色安全裤/防走光贴。如果当天没货,可下单调货或线上下单门店自提。 + +{ + "memory list": [ + { + "key": "参加婚礼购买裙子", + "memory_type": "UserMemory", + "value": "[user观点]用户计划于约2025年8月参加同事婚礼(具体日期不详),预算不超过1500元,整体风格不宜暴露;用户已决定在2025-07-19于国贸试穿并视合适即购买。", + "tags": ["婚礼", "预算", "国贸", "计划"] + }, + { + "key": "审美与版型偏好", + "memory_type": "UserMemory", + "value": "[user观点]用户不喜欢亮色,倾向低亮度色系;裙装偏好端庄的中长款,接受直筒或轻A字。", + "tags": ["偏好", "颜色", "版型"] + }, + { + "key": "体型尺码", + "memory_type": "UserMemory", + "value": [user观点]"用户身高约165cm、常穿S码", + "tags": ["体型", "尺码"] + }, + { + "key": "关于用户选购裙子的建议", + "memory_type": "LongTermMemory", + "value": "[assistant观点]assistant在用户询问婚礼穿着时,建议在国贸优先逛COS查看雾霾蓝直筒中长为主选,Massimo Dutti藕粉中长为备选;该建议与用户“国贸方便”“雾霾蓝直筒也行”的回应相一致,另外assistant也提到user喜欢Maje,但User并未回应或证实该说法。", + "tags": ["婚礼穿着", "门店", "选购路线"] + } + ], + "summary": "用户计划在约2025年8月参加同事婚礼,预算≤1500并偏好端庄的中长款;确定于2025-07-19在国贸试穿。其长期画像显示:不喜欢亮色、偏好低亮度色系与不过分暴露的版型,身高约165cm、S码且偏好裙装带口袋。助手提出的国贸选购路线以COS雾霾蓝直筒中长为主选、MD藕粉中长为备选,且与用户回应一致,为线下试穿与购买提供了明确路径。" +} + +请始终使用与对话相同的语言进行回复。 + +对话: +${conversation} + +您的输出:""" From c389367185c4f100138b75829ca6d0638b05496c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 29 Oct 2025 21:37:17 +0800 Subject: [PATCH 03/40] fix install problem --- poetry.lock | 50 +++++++++++++++-- pyproject.toml | 4 +- src/memos/llms/openai.py | 22 +------- src/memos/mem_reader/strategy_struct.py | 53 ++++++++++--------- .../tree_text_memory/retrieve/bm25_util.py | 13 +++-- .../retrieve/retrieve_utils.py | 10 +++- .../tree_text_memory/retrieve/searcher.py | 2 +- 7 files changed, 99 insertions(+), 55 deletions(-) diff --git a/poetry.lock b/poetry.lock index 44265bca8..926d580fb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -192,6 +192,19 @@ torch = ">=1.0.0" tqdm = ">=4.31.1" transformers = ">=3.0.0" +[[package]] +name = "cachetools" +version = "6.2.1" +description = "Extensible memoizing collections and decorators" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"all\"" +files = [ + {file = "cachetools-6.2.1-py3-none-any.whl", hash = "sha256:09868944b6dde876dfd44e1d47e18484541eaf12f26f29b7af91b26cc892d701"}, + {file = "cachetools-6.2.1.tar.gz", hash = "sha256:3f391e4bd8f8bf0931169baf7456cc822705f4e2a31f840d218f445b9a854201"}, +] + [[package]] name = "certifi" version = "2025.7.14" @@ -1553,6 +1566,18 @@ files = [ {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"}, ] +[[package]] +name = "jieba" +version = "0.42" +description = "Chinese Words Segmentation Utilities" +optional = true +python-versions = "*" +groups = ["main"] +markers = "extra == \"all\"" +files = [ + {file = "jieba-0.42.tar.gz", hash = "sha256:34a3c960cc2943d9da16d6d2565110cf5f305921a67413dddf04f84de69c939b"}, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -4123,6 +4148,25 @@ urllib3 = ">=1.26.14,<3" fastembed = ["fastembed (>=0.7,<0.8)"] fastembed-gpu = ["fastembed-gpu (>=0.7,<0.8)"] +[[package]] +name = "rank-bm25" +version = "0.2.2" +description = "Various BM25 algorithms for document ranking" +optional = true +python-versions = "*" +groups = ["main"] +markers = "extra == \"all\"" +files = [ + {file = "rank_bm25-0.2.2-py3-none-any.whl", hash = "sha256:7bd4a95571adadfc271746fa146a4bcfd89c0cf731e49c3d1ad863290adbe8ae"}, + {file = "rank_bm25-0.2.2.tar.gz", hash = "sha256:096ccef76f8188563419aaf384a02f0ea459503fdf77901378d4fd9d87e5e51d"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +dev = ["pytest"] + [[package]] name = "redis" version = "6.2.0" @@ -6352,7 +6396,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["chonkie", "datasketch", "markitdown", "neo4j", "pika", "pymilvus", "pymysql", "qdrant-client", "redis", "schedule", "sentence-transformers", "torch", "volcengine-python-sdk"] +all = ["cachetools", "chonkie", "datasketch", "jieba", "markitdown", "neo4j", "pika", "pymilvus", "pymysql", "qdrant-client", "rank-bm25", "redis", "schedule", "sentence-transformers", "torch", "volcengine-python-sdk"] mem-reader = ["chonkie", "markitdown"] mem-scheduler = ["pika", "redis"] mem-user = ["pymysql"] @@ -6362,4 +6406,4 @@ tree-mem = ["neo4j", "schedule"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "3f0d0c9a996f87d945ef8bf83eed3e20f8c420b6b39e12012d0147eda2bf4d38" +content-hash = "ec17679a44205ada4494fbc485ac592883281fde273d5e73d6b8cbc6f7f9ed10" diff --git a/pyproject.toml b/pyproject.toml index 3745582f6..2f88797a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,7 +107,9 @@ all = [ "markitdown[docx,pdf,pptx,xls,xlsx] (>=0.1.1,<0.2.0)", "pymilvus (>=2.6.1,<3.0.0)", "datasketch (>=1.6.5,<2.0.0)", - + "jieba (>=0.38.1,<0.42.1)", + "rank-bm25 (>=0.2.2)", + "cachetools (>=6.0.0)", # NOT exist in the above optional groups # Because they are either huge-size dependencies or infrequently used dependencies. # We kindof don't want users to install them. diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py index 289d2aea1..1a1703340 100644 --- a/src/memos/llms/openai.py +++ b/src/memos/llms/openai.py @@ -58,29 +58,11 @@ def clear_cache(cls): logger.info("OpenAI LLM instance cache cleared") @timed(log=True, log_prefix="OpenAI LLM") - def generate(self, messages: MessageList) -> str: - """Generate a response from OpenAI LLM.""" - response = self.client.chat.completions.create( - model=self.config.model_name_or_path, - messages=messages, - extra_body=self.config.extra_body, - temperature=self.config.temperature, - max_tokens=self.config.max_tokens, - top_p=self.config.top_p, - ) - logger.info(f"Response from OpenAI: {response.model_dump_json()}") - response_content = response.choices[0].message.content - if self.config.remove_think_prefix: - return remove_thinking_tags(response_content) - else: - return response_content - - def customized_generate(self, messages: MessageList, **kwargs) -> str: - """Generate a response from OpenAI LLM.""" + def generate(self, messages: MessageList, **kwargs) -> str: + """Generate a response from OpenAI LLM, optionally overriding generation params.""" temperature = kwargs.get("temperature", self.config.temperature) max_tokens = kwargs.get("max_tokens", self.config.max_tokens) top_p = kwargs.get("top_p", self.config.top_p) - response = self.client.chat.completions.create( model=self.config.model_name_or_path, messages=messages, diff --git a/src/memos/mem_reader/strategy_struct.py b/src/memos/mem_reader/strategy_struct.py index c7e9a9d1b..2cac1652a 100644 --- a/src/memos/mem_reader/strategy_struct.py +++ b/src/memos/mem_reader/strategy_struct.py @@ -1,14 +1,11 @@ import os -import re from abc import ABC from memos import log from memos.configs.mem_reader import StrategyStructMemReaderConfig from memos.configs.parser import ParserConfigFactory -from memos.mem_reader.simple_struct import ( - SimpleStructMemReader, -) +from memos.mem_reader.simple_struct import SimpleStructMemReader, detect_lang from memos.parsers.factory import ParserFactory from memos.templates.mem_reader_prompts import ( SIMPLE_STRUCT_DOC_READER_PROMPT, @@ -23,7 +20,7 @@ logger = log.get_logger(__name__) -PROMPT_DICT = { +STRATEGY_PROMPT_DICT = { "chat": { "en": STRATEGY_STRUCT_MEM_READER_PROMPT, "zh": STRATEGY_STRUCT_MEM_READER_PROMPT_ZH, @@ -33,26 +30,6 @@ "doc": {"en": SIMPLE_STRUCT_DOC_READER_PROMPT, "zh": SIMPLE_STRUCT_DOC_READER_PROMPT_ZH}, } -try: - import tiktoken - - try: - _ENC = tiktoken.encoding_for_model("gpt-4o-mini") - except Exception: - _ENC = tiktoken.get_encoding("cl100k_base") - - def _count_tokens_text(s: str) -> int: - return len(_ENC.encode(s or "")) -except Exception: - # Heuristic fallback: zh chars ~1 token, others ~1 token per ~4 chars - def _count_tokens_text(s: str) -> int: - if not s: - return 0 - zh_chars = re.findall(r"[\u4e00-\u9fff]", s) - zh = len(zh_chars) - rest = len(s) - zh - return zh + max(1, rest // 4) - class StrategyStructMemReader(SimpleStructMemReader, ABC): """Naive implementation of MemReader.""" @@ -61,6 +38,32 @@ def __init__(self, config: StrategyStructMemReaderConfig): super().__init__(config) self.chat_chunker = config.chat_chunker["config"] + def _get_llm_response(self, mem_str: str) -> dict: + lang = detect_lang(mem_str) + template = STRATEGY_PROMPT_DICT["chat"][lang] + examples = STRATEGY_PROMPT_DICT["chat"][f"{lang}_example"] + prompt = template.replace("${conversation}", mem_str) + if self.config.remove_prompt_example: + prompt = prompt.replace(examples, "") + messages = [{"role": "user", "content": prompt}] + try: + response_text = self.llm.generate(messages) + response_json = self.parse_json_result(response_text) + except Exception as e: + logger.error(f"[LLM] Exception during chat generation: {e}") + response_json = { + "memory list": [ + { + "key": mem_str[:10], + "memory_type": "UserMemory", + "value": mem_str, + "tags": [], + } + ], + "summary": mem_str, + } + return response_json + def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: """ Get raw information from scene_data. diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py b/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py index 9f02daae4..4aca4022f 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/bm25_util.py @@ -2,10 +2,9 @@ import numpy as np -from cachetools import LRUCache -from rank_bm25 import BM25Okapi from sklearn.feature_extraction.text import TfidfVectorizer +from memos.dependency import require_python_package from memos.log import get_logger from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import FastTokenizer from memos.utils import timed @@ -13,13 +12,13 @@ logger = get_logger(__name__) # Global model cache -_BM25_CACHE = LRUCache(maxsize=100) _CACHE_LOCK = threading.Lock() class EnhancedBM25: """Enhanced BM25 with Spacy tokenization and TF-IDF reranking""" + @require_python_package(import_name="cachetools", install_command="pip install cachetools") def __init__(self, tokenizer=None, en_model="en_core_web_sm", zh_model="zh_core_web_sm"): """ Initialize Enhanced BM25 with memory management @@ -30,13 +29,21 @@ def __init__(self, tokenizer=None, en_model="en_core_web_sm", zh_model="zh_core_ self.tokenizer = tokenizer self._current_tfidf = None + global _BM25_CACHE + from cachetools import LRUCache + + _BM25_CACHE = LRUCache(maxsize=100) + def _tokenize_doc(self, text): """ Tokenize a single document using SpacyTokenizer """ return self.tokenizer.tokenize_mixed(text, lang="auto") + @require_python_package(import_name="rank_bm25", install_command="pip install rank_bm25") def _prepare_corpus_data(self, corpus, corpus_name="default"): + from rank_bm25 import BM25Okapi + with _CACHE_LOCK: if corpus_name in _BM25_CACHE: print("hit::", corpus_name) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py index e3081ebf6..eec827c86 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py @@ -3,8 +3,7 @@ from pathlib import Path -import jieba - +from memos.dependency import require_python_package from memos.log import get_logger @@ -326,8 +325,15 @@ def _is_chinese(self, text): chinese_chars = sum(1 for char in text if "\u4e00" <= char <= "\u9fff") return chinese_chars / max(len(text), 1) > 0.3 + @require_python_package( + import_name="jieba", + install_command="pip install jieba", + install_link="https://github.com/fxsjy/jieba", + ) def _tokenize_chinese(self, text): """split zh jieba""" + import jieba + tokens = jieba.lcut(text) if self.use_jieba else list(text) tokens = [token.strip() for token in tokens if token.strip()] if self.use_stopwords: diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index c891e9f0c..b6d082940 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -522,7 +522,7 @@ def _cot_query( messages = [{"role": "user", "content": prompt}] try: - response_text = self.llm.customized_generate(messages, temperature=0, top_p=1) + response_text = self.llm.generate(messages, temperature=0, top_p=1) response_json = parse_json_result(response_text) assert "is_complex" in response_json if not response_json["is_complex"]: From 499502d5ea3831bc79208d2cae7c9db8fccb51e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 29 Oct 2025 22:09:42 +0800 Subject: [PATCH 04/40] fix --- src/memos/configs/mem_reader.py | 2 +- src/memos/memories/textual/tree_text_memory/retrieve/recall.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/configs/mem_reader.py b/src/memos/configs/mem_reader.py index fb2de3598..dc8d37a35 100644 --- a/src/memos/configs/mem_reader.py +++ b/src/memos/configs/mem_reader.py @@ -37,7 +37,7 @@ def parse_datetime(cls, value): ) chat_chunker: dict[str, Any] = Field( - ..., description="Configuration for the MemReader chat chunk strategy" + default=None, description="Configuration for the MemReader chat chunk strategy" ) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index 93edf3d47..b7383aa13 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -21,7 +21,7 @@ def __init__( self, graph_store: Neo4jGraphDB, embedder: OllamaEmbedder, - bm25_retriever: EnhancedBM25 | None, + bm25_retriever: EnhancedBM25 | None = None, ): self.graph_store = graph_store self.embedder = embedder From e1bb223b6107d3873f593560360160f974c5c9c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 29 Oct 2025 22:20:10 +0800 Subject: [PATCH 05/40] fix test --- tests/memories/textual/test_tree_task_goal_parser.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/memories/textual/test_tree_task_goal_parser.py b/tests/memories/textual/test_tree_task_goal_parser.py index c71af4b06..899e2454b 100644 --- a/tests/memories/textual/test_tree_task_goal_parser.py +++ b/tests/memories/textual/test_tree_task_goal_parser.py @@ -20,12 +20,7 @@ def generate(self, messages): def test_parse_fast_returns_expected(): parser = TaskGoalParser() result = parser.parse("Tell me about cats", mode="fast") - assert isinstance(result, ParsedTaskGoal) - assert result.memories == ["Tell me about cats"] - assert result.keys == ["Tell me about cats"] - assert result.tags == [] - assert result.goal_type == "default" def test_parse_fine_calls_llm_and_parses(): From 790e99f54a465913346b03cc9d97a309b7921b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 30 Oct 2025 15:19:01 +0800 Subject: [PATCH 06/40] turn off graph recall --- .../tree_text_memory/retrieve/recall.py | 124 ++++++++++++------ .../retrieve/task_goal_parser.py | 29 ++-- 2 files changed, 107 insertions(+), 46 deletions(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index b7383aa13..5dac95c28 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -1,4 +1,5 @@ import concurrent.futures +import os from memos.context.context import ContextThreadPoolExecutor from memos.embedders.factory import OllamaEmbedder @@ -184,47 +185,96 @@ def process_node(node): return TextualMemoryItem.from_dict(node) return None - candidate_ids = set() - - # 1) key-based OR branch - if parsed_goal.keys: - key_filters = [ - {"field": "key", "op": "in", "value": parsed_goal.keys}, - {"field": "memory_type", "op": "=", "value": memory_scope}, - ] - key_ids = self.graph_store.get_by_metadata(key_filters, user_name=user_name) - candidate_ids.update(key_ids) - - # 2) tag-based OR branch - if parsed_goal.tags: - tag_filters = [ - {"field": "tags", "op": "contains", "value": parsed_goal.tags}, - {"field": "memory_type", "op": "=", "value": memory_scope}, - ] - tag_ids = self.graph_store.get_by_metadata(tag_filters, user_name=user_name) - candidate_ids.update(tag_ids) - - # No matches → return empty - if not candidate_ids: - return [] + if os.getenv("FAST_GRAPH", "false") == "true": + candidate_ids = set() - # Load nodes and post-filter - node_dicts = self.graph_store.get_nodes( - list(candidate_ids), include_embedding=False, user_name=user_name - ) + # 1) key-based OR branch + if parsed_goal.keys: + key_filters = [ + {"field": "key", "op": "in", "value": parsed_goal.keys}, + {"field": "memory_type", "op": "=", "value": memory_scope}, + ] + key_ids = self.graph_store.get_by_metadata(key_filters) + candidate_ids.update(key_ids) + + # 2) tag-based OR branch + if parsed_goal.tags: + tag_filters = [ + {"field": "tags", "op": "contains", "value": parsed_goal.tags}, + {"field": "memory_type", "op": "=", "value": memory_scope}, + ] + tag_ids = self.graph_store.get_by_metadata(tag_filters) + candidate_ids.update(tag_ids) - final_nodes = [] - with ContextThreadPoolExecutor(max_workers=3) as executor: - futures = {executor.submit(process_node, node): i for i, node in enumerate(node_dicts)} - temp_results = [None] * len(node_dicts) + # No matches → return empty + if not candidate_ids: + return [] + + # Load nodes and post-filter + node_dicts = self.graph_store.get_nodes(list(candidate_ids), include_embedding=False) + + final_nodes = [] + for node in node_dicts: + meta = node.get("metadata", {}) + node_key = meta.get("key") + node_tags = meta.get("tags", []) or [] + + keep = False + # key equals to node_key + if parsed_goal.keys and node_key in parsed_goal.keys: + keep = True + # overlap tags more than 2 + elif parsed_goal.tags: + overlap = len(set(node_tags) & set(parsed_goal.tags)) + if overlap >= 2: + keep = True + if keep: + final_nodes.append(TextualMemoryItem.from_dict(node)) + return final_nodes + else: + candidate_ids = set() + + # 1) key-based OR branch + if parsed_goal.keys: + key_filters = [ + {"field": "key", "op": "in", "value": parsed_goal.keys}, + {"field": "memory_type", "op": "=", "value": memory_scope}, + ] + key_ids = self.graph_store.get_by_metadata(key_filters, user_name=user_name) + candidate_ids.update(key_ids) + + # 2) tag-based OR branch + if parsed_goal.tags: + tag_filters = [ + {"field": "tags", "op": "contains", "value": parsed_goal.tags}, + {"field": "memory_type", "op": "=", "value": memory_scope}, + ] + tag_ids = self.graph_store.get_by_metadata(tag_filters, user_name=user_name) + candidate_ids.update(tag_ids) + + # No matches → return empty + if not candidate_ids: + return [] + + # Load nodes and post-filter + node_dicts = self.graph_store.get_nodes( + list(candidate_ids), include_embedding=False, user_name=user_name + ) + + final_nodes = [] + with ContextThreadPoolExecutor(max_workers=3) as executor: + futures = { + executor.submit(process_node, node): i for i, node in enumerate(node_dicts) + } + temp_results = [None] * len(node_dicts) - for future in concurrent.futures.as_completed(futures): - original_index = futures[future] - result = future.result() - temp_results[original_index] = result + for future in concurrent.futures.as_completed(futures): + original_index = futures[future] + result = future.result() + temp_results[original_index] = result - final_nodes = [result for result in temp_results if result is not None] - return final_nodes + final_nodes = [result for result in temp_results if result is not None] + return final_nodes def _vector_recall( self, diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py index 6a1138c90..6e256611a 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py @@ -1,3 +1,4 @@ +import os import traceback from string import Template @@ -50,15 +51,25 @@ def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGo """ Fast mode: simple jieba word split. """ - desc_tokenized = self.tokenizer.tokenize_mixed(task_description) - return ParsedTaskGoal( - memories=[task_description], - keys=desc_tokenized, - tags=desc_tokenized, - goal_type="default", - rephrased_query=task_description, - internet_search=False, - ) + if os.getenv("FAST_GRAPH", "false") == "true": + desc_tokenized = self.tokenizer.tokenize_mixed(task_description) + return ParsedTaskGoal( + memories=[task_description], + keys=desc_tokenized, + tags=desc_tokenized, + goal_type="default", + rephrased_query=task_description, + internet_search=False, + ) + else: + return ParsedTaskGoal( + memories=[task_description], + keys=[task_description], + tags=[], + goal_type="default", + rephrased_query=task_description, + internet_search=False, + ) def _parse_fine( self, query: str, context: str = "", conversation: list[dict] | None = None From 390ba298c0b8b51e781a7349bc3ea3dec96b5c79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 30 Oct 2025 16:06:14 +0800 Subject: [PATCH 07/40] turn off graph recall --- src/memos/api/config.py | 2 ++ src/memos/memories/textual/simple_tree.py | 10 ++-------- src/memos/memories/textual/tree.py | 7 ++----- .../textual/tree_text_memory/retrieve/recall.py | 15 +++++++++++---- .../textual/tree_text_memory/retrieve/searcher.py | 12 +++++++++--- .../tree_text_memory/retrieve/task_goal_parser.py | 9 +++++---- 6 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 405e8068d..c7d7ae316 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -866,6 +866,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple[MOSConfig, General "UserMemory": os.getenv("NEBULAR_USER_MEMORY", 1e6), }, "search_strategy": { + "fast_graph": bool(os.getenv("FAST_GRAPH", "false") == "true"), "bm25": bool(os.getenv("BM25_CALL", "false") == "true"), "cot": bool(os.getenv("VEC_COT_CALL", "false") == "true"), }, @@ -937,6 +938,7 @@ def get_default_cube_config() -> GeneralMemCubeConfig | None: "UserMemory": os.getenv("NEBULAR_USER_MEMORY", 1e6), }, "search_strategy": { + "fast_graph": bool(os.getenv("FAST_GRAPH", "false") == "true"), "bm25": bool(os.getenv("BM25_CALL", "false") == "true"), "cot": bool(os.getenv("VEC_COT_CALL", "false") == "true"), }, diff --git a/src/memos/memories/textual/simple_tree.py b/src/memos/memories/textual/simple_tree.py index 6974dbe8f..992b7bfab 100644 --- a/src/memos/memories/textual/simple_tree.py +++ b/src/memos/memories/textual/simple_tree.py @@ -70,12 +70,6 @@ def __init__( ) logger.info(f"time init: bm25_retriever time is: {time.time() - time_start_bm}") - self.vec_cot = ( - self.search_strategy["cot"] - if self.search_strategy and "cot" in self.search_strategy - else False - ) - time_start_rr = time.time() self.reranker = reranker logger.info(f"time init: reranker time is: {time.time() - time_start_rr}") @@ -189,7 +183,7 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=None, moscube=moscube, - vec_cot=self.vec_cot, + search_strategy=self.search_strategy, ) else: searcher = Searcher( @@ -200,7 +194,7 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=self.internet_retriever, moscube=moscube, - vec_cot=self.vec_cot, + search_strategy=self.search_strategy, ) return searcher.search( query, top_k, info, mode, memory_type, search_filter, user_name=user_name diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index a58f993bb..19bd3ba5b 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -51,11 +51,6 @@ def __init__(self, config: TreeTextMemoryConfig): self.bm25_retriever = ( EnhancedBM25() if self.search_strategy and self.search_strategy["bm25"] else None ) - self.vec_cot = ( - self.search_strategy["cot"] - if self.search_strategy and "cot" in self.search_strategy - else False - ) if config.reranker is None: default_cfg = RerankerConfigFactory.model_validate( @@ -143,6 +138,7 @@ def get_searcher( self.reranker, internet_retriever=None, moscube=moscube, + search_strategy=self.search_strategy, ) else: searcher = Searcher( @@ -152,6 +148,7 @@ def get_searcher( self.reranker, internet_retriever=self.internet_retriever, moscube=moscube, + search_strategy=self.search_strategy, ) return searcher diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index 5dac95c28..8cf2f47f3 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -1,5 +1,4 @@ import concurrent.futures -import os from memos.context.context import ContextThreadPoolExecutor from memos.embedders.factory import OllamaEmbedder @@ -41,6 +40,7 @@ def retrieve( search_filter: dict | None = None, user_name: str | None = None, id_filter: dict | None = None, + use_fast_graph: bool = False, ) -> list[TextualMemoryItem]: """ Perform hybrid memory retrieval: @@ -70,7 +70,13 @@ def retrieve( with ContextThreadPoolExecutor(max_workers=3) as executor: # Structured graph-based retrieval - future_graph = executor.submit(self._graph_recall, parsed_goal, memory_scope, user_name) + future_graph = executor.submit( + self._graph_recall, + parsed_goal, + memory_scope, + user_name, + use_fast_graph=use_fast_graph, + ) # Vector similarity search future_vector = executor.submit( self._vector_recall, @@ -156,7 +162,7 @@ def retrieve_from_cube( return list(combined.values()) def _graph_recall( - self, parsed_goal: ParsedTaskGoal, memory_scope: str, user_name: str | None = None + self, parsed_goal: ParsedTaskGoal, memory_scope: str, user_name: str | None = None, **kwargs ) -> list[TextualMemoryItem]: """ Perform structured node-based retrieval from Neo4j. @@ -164,6 +170,7 @@ def _graph_recall( - tags must overlap with at least 2 input tags - scope filters by memory_type if provided """ + use_fast_graph = kwargs.get("use_fast_graph", False) def process_node(node): meta = node.get("metadata", {}) @@ -185,7 +192,7 @@ def process_node(node): return TextualMemoryItem.from_dict(node) return None - if os.getenv("FAST_GRAPH", "false") == "true": + if not use_fast_graph: candidate_ids = set() # 1) key-based OR branch diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 563695c68..b8e90cbf1 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -45,7 +45,7 @@ def __init__( bm25_retriever: EnhancedBM25 | None = None, internet_retriever: None = None, moscube: bool = False, - vec_cot: bool = False, + search_strategy: dict | None = None, ): self.graph_store = graph_store self.embedder = embedder @@ -59,7 +59,9 @@ def __init__( # Create internet retriever from config if provided self.internet_retriever = internet_retriever self.moscube = moscube - self.vec_cot = vec_cot + self.use_fast_graph = ( + search_strategy.get("fast_graph", "false") == "true" if search_strategy else False + ) self._usage_executor = ContextThreadPoolExecutor(max_workers=4, thread_name_prefix="usage") @@ -226,6 +228,7 @@ def _parse_task( context="\n".join(context), conversation=info.get("chat_history", []), mode=mode, + use_fast_graph=self.use_fast_graph, ) query = parsed_goal.rephrased_query or query @@ -340,6 +343,7 @@ def _retrieve_from_working_memory( search_filter=search_filter, user_name=user_name, id_filter=id_filter, + use_fast_graph=self.use_fast_graph, ) return self.reranker.rerank( query=query, @@ -369,7 +373,7 @@ def _retrieve_from_long_term_and_user( # chain of thinking cot_embeddings = [] - if self.vec_cot: + if self.search_strategy["vec_cot"]: queries = self._cot_query(query) if len(queries) > 1: cot_embeddings = self.embedder.embed(queries) @@ -390,6 +394,7 @@ def _retrieve_from_long_term_and_user( search_filter=search_filter, user_name=user_name, id_filter=id_filter, + use_fast_graph=self.use_fast_graph, ) ) if memory_type in ["All", "UserMemory"]: @@ -404,6 +409,7 @@ def _retrieve_from_long_term_and_user( search_filter=search_filter, user_name=user_name, id_filter=id_filter, + use_fast_graph=self.use_fast_graph, ) ) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py index 6e256611a..5d706559c 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py @@ -1,4 +1,3 @@ -import os import traceback from string import Template @@ -30,6 +29,7 @@ def parse( context: str = "", conversation: list[dict] | None = None, mode: str = "fast", + **kwargs, ) -> ParsedTaskGoal: """ Parse user input into structured semantic layers. @@ -39,7 +39,7 @@ def parse( - mode == 'fine': use LLM to parse structured topic/keys/tags """ if mode == "fast": - return self._parse_fast(task_description) + return self._parse_fast(task_description, **kwargs) elif mode == "fine": if not self.llm: raise ValueError("LLM not provided for slow mode.") @@ -47,11 +47,12 @@ def parse( else: raise ValueError(f"Unknown mode: {mode}") - def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal: + def _parse_fast(self, task_description: str, **kwargs) -> ParsedTaskGoal: """ Fast mode: simple jieba word split. """ - if os.getenv("FAST_GRAPH", "false") == "true": + use_fast_graph = kwargs.get("use_fast_graph", False) + if use_fast_graph: desc_tokenized = self.tokenizer.tokenize_mixed(task_description) return ParsedTaskGoal( memories=[task_description], From 96152821722652ce38813d2a85c001f183cb47df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 30 Oct 2025 16:13:47 +0800 Subject: [PATCH 08/40] turn off graph recall --- .../memories/textual/tree_text_memory/retrieve/searcher.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index b8e90cbf1..0974d67f2 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -59,6 +59,9 @@ def __init__( # Create internet retriever from config if provided self.internet_retriever = internet_retriever self.moscube = moscube + self.vec_cot = ( + search_strategy.get("vec_cot", "false") == "true" if search_strategy else False + ) self.use_fast_graph = ( search_strategy.get("fast_graph", "false") == "true" if search_strategy else False ) @@ -373,7 +376,7 @@ def _retrieve_from_long_term_and_user( # chain of thinking cot_embeddings = [] - if self.search_strategy["vec_cot"]: + if self.vec_cot: queries = self._cot_query(query) if len(queries) > 1: cot_embeddings = self.embedder.embed(queries) From 04f412b2e28faec1da603115f8938918cabd2155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 30 Oct 2025 19:11:03 +0800 Subject: [PATCH 09/40] fix Searcher input bug --- src/memos/memories/textual/tree.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index 19bd3ba5b..53628d075 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -138,7 +138,6 @@ def get_searcher( self.reranker, internet_retriever=None, moscube=moscube, - search_strategy=self.search_strategy, ) else: searcher = Searcher( @@ -148,7 +147,6 @@ def get_searcher( self.reranker, internet_retriever=self.internet_retriever, moscube=moscube, - search_strategy=self.search_strategy, ) return searcher @@ -197,7 +195,7 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=None, moscube=moscube, - vec_cot=self.vec_cot, + search_strategy=self.search_strategy, ) else: searcher = Searcher( @@ -208,7 +206,7 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=self.internet_retriever, moscube=moscube, - vec_cot=self.vec_cot, + search_strategy=self.search_strategy, ) return searcher.search(query, top_k, info, mode, memory_type, search_filter) From 9716274a5b3eb51fc0f7b2566fdd871ad1dd042c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 30 Oct 2025 19:13:24 +0800 Subject: [PATCH 10/40] fix Searcher --- src/memos/memories/textual/tree.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index 19bd3ba5b..9b46d776b 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -197,7 +197,6 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=None, moscube=moscube, - vec_cot=self.vec_cot, ) else: searcher = Searcher( @@ -208,7 +207,6 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=self.internet_retriever, moscube=moscube, - vec_cot=self.vec_cot, ) return searcher.search(query, top_k, info, mode, memory_type, search_filter) From f8b9b4a37c655e9006e8827480c91166e9a81caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 30 Oct 2025 19:16:27 +0800 Subject: [PATCH 11/40] fix Search --- src/memos/memories/textual/tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index 9b46d776b..53628d075 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -138,7 +138,6 @@ def get_searcher( self.reranker, internet_retriever=None, moscube=moscube, - search_strategy=self.search_strategy, ) else: searcher = Searcher( @@ -148,7 +147,6 @@ def get_searcher( self.reranker, internet_retriever=self.internet_retriever, moscube=moscube, - search_strategy=self.search_strategy, ) return searcher @@ -197,6 +195,7 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=None, moscube=moscube, + search_strategy=self.search_strategy, ) else: searcher = Searcher( @@ -207,6 +206,7 @@ def search( bm25_retriever=self.bm25_retriever, internet_retriever=self.internet_retriever, moscube=moscube, + search_strategy=self.search_strategy, ) return searcher.search(query, top_k, info, mode, memory_type, search_filter) From b9dbecd5003688288e84f491b9d4f4130f61553b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 4 Nov 2025 11:46:18 +0800 Subject: [PATCH 12/40] fix bug --- src/memos/api/config.py | 2 +- src/memos/configs/memory.py | 2 +- src/memos/mem_reader/strategy_struct.py | 16 ++- .../retrieve/retrieval_mid_structs.py | 1 + .../retrieve/retrieve_utils.py | 2 +- .../tree_text_memory/retrieve/searcher.py | 18 +-- .../retrieve/task_goal_parser.py | 13 ++- .../templates/mem_reader_strategy_prompts.py | 104 +++++++++--------- 8 files changed, 87 insertions(+), 71 deletions(-) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index d9db93c1a..7f61d54ac 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -427,7 +427,7 @@ def get_reader_config() -> dict[str, Any]: "config": { "chunk_type": os.getenv("MEM_READER_CHAT_CHUNK_TYPE", "default"), "chunk_length": int(os.getenv("MEM_READER_CHAT_CHUNK_TOKEN_SIZE", 1600)), - "chunk_session": int(os.getenv("MEM_READER_CHAT_CHUNK_SESS_SIZE", 20)), + "chunk_session": int(os.getenv("MEM_READER_CHAT_CHUNK_SESS_SIZE", 10)), "chunk_overlap": int(os.getenv("MEM_READER_CHAT_CHUNK_OVERLAP", 2)), }, } diff --git a/src/memos/configs/memory.py b/src/memos/configs/memory.py index 49320fbf5..34967849a 100644 --- a/src/memos/configs/memory.py +++ b/src/memos/configs/memory.py @@ -184,7 +184,7 @@ class TreeTextMemoryConfig(BaseTextMemoryConfig): ), ) - search_strategy: dict[str, bool] | None = Field( + search_strategy: dict[str, Any] | None = Field( default=None, description=( 'Set search strategy for this memory configuration.{"bm25": true, "cot": false}' diff --git a/src/memos/mem_reader/strategy_struct.py b/src/memos/mem_reader/strategy_struct.py index 2cac1652a..a45586bd0 100644 --- a/src/memos/mem_reader/strategy_struct.py +++ b/src/memos/mem_reader/strategy_struct.py @@ -39,11 +39,12 @@ def __init__(self, config: StrategyStructMemReaderConfig): self.chat_chunker = config.chat_chunker["config"] def _get_llm_response(self, mem_str: str) -> dict: + print("[code stamp]: fine_get_llm_response") lang = detect_lang(mem_str) template = STRATEGY_PROMPT_DICT["chat"][lang] examples = STRATEGY_PROMPT_DICT["chat"][f"{lang}_example"] prompt = template.replace("${conversation}", mem_str) - if self.config.remove_prompt_example: + if self.config.remove_prompt_example: # TODO unused prompt = prompt.replace(examples, "") messages = [{"role": "user", "content": prompt}] try: @@ -112,6 +113,19 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: results.append([overlap_item, item]) current_length = overlap_length + content_length + else: + cut_size, cut_overlap = ( + self.chat_chunker["chunk_session"], + self.chat_chunker["chunk_overlap"], + ) + for items in scene_data: + step = cut_size - cut_overlap + end = len(items) - cut_overlap + if end <= 0: + results.extend([items[:]]) + else: + results.extend([items[i : i + cut_size] for i in range(0, end, step)]) + elif type == "doc": parser_config = ParserConfigFactory.model_validate( { diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py b/src/memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py index 6accc4a16..7aefaa1a3 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py @@ -13,3 +13,4 @@ class ParsedTaskGoal: rephrased_query: str | None = None internet_search: bool = False goal_type: str | None = None # e.g., 'default', 'explanation', etc. + context: str = "" diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py index eec827c86..3f2b41a47 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py @@ -17,7 +17,7 @@ def find_project_root(marker=".git"): if (current / marker).exists(): return current current = current.parent - logger.warn(f"The project root directory tag file was not found: {marker}") + return Path(".") PROJECT_ROOT = find_project_root() diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 0974d67f2..a86207e66 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -30,8 +30,8 @@ logger = get_logger(__name__) COT_DICT = { - "fast": {"en": COT_PROMPT, "zh": COT_PROMPT_ZH}, - "fine": {"en": SIMPLE_COT_PROMPT, "zh": SIMPLE_COT_PROMPT_ZH}, + "fine": {"en": COT_PROMPT, "zh": COT_PROMPT_ZH}, + "fast": {"en": SIMPLE_COT_PROMPT, "zh": SIMPLE_COT_PROMPT_ZH}, } @@ -59,12 +59,8 @@ def __init__( # Create internet retriever from config if provided self.internet_retriever = internet_retriever self.moscube = moscube - self.vec_cot = ( - search_strategy.get("vec_cot", "false") == "true" if search_strategy else False - ) - self.use_fast_graph = ( - search_strategy.get("fast_graph", "false") == "true" if search_strategy else False - ) + self.vec_cot = search_strategy.get("cot", False) if search_strategy else False + self.use_fast_graph = search_strategy.get("fast_graph", False) if search_strategy else False self._usage_executor = ContextThreadPoolExecutor(max_workers=4, thread_name_prefix="usage") @@ -287,6 +283,7 @@ def _retrieve_paths( search_filter, user_name, id_filter, + mode=mode, ) ) tasks.append( @@ -369,6 +366,7 @@ def _retrieve_from_long_term_and_user( search_filter: dict | None = None, user_name: str | None = None, id_filter: dict | None = None, + mode: str = "fast", ): """Retrieve and rerank from LongTermMemory and UserMemory""" results = [] @@ -377,7 +375,7 @@ def _retrieve_from_long_term_and_user( # chain of thinking cot_embeddings = [] if self.vec_cot: - queries = self._cot_query(query) + queries = self._cot_query(query, mode=mode, context=parsed_goal.context) if len(queries) > 1: cot_embeddings = self.embedder.embed(queries) cot_embeddings.extend(query_embedding) @@ -567,6 +565,8 @@ def _cot_query( "${split_num_threshold}", str(split_num) ) logger.info("COT process") + print("---------------prompt-------------") + print(prompt, mode) messages = [{"role": "user", "content": prompt}] try: diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py index 5d706559c..55e33494c 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py @@ -39,7 +39,7 @@ def parse( - mode == 'fine': use LLM to parse structured topic/keys/tags """ if mode == "fast": - return self._parse_fast(task_description, **kwargs) + return self._parse_fast(task_description, context=context, **kwargs) elif mode == "fine": if not self.llm: raise ValueError("LLM not provided for slow mode.") @@ -51,6 +51,7 @@ def _parse_fast(self, task_description: str, **kwargs) -> ParsedTaskGoal: """ Fast mode: simple jieba word split. """ + context = kwargs.get("context", "") use_fast_graph = kwargs.get("use_fast_graph", False) if use_fast_graph: desc_tokenized = self.tokenizer.tokenize_mixed(task_description) @@ -61,6 +62,7 @@ def _parse_fast(self, task_description: str, **kwargs) -> ParsedTaskGoal: goal_type="default", rephrased_query=task_description, internet_search=False, + context=context, ) else: return ParsedTaskGoal( @@ -70,6 +72,7 @@ def _parse_fast(self, task_description: str, **kwargs) -> ParsedTaskGoal: goal_type="default", rephrased_query=task_description, internet_search=False, + context=context, ) def _parse_fine( @@ -91,16 +94,17 @@ def _parse_fine( logger.info(f"Parsing Goal... LLM input is {prompt}") response = self.llm.generate(messages=[{"role": "user", "content": prompt}]) logger.info(f"Parsing Goal... LLM Response is {response}") - return self._parse_response(response) + return self._parse_response(response, context=context) except Exception: logger.warning(f"Fail to fine-parse query {query}: {traceback.format_exc()}") - return self._parse_fast(query) + return self._parse_fast(query, context=context) - def _parse_response(self, response: str) -> ParsedTaskGoal: + def _parse_response(self, response: str, **kwargs) -> ParsedTaskGoal: """ Parse LLM JSON output safely. """ try: + context = kwargs.get("context", "") response = response.replace("```", "").replace("json", "").strip() response_json = eval(response) return ParsedTaskGoal( @@ -110,6 +114,7 @@ def _parse_response(self, response: str) -> ParsedTaskGoal: rephrased_query=response_json.get("rephrased_instruction", None), internet_search=response_json.get("internet_search", False), goal_type=response_json.get("goal_type", "default"), + context=context, ) except Exception as e: raise ValueError(f"Failed to parse LLM output: {e}\nRaw response:\n{response}") from e diff --git a/src/memos/templates/mem_reader_strategy_prompts.py b/src/memos/templates/mem_reader_strategy_prompts.py index fca4d717b..07aa3978d 100644 --- a/src/memos/templates/mem_reader_strategy_prompts.py +++ b/src/memos/templates/mem_reader_strategy_prompts.py @@ -38,14 +38,14 @@ 7. Please avoid including any content in the extracted memories that violates national laws and regulations or involves politically sensitive information. -Return a valid JSON object with the following structure: +Return a valid JSON object with the following structure: { "memory list": [ { "key": , "memory_type": , - "value": , + "value": , "tags": }, ... @@ -54,11 +54,11 @@ } Language rules: -- The `key`, `value`, `tags`, and `summary` fields must match the primary language of the input conversation. **If the input is Chinese, output in Chinese.** -- Keep `memory_type` in English. +- The `key`, `value`, `tags`, `summary` and `memory_type` fields must be in English. -Example: -Conversation: + +Example1: +Conversations: user: [June 26, 2025 at 3:00 PM]: Hi Jerry! Yesterday at 3 PM I had a meeting with my team about the new project. assistant: Oh Tom! Do you think the team can finish by December 15? user: [June 26, 2025 at 3:00 PM]: I’m worried. The backend won’t be done until December 10, so testing will be tight. @@ -84,52 +84,50 @@ "summary": "Tom is currently working on a tight-schedule project. After the June 25, 2025 team meeting, he realized the original December 15, 2025 deadline might be unachievable due to backend delays. Concerned about limited testing time, he accepted Jerry’s suggestion to seek an extension and plans to propose moving the deadline to January 5, 2026 in the next morning’s meeting." } -Another Example in Chinese (Note: when the user's language is Chinese, you must also output in Chinese): - -对话(节选): -user|2025-07-12 19:30:下个月同事婚礼,要买条裙子,预算1500内,别太暴露。 +Example2: +Conversations (excerpt): +user|2025-07-12 19:30:A colleague's wedding is next month, need to buy a dress, budget within 1500, not too revealing. assistant|19:32 -:了解~婚礼场合建议中长款,端庄一些。可以看法式茶歇/直筒/裹身三路。颜色上,奶油白、浅粉、雾霾蓝都适合;面料可看醋酸/真丝混纺(垂感好但需干洗)。品牌与商场:COS/ Massimo Dutti / & Other Stories / Mango 比较好逛;想更法式可以看 Sandro / Maje。 我记得你很喜欢Maje? -user|19:35:不喜欢亮色。国贸方便。 -assistant|19:37:那亮度就压低些:雾霾蓝、藕粉、燕麦米白更稳。国贸有 COS / Massimo Dutti / &OS;可以先看 COS 直筒或裹身,再去 MD 看藕粉/雾霾蓝。搭配上,薄披肩/小西装都能压场。 -user|19:40:165cm,S码;最好有口袋。 -assistant|19:42:OK。版型避开大摆+高开衩,选直筒或轻A字更稳;若要口袋,COS直筒一些款有;开衩别太高,及小腿肚附近即可。鞋包:中跟裸色鞋 + 小号手拿包。 -user|19:47:本来想茶歇A字,但雾霾蓝直筒也行。 -assistant|19:49:可以先锁定:COS 雾霾蓝直筒中长为主选,MD 藕粉中长做备选;再看 & Other Stories 是否有合身的裹身款。 -user|19:52:行,周六(7/19)去国贸试,合适就买。 -assistant|19:54:记得看肩带可调/有无内衬,醋酸/真丝优先干洗;准备同色安全裤/防走光贴。如果当天没货,可下单调货或线上下单门店自提。 +:Got it~ For a wedding occasion, midi length is suggested, more elegant. You can consider three styles: French tea-length / straight / wrap. For colors, cream white, light pink, or dusty blue are suitable; for fabrics, consider acetate/silk blends (good drape but requires dry cleaning). Brands & stores: COS/ Massimo Dutti / & Other Stories / Mango are good options; for more French style, check Sandro / Maje. I remember you really like Maje? +user|19:35:Don't like bright colors. China World Summit Wing is convenient. +assistant|19:37:Then lower the brightness: dusty blue, dusty pink, oat beige are more subdued. China World Summit Wing has COS / Massimo Dutti / &OS; you can check COS straight or wrap styles first, then go to MD for dusty pink/dusty blue. For pairing, a thin shawl/blazer can elevate the look. +user|19:40:165cm, S size; preferably with pockets. +assistant|19:42:OK. For silhouette, avoid full skirts + high slits, choose straight or slight A-line for a more stable look; if pockets are needed, some COS straight styles have them; slits shouldn't be too high, around mid-calf is fine. Shoes & bag: mid-heel nude shoes + small clutch. +user|19:47:Originally thought about tea-length A-line, but dusty blue straight is also okay. +assistant|19:49:You can focus on: COS dusty blue straight midi as the primary choice, MD dusty pink midi as the backup; then check if & Other Stories has any well-fitting wrap styles. +user|19:52:Okay, going to China World Summit Wing on Saturday (7/19) to try, will buy if suitable. +assistant|19:54:Remember to check if straps are adjustable / if there's a lining, acetate/silk preferably dry cleaned; prepare same-color safety shorts / anti-wardrobe malfunction tape. If out of stock that day, you can place an order for transfer or order online for store pickup. { - "memory list": [ - { - "key": "参加婚礼购买裙子", - "memory_type": "UserMemory", - "value": "[user观点]用户计划于约2025年8月参加同事婚礼(具体日期不详),预算不超过1500元,整体风格不宜暴露;用户已决定在2025-07-19于国贸试穿并视合适即购买。", - "tags": ["婚礼", "预算", "国贸", "计划"] - }, - { - "key": "审美与版型偏好", - "memory_type": "UserMemory", - "value": "[user观点]用户不喜欢亮色,倾向低亮度色系;裙装偏好端庄的中长款,接受直筒或轻A字。", - "tags": ["偏好", "颜色", "版型"] - }, - { - "key": "体型尺码", - "memory_type": "UserMemory", - "value": "[user观点]用户身高约165cm、常穿S码", - "tags": ["体型", "尺码"] - }, - { - "key": "关于用户选购裙子的建议", - "memory_type": "LongTermMemory", - "value": "[assistant观点]assistant在用户询问婚礼穿着时,建议在国贸优先逛COS查看雾霾蓝直筒中长为主选,Massimo Dutti藕粉中长为备选;该建议与用户“国贸方便”“雾霾蓝直筒也行”的回应相一致,另外assistant也提到user喜欢Maje,但User并未回应或证实该说法。", - "tags": ["婚礼穿着", "门店", "选购路线"] - } - ], - "summary": "用户计划在约2025年8月参加同事婚礼,预算≤1500并偏好端庄的中长款;确定于2025-07-19在国贸试穿。其长期画像显示:不喜欢亮色、偏好低亮度色系与不过分暴露的版型,身高约165cm、S码且偏好裙装带口袋。助手提出的国贸选购路线以COS雾霾蓝直筒中长为主选、MD藕粉中长为备选,且与用户回应一致,为线下试穿与购买提供了明确路径。" +"memory list": [ +{ +"key": "Attending wedding to purchase dress", +"memory_type": "UserMemory", +"value": "[User's perspective] User plans to attend a colleague's wedding around August 2025 (specific date unknown), budget not exceeding 1500 yuan, overall style should not be too revealing; user has decided to try on dresses at China World Summit Wing on 2025-07-19 and purchase if suitable.", +"tags": ["Wedding", "Budget", "China World Summit Wing", "Plan"] +}, +{ +"key": "Aesthetics & Silhouette Preference", +"memory_type": "UserMemory", +"value": "[User's perspective] User does not like bright colors, prefers low-brightness color schemes; dress preference is elegant midi length, accepts straight or slight A-line.", +"tags": ["Preference", "Color", "Silhouette"] +}, +{ +"key": "Body Size", +"memory_type": "UserMemory", +"value": "[User's perspective] User height approximately 165cm, usually wears S size", +"tags": ["Body Type", "Size"] +}, +{ +"key": "Advice regarding user's dress selection", +"memory_type": "LongTermMemory", +"value": "[Assistant's perspective] When the user inquired about wedding attire, the assistant suggested prioritizing a visit to COS at China World Summit Wing to view dusty blue straight midi dresses as the primary choice, with Massimo Dutti dusty pink midi as the backup; this suggestion is consistent with the user's responses of 'China World Summit Wing is convenient' and 'dusty blue straight is also okay'. Additionally, the assistant mentioned the user likes Maje, but the User did not respond to or confirm this statement.", +"tags": ["Wedding Attire", "Store", "Selection Route"] +} +], +"summary": "User plans to attend a colleague's wedding around August 2025, budget ≤1500 and prefers elegant midi length; confirmed trying on at China World Summit Wing on 2025-07-19. Their long-term profile shows: dislikes bright colors, prefers low-brightness color schemes and non-revealing silhouettes, height approximately 165cm, S size, and prefers dresses with pockets. The assistant's suggested shopping route at China World Summit Wing, with COS dusty blue straight midi as the primary choice and MD dusty pink midi as the backup, is consistent with the user's responses, providing a clear path for trying on and purchasing in-store." } -Always respond in the same language as the conversation. Conversation: ${conversation} @@ -183,7 +181,7 @@ { "key": <字符串,唯一且简洁的记忆标题>, "memory_type": <字符串,"LongTermMemory" 或 "UserMemory">, - "value": <详细、独立且无歧义的记忆陈述——若输入对话为英文,则用英文;若为中文,则用中文>, + "value": <详细、独立且无歧义的记忆陈述>, "tags": <一个包含相关人名、事件和特征关键词的列表(例如,["丽丽","截止日期", "团队", "计划"])> }, ... @@ -192,10 +190,10 @@ } 语言规则: -- `key`、`value`、`tags`、`summary` 字段必须与输入对话的主要语言一致。**如果输入是中文,请输出中文** -- `memory_type` 保持英文。 +- `key`、`value`、`tags`、`summary` 、`memory_type` 字段必须输出中文 + -示例: +示例1: 对话: user: [2025年6月26日下午3:00]:嗨Jerry!昨天下午3点我和团队开了个会,讨论新项目。 assistant: 哦Tom!你觉得团队能在12月15日前完成吗? @@ -226,8 +224,7 @@ 年1月5日。" } -另一个中文示例(注意:当用户语言为中文时,您也需输出中文): - +示例2: 对话(节选): user|2025-07-12 19:30:下个月同事婚礼,要买条裙子,预算1500内,别太暴露。 assistant|19:32 @@ -271,7 +268,6 @@ "summary": "用户计划在约2025年8月参加同事婚礼,预算≤1500并偏好端庄的中长款;确定于2025-07-19在国贸试穿。其长期画像显示:不喜欢亮色、偏好低亮度色系与不过分暴露的版型,身高约165cm、S码且偏好裙装带口袋。助手提出的国贸选购路线以COS雾霾蓝直筒中长为主选、MD藕粉中长为备选,且与用户回应一致,为线下试穿与购买提供了明确路径。" } -请始终使用与对话相同的语言进行回复。 对话: ${conversation} From 1173c07d00ebb8da8952ba5d34520a28c62676ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 4 Nov 2025 15:07:11 +0800 Subject: [PATCH 13/40] adjust strategy reader --- src/memos/mem_reader/strategy_struct.py | 2 +- .../templates/mem_reader_strategy_prompts.py | 87 +++++-------------- 2 files changed, 25 insertions(+), 64 deletions(-) diff --git a/src/memos/mem_reader/strategy_struct.py b/src/memos/mem_reader/strategy_struct.py index a45586bd0..302afd85c 100644 --- a/src/memos/mem_reader/strategy_struct.py +++ b/src/memos/mem_reader/strategy_struct.py @@ -39,7 +39,7 @@ def __init__(self, config: StrategyStructMemReaderConfig): self.chat_chunker = config.chat_chunker["config"] def _get_llm_response(self, mem_str: str) -> dict: - print("[code stamp]: fine_get_llm_response") + lang = detect_lang(mem_str) template = STRATEGY_PROMPT_DICT["chat"][lang] examples = STRATEGY_PROMPT_DICT["chat"][f"{lang}_example"] diff --git a/src/memos/templates/mem_reader_strategy_prompts.py b/src/memos/templates/mem_reader_strategy_prompts.py index 07aa3978d..ba4a00d0a 100644 --- a/src/memos/templates/mem_reader_strategy_prompts.py +++ b/src/memos/templates/mem_reader_strategy_prompts.py @@ -16,8 +16,13 @@ - Always set "model_type" to "UserMemory" for this output. 3. Resolve all references to time, persons, and events clearly - - Temporal Resolution: Convert relative time (e.g., 'yesterday') to absolute dates based on the message timestamp. Distinguish between event time and message time; flag any uncertainty. + - Temporal Resolution: Convert relative time (e.g., "yesterday") to absolute dates based on the message timestamp. Distinguish between event time and message time; flag any uncertainty. + > Where feasible, use the message timestamp to convert relative time expressions into absolute dates (e.g., "yesterday" in a message dated January 15, 2023, can be converted to "January 14, 2023," and "last week" can be described as "the week preceding January 15, 2023"). + > Explicitly differentiate between the time when the event occurred and the time the message was sent. + > Clearly indicate any uncertainty (e.g., "approximately June 2025", "exact date unknown"). - Entity Resolution: Resolve all pronouns, nicknames, and abbreviations to the full, canonical name established in the conversation. + > For example, "Melanie" uses the abbreviated name "Mel" in the paragraph; when extracting her name in the "value" field, it should be restored to "Melanie". + - Location resolution: If specific locations are mentioned, include them explicitly. 4. Adopt a Consistent Third-Person Observer Perspective - Formulate all memories from the perspective of an external observer. Use "The user" or their specific name as the subject. @@ -57,7 +62,7 @@ - The `key`, `value`, `tags`, `summary` and `memory_type` fields must be in English. -Example1: +Example: Conversations: user: [June 26, 2025 at 3:00 PM]: Hi Jerry! Yesterday at 3 PM I had a meeting with my team about the new project. assistant: Oh Tom! Do you think the team can finish by December 15? @@ -71,61 +76,17 @@ { "key": "Initial project meeting", "memory_type": "LongTermMemory", - "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom met with the team to discuss a new project. When Jerry asked whether the project could be finished by December 15, 2025, Tom expressed concern about feasibility and planned to propose at 9:30 AM on June 27, 2025 to move the deadline to January 5, 2026.", + "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom held a meeting with their team to discuss a new project. The conversation covered the timeline and raised concerns about the feasibility of the December 15, 2025 deadline.", "tags": ["Tom", "project", "timeline", "meeting", "deadline"] }, { - "key": "Jerry’s suggestion about the deadline", - "memory_type": "LongTermMemory", - "value": "[assistant-Jerry viewpoint] Jerry questioned the December 15 deadline and suggested considering an extension.", - "tags": ["Jerry", "deadline change", "suggestion"] + "key": "Planned scope adjustment", + "memory_type": "UserMemory", + "value": "Tom planned to suggest in a meeting on June 27, 2025 at 9:30 AM that the team should prioritize features and propose shifting the project deadline to January 5, 2026.", + "tags": ["Tom", "planning", "deadline change", "feature prioritization"] } ], - "summary": "Tom is currently working on a tight-schedule project. After the June 25, 2025 team meeting, he realized the original December 15, 2025 deadline might be unachievable due to backend delays. Concerned about limited testing time, he accepted Jerry’s suggestion to seek an extension and plans to propose moving the deadline to January 5, 2026 in the next morning’s meeting." -} - -Example2: -Conversations (excerpt): -user|2025-07-12 19:30:A colleague's wedding is next month, need to buy a dress, budget within 1500, not too revealing. -assistant|19:32 -:Got it~ For a wedding occasion, midi length is suggested, more elegant. You can consider three styles: French tea-length / straight / wrap. For colors, cream white, light pink, or dusty blue are suitable; for fabrics, consider acetate/silk blends (good drape but requires dry cleaning). Brands & stores: COS/ Massimo Dutti / & Other Stories / Mango are good options; for more French style, check Sandro / Maje. I remember you really like Maje? -user|19:35:Don't like bright colors. China World Summit Wing is convenient. -assistant|19:37:Then lower the brightness: dusty blue, dusty pink, oat beige are more subdued. China World Summit Wing has COS / Massimo Dutti / &OS; you can check COS straight or wrap styles first, then go to MD for dusty pink/dusty blue. For pairing, a thin shawl/blazer can elevate the look. -user|19:40:165cm, S size; preferably with pockets. -assistant|19:42:OK. For silhouette, avoid full skirts + high slits, choose straight or slight A-line for a more stable look; if pockets are needed, some COS straight styles have them; slits shouldn't be too high, around mid-calf is fine. Shoes & bag: mid-heel nude shoes + small clutch. -user|19:47:Originally thought about tea-length A-line, but dusty blue straight is also okay. -assistant|19:49:You can focus on: COS dusty blue straight midi as the primary choice, MD dusty pink midi as the backup; then check if & Other Stories has any well-fitting wrap styles. -user|19:52:Okay, going to China World Summit Wing on Saturday (7/19) to try, will buy if suitable. -assistant|19:54:Remember to check if straps are adjustable / if there's a lining, acetate/silk preferably dry cleaned; prepare same-color safety shorts / anti-wardrobe malfunction tape. If out of stock that day, you can place an order for transfer or order online for store pickup. - -{ -"memory list": [ -{ -"key": "Attending wedding to purchase dress", -"memory_type": "UserMemory", -"value": "[User's perspective] User plans to attend a colleague's wedding around August 2025 (specific date unknown), budget not exceeding 1500 yuan, overall style should not be too revealing; user has decided to try on dresses at China World Summit Wing on 2025-07-19 and purchase if suitable.", -"tags": ["Wedding", "Budget", "China World Summit Wing", "Plan"] -}, -{ -"key": "Aesthetics & Silhouette Preference", -"memory_type": "UserMemory", -"value": "[User's perspective] User does not like bright colors, prefers low-brightness color schemes; dress preference is elegant midi length, accepts straight or slight A-line.", -"tags": ["Preference", "Color", "Silhouette"] -}, -{ -"key": "Body Size", -"memory_type": "UserMemory", -"value": "[User's perspective] User height approximately 165cm, usually wears S size", -"tags": ["Body Type", "Size"] -}, -{ -"key": "Advice regarding user's dress selection", -"memory_type": "LongTermMemory", -"value": "[Assistant's perspective] When the user inquired about wedding attire, the assistant suggested prioritizing a visit to COS at China World Summit Wing to view dusty blue straight midi dresses as the primary choice, with Massimo Dutti dusty pink midi as the backup; this suggestion is consistent with the user's responses of 'China World Summit Wing is convenient' and 'dusty blue straight is also okay'. Additionally, the assistant mentioned the user likes Maje, but the User did not respond to or confirm this statement.", -"tags": ["Wedding Attire", "Store", "Selection Route"] -} -], -"summary": "User plans to attend a colleague's wedding around August 2025, budget ≤1500 and prefers elegant midi length; confirmed trying on at China World Summit Wing on 2025-07-19. Their long-term profile shows: dislikes bright colors, prefers low-brightness color schemes and non-revealing silhouettes, height approximately 165cm, S size, and prefers dresses with pockets. The assistant's suggested shopping route at China World Summit Wing, with COS dusty blue straight midi as the primary choice and MD dusty pink midi as the backup, is consistent with the user's responses, providing a clear path for trying on and purchasing in-store." + "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach." } @@ -153,7 +114,11 @@ 3. 明确解析所有指代关系 - 时间解析:根据消息时间戳将相对时间(如“昨天”)转换为绝对日期。区分事件时间与消息时间,对不确定项进行标注 + # 条件允许则使用消息时间戳将相对时间表达转换为绝对日期(如:2023年1月15日的“昨天”则转换为2023年1月14日);“上周”则转换为2023年1月15日前一周)。 + # 明确区分事件时间和消息时间。 + # 如果存在不确定性,需明确说明(例如,“约2025年6月”,“具体日期不详”)。 - 实体解析:将所有代词、昵称和缩写解析为对话中确立的完整规范名称 + - 地点解析:若提及具体地点,请包含在内。 4. 采用统一的第三人称观察视角 - 所有记忆表述均需从外部观察者视角构建,使用“用户”或其具体姓名作为主语 @@ -207,21 +172,17 @@ { "key": "项目初期会议", "memory_type": "LongTermMemory", - "value": "[user-Tom观点]2025年6月25日下午3:00,Tom与团队开会讨论新项目。当Jerry - 询问该项目能否在2025年12月15日前完成时,Tom对此日期前完成的可行性表达担忧,并计划在2025年6月27日上午9:30 - 提议将截止日期推迟至2026年1月5日。", - "tags": ["Tom", "项目", "时间表", "会议", "截止日期"] + "value": "2025年6月25日下午3:00,Tom与团队开会讨论新项目。会议涉及时间表,并提出了对2025年12月15日截止日期可行性的担忧。", + "tags": ["项目", "时间表", "会议", "截止日期"] }, { - "key": "Jerry对新项目截止日期的建议", - "memory_type": "LongTermMemory", - "value": "[assistant-Jerry观点]Jerry对Tom的新项目截止日期提出疑问、并提议Tom考虑延期。", - "tags": ["Jerry", "截止日期变更", "建议"] + "key": "计划调整范围", + "memory_type": "UserMemory", + "value": "Tom计划在2025年6月27日上午9:30的会议上建议团队优先处理功能,并提议将项目截止日期推迟至2026年1月5日。", + "tags": ["计划", "截止日期变更", "功能优先级"] } ], - "summary": "Tom目前正在做一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15 - 日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议,计划在次日早上的会议上提出将截止日期推迟至2026 - 年1月5日。" + "summary": "Tom目前正专注于管理一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议。Tom计划在次日早上的会议上提出将截止日期推迟至2026年1月5日。他的行为反映出对时间线的担忧,以及积极、以团队为导向的问题解决方式。" } 示例2: From 744d2278a626ababd8b93b1dd6b490255d1950bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 4 Nov 2025 15:10:16 +0800 Subject: [PATCH 14/40] adjust strategy reader --- src/memos/mem_reader/strategy_struct.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/memos/mem_reader/strategy_struct.py b/src/memos/mem_reader/strategy_struct.py index 302afd85c..1fc21461e 100644 --- a/src/memos/mem_reader/strategy_struct.py +++ b/src/memos/mem_reader/strategy_struct.py @@ -39,7 +39,6 @@ def __init__(self, config: StrategyStructMemReaderConfig): self.chat_chunker = config.chat_chunker["config"] def _get_llm_response(self, mem_str: str) -> dict: - lang = detect_lang(mem_str) template = STRATEGY_PROMPT_DICT["chat"][lang] examples = STRATEGY_PROMPT_DICT["chat"][f"{lang}_example"] From a9a98fa4079f2dfcd183a7e467b513295605d84d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 4 Nov 2025 15:16:34 +0800 Subject: [PATCH 15/40] adjust search config input --- src/memos/memories/textual/simple_tree.py | 2 +- .../memories/textual/tree_text_memory/retrieve/searcher.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/memos/memories/textual/simple_tree.py b/src/memos/memories/textual/simple_tree.py index 992b7bfab..c5f795335 100644 --- a/src/memos/memories/textual/simple_tree.py +++ b/src/memos/memories/textual/simple_tree.py @@ -66,7 +66,7 @@ def __init__( time_start_bm = time.time() self.search_strategy = config.search_strategy self.bm25_retriever = ( - EnhancedBM25() if self.search_strategy and self.search_strategy["bm25"] else None + EnhancedBM25() if self.search_strategy and self.search_strategy.get("bm25", False) else None ) logger.info(f"time init: bm25_retriever time is: {time.time() - time_start_bm}") diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index a86207e66..2f6ef6afa 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -564,9 +564,6 @@ def _cot_query( prompt = template.replace("${original_query}", query).replace( "${split_num_threshold}", str(split_num) ) - logger.info("COT process") - print("---------------prompt-------------") - print(prompt, mode) messages = [{"role": "user", "content": prompt}] try: From 900f5e60d2a8dd158f53a826e0e10a5e19aeea19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 4 Nov 2025 15:32:15 +0800 Subject: [PATCH 16/40] reformat code --- src/memos/memories/textual/simple_tree.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/memos/memories/textual/simple_tree.py b/src/memos/memories/textual/simple_tree.py index c5f795335..313989cd2 100644 --- a/src/memos/memories/textual/simple_tree.py +++ b/src/memos/memories/textual/simple_tree.py @@ -66,7 +66,9 @@ def __init__( time_start_bm = time.time() self.search_strategy = config.search_strategy self.bm25_retriever = ( - EnhancedBM25() if self.search_strategy and self.search_strategy.get("bm25", False) else None + EnhancedBM25() + if self.search_strategy and self.search_strategy.get("bm25", False) + else None ) logger.info(f"time init: bm25_retriever time is: {time.time() - time_start_bm}") From 144c4467f697b26a889bf1240a715cfd282fbf1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 5 Nov 2025 10:33:44 +0800 Subject: [PATCH 17/40] re pr --- .../templates/mem_reader_strategy_prompts.py | 87 +++++-------------- 1 file changed, 24 insertions(+), 63 deletions(-) diff --git a/src/memos/templates/mem_reader_strategy_prompts.py b/src/memos/templates/mem_reader_strategy_prompts.py index 07aa3978d..ba4a00d0a 100644 --- a/src/memos/templates/mem_reader_strategy_prompts.py +++ b/src/memos/templates/mem_reader_strategy_prompts.py @@ -16,8 +16,13 @@ - Always set "model_type" to "UserMemory" for this output. 3. Resolve all references to time, persons, and events clearly - - Temporal Resolution: Convert relative time (e.g., 'yesterday') to absolute dates based on the message timestamp. Distinguish between event time and message time; flag any uncertainty. + - Temporal Resolution: Convert relative time (e.g., "yesterday") to absolute dates based on the message timestamp. Distinguish between event time and message time; flag any uncertainty. + > Where feasible, use the message timestamp to convert relative time expressions into absolute dates (e.g., "yesterday" in a message dated January 15, 2023, can be converted to "January 14, 2023," and "last week" can be described as "the week preceding January 15, 2023"). + > Explicitly differentiate between the time when the event occurred and the time the message was sent. + > Clearly indicate any uncertainty (e.g., "approximately June 2025", "exact date unknown"). - Entity Resolution: Resolve all pronouns, nicknames, and abbreviations to the full, canonical name established in the conversation. + > For example, "Melanie" uses the abbreviated name "Mel" in the paragraph; when extracting her name in the "value" field, it should be restored to "Melanie". + - Location resolution: If specific locations are mentioned, include them explicitly. 4. Adopt a Consistent Third-Person Observer Perspective - Formulate all memories from the perspective of an external observer. Use "The user" or their specific name as the subject. @@ -57,7 +62,7 @@ - The `key`, `value`, `tags`, `summary` and `memory_type` fields must be in English. -Example1: +Example: Conversations: user: [June 26, 2025 at 3:00 PM]: Hi Jerry! Yesterday at 3 PM I had a meeting with my team about the new project. assistant: Oh Tom! Do you think the team can finish by December 15? @@ -71,61 +76,17 @@ { "key": "Initial project meeting", "memory_type": "LongTermMemory", - "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom met with the team to discuss a new project. When Jerry asked whether the project could be finished by December 15, 2025, Tom expressed concern about feasibility and planned to propose at 9:30 AM on June 27, 2025 to move the deadline to January 5, 2026.", + "value": "[user-Tom viewpoint] On June 25, 2025 at 3:00 PM, Tom held a meeting with their team to discuss a new project. The conversation covered the timeline and raised concerns about the feasibility of the December 15, 2025 deadline.", "tags": ["Tom", "project", "timeline", "meeting", "deadline"] }, { - "key": "Jerry’s suggestion about the deadline", - "memory_type": "LongTermMemory", - "value": "[assistant-Jerry viewpoint] Jerry questioned the December 15 deadline and suggested considering an extension.", - "tags": ["Jerry", "deadline change", "suggestion"] + "key": "Planned scope adjustment", + "memory_type": "UserMemory", + "value": "Tom planned to suggest in a meeting on June 27, 2025 at 9:30 AM that the team should prioritize features and propose shifting the project deadline to January 5, 2026.", + "tags": ["Tom", "planning", "deadline change", "feature prioritization"] } ], - "summary": "Tom is currently working on a tight-schedule project. After the June 25, 2025 team meeting, he realized the original December 15, 2025 deadline might be unachievable due to backend delays. Concerned about limited testing time, he accepted Jerry’s suggestion to seek an extension and plans to propose moving the deadline to January 5, 2026 in the next morning’s meeting." -} - -Example2: -Conversations (excerpt): -user|2025-07-12 19:30:A colleague's wedding is next month, need to buy a dress, budget within 1500, not too revealing. -assistant|19:32 -:Got it~ For a wedding occasion, midi length is suggested, more elegant. You can consider three styles: French tea-length / straight / wrap. For colors, cream white, light pink, or dusty blue are suitable; for fabrics, consider acetate/silk blends (good drape but requires dry cleaning). Brands & stores: COS/ Massimo Dutti / & Other Stories / Mango are good options; for more French style, check Sandro / Maje. I remember you really like Maje? -user|19:35:Don't like bright colors. China World Summit Wing is convenient. -assistant|19:37:Then lower the brightness: dusty blue, dusty pink, oat beige are more subdued. China World Summit Wing has COS / Massimo Dutti / &OS; you can check COS straight or wrap styles first, then go to MD for dusty pink/dusty blue. For pairing, a thin shawl/blazer can elevate the look. -user|19:40:165cm, S size; preferably with pockets. -assistant|19:42:OK. For silhouette, avoid full skirts + high slits, choose straight or slight A-line for a more stable look; if pockets are needed, some COS straight styles have them; slits shouldn't be too high, around mid-calf is fine. Shoes & bag: mid-heel nude shoes + small clutch. -user|19:47:Originally thought about tea-length A-line, but dusty blue straight is also okay. -assistant|19:49:You can focus on: COS dusty blue straight midi as the primary choice, MD dusty pink midi as the backup; then check if & Other Stories has any well-fitting wrap styles. -user|19:52:Okay, going to China World Summit Wing on Saturday (7/19) to try, will buy if suitable. -assistant|19:54:Remember to check if straps are adjustable / if there's a lining, acetate/silk preferably dry cleaned; prepare same-color safety shorts / anti-wardrobe malfunction tape. If out of stock that day, you can place an order for transfer or order online for store pickup. - -{ -"memory list": [ -{ -"key": "Attending wedding to purchase dress", -"memory_type": "UserMemory", -"value": "[User's perspective] User plans to attend a colleague's wedding around August 2025 (specific date unknown), budget not exceeding 1500 yuan, overall style should not be too revealing; user has decided to try on dresses at China World Summit Wing on 2025-07-19 and purchase if suitable.", -"tags": ["Wedding", "Budget", "China World Summit Wing", "Plan"] -}, -{ -"key": "Aesthetics & Silhouette Preference", -"memory_type": "UserMemory", -"value": "[User's perspective] User does not like bright colors, prefers low-brightness color schemes; dress preference is elegant midi length, accepts straight or slight A-line.", -"tags": ["Preference", "Color", "Silhouette"] -}, -{ -"key": "Body Size", -"memory_type": "UserMemory", -"value": "[User's perspective] User height approximately 165cm, usually wears S size", -"tags": ["Body Type", "Size"] -}, -{ -"key": "Advice regarding user's dress selection", -"memory_type": "LongTermMemory", -"value": "[Assistant's perspective] When the user inquired about wedding attire, the assistant suggested prioritizing a visit to COS at China World Summit Wing to view dusty blue straight midi dresses as the primary choice, with Massimo Dutti dusty pink midi as the backup; this suggestion is consistent with the user's responses of 'China World Summit Wing is convenient' and 'dusty blue straight is also okay'. Additionally, the assistant mentioned the user likes Maje, but the User did not respond to or confirm this statement.", -"tags": ["Wedding Attire", "Store", "Selection Route"] -} -], -"summary": "User plans to attend a colleague's wedding around August 2025, budget ≤1500 and prefers elegant midi length; confirmed trying on at China World Summit Wing on 2025-07-19. Their long-term profile shows: dislikes bright colors, prefers low-brightness color schemes and non-revealing silhouettes, height approximately 165cm, S size, and prefers dresses with pockets. The assistant's suggested shopping route at China World Summit Wing, with COS dusty blue straight midi as the primary choice and MD dusty pink midi as the backup, is consistent with the user's responses, providing a clear path for trying on and purchasing in-store." + "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach." } @@ -153,7 +114,11 @@ 3. 明确解析所有指代关系 - 时间解析:根据消息时间戳将相对时间(如“昨天”)转换为绝对日期。区分事件时间与消息时间,对不确定项进行标注 + # 条件允许则使用消息时间戳将相对时间表达转换为绝对日期(如:2023年1月15日的“昨天”则转换为2023年1月14日);“上周”则转换为2023年1月15日前一周)。 + # 明确区分事件时间和消息时间。 + # 如果存在不确定性,需明确说明(例如,“约2025年6月”,“具体日期不详”)。 - 实体解析:将所有代词、昵称和缩写解析为对话中确立的完整规范名称 + - 地点解析:若提及具体地点,请包含在内。 4. 采用统一的第三人称观察视角 - 所有记忆表述均需从外部观察者视角构建,使用“用户”或其具体姓名作为主语 @@ -207,21 +172,17 @@ { "key": "项目初期会议", "memory_type": "LongTermMemory", - "value": "[user-Tom观点]2025年6月25日下午3:00,Tom与团队开会讨论新项目。当Jerry - 询问该项目能否在2025年12月15日前完成时,Tom对此日期前完成的可行性表达担忧,并计划在2025年6月27日上午9:30 - 提议将截止日期推迟至2026年1月5日。", - "tags": ["Tom", "项目", "时间表", "会议", "截止日期"] + "value": "2025年6月25日下午3:00,Tom与团队开会讨论新项目。会议涉及时间表,并提出了对2025年12月15日截止日期可行性的担忧。", + "tags": ["项目", "时间表", "会议", "截止日期"] }, { - "key": "Jerry对新项目截止日期的建议", - "memory_type": "LongTermMemory", - "value": "[assistant-Jerry观点]Jerry对Tom的新项目截止日期提出疑问、并提议Tom考虑延期。", - "tags": ["Jerry", "截止日期变更", "建议"] + "key": "计划调整范围", + "memory_type": "UserMemory", + "value": "Tom计划在2025年6月27日上午9:30的会议上建议团队优先处理功能,并提议将项目截止日期推迟至2026年1月5日。", + "tags": ["计划", "截止日期变更", "功能优先级"] } ], - "summary": "Tom目前正在做一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15 - 日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议,计划在次日早上的会议上提出将截止日期推迟至2026 - 年1月5日。" + "summary": "Tom目前正专注于管理一个进度紧张的新项目。在2025年6月25日的团队会议后,他意识到原定2025年12月15日的截止日期可能无法实现,因为后端会延迟。由于担心测试时间不足,他接受了Jerry提出的延期建议。Tom计划在次日早上的会议上提出将截止日期推迟至2026年1月5日。他的行为反映出对时间线的担忧,以及积极、以团队为导向的问题解决方式。" } 示例2: From f506d3eee87bdf295c536be15af7172184890718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 5 Nov 2025 10:48:02 +0800 Subject: [PATCH 18/40] format repair --- README.md | 2 +- src/memos/graph_dbs/polardb.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 50621b584..a08177676 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ MemOS demonstrates significant improvements over baseline memory solutions in multiple memory tasks, showcasing its capabilities in **information extraction**, **temporal and cross-session reasoning**, and **personalized preference responses**. -| Model | LOCOMO | LongMemEval | PrefEval-10 | PersonaMem | +| Model | LOCOMO | LongMemEval | PrefEval-10 | PersonaMem | |-----------------|-------------|-------------|-------------|-------------| | **GPT-4o-mini** | 52.75 | 55.4 | 2.8 | 43.46 | | **MemOS** | **75.80** | **77.80** | **71.90** | **61.17** | diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 552b30241..ac49228e2 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -3024,7 +3024,7 @@ def format_param_value(self, value: str | None) -> str: """Format parameter value to handle both quoted and unquoted formats""" # Handle None value if value is None: - logger.warning(f"format_param_value: value is None") + logger.warning("format_param_value: value is None") return "null" # Remove outer quotes if they exist From dc67413c84d2988178cf510a60df4b5acc9b0a2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 11 Nov 2025 11:13:51 +0800 Subject: [PATCH 19/40] fix time issue --- src/memos/mem_reader/simple_struct.py | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 13515c038..3845f37d0 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -6,6 +6,7 @@ import traceback from abc import ABC +from datetime import datetime, timezone from typing import Any from tqdm import tqdm @@ -399,7 +400,7 @@ def get_memory( if not all(isinstance(info[field], str) for field in required_fields): raise ValueError("user_id and session_id must be strings") - + scene_data = self._complete_chat_time(scene_data, type) list_scene_data_info = self.get_scene_data_info(scene_data, type) memory_list = [] @@ -508,6 +509,31 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: return results + def _complete_chat_time(self, scene_data: list[list[dict]], type: str): + if type != "chat": + return scene_data + complete_scene_data = [] + + for items in scene_data: + chat_time_value = None + + for item in items: + if "chat_time" in item: + chat_time_value = item["chat_time"] + break + + if chat_time_value is None: + session_date = datetime.now(timezone.utc) + date_format = "%I:%M %p on %d %B, %Y UTC" + chat_time_value = session_date.strftime(date_format) + + for i in range(len(items)): + if "chat_time" not in items[i]: + items[i]["chat_time"] = chat_time_value + + complete_scene_data.append(items) + return complete_scene_data + def _process_doc_data(self, scene_data_info, info, **kwargs): mode = kwargs.get("mode", "fine") if mode == "fast": From 8bfbf94f4a9e738ad38c354eb72cb6a3cd9e1aa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 19 Nov 2025 17:29:43 +0800 Subject: [PATCH 20/40] develop feedback process --- src/memos/api/product_models.py | 26 + src/memos/api/routers/server_router.py | 26 + src/memos/configs/memory.py | 37 ++ src/memos/graph_dbs/polardb.py | 5 +- src/memos/mem_feedback/base.py | 15 + src/memos/mem_feedback/feedback.py | 385 ++++++++++++ src/memos/mem_feedback/simple_feedback.py | 23 + .../tree_text_memory/organize/manager.py | 20 + src/memos/templates/mem_feedback_prompts.py | 593 ++++++++++++++++++ 9 files changed, 1127 insertions(+), 3 deletions(-) create mode 100644 src/memos/mem_feedback/base.py create mode 100644 src/memos/mem_feedback/feedback.py create mode 100644 src/memos/mem_feedback/simple_feedback.py create mode 100644 src/memos/templates/mem_feedback_prompts.py diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index 0412754c3..01f8797c2 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -200,6 +200,32 @@ class APIADDRequest(BaseRequest): ) +class APIFeedbackRequest(BaseRequest): + """Request model for processing feedback info.""" + + user_id: str = Field(..., description="User ID") + mem_cube_id: str | None = Field(..., description="Cube ID to use for chat") + session_id: str | None = Field( + "default_session", description="Session ID for soft-filtering memories" + ) + agent_id: str = Field(None, description="Agent ID") + app_id: str = Field(None, description="App ID") + chat_history: list[MessageDict] | None = Field(..., description="Chat history") + feedback_content: str | None = Field(..., description="Feedback content to process") + feedback_time: str | None = Field(None, description="Feedback time") + allow_public: bool = Field( + False, description="Whether to allow writing to the public memory repository" + ) + allow_knowledgebase_write: bool = Field( + False, description="Whether to allow writing into the user memory repository" + ) + allow_knowledgebase_ids: bool = Field( + False, description="Write to the specified memory repository ID" + ) + sync_mode: Literal["sync", "async"] = Field("async", description="feedback mode: sync or async") + corrected_answer: bool = Field(False, description="Whether need return corrected answer") + + class APIChatCompleteRequest(BaseRequest): """Request model for chat operations.""" diff --git a/src/memos/api/routers/server_router.py b/src/memos/api/routers/server_router.py index b426c2965..58d0556ae 100644 --- a/src/memos/api/routers/server_router.py +++ b/src/memos/api/routers/server_router.py @@ -16,6 +16,7 @@ from memos.api.product_models import ( APIADDRequest, APIChatCompleteRequest, + APIFeedbackRequest, APISearchRequest, MemoryResponse, SearchResponse, @@ -34,6 +35,7 @@ from memos.llms.factory import LLMFactory from memos.log import get_logger from memos.mem_cube.navie import NaiveMemCube +from memos.mem_feedback.simple_feedback import SimpleMemFeedback from memos.mem_os.product_server import MOSServer from memos.mem_reader.factory import MemReaderFactory from memos.mem_scheduler.orm_modules.base_model import BaseDBManager @@ -253,6 +255,10 @@ def init_server(): retriever=pref_retriever, ) + feedback_server = SimpleMemFeedback( + llm=llm, embedder=embedder, graph_store=graph_db, memory_manager=memory_manager + ) + mos_server = MOSServer( mem_reader=mem_reader, llm=llm, @@ -304,6 +310,7 @@ def init_server(): pref_retriever, text_mem, pref_mem, + feedback_server, ) @@ -327,6 +334,7 @@ def init_server(): pref_retriever, text_mem, pref_mem, + feedback_server, ) = init_server() @@ -670,6 +678,24 @@ def _process_pref_mem() -> list[dict[str, str]]: ) +@router.post("/chat/feedback", summary="Chat feedback", response_model=MemoryResponse) +def chat_feedback(feedback_req: APIFeedbackRequest): + """Process feedback for a specific user""" + process_record = feedback_server.process_feedback( + user_name=feedback_req.mem_cube_id, + session_id=feedback_req.session_id, + chat_history=feedback_req.chat_history, + feedback_content=feedback_req.feedback_content, + feedback_time=feedback_req.feedback_time, + allow_knowledgebase_write=feedback_req.allow_knowledgebase_write, + sync_mode=feedback_req.sync_mode, + corrected_answer=feedback_req.corrected_answer, + mem_reader=mem_reader, + ) + + return MemoryResponse(message="Feedback process successfully", data=[process_record]) + + @router.get("/scheduler/status", summary="Get scheduler running status") def scheduler_status(user_name: str | None = None): try: diff --git a/src/memos/configs/memory.py b/src/memos/configs/memory.py index 34967849a..9caf4fa3e 100644 --- a/src/memos/configs/memory.py +++ b/src/memos/configs/memory.py @@ -240,6 +240,42 @@ class PreferenceTextMemoryConfig(BaseTextMemoryConfig): ) +class MemFeedbackConfig(BaseTextMemoryConfig): + """Memory feedback configuration class.""" + + extractor_llm: LLMConfigFactory = Field( + ..., + default_factory=LLMConfigFactory, + description="LLM configuration for the memory extractor", + ) + embedder: EmbedderConfigFactory = Field( + ..., + default_factory=EmbedderConfigFactory, + description="Embedder configuration for the memory embedding", + ) + reranker: RerankerConfigFactory | None = Field( + None, + description="Reranker configuration (optional).", + ) + graph_db: GraphDBConfigFactory = Field( + ..., + default_factory=GraphDBConfigFactory, + description="Graph database configuration for the tree-memory storage", + ) + reorganize: bool | None = Field( + False, + description="Optional description for this memory configuration.", + ) + + memory_size: dict[str, Any] | None = Field( + default=None, + description=( + "Maximum item counts per memory bucket, e.g.: " + '{"WorkingMemory": 20, "LongTermMemory": 10000, "UserMemory": 10000}' + ), + ) + + # ─── 3. Global Memory Config Factory ────────────────────────────────────────── @@ -259,6 +295,7 @@ class MemoryConfigFactory(BaseConfig): "vllm_kv_cache": KVCacheMemoryConfig, # Use same config as kv_cache "lora": LoRAMemoryConfig, "uninitialized": UninitializedMemoryConfig, + "mem_feedback": MemFeedbackConfig, } @field_validator("backend") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 60902420f..3108010f9 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -484,12 +484,11 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N original_memory = current_node.get("memory", "") # Preserve original memory # If fields include memory, use it; otherwise keep original memory - if "memory" in fields: - original_memory = fields.pop("memory") + new_memory = fields.pop("memory") if "memory" in fields else original_memory properties.update(fields) properties["id"] = original_id # Ensure ID is not overwritten - properties["memory"] = original_memory # Ensure memory is not overwritten + properties["memory"] = new_memory # Ensure memory is not overwritten # Handle embedding field embedding_vector = None diff --git a/src/memos/mem_feedback/base.py b/src/memos/mem_feedback/base.py new file mode 100644 index 000000000..7b41199d6 --- /dev/null +++ b/src/memos/mem_feedback/base.py @@ -0,0 +1,15 @@ +from abc import ABC, abstractmethod + +from memos.configs.memory import MemFeedbackConfig + + +class BaseMemFeedback(ABC): + """MemFeedback interface class for reading information.""" + + @abstractmethod + def __init__(self, config: MemFeedbackConfig): + """Initialize the MemFeedback with the given configuration.""" + + @abstractmethod + def process_feedback(self, data: dict) -> None: + """Process user's feedback""" diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py new file mode 100644 index 000000000..653510e84 --- /dev/null +++ b/src/memos/mem_feedback/feedback.py @@ -0,0 +1,385 @@ +import concurrent.futures +import json + +from datetime import datetime + +from memos import log +from memos.configs.memory import MemFeedbackConfig +from memos.context.context import ContextThreadPoolExecutor +from memos.embedders.factory import EmbedderFactory, OllamaEmbedder +from memos.graph_dbs.factory import GraphStoreFactory, PolarDBGraphDB +from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM +from memos.mem_feedback.base import BaseMemFeedback +from memos.mem_reader.simple_struct import SimpleStructMemReader, detect_lang +from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata +from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager +from memos.templates.mem_feedback_prompts import ( + FEEDBACK_ANSWER_PROMPT, + FEEDBACK_ANSWER_PROMPT_ZH, + FEEDBACK_JUDGEMENT_PROMPT, + FEEDBACK_JUDGEMENT_PROMPT_ZH, + UPDATE_FORMER_MEMORIES, + UPDATE_FORMER_MEMORIES_ZH, +) +from memos.types import MessageDict + + +FEEDBACK_PROMPT_DICT = { + "judge": {"en": FEEDBACK_JUDGEMENT_PROMPT, "zh": FEEDBACK_JUDGEMENT_PROMPT_ZH}, + "compare": {"en": UPDATE_FORMER_MEMORIES, "zh": UPDATE_FORMER_MEMORIES_ZH}, + "generation": {"en": FEEDBACK_ANSWER_PROMPT, "zh": FEEDBACK_ANSWER_PROMPT_ZH}, +} + +logger = log.get_logger(__name__) + + +class MemFeedback(BaseMemFeedback): + def __init__(self, config: MemFeedbackConfig): + """ + Initialize the MemFeedback with configuration. + + Args: + config: Configuration object for the MemFeedback + """ + self.config = config + self.llm: OpenAILLM | OllamaLLM | AzureLLM = LLMFactory.from_config(config.extractor_llm) + self.embedder: OllamaEmbedder = EmbedderFactory.from_config(config.embedder) + self.graph_store: PolarDBGraphDB = GraphStoreFactory.from_config(config.graph_db) + + self.is_reorganize = config.reorganize + self.memory_manager: MemoryManager = MemoryManager( + self.graph_store, + self.embedder, + self.llm, + memory_size=config.memory_size + or { + "WorkingMemory": 20, + "LongTermMemory": 1500, + "UserMemory": 480, + }, + is_reorganize=self.is_reorganize, + ) + + def _feedback_judgement( + self, chat_history: list[MessageDict], feedback_content: str, feedback_time: str = "" + ) -> dict | None: + """ + Generate a judgement for a given feedback. + """ + lang = detect_lang(feedback_content) + template = FEEDBACK_PROMPT_DICT["judge"][lang] + chat_history_str = str(chat_history[-4:]) + prompt = ( + template.replace("{chat_history}", chat_history_str) + .replace("{user_feedback}", feedback_content) + .replace("{feedback_time}", feedback_time) + ) + judge_res = self._get_llm_response(prompt) + return judge_res if judge_res else [] + + def _feedback_memory( + self, user_name: str, feedback_memories: list[TextualMemoryItem], **kwargs + ) -> dict: + sync_mode = kwargs.get("sync_mode") + + def _add_or_update(memory_item: TextualMemoryItem): + current_memories = self._vec_query(memory_item.metadata.embedding, user_name=user_name) + if current_memories: + lang = detect_lang("".join(memory_item.memory)) + template = FEEDBACK_PROMPT_DICT["compare"][lang] + prompt = template.replace("{current_memories}", str(current_memories)).replace( + "{new_facts}", memory_item.memory + ) + operations = self._get_llm_response(prompt).get("operation", {}) + else: + operations = {"event": "ADD"} + logger.info(f"[Feedback memory operations]: {operations!s}") + + if operations and operations["event"].lower() == "add": + added_ids = self.memory_manager.add( + [memory_item], user_name=user_name, mode=sync_mode + ) + logger.info(f"[Memory Feedback ADD] {added_ids!s}") + + return { + "record": { + "add": [{"id": added_ids[0], "text": memory_item.memory}], + "update": [], + } + } + elif operations and operations["event"].lower() == "update": + to_update_id = operations["id"] + updated_ids = self.memory_manager.update( + [to_update_id], [memory_item], user_name=user_name, mode=sync_mode + ) + log_update_info = operations["old_memory"] + " >> " + operations["text"] + logger.info(f"[Memory Feedback UPDATE] {updated_ids}, info: {log_update_info}") + + return { + "record": { + "add": [], + "update": [ + { + "id": to_update_id, + "origin_memory": operations["old_memory"], + "text": operations["text"], + } + ], + } + } + else: + return {"record": {"add": [], "update": []}} + + search_filter = {"user_name": user_name} + with ContextThreadPoolExecutor(max_workers=8) as ex: + futures = { + ex.submit(_add_or_update, mem, search_filter): i + for i, mem in enumerate(feedback_memories) + } + results = [None] * len(futures) + for fut in concurrent.futures.as_completed(futures): + i = futures[fut] + try: + node = fut.result() + if node: + results[i] = node + except Exception as e: + logger.error(f"[FeedBack] error: {e}") + mem_res = [r for r in results if r] + + return { + "record": { + "add": [element for item in mem_res for element in item["record"]["add"]], + "update": [element for item in mem_res for element in item["record"]["update"]], + } + } + + def _vec_query(self, new_memories_embedding: list[float], user_name=None): + retrieved_ids = self.graph_store.search_by_embedding( + new_memories_embedding, user_name=user_name + ) + current_memories = [self.graph_store.get_node(item["id"]) for item in retrieved_ids] + + return [ + { + "id": item["id"], + "text": item["memory"], + } + for item in current_memories + ] + + def _get_llm_response(self, prompt: str, dsl: bool = True) -> dict: + messages = [{"role": "user", "content": prompt}] + try: + response_text = self.llm.generate(messages) + if dsl: + response_json = json.loads(response_text) + else: + return response_text + except Exception as e: + logger.error(f"[LLM] Exception during chat generation: {e}") + response_json = None + return response_json + + def _generate_answer( + self, chat_history: list[MessageDict], feedback_content: str, corrected_answer: bool + ) -> str: + """ + Answer generation to facilitate concurrent submission. + """ + if not corrected_answer: + return "" + lang = detect_lang(feedback_content) + template = FEEDBACK_PROMPT_DICT["generation"][lang] + chat_history_str = "\n".join( + [f"{item['role']}: {item['content']}" for item in chat_history] + ) + chat_history_str = chat_history_str if chat_history_str else "none" + prompt = template.replace("{chat_history}", chat_history_str).replace( + "{question}", feedback_content + ) + return self._get_llm_response(prompt, dsl=False) + + def process_feedback_core( + self, + user_name: str, + chat_history: list[MessageDict], + feedback_content: str, + mem_reader: SimpleStructMemReader | None = None, + **kwargs, + ) -> dict: + """ + Core feedback processing: judgment, memory extraction, addition/update. Return record. + """ + try: + feedback_time = kwargs.get("feedback_time") or datetime.now().isoformat() + session_id = kwargs.get("session_id") + allow_knowledgebase_write = bool(kwargs.get("allow_knowledgebase_write")) + if not allow_knowledgebase_write: + return {"record": {"add": [], "update": []}} + + info = {"user_id": user_name, "session_id": session_id} + logger.info(f"[Feedback Core] Starting memory feedback process for user {user_name}") + + if mem_reader and not chat_history: + scene_data = [ + [{"role": "user", "content": feedback_content, "chat_time": feedback_time}] + ] + memories = mem_reader.get_memory(scene_data, type="chat", info=info) + to_add_memories = [item for scene in memories for item in scene] + added_ids = self.memory_manager.add(to_add_memories, user_name=user_name) + logger.info( + f"[Feedback Core] Added {len(added_ids)} memories for user {user_name}." + ) + return { + "record": { + "add": [ + {"id": _id, "text": added_mem.memory} + for _id, added_mem in zip(added_ids, to_add_memories, strict=False) + ], + "update": [], + } + } + + elif chat_history: + raw_judge = self._feedback_judgement( + chat_history, feedback_content, feedback_time=feedback_time + ) + judge_res = ( + [ + item + for item in raw_judge + if item["validity"].lower() == "true" and item["corrected_info"].strip() + ] + if raw_judge + else [] + ) + if not judge_res: + logger.warning( + f"[Feedback Core] No valid judgements for user {user_name}: {raw_judge}." + ) + return {"record": {"add": [], "update": []}} + + feedback_memories = [] + feedback_memories_embeddings = self.embedder.embed( + [item["corrected_info"] for item in judge_res] + ) + for item, embedding in zip(judge_res, feedback_memories_embeddings, strict=False): + value = item["corrected_info"] + key = item["key"] + tags = item["tags"] + feedback_memories.append( + TextualMemoryItem( + memory=value, + metadata=TreeNodeTextualMemoryMetadata( + user_id=info.get("user_id", ""), + session_id=info.get("session_id", ""), + memory_type="LongTermMemory", + status="activated", + tags=tags, + key=key, + embedding=embedding, + usage=[], + sources=[{"type": "chat"}], + background="", + confidence=0.99, + type="fine", + ), + ) + ) + mem_record = self._feedback_memory(user_name, feedback_memories, **kwargs) + logger.info( + f"[Feedback Core] Processed {len(feedback_memories)} feedback memories for user {user_name}." + ) + return mem_record + + else: + logger.info("[Feedback Core] Empty chat_history and no mem_reader, skipping.") + return {"record": {"add": [], "update": []}} + + except Exception as e: + logger.error(f"[Feedback Core] Error for user {user_name}: {e}") + return {"record": {"add": [], "update": []}} + + def process_feedback( + self, + user_name: str, + chat_history: list[MessageDict], + feedback_content: str, + mem_reader: SimpleStructMemReader | None = None, + **kwargs, + ): + """ + Process feedback with different modes. + + Args: + user_name: User identifier + chat_history: List of chat messages + feedback_content: Feedback content from user + mem_reader: Memory reader instance + **kwargs: Additional arguments including sync_mode + + Returns: + Dict with answer and/or memory operation records + """ + sync_mode = kwargs.get("sync_mode") + corrected_answer = kwargs.get("corrected_answer") + + if sync_mode == "sync": + with ContextThreadPoolExecutor(max_workers=2) as ex: + answer_future = ex.submit( + self._generate_answer, + chat_history, + feedback_content, + corrected_answer=corrected_answer, + ) + core_future = ex.submit( + self.process_feedback_core, + user_name, + chat_history, + feedback_content, + mem_reader, + **kwargs, + ) + concurrent.futures.wait([answer_future, core_future]) + try: + answer = answer_future.result() + record = core_future.result() + logger.info( + f"[process_feedback sync] Completed concurrently for user {user_name} with full results." + ) + return {"answer": answer, "record": record["record"]} + except Exception as e: + logger.error( + f"[process_feedback sync] Error in concurrent tasks for {user_name}: {e}" + ) + return {"answer": "", "record": {"add": [], "update": []}} + else: + answer = self._generate_answer( + chat_history, feedback_content, corrected_answer=corrected_answer + ) + + ex = ContextThreadPoolExecutor(max_workers=1) + future = ex.submit( + self.process_feedback_core, + user_name, + chat_history, + feedback_content, + mem_reader, + **kwargs, + ) + ex.shutdown(wait=False) + + def log_completion(f): + try: + result = f.result() + logger.info(f"[Background Feedback] Completed for {user_name}: {result}") + except Exception as e: + logger.error(f"[Background Feedback] Error for {user_name}: {e}") + + future.add_done_callback(log_completion) + + logger.info( + f"[process_feedback async] Returned answer, background task started for {user_name}." + ) + return {"answer": answer, "record": {"add": [], "update": []}} diff --git a/src/memos/mem_feedback/simple_feedback.py b/src/memos/mem_feedback/simple_feedback.py new file mode 100644 index 000000000..23e25ff2c --- /dev/null +++ b/src/memos/mem_feedback/simple_feedback.py @@ -0,0 +1,23 @@ +from memos import log +from memos.embedders.factory import OllamaEmbedder +from memos.graph_dbs.factory import PolarDBGraphDB +from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM +from memos.mem_feedback.feedback import MemFeedback +from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager + + +logger = log.get_logger(__name__) + + +class SimpleMemFeedback(MemFeedback): + def __init__( + self, + llm: OpenAILLM | OllamaLLM | AzureLLM, + embedder: OllamaEmbedder, + graph_store: PolarDBGraphDB, + memory_manager: MemoryManager, + ): + self.llm = llm + self.embedder = embedder + self.graph_store = graph_store + self.memory_manager = memory_manager diff --git a/src/memos/memories/textual/tree_text_memory/organize/manager.py b/src/memos/memories/textual/tree_text_memory/organize/manager.py index a71fee02f..dadf7c321 100644 --- a/src/memos/memories/textual/tree_text_memory/organize/manager.py +++ b/src/memos/memories/textual/tree_text_memory/organize/manager.py @@ -115,6 +115,26 @@ def add( self._refresh_memory_size(user_name=user_name) return added_ids + def update( + self, + memories_ids: list[str], + memories: list[TextualMemoryItem], + user_name: str | None = None, + mode: str = "sync", + ) -> list[str]: + for _id, memory in zip(memories_ids, memories, strict=False): + field = { + "memory": memory.memory, + "key": memory.metadata.key, + "tags": memory.metadata.tags, + "embedding": memory.metadata.embedding, + } + self.graph_store.update_node(_id, field, user_name) + + graph_class_name = self.graph_store.__class__.__name__ + logger.info(f"Updated {graph_class_name}") + return memories_ids + def replace_working_memory( self, memories: list[TextualMemoryItem], user_name: str | None = None ) -> None: diff --git a/src/memos/templates/mem_feedback_prompts.py b/src/memos/templates/mem_feedback_prompts.py new file mode 100644 index 000000000..2a9a7dcb2 --- /dev/null +++ b/src/memos/templates/mem_feedback_prompts.py @@ -0,0 +1,593 @@ +FEEDBACK_JUDGEMENT_PROMPT = """You are a answer quality analysis expert. Please strictly follow the steps and criteria below to analyze the provided "User and Assistant Chat History" and "User Feedback," and fill the final evaluation results into the specified JSON format. + +Analysis Steps and Criteria: +1. *Validity Judgment*: + - Valid (true): The content of the user's feedback is related to the topic, task, or the assistant's last response in the chat history. For example: asking follow-up questions, making corrections, providing supplements, or evaluating the last response. + - Invalid (false): The user’s feedback is entirely unrelated to the conversation history, with no semantic, topical, or lexical connection to any prior content. + +2. *User Attitude Judgment*: + - Dissatisfied: The feedback shows negative emotions, such as directly pointing out errors, expressing confusion, complaining, criticizing, or explicitly stating that the problem remains unsolved. + - Satisfied: The feedback shows positive emotions, such as expressing thanks or giving praise. + - Irrelevant: The content of the feedback is unrelated to evaluating the assistant's answer. + +3. *Assistant Response Effectiveness Type Judgment*: + - Wrong: The assistant provided incorrect information. + - Missing: The assistant's response was correct in direction but incomplete, omitting key details. + - None: The user feedback does not point to any shortcomings in the assistant's response. + +4. *Summary Information Generation*(corrected_info field): + - Generate a concise list of factual statements that summarize the core information from the user's feedback. + — Focus on objective facts, corrections, or confirmations. + - Express time information as concrete, unambiguous date(s) or period(s) (e.g., “March 2023”, “2024-07”, or “May–June 2022”). + - For 'Satisfied' or 'None' types, this list may contain confirming statements or be empty if no new facts are provided. + - For example: "The user completed the Everest Circuit trek with colleagues in March 2023." + +Output Format: +[ + { + "validity": "", + "user_attitude": "", + "error_type": "", + "corrected_info": "", + "key": , + "tags": + }, + ... +] + +Example1: + +Dialogue History: +user: I can't eat spicy food these days. Can you recommend some suitable restaurants for me? +assistant: Sure, I recommend the Fish Restaurant near you. Their signature dishes include various types of steamed seafood and sashimi of sea fish. +feedback time: 2023-1-18T14:25:00.856481 + +User Feedback: +Oh,No!I'm allergic to seafood!And I don't like eating raw fish. + +Output: +[ + { + "validity": "true", + "user_attitude": "dissatisfied", + "error_type": "wrong", + "corrected_info": "User is allergic to seafood", + "key": "allergic to seafood", + "tags": ["allergic", "seafood"] + }, + { + "validity": "true", + "user_attitude": "dissatisfied", + "error_type": "wrong", + "corrected_info": "User does not like eating raw fish.", + "key": "dislike eating raw fish.", + "tags": ["dislike", "raw fish"] + } +] + +Example2: + +Dialogue History: +user: When did Jhon graduated? +assistant: 2014 +feedback time: 2025-11-18T20:45:00.875249 + +User Feedback: +Wrong. He graduated the following year. + +Output: +[ + { + "validity": "true", + "user_attitude": "dissatisfied", + "error_type": "wrong", + "corrected_info": "Jhon was graduated at 2015", + "key": "Jhon graduated time", + "tags": ["Jhon", "graduated", "year"] + } +] +Dialogue History: +{chat_history} +feedback time: {feedback_time} + +User Feedback: +{user_feedback} + +Output: +""" + + +FEEDBACK_JUDGEMENT_PROMPT_ZH = """你是一个对话质量分析专家。请严格根据以下步骤和标准,对提供的“用户和助理的对话历史”和“用户反馈”进行分析,并将最终判定结果填入指定的JSON格式中。 + +分析步骤与判定标准: +1. *有效性判定* + - 有效(true):用户反馈的内容与对话历史的主题、任务或上一次助理的回答*相关*。例如:针对回答进行追问、纠正、补充或评价。 + - 无效(false):用户的反馈与对话历史*完全无关*,与任何先前内容之间不存在语义、主题或词汇上的联系。 +2. *用户态度判定* + - 不满意(dissatisfied):反馈中表现出负面情绪,如直接指出错误、表达困惑、抱怨、批评,或明确表示问题未解决。 + - 满意(satisfied):反馈中表现出正面情绪,如表示感谢或给予称赞。 + - 无关(irrelevant):反馈内容与评价助理回答无关。 +3. *助理回答效果类型判定* + - 错误(wrong):助理提供了不正确的信息。 + - 缺漏(missing):助理的回答方向正确但不完整,遗漏了关键细节。 + - 无(none):用户反馈并未指向助理回答的任何不足。 +4. *总结信息生成* + - 生成一份简洁的事实陈述列表,该列表概括了用户反馈中的核心信息。 + - 重点放在客观事实、更正或确认上。 + - 对于“满意”或“无”类型的反馈,该列表可能包含确认性的陈述,或者如果未提供新事实,则可能为空。 + - 例如:“用户在2023年3月与同事完成了珠峰环线徒步旅行。” + +输出格式: +[ + { + "validity": <字符串,"true" 或 "false">, + "user_attitude": <字符串,"dissatisfied" 或 "satisfied" 或 "irrelevant">, + "error_type": <字符串,"wrong" 或 "missing" 或 "irrelevant">, + "corrected_info": <字符串,中文书写正确的信息记录>, + "key": <字符串,唯一且简洁的记忆标题>, + "tags": <相关主题关键词列表(例如,["截止日期", "团队", "计划"])> + }, + ... +] + +示例: + +用户和助理的对话历史: +user: 这两天我吃不了辣椒,给我推荐一些适合的餐厅吧。 +assistant: 好的,推荐您附近的新荣记餐厅,黄鱼年糕以及各类清蒸海鲜是这件餐厅的招牌菜。 +反馈时间:2023-1-18T14:25:00.856481 + +用户反馈: +你忘记我海鲜过敏这件事了吗?而且我不喜欢年糕的口感。 + +输出: +[ + { + "validity": "true", + "user_attitude": "dissatisfied", + "error_type": "wrong", + "corrected_info": "用户对海鲜过敏。", + "key": "海鲜过敏", + "tags": ["海鲜", "过敏"] + }, + { + "validity": "true", + "user_attitude": "dissatisfied", + "error_type": "wrong", + "corrected_info": "用户不喜欢年糕的口感。", + "key": "不喜欢年糕", + "tags": ["不喜欢年糕", "年糕", "口感"] + } +] + + +用户和助理的对话历史: +{chat_history} +反馈时间:{feedback_time} + +用户反馈: +{user_feedback} + +输出: +""" + + +UPDATE_FORMER_MEMORIES = """Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add or update, and provide final operation recommendations. + +You must strictly return the response in the following JSON format: + +{ + "operation": + { + "id": "", + "text": "", + "event": "", + "old_memory": "" + } +} + +*Requirements*: +1. If the new fact adds no supplemental value and the existing memory supersedes it, no operation is performed. +2. If the new fact is similar to existing memory but the information is more accurate, complete, or requires correction, set operation to "UPDATE" +3. If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update the original memory based on the new fact and set operation to "UPDATE" +4. If there is completely new information to add, set operation to "ADD" + +*ID Management Rules*: +- Update operation: Keep the original ID unchanged +- Add operation: Generate a new unique ID in the format of a 4-digit string (e.g., "0001", "0002", etc.) + +*Important Requirements*: +- Return only the JSON format response, without any other content +- For update operations, you must provide the old_memory field to show the original content +- text field requirements: Use concise, complete declarative sentences that are consistent with the newly acquired factual information, avoiding redundant information +- text and old_memory content should be in English + +Example1: +Current Memories: +{ + "memory": [ + { + "id": "123", + "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" + }, + { + "id": "908", + "text": "The user likes to go fishing with friends on weekends" + } + ] +} + +Newly facts: +"The user is currently working as a senior full-stack development engineer at Company B" + +Operation recommendations: +{ + "operation": + { + "id": "123", + "text": "The user is currently working as a senior full-stack development engineer at Company B", + "event": "UPDATE", + "old_memory": "The user works as a software engineer in Company A, mainly responsible for front-end development" + } +} + +Example2: +Current Memories: +{ + "memory": [ + { + "id": "123", + "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" + }, + { + "id": "908", + "text": "The user likes to go fishing with friends on weekends" + } + ] +} + +Newly facts: +"The user's residential address is Mingyue Community, Chaoyang District, Beijing" + +Operation recommendations: +{ + "operation": + { + "id": "4567", + "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", + "event": "ADD" + } +} + +Current Memories +{current_memories} + +Newly facts: +{new_facts} + +Operation recommendations: +""" + + +UPDATE_FORMER_MEMORIES_ZH = """请分析新获取的事实信息,并决定该信息应该如何更新到记忆库中:新增或更新,并给出最终的操作建议。 + +你必须严格按照以下JSON格式返回响应: + +{ + "operation": + { + "id": "<记忆ID>", + "text": "<记忆内容>", + "event": "<操作类型,必须是 "ADD", "UPDATE"之一>", + "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" + } +} + +要求: +1. 如果新事实对现有记忆没有额外补充,现有记忆的信息可以覆盖新事实,则不设置任何操作 +2. 如果新事实与现有记忆相似但信息更准确、完整或需要修正,设置操作为"UPDATE" +3. 如果新事实与现有记忆在关键信息上矛盾(如时间、地点、状态等),以新事实为准更新原有记忆,设置操作为"UPDATE" +4. 如果有全新信息添加,设置操作为"ADD" + +ID管理规则: +- 更新操作:保持原有ID不变 +- 新增操作:生成新的唯一ID,格式为4位数字字符串(如:"0001", "0002"等) + +重要要求: +- 只返回JSON格式的响应,不要包含其他任何内容 +- 对于更新操作,必须提供old_memory字段显示原内容 +- text字段要求:使用简洁、完整的陈述句,和新获取的事实信息一致,避免冗余信息 +- text和old_memory内容使用中文 + +示例1: +现有记忆记录: +{ + "memory": [ + { + "id": "123", + "text": "用户在公司A担任软件工程师,主要负责前端开发" + }, + { + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼" + } + ] +} + +新获取的事实: +"用户现在在公司B担任高级全栈开发工程师" + +操作建议: +{ + "operation": + { + "id": "123", + "text": "用户在公司B担任高级全栈开发工程师", + "event": "UPDATE", + "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" + } +} + +示例2: +现有记忆记录: +{ + "memory": [ + { + "id": "123", + "text": "用户在公司A担任软件工程师,主要负责前端开发" + }, + { + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼" + } + ] +} + +新获取的事实: +"用户的居住地址是北京市朝阳区明月小区" + +操作建议: +{ + "operation": + { + "id": "4567", + "text": "用户的居住地址是北京市朝阳区明月小区", + "event": "ADD" + } +} + +现有记忆记录: +{current_memories} + +新获取的事实: +{new_facts} + +操作建议: +""" + + +GROUP_UPDATE_FORMER_MEMORIES = """Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add, update, or keep unchanged, and provide final operation recommendations. + +You must strictly return the response in the following JSON format: + +{ + "operation": [ + { + "id": "", + "text": "", + "event": "", + "old_memory": "" + }, + ... + ] +} + +*Requirements*: +1. If the new fact provides no additional supplement to existing memory, set operation to "NONE" +2. If the new fact is similar to existing memory but the information is more accurate, complete, or requires correction, set operation to "UPDATE" +3. If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update the original memory based on the new fact and set operation to "UPDATE" +4. If there is completely new information to add, set operation to "ADD" + +*ID Management Rules*: +- Update operation: Keep the original ID unchanged +- Add operation: Generate a new unique ID in the format of a 4-digit string (e.g., "0001", "0002", etc.) + +*Important Requirements*: +- Return only the JSON format response, without any other content +- For update operations, you must provide the old_memory field to show the original content +- text field requirements: Use concise, complete declarative sentences that are consistent with the newly acquired factual information, avoiding redundant information +- text and old_memory content should be in English + +Example: +Current Memories: +{ + "memory": [ + { + "id": "123", + "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" + }, + { + "id": "908", + "text": "The user likes to go fishing with friends on weekends" + } + ] +} + +Newly facts: +["The user is currently working as a senior full-stack development engineer at Company B", "The user's residential address is Mingyue Community, Chaoyang District, Beijing", "The user goes fishing on weekends"] + +Operation recommendations: +{ + "operation": [ + { + "id": "123", + "text": "The user is currently working as a senior full-stack development engineer at Company B", + "event": "UPDATE", + "old_memory": "The user works as a software engineer in Company A, mainly responsible for front-end development" + }, + { + "id": "4567", + "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", + "event": "ADD" + }, + { + "id": "908", + "text": "The user likes to go fishing with friends on weekends", + "event": "NONE" + } + ] +} + +Current Memories +{current_memories} + +Newly facts: +{new_facts} + +Operation recommendations: +""" + + +GROUP_UPDATE_FORMER_MEMORIES_ZH = """请分析新获取的事实信息,并决定这些信息应该如何更新到记忆库中:新增、更新、或保持不变,并给出最终的操作建议。 + +你必须严格按照以下JSON格式返回响应: + +{ + "operation": [ + { + "id": "<记忆ID>", + "text": "<记忆内容>", + "event": "<操作类型,必须是 "ADD", "UPDATE", "NONE" 之一>", + "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" + }, + ... + ] +} + +要求: +1. 如果新事实对现有记忆没有额外补充,设置操作为"NONE" +2. 如果新事实与现有记忆相似但信息更准确、完整或需要修正,设置操作为"UPDATE" +3. 如果新事实与现有记忆在关键信息上矛盾(如时间、地点、状态等),以新事实为准更新原有记忆,设置操作为"UPDATE" +4. 如果有全新信息添加,设置操作为"ADD" + +ID管理规则: +- 更新操作:保持原有ID不变 +- 新增操作:生成新的唯一ID,格式为4位数字字符串(如:"0001", "0002"等) + +重要要求: +- 只返回JSON格式的响应,不要包含其他任何内容 +- 对于更新操作,必须提供old_memory字段显示原内容 +- text字段要求:使用简洁、完整的陈述句,和新获取的事实信息一致,避免冗余信息 +- text和old_memory内容使用中文 + +示例: +现有记忆记录: +{ + "memory": [ + { + "id": "123", + "text": "用户在公司A担任软件工程师,主要负责前端开发" + }, + { + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼" + } + ] +} + +新获取的事实: +["用户现在在公司B担任高级全栈开发工程师", "用户的居住地址是北京市朝阳区明月小区", "用户在周末会去钓鱼"] + +操作建议: +{ + "operation": [ + { + "id": "123", + "text": "用户在公司B担任高级全栈开发工程师", + "event": "UPDATE", + "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" + }, + { + "id": "4567", + "text": "用户的居住地址是北京市朝阳区明月小区", + "event": "ADD" + }, + { + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼", + "event": "NONE" + } + ] +} + +现有记忆记录: +{current_memories} + +新获取的事实: +{new_facts} + +操作建议: +""" + + +FEEDBACK_ANSWER_PROMPT = """ +You are a knowledgeable and helpful AI assistant.You have access to the history of the current conversation. This history contains the previous exchanges between you and the user. + +# INSTRUCTIONS: +1. Carefully analyze the entire conversation history. Your answer must be based only on the information that has been exchanged within this dialogue. +2. Pay close attention to the sequence of the conversation. If the user refers back to a previous statement (e.g., "the thing I mentioned earlier"), you must identify that specific point in the history. +3. Your primary goal is to provide continuity and context from this specific conversation. Do not introduce new facts or topics that have not been previously discussed. +4. If current question is ambiguous, use the conversation history to clarify its meaning. + +# APPROACH (Think step by step): +1. Review the conversation history to understand the context and topics that have been discussed. +2. Identify any specific details, preferences, or statements the user has made that are relevant to the current question. +3. Formulate a precise, concise answer that is a direct continuation of the existing dialogue. +4. Ensure your final answer is grounded in the conversation history and directly addresses the user's latest query in that context. + +# Tip: +If no chat history is provided: + - Treat the query as self-contained. + - Do not assume prior context. + - Respond based solely on the current question. + - Do not raise new questions during the answering process. + +Chat history: +{chat_history} + +Question: +{question} + +Answer: +""" + +FEEDBACK_ANSWER_PROMPT_ZH = """ +你是一个知识渊博且乐于助人的AI助手。你可以访问当前对话的完整历史记录。这些记录包含你与用户之间先前的所有交流内容。 + +# 指令: +1. 仔细分析整个对话历史。你的回答必须仅基于本次对话中已交流的信息。 +2. 密切关注对话的先后顺序。如果用户提及之前的发言(例如“我之前提到的那件事”),你必须定位到历史记录中的具体内容。 +3. 你的主要目标是基于本次特定对话提供连续性和上下文。不要引入之前对话中未讨论过的新事实或话题。 +4. 如果用户当前的问题含义不明确,请利用对话历史来澄清其意图。 + +# 处理方法(逐步思考): +1. 回顾对话历史,以理解已讨论的背景和主题。 +2. 识别用户已提及的、与当前问题相关的任何具体细节、偏好或陈述。 +3. 构思一个精准、简洁的回答,使其成为现有对话的直接延续。 +4. 确保你的最终回答紧扣对话历史,并在此上下文中直接回应用户的最新提问。 + +# 注意: +如果没有提供聊天历史记录: + - 将该查询视为独立的。 + - 不要假设之前存在背景信息。 + - 仅根据当前问题进行回答。 + - 在回答过程中不必提出新的问题。 + +对话历史: +{chat_history} + +问题: +{question} + +回答: +""" From 4d712eb5840f092853aa08417ed55d2b41883a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 20 Nov 2025 19:09:23 +0800 Subject: [PATCH 21/40] feedback handler configuration --- src/memos/api/handlers/base_handler.py | 7 + src/memos/api/handlers/component_init.py | 11 + src/memos/api/handlers/feedback_handler.py | 52 +++++ src/memos/api/product_models.py | 5 +- src/memos/api/routers/server_router.py | 19 +- src/memos/configs/memory.py | 9 +- src/memos/graph_dbs/polardb.py | 10 +- src/memos/mem_feedback/feedback.py | 226 +++++++++++++------- src/memos/mem_feedback/simple_feedback.py | 3 + src/memos/templates/mem_feedback_prompts.py | 195 +++++++++++++---- 10 files changed, 411 insertions(+), 126 deletions(-) create mode 100644 src/memos/api/handlers/feedback_handler.py diff --git a/src/memos/api/handlers/base_handler.py b/src/memos/api/handlers/base_handler.py index a686ac8f9..0b2fc5ae3 100644 --- a/src/memos/api/handlers/base_handler.py +++ b/src/memos/api/handlers/base_handler.py @@ -37,6 +37,7 @@ def __init__( internet_retriever: Any | None = None, memory_manager: Any | None = None, mos_server: Any | None = None, + feedback_server: Any | None = None, **kwargs, ): """ @@ -68,6 +69,7 @@ def __init__( self.internet_retriever = internet_retriever self.memory_manager = memory_manager self.mos_server = mos_server + self.feedback_server = feedback_server # Store any additional dependencies for key, value in kwargs.items(): @@ -161,6 +163,11 @@ def mos_server(self): """Get MOS server instance.""" return self.deps.mos_server + @property + def feedback_server(self): + """Get feedback server instance.""" + return self.deps.feedback_server + def _validate_dependencies(self, *required_deps: str) -> None: """ Validate that required dependencies are available. diff --git a/src/memos/api/handlers/component_init.py b/src/memos/api/handlers/component_init.py index 78ed13e1f..56ec08152 100644 --- a/src/memos/api/handlers/component_init.py +++ b/src/memos/api/handlers/component_init.py @@ -28,6 +28,7 @@ from memos.llms.factory import LLMFactory from memos.log import get_logger from memos.mem_cube.navie import NaiveMemCube +from memos.mem_feedback.simple_feedback import SimpleMemFeedback from memos.mem_os.product_server import MOSServer from memos.mem_reader.factory import MemReaderFactory from memos.mem_scheduler.orm_modules.base_model import BaseDBManager @@ -251,6 +252,15 @@ def init_server() -> dict[str, Any]: online_bot = get_online_bot_function() if dingding_enabled else None logger.info("DingDing bot is enabled") + # Initialize feedback server + feedback_server = SimpleMemFeedback( + llm=llm, + embedder=embedder, + graph_store=graph_db, + memory_manager=memory_manager, + mem_reader=mem_reader, + ) + # Return all components as a dictionary for easy access and extension return { "graph_db": graph_db, @@ -273,4 +283,5 @@ def init_server() -> dict[str, Any]: "text_mem": text_mem, "pref_mem": pref_mem, "online_bot": online_bot, + "feedback_server": feedback_server, } diff --git a/src/memos/api/handlers/feedback_handler.py b/src/memos/api/handlers/feedback_handler.py new file mode 100644 index 000000000..2e8a21cb1 --- /dev/null +++ b/src/memos/api/handlers/feedback_handler.py @@ -0,0 +1,52 @@ +""" +Feeback handler for memory add/update functionality. +""" + +from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies +from memos.api.product_models import APIFeedbackRequest, MemoryResponse +from memos.log import get_logger + + +logger = get_logger(__name__) + + +class FeedbackHandler(BaseHandler): + """ + Handler for memory feedback operations. + + Provides fast, fine-grained, and mixture-based feedback modes. + """ + + def __init__(self, dependencies: HandlerDependencies): + """ + Initialize feedback handler. + + Args: + dependencies: HandlerDependencies instance + """ + super().__init__(dependencies) + self._validate_dependencies("feedback_server", "mem_reader") + + def handle_feedback_memories(self, feedback_req: APIFeedbackRequest) -> MemoryResponse: + """ + Main handler for feedback memories endpoint. + + Args: + feedback_req: feedback request containing content and parameters + + Returns: + MemoryResponse with formatted results + """ + process_record = self.feedback_server.process_feedback( + user_name=feedback_req.mem_cube_id, + session_id=feedback_req.session_id, + chat_history=feedback_req.history, + retrieved_memory_ids=feedback_req.retrieved_memory_ids, + feedback_content=feedback_req.feedback_content, + feedback_time=feedback_req.feedback_time, + allow_knowledgebase_write=feedback_req.allow_knowledgebase_write, + sync_mode=feedback_req.sync_mode, + corrected_answer=feedback_req.corrected_answer, + ) + + return MemoryResponse(message="Feedback process successfully", data=[process_record]) diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index 7d6373374..3e70d522a 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -216,7 +216,10 @@ class APIFeedbackRequest(BaseRequest): ) agent_id: str = Field(None, description="Agent ID") app_id: str = Field(None, description="App ID") - chat_history: list[MessageDict] | None = Field(..., description="Chat history") + history: list[MessageDict] | None = Field(..., description="Chat history") + retrieved_memory_ids: list[str] | None = Field( + None, description="Retrieved memory ids at last turn" + ) feedback_content: str | None = Field(..., description="Feedback content to process") feedback_time: str | None = Field(None, description="Feedback time") allow_public: bool = Field( diff --git a/src/memos/api/routers/server_router.py b/src/memos/api/routers/server_router.py index b3b517305..fbe0f0df7 100644 --- a/src/memos/api/routers/server_router.py +++ b/src/memos/api/routers/server_router.py @@ -21,10 +21,12 @@ from memos.api.handlers.add_handler import AddHandler from memos.api.handlers.base_handler import HandlerDependencies from memos.api.handlers.chat_handler import ChatHandler +from memos.api.handlers.feedback_handler import FeedbackHandler from memos.api.handlers.search_handler import SearchHandler from memos.api.product_models import ( APIADDRequest, APIChatCompleteRequest, + APIFeedbackRequest, APISearchRequest, ChatRequest, GetMemoryRequest, @@ -56,7 +58,7 @@ chat_handler = ChatHandler( dependencies, search_handler, add_handler, online_bot=components.get("online_bot") ) - +feedback_handler = FeedbackHandler(dependencies) # Extract commonly used components for function-based handlers # (These can be accessed from the components dict without unpacking all of them) mem_scheduler: BaseScheduler = components["mem_scheduler"] @@ -219,3 +221,18 @@ def get_all_memories(memory_req: GetMemoryRequest): memory_type=memory_req.memory_type or "text_mem", naive_mem_cube=naive_mem_cube, ) + + +# ============================================================================= +# Feedback API Endpoints +# ============================================================================= + + +@router.post("/feedback", summary="Feedback memories", response_model=MemoryResponse) +def feedback_memories(feedback_req: APIFeedbackRequest): + """ + Feedback memories for a specific user. + + This endpoint uses the class-based FeedbackHandler for better code organization. + """ + return feedback_handler.handle_feedback_memories(feedback_req) diff --git a/src/memos/configs/memory.py b/src/memos/configs/memory.py index 9caf4fa3e..04fc58ad6 100644 --- a/src/memos/configs/memory.py +++ b/src/memos/configs/memory.py @@ -7,6 +7,7 @@ from memos.configs.graph_db import GraphDBConfigFactory from memos.configs.internet_retriever import InternetRetrieverConfigFactory from memos.configs.llm import LLMConfigFactory +from memos.configs.mem_reader import MemReaderConfigFactory from memos.configs.reranker import RerankerConfigFactory from memos.configs.vec_db import VectorDBConfigFactory from memos.exceptions import ConfigurationError @@ -240,7 +241,7 @@ class PreferenceTextMemoryConfig(BaseTextMemoryConfig): ) -class MemFeedbackConfig(BaseTextMemoryConfig): +class MemFeedbackConfig(BaseMemoryConfig): """Memory feedback configuration class.""" extractor_llm: LLMConfigFactory = Field( @@ -275,6 +276,12 @@ class MemFeedbackConfig(BaseTextMemoryConfig): ), ) + mem_reader: MemReaderConfigFactory = Field( + ..., + default_factory=MemReaderConfigFactory, + description="MemReader configuration for the Feedback", + ) + # ─── 3. Global Memory Config Factory ────────────────────────────────────────── diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 60998c172..74bce3b97 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -524,13 +524,19 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N properties = current_node["metadata"].copy() original_id = properties.get("id", id) # Preserve original ID original_memory = current_node.get("memory", "") # Preserve original memory + updated_at = fields.pop("updated_at", datetime.utcnow().isoformat()) + usage_text = f"User: {user_name} | Time: {updated_at} | Operation: Update | Overwrite: {original_memory}" + usage_info = current_node.get("usage", []) # Preserve usage_info + usage_info.insert(0, usage_text) # If fields include memory, use it; otherwise keep original memory new_memory = fields.pop("memory") if "memory" in fields else original_memory properties.update(fields) - properties["id"] = original_id # Ensure ID is not overwritten - properties["memory"] = new_memory # Ensure memory is not overwritten + properties["id"] = original_id + properties["memory"] = new_memory + properties["usage"] = usage_info + properties["updated_at"] = updated_at # Handle embedding field embedding_vector = None diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index 653510e84..38b6b5674 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -1,4 +1,5 @@ import concurrent.futures +import difflib import json from datetime import datetime @@ -10,7 +11,8 @@ from memos.graph_dbs.factory import GraphStoreFactory, PolarDBGraphDB from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM from memos.mem_feedback.base import BaseMemFeedback -from memos.mem_reader.simple_struct import SimpleStructMemReader, detect_lang +from memos.mem_reader.factory import MemReaderFactory +from memos.mem_reader.simple_struct import detect_lang from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager from memos.templates.mem_feedback_prompts import ( @@ -45,6 +47,7 @@ def __init__(self, config: MemFeedbackConfig): self.llm: OpenAILLM | OllamaLLM | AzureLLM = LLMFactory.from_config(config.extractor_llm) self.embedder: OllamaEmbedder = EmbedderFactory.from_config(config.embedder) self.graph_store: PolarDBGraphDB = GraphStoreFactory.from_config(config.graph_db) + self.mem_reader = MemReaderFactory.from_config(config.mem_reader) self.is_reorganize = config.reorganize self.memory_manager: MemoryManager = MemoryManager( @@ -60,6 +63,23 @@ def __init__(self, config: MemFeedbackConfig): is_reorganize=self.is_reorganize, ) + def _pure_add(self, user_name: str, feedback_content: str, feedback_time: str, info: dict): + """Directly add new memory""" + scene_data = [[{"role": "user", "content": feedback_content, "chat_time": feedback_time}]] + memories = self.mem_reader.get_memory(scene_data, type="chat", info=info) + to_add_memories = [item for scene in memories for item in scene] + added_ids = self.memory_manager.add(to_add_memories, user_name=user_name) + logger.info(f"[Feedback Core] Added {len(added_ids)} memories for user {user_name}.") + return { + "record": { + "add": [ + {"id": _id, "text": added_mem.memory} + for _id, added_mem in zip(added_ids, to_add_memories, strict=False) + ], + "update": [], + } + } + def _feedback_judgement( self, chat_history: list[MessageDict], feedback_content: str, feedback_time: str = "" ) -> dict | None: @@ -75,65 +95,105 @@ def _feedback_judgement( .replace("{feedback_time}", feedback_time) ) judge_res = self._get_llm_response(prompt) - return judge_res if judge_res else [] + if judge_res: + return judge_res + else: + logger.warning("[Feedback Core]: feedback judgement failed, return []") + return [] def _feedback_memory( self, user_name: str, feedback_memories: list[TextualMemoryItem], **kwargs ) -> dict: sync_mode = kwargs.get("sync_mode") + retrieved_memory_ids = kwargs.get("retrieved_memory_ids") or [] + retrieved_memories = [self.graph_store.get_node(_id) for _id in retrieved_memory_ids] + current_memories = [ + {"id": item["id"], "text": item["memory"]} for item in retrieved_memories + ] + + def _single_add_operation( + memory_item: TextualMemoryItem, user_name: str, sync_mode: str + ) -> dict: + """处理单个添加操作""" + added_ids = self.memory_manager.add([memory_item], user_name=user_name, mode=sync_mode) + logger.info(f"[Memory Feedback ADD] {added_ids[0]}") + + return {"id": added_ids[0], "text": memory_item.memory} + + def _single_update_operation( + op: dict, memory_item: TextualMemoryItem, user_name: str, sync_mode: str + ) -> dict: + """处理单个更新操作""" + update_id = op.get("id") + updated_ids = self.memory_manager.update( + [update_id], [memory_item], user_name=user_name, mode=sync_mode + ) + log_update_info = op.get("old_memory", "") + " >> " + op.get("text", "") + logger.info(f"[Memory Feedback UPDATE] {updated_ids[0]}, info: {log_update_info}") + + return { + "id": update_id, + "origin_memory": op.get("old_memory", ""), + "text": op.get("text", ""), + } + + def _add_or_update(memory_item: TextualMemoryItem, current_memories: list): + if current_memories == []: + current_memories = self._vec_query( + memory_item.metadata.embedding, user_name=user_name + ) - def _add_or_update(memory_item: TextualMemoryItem): - current_memories = self._vec_query(memory_item.metadata.embedding, user_name=user_name) if current_memories: lang = detect_lang("".join(memory_item.memory)) template = FEEDBACK_PROMPT_DICT["compare"][lang] prompt = template.replace("{current_memories}", str(current_memories)).replace( "{new_facts}", memory_item.memory ) - operations = self._get_llm_response(prompt).get("operation", {}) + operations = self._get_llm_response(prompt).get("operation", []) + operations = self._id_dehallucination(operations, current_memories) else: - operations = {"event": "ADD"} + operations = [{"event": "ADD"}] + logger.info(f"[Feedback memory operations]: {operations!s}") - if operations and operations["event"].lower() == "add": - added_ids = self.memory_manager.add( - [memory_item], user_name=user_name, mode=sync_mode - ) - logger.info(f"[Memory Feedback ADD] {added_ids!s}") - - return { - "record": { - "add": [{"id": added_ids[0], "text": memory_item.memory}], - "update": [], - } - } - elif operations and operations["event"].lower() == "update": - to_update_id = operations["id"] - updated_ids = self.memory_manager.update( - [to_update_id], [memory_item], user_name=user_name, mode=sync_mode - ) - log_update_info = operations["old_memory"] + " >> " + operations["text"] - logger.info(f"[Memory Feedback UPDATE] {updated_ids}, info: {log_update_info}") - - return { - "record": { - "add": [], - "update": [ - { - "id": to_update_id, - "origin_memory": operations["old_memory"], - "text": operations["text"], - } - ], - } - } - else: + if not operations: return {"record": {"add": [], "update": []}} - search_filter = {"user_name": user_name} - with ContextThreadPoolExecutor(max_workers=8) as ex: + add_results = [] + update_results = [] + + with ContextThreadPoolExecutor(max_workers=10) as executor: + future_to_op = {} + for op in operations: + event_type = op.get("event", "").lower() + + if event_type == "add": + future = executor.submit( + _single_add_operation, memory_item, user_name, sync_mode + ) + future_to_op[future] = ("add", op) + elif event_type == "update": + future = executor.submit( + _single_update_operation, op, memory_item, user_name, sync_mode + ) + future_to_op[future] = ("update", op) + + for future in concurrent.futures.as_completed(future_to_op): + result_type, original_op = future_to_op[future] + try: + result = future.result() + if result_type == "add": + add_results.append(result) + elif result_type == "update": + update_results.append(result) + except Exception as e: + logger.error(f"Operation failed for {original_op}: {e}") + + return {"record": {"add": add_results, "update": update_results}} + + with ContextThreadPoolExecutor(max_workers=3) as ex: futures = { - ex.submit(_add_or_update, mem, search_filter): i + ex.submit(_add_or_update, mem, current_memories): i for i, mem in enumerate(feedback_memories) } results = [None] * len(futures) @@ -173,14 +233,42 @@ def _get_llm_response(self, prompt: str, dsl: bool = True) -> dict: try: response_text = self.llm.generate(messages) if dsl: + response_text = response_text.replace("```", "").replace("json", "") response_json = json.loads(response_text) else: return response_text except Exception as e: - logger.error(f"[LLM] Exception during chat generation: {e}") + logger.error(f"[Feedback Core LLM] Exception during chat generation: {e}") response_json = None return response_json + def _id_dehallucination(self, operations, current_memories): + right_ids = [item["id"] for item in current_memories] + right_lower_map = {x.lower(): x for x in right_ids} + + def correct_item(data): + if data.get("event", "").lower() != "update": + return data + + original_id = data["id"] + if original_id in right_ids: + return data + + lower_id = original_id.lower() + if lower_id in right_lower_map: + data["id"] = right_lower_map[lower_id] + return data + + matches = difflib.get_close_matches(original_id, right_ids, n=1, cutoff=0.8) + if matches: + data["id"] = matches[0] + return data + + return None + + dehallu_res = [correct_item(item) for item in operations] + return [item for item in dehallu_res if item] + def _generate_answer( self, chat_history: list[MessageDict], feedback_content: str, corrected_answer: bool ) -> str: @@ -205,7 +293,6 @@ def process_feedback_core( user_name: str, chat_history: list[MessageDict], feedback_content: str, - mem_reader: SimpleStructMemReader | None = None, **kwargs, ) -> dict: """ @@ -221,31 +308,14 @@ def process_feedback_core( info = {"user_id": user_name, "session_id": session_id} logger.info(f"[Feedback Core] Starting memory feedback process for user {user_name}") - if mem_reader and not chat_history: - scene_data = [ - [{"role": "user", "content": feedback_content, "chat_time": feedback_time}] - ] - memories = mem_reader.get_memory(scene_data, type="chat", info=info) - to_add_memories = [item for scene in memories for item in scene] - added_ids = self.memory_manager.add(to_add_memories, user_name=user_name) - logger.info( - f"[Feedback Core] Added {len(added_ids)} memories for user {user_name}." - ) - return { - "record": { - "add": [ - {"id": _id, "text": added_mem.memory} - for _id, added_mem in zip(added_ids, to_add_memories, strict=False) - ], - "update": [], - } - } - - elif chat_history: + if not chat_history: + return self._pure_add(user_name, feedback_content, feedback_time, info) + + else: raw_judge = self._feedback_judgement( chat_history, feedback_content, feedback_time=feedback_time ) - judge_res = ( + valid_feedback = ( [ item for item in raw_judge @@ -254,7 +324,14 @@ def process_feedback_core( if raw_judge else [] ) - if not judge_res: + if ( + raw_judge + and raw_judge[0]["validity"].lower() == "false" + and raw_judge[0]["user_attitude"].lower() == "irrelevant" + ): + return self._pure_add(user_name, feedback_content, feedback_time, info) + + if not valid_feedback: logger.warning( f"[Feedback Core] No valid judgements for user {user_name}: {raw_judge}." ) @@ -262,9 +339,11 @@ def process_feedback_core( feedback_memories = [] feedback_memories_embeddings = self.embedder.embed( - [item["corrected_info"] for item in judge_res] + [item["corrected_info"] for item in valid_feedback] ) - for item, embedding in zip(judge_res, feedback_memories_embeddings, strict=False): + for item, embedding in zip( + valid_feedback, feedback_memories_embeddings, strict=False + ): value = item["corrected_info"] key = item["key"] tags = item["tags"] @@ -287,16 +366,13 @@ def process_feedback_core( ), ) ) + mem_record = self._feedback_memory(user_name, feedback_memories, **kwargs) logger.info( f"[Feedback Core] Processed {len(feedback_memories)} feedback memories for user {user_name}." ) return mem_record - else: - logger.info("[Feedback Core] Empty chat_history and no mem_reader, skipping.") - return {"record": {"add": [], "update": []}} - except Exception as e: logger.error(f"[Feedback Core] Error for user {user_name}: {e}") return {"record": {"add": [], "update": []}} @@ -306,7 +382,6 @@ def process_feedback( user_name: str, chat_history: list[MessageDict], feedback_content: str, - mem_reader: SimpleStructMemReader | None = None, **kwargs, ): """ @@ -316,7 +391,6 @@ def process_feedback( user_name: User identifier chat_history: List of chat messages feedback_content: Feedback content from user - mem_reader: Memory reader instance **kwargs: Additional arguments including sync_mode Returns: @@ -338,7 +412,6 @@ def process_feedback( user_name, chat_history, feedback_content, - mem_reader, **kwargs, ) concurrent.futures.wait([answer_future, core_future]) @@ -365,7 +438,6 @@ def process_feedback( user_name, chat_history, feedback_content, - mem_reader, **kwargs, ) ex.shutdown(wait=False) diff --git a/src/memos/mem_feedback/simple_feedback.py b/src/memos/mem_feedback/simple_feedback.py index 23e25ff2c..59ee38438 100644 --- a/src/memos/mem_feedback/simple_feedback.py +++ b/src/memos/mem_feedback/simple_feedback.py @@ -3,6 +3,7 @@ from memos.graph_dbs.factory import PolarDBGraphDB from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM from memos.mem_feedback.feedback import MemFeedback +from memos.mem_reader.simple_struct import SimpleStructMemReader from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager @@ -16,8 +17,10 @@ def __init__( embedder: OllamaEmbedder, graph_store: PolarDBGraphDB, memory_manager: MemoryManager, + mem_reader: SimpleStructMemReader, ): self.llm = llm self.embedder = embedder self.graph_store = graph_store self.memory_manager = memory_manager + self.mem_reader = mem_reader diff --git a/src/memos/templates/mem_feedback_prompts.py b/src/memos/templates/mem_feedback_prompts.py index 2a9a7dcb2..e1e3abfa4 100644 --- a/src/memos/templates/mem_feedback_prompts.py +++ b/src/memos/templates/mem_feedback_prompts.py @@ -101,7 +101,7 @@ 分析步骤与判定标准: 1. *有效性判定* - - 有效(true):用户反馈的内容与对话历史的主题、任务或上一次助理的回答*相关*。例如:针对回答进行追问、纠正、补充或评价。 + - 有效(true):用户反馈的内容与对话历史的主题、任务或上一次助理的回答*有关联*。例如:针对回答进行追问、纠正、补充或评价。 - 无效(false):用户的反馈与对话历史*完全无关*,与任何先前内容之间不存在语义、主题或词汇上的联系。 2. *用户态度判定* - 不满意(dissatisfied):反馈中表现出负面情绪,如直接指出错误、表达困惑、抱怨、批评,或明确表示问题未解决。 @@ -172,33 +172,40 @@ """ -UPDATE_FORMER_MEMORIES = """Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add or update, and provide final operation recommendations. +UPDATE_FORMER_MEMORIES = """Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add, update, or keep unchanged, and provide final operation recommendations. You must strictly return the response in the following JSON format: { "operation": - { - "id": "", - "text": "", - "event": "", - "old_memory": "" - } + [ + { + "id": "", + "text": "", + "event": "", + "old_memory": "" + }, + ... + ] } *Requirements*: -1. If the new fact adds no supplemental value and the existing memory supersedes it, no operation is performed. +1. If the new fact does not provide additional information to the existing memory item, the existing memory can override the new fact, and the operation is set to "NONE." 2. If the new fact is similar to existing memory but the information is more accurate, complete, or requires correction, set operation to "UPDATE" 3. If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update the original memory based on the new fact and set operation to "UPDATE" -4. If there is completely new information to add, set operation to "ADD" +4. If there is no existing memory that requires updating, the new fact is added as entirely new information, and the operation is set to "ADD." Therefore, in the same operation list, ADD and UPDATE will not coexist. + *ID Management Rules*: - Update operation: Keep the original ID unchanged - Add operation: Generate a new unique ID in the format of a 4-digit string (e.g., "0001", "0002", etc.) *Important Requirements*: -- Return only the JSON format response, without any other content - For update operations, you must provide the old_memory field to show the original content +- Compare the existing memories one by one and do not miss any content that needs to be updated. When multiple existing memories need to be updated, include all relevant entries in the operation list + +If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update ALL affected original memories based on the new fact and set operation to "UPDATE" for each one. Multiple memories covering the same outdated information should all be updated. +- Return only the JSON format response, without any other content - text field requirements: Use concise, complete declarative sentences that are consistent with the newly acquired factual information, avoiding redundant information - text and old_memory content should be in English @@ -206,29 +213,65 @@ Current Memories: { "memory": [ + { + "id": "0911", + "text": "The user is a senior full-stack developer working at Company B" + }, { "id": "123", - "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" + "text": "The user works as a software engineer at Company A, primarily responsible for front-end development" + }, + { + "id": "648", + "text": "The user is responsible for front-end development of software at Company A" + }, + { + "id": "7210", + "text": "The user is responsible for front-end development of software at Company A" }, { "id": "908", - "text": "The user likes to go fishing with friends on weekends" + "text": "The user enjoys fishing with friends on weekends" } ] } Newly facts: -"The user is currently working as a senior full-stack development engineer at Company B" +"The user works as a senior full-stack developer at Company B" Operation recommendations: { "operation": - { - "id": "123", - "text": "The user is currently working as a senior full-stack development engineer at Company B", - "event": "UPDATE", - "old_memory": "The user works as a software engineer in Company A, mainly responsible for front-end development" - } + [ + { + "id": "0911", + "text": "The user is a senior full-stack developer working at Company B", + "event": "NONE" + }, + { + "id": "123", + "text": "The user works as a senior full-stack developer at Company B", + "event": "UPDATE", + "old_memory": "The user works as a software engineer at Company A, primarily responsible for front-end development" + }, + { + "id": "648", + "text": "The user works as a senior full-stack developer at Company B", + "event": "UPDATE", + "old_memory": "The user is responsible for front-end development of software at Company A" + }, + { + "id": "7210", + "text": "The user works as a senior full-stack developer at Company B", + "event": "UPDATE", + "old_memory": "The user is responsible for front-end development of software at Company A" + }, + { + "id": "908", + "text": "The user enjoys fishing with friends on weekends", + "event": "NONE" + } + ] } Example2: @@ -252,11 +295,23 @@ Operation recommendations: { "operation": - { - "id": "4567", - "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", - "event": "ADD" - } + [ + { + "id": "123", + "text": "The user works as a software engineer at Company A, primarily responsible for front-end development", + "event": "NONE" + }, + { + "id": "908", + "text": "The user enjoys fishing with friends on weekends", + "event": "NONE" + }, + { + "id": "4567", + "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", + "event": "ADD" + } + ] } Current Memories @@ -269,33 +324,37 @@ """ -UPDATE_FORMER_MEMORIES_ZH = """请分析新获取的事实信息,并决定该信息应该如何更新到记忆库中:新增或更新,并给出最终的操作建议。 +UPDATE_FORMER_MEMORIES_ZH = """请分析新获取的事实信息,并决定这些信息应该如何更新到记忆库中:新增、更新、或保持不变,并给出最终的操作建议。 你必须严格按照以下JSON格式返回响应: { "operation": - { - "id": "<记忆ID>", - "text": "<记忆内容>", - "event": "<操作类型,必须是 "ADD", "UPDATE"之一>", - "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" - } + [ + { + "id": "<记忆ID>", + "text": "<记忆内容>", + "event": "<操作类型,必须是 "ADD", "UPDATE", "NONE" 之一>", + "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" + }, + ... + ] } 要求: -1. 如果新事实对现有记忆没有额外补充,现有记忆的信息可以覆盖新事实,则不设置任何操作 -2. 如果新事实与现有记忆相似但信息更准确、完整或需要修正,设置操作为"UPDATE" +1. 如果新事实对现有记忆item没有额外补充,现有记忆的信息可以覆盖新事实,设置操作为"NONE" +2. 如果新事实与现有记忆item相似但信息更准确、完整或需要修正,设置操作为"UPDATE" 3. 如果新事实与现有记忆在关键信息上矛盾(如时间、地点、状态等),以新事实为准更新原有记忆,设置操作为"UPDATE" -4. 如果有全新信息添加,设置操作为"ADD" +4. 如果现有记忆中没有需要更新的,则新事实作为全新信息添加,设置操作为"ADD"。因此可知同一个 operation 列表中,ADD和UPDATE不会同时存在。 ID管理规则: - 更新操作:保持原有ID不变 - 新增操作:生成新的唯一ID,格式为4位数字字符串(如:"0001", "0002"等) 重要要求: -- 只返回JSON格式的响应,不要包含其他任何内容 - 对于更新操作,必须提供old_memory字段显示原内容 +- 对现有记忆逐一比对,不可漏掉需要更新的内容。当多个现有记忆需要更新时,将所有的相关条目都包含在操作列表中 +- 只返回JSON格式的响应,不要包含其他任何内容 - text字段要求:使用简洁、完整的陈述句,和新获取的事实信息一致,避免冗余信息 - text和old_memory内容使用中文 @@ -303,10 +362,22 @@ 现有记忆记录: { "memory": [ + { + "id": "0911", + "text": "用户是高级全栈开发工程师,在B公司工作" + }, { "id": "123", "text": "用户在公司A担任软件工程师,主要负责前端开发" }, + { + "id": "648", + "text": "用户在公司A负责软件的前端开发工作" + }, + { + "id": "7210", + "text": "用户在公司A负责软件的前端开发工作" + }, { "id": "908", "text": "用户周末喜欢和朋友一起钓鱼" @@ -320,12 +391,36 @@ 操作建议: { "operation": - { - "id": "123", - "text": "用户在公司B担任高级全栈开发工程师", - "event": "UPDATE", - "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" - } + [ + { + "id": "0911", + "text": "用户是高级全栈开发工程师,在B公司工作", + "event": "NONE" + }, + { + "id": "123", + "text": "用户现在在公司B担任高级全栈开发工程师", + "event": "UPDATE", + "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" + }, + { + "id": "648", + "text": "用户现在在公司B担任高级全栈开发工程师", + "event": "UPDATE", + "old_memory": "用户在公司A负责软件的前端开发工作" + }, + { + "id": "7210", + "text": "用户现在在公司B担任高级全栈开发工程师", + "event": "UPDATE", + "old_memory": "用户在公司A负责软件的前端开发工作" + }, + { + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼", + "event": "NONE" + } + ] } 示例2: @@ -349,11 +444,23 @@ 操作建议: { "operation": - { + [ + { + "id": "123", + "text": "用户在公司A担任软件工程师,主要负责前端开发", + "event": "NONE" + }, + { + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼", + "event": "NONE" + }, + { "id": "4567", "text": "用户的居住地址是北京市朝阳区明月小区", "event": "ADD" - } + } + ] } 现有记忆记录: From aef3aad8da82af8b83da5e11f105989e82e3d9e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 26 Nov 2025 20:55:46 +0800 Subject: [PATCH 22/40] upgrade feedback using --- src/memos/graph_dbs/polardb.py | 11 +- src/memos/mem_feedback/feedback.py | 211 ++++++-- .../tree_text_memory/organize/manager.py | 2 +- src/memos/templates/mem_feedback_prompts.py | 506 ++++++++++-------- 4 files changed, 449 insertions(+), 281 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 74bce3b97..ebaec2b90 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -525,9 +525,11 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N original_id = properties.get("id", id) # Preserve original ID original_memory = current_node.get("memory", "") # Preserve original memory updated_at = fields.pop("updated_at", datetime.utcnow().isoformat()) - usage_text = f"User: {user_name} | Time: {updated_at} | Operation: Update | Overwrite: {original_memory}" - usage_info = current_node.get("usage", []) # Preserve usage_info - usage_info.insert(0, usage_text) + + record_info = f"User:{user_name} | Time:{updated_at} | Operation:Update | Overwrite: {original_memory}" + covered_history = current_node.get("covered_history", []) + covered_history.insert(0, record_info) + logger.info(f"New GraphDB Update: {record_info}") # If fields include memory, use it; otherwise keep original memory new_memory = fields.pop("memory") if "memory" in fields else original_memory @@ -535,13 +537,14 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N properties.update(fields) properties["id"] = original_id properties["memory"] = new_memory - properties["usage"] = usage_info + properties["covered_history"] = covered_history properties["updated_at"] = updated_at # Handle embedding field embedding_vector = None if "embedding" in fields: embedding_vector = fields.pop("embedding") + assert properties["embedding"] == embedding_vector, "Embedding vector mismatch" if not isinstance(embedding_vector, list): embedding_vector = None diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index 38b6b5674..6b8520c50 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -4,6 +4,8 @@ from datetime import datetime +from tenacity import retry, stop_after_attempt, wait_exponential + from memos import log from memos.configs.memory import MemFeedbackConfig from memos.context.context import ContextThreadPoolExecutor @@ -64,12 +66,18 @@ def __init__(self, config: MemFeedbackConfig): ) def _pure_add(self, user_name: str, feedback_content: str, feedback_time: str, info: dict): - """Directly add new memory""" + """ + Directly add new memory + """ scene_data = [[{"role": "user", "content": feedback_content, "chat_time": feedback_time}]] memories = self.mem_reader.get_memory(scene_data, type="chat", info=info) to_add_memories = [item for scene in memories for item in scene] - added_ids = self.memory_manager.add(to_add_memories, user_name=user_name) - logger.info(f"[Feedback Core] Added {len(added_ids)} memories for user {user_name}.") + added_ids = self._retry_db_operation( + lambda: self.memory_manager.add(to_add_memories, user_name=user_name) + ) + logger.info( + f"[Feedback Core: _pure_add] Added {len(added_ids)} memories for user {user_name}." + ) return { "record": { "add": [ @@ -88,17 +96,21 @@ def _feedback_judgement( """ lang = detect_lang(feedback_content) template = FEEDBACK_PROMPT_DICT["judge"][lang] - chat_history_str = str(chat_history[-4:]) - prompt = ( - template.replace("{chat_history}", chat_history_str) - .replace("{user_feedback}", feedback_content) - .replace("{feedback_time}", feedback_time) + chat_history_lis = [f"""{msg["role"]}: {msg["content"]}""" for msg in chat_history[-4:]] + chat_history_str = "\n".join(chat_history_lis) + prompt = template.format( + chat_history=chat_history_str, + user_feedback=feedback_content, + feedback_time=feedback_time, ) + judge_res = self._get_llm_response(prompt) if judge_res: return judge_res else: - logger.warning("[Feedback Core]: feedback judgement failed, return []") + logger.warning( + "[Feedback Core: _feedback_judgement] feedback judgement failed, return []" + ) return [] def _feedback_memory( @@ -106,16 +118,36 @@ def _feedback_memory( ) -> dict: sync_mode = kwargs.get("sync_mode") retrieved_memory_ids = kwargs.get("retrieved_memory_ids") or [] + chat_history = kwargs.get("chat_history", []) + feedback_content = kwargs.get("feedback_content", "") + + chat_history_lis = [f"""{msg["role"]}: {msg["content"]}""" for msg in chat_history[-4:]] + fact_history = "\n".join(chat_history_lis) + f"\nuser feedback: \n{feedback_content}" + retrieved_memories = [self.graph_store.get_node(_id) for _id in retrieved_memory_ids] + filterd_ids = [ + item["id"] for item in retrieved_memories if "mode:fast" in item["metadata"]["tags"] + ] + if filterd_ids: + logger.warning( + f"[Feedback Core: _feedback_memory] Since the tags mode is fast, no modifications are made to the following memory {filterd_ids}." + ) + current_memories = [ - {"id": item["id"], "text": item["memory"]} for item in retrieved_memories + {"id": item["id"], "text": item["memory"]} + for item in retrieved_memories + if "mode:fast" not in item["metadata"]["tags"] ] def _single_add_operation( memory_item: TextualMemoryItem, user_name: str, sync_mode: str ) -> dict: - """处理单个添加操作""" - added_ids = self.memory_manager.add([memory_item], user_name=user_name, mode=sync_mode) + """ + Individual addition operations + """ + added_ids = self._retry_db_operation( + lambda: self.memory_manager.add([memory_item], user_name=user_name, mode=sync_mode) + ) logger.info(f"[Memory Feedback ADD] {added_ids[0]}") return {"id": added_ids[0], "text": memory_item.memory} @@ -123,10 +155,14 @@ def _single_add_operation( def _single_update_operation( op: dict, memory_item: TextualMemoryItem, user_name: str, sync_mode: str ) -> dict: - """处理单个更新操作""" + """ + Individual update operations + """ update_id = op.get("id") - updated_ids = self.memory_manager.update( - [update_id], [memory_item], user_name=user_name, mode=sync_mode + updated_ids = self._retry_db_operation( + lambda: self.memory_manager.update( + [update_id], [memory_item], user_name=user_name, mode=sync_mode + ) ) log_update_info = op.get("old_memory", "") + " >> " + op.get("text", "") logger.info(f"[Memory Feedback UPDATE] {updated_ids[0]}, info: {log_update_info}") @@ -137,7 +173,9 @@ def _single_update_operation( "text": op.get("text", ""), } - def _add_or_update(memory_item: TextualMemoryItem, current_memories: list): + def _add_or_update( + memory_item: TextualMemoryItem, current_memories: list, fact_history: str + ): if current_memories == []: current_memories = self._vec_query( memory_item.metadata.embedding, user_name=user_name @@ -146,9 +184,12 @@ def _add_or_update(memory_item: TextualMemoryItem, current_memories: list): if current_memories: lang = detect_lang("".join(memory_item.memory)) template = FEEDBACK_PROMPT_DICT["compare"][lang] - prompt = template.replace("{current_memories}", str(current_memories)).replace( - "{new_facts}", memory_item.memory + prompt = template.format( + current_memories=str(current_memories), + new_facts=memory_item.memory, + chat_history=fact_history, ) + operations = self._get_llm_response(prompt).get("operation", []) operations = self._id_dehallucination(operations, current_memories) else: @@ -187,13 +228,16 @@ def _add_or_update(memory_item: TextualMemoryItem, current_memories: list): elif result_type == "update": update_results.append(result) except Exception as e: - logger.error(f"Operation failed for {original_op}: {e}") + logger.error( + f"[Feedback Core: _add_or_update] Operation failed for {original_op}: {e}", + exc_info=True, + ) return {"record": {"add": add_results, "update": update_results}} with ContextThreadPoolExecutor(max_workers=3) as ex: futures = { - ex.submit(_add_or_update, mem, current_memories): i + ex.submit(_add_or_update, mem, current_memories, fact_history): i for i, mem in enumerate(feedback_memories) } results = [None] * len(futures) @@ -204,7 +248,10 @@ def _add_or_update(memory_item: TextualMemoryItem, current_memories: list): if node: results[i] = node except Exception as e: - logger.error(f"[FeedBack] error: {e}") + logger.error( + f"[Feedback Core: _feedback_memory] Error processing memory index {i}: {e}", + exc_info=True, + ) mem_res = [r for r in results if r] return { @@ -216,22 +263,34 @@ def _add_or_update(memory_item: TextualMemoryItem, current_memories: list): def _vec_query(self, new_memories_embedding: list[float], user_name=None): retrieved_ids = self.graph_store.search_by_embedding( - new_memories_embedding, user_name=user_name + new_memories_embedding, user_name=user_name, top_k=5 ) current_memories = [self.graph_store.get_node(item["id"]) for item in retrieved_ids] + if not retrieved_ids: + logger.info( + f"[Feedback Core: _vec_query] No similar memories found for embedding query for user {user_name}." + ) + filterd_ids = [ + item["id"] for item in current_memories if "mode:fast" in item["metadata"]["tags"] + ] + if filterd_ids: + logger.warning( + f"[Feedback Core: _vec_query] Since the tags mode is fast, no modifications are made to the following memory {filterd_ids}." + ) return [ { "id": item["id"], "text": item["memory"], } for item in current_memories + if "mode:fast" not in item["metadata"]["tags"] ] def _get_llm_response(self, prompt: str, dsl: bool = True) -> dict: messages = [{"role": "user", "content": prompt}] try: - response_text = self.llm.generate(messages) + response_text = self.llm.generate(messages, temperature=0.3) if dsl: response_text = response_text.replace("```", "").replace("json", "") response_json = json.loads(response_text) @@ -275,7 +334,7 @@ def _generate_answer( """ Answer generation to facilitate concurrent submission. """ - if not corrected_answer: + if not corrected_answer or feedback_content.strip() == "": return "" lang = detect_lang(feedback_content) template = FEEDBACK_PROMPT_DICT["generation"][lang] @@ -283,9 +342,8 @@ def _generate_answer( [f"{item['role']}: {item['content']}" for item in chat_history] ) chat_history_str = chat_history_str if chat_history_str else "none" - prompt = template.replace("{chat_history}", chat_history_str).replace( - "{question}", feedback_content - ) + prompt = template.format(chat_history=chat_history_str, question=feedback_content) + return self._get_llm_response(prompt, dsl=False) def process_feedback_core( @@ -298,16 +356,28 @@ def process_feedback_core( """ Core feedback processing: judgment, memory extraction, addition/update. Return record. """ + + def check_validity(item): + return ( + "validity" in item + and item["validity"].lower() == "true" + and "corrected_info" in item + and item["corrected_info"].strip() + and "key" in item + and "tags" in item + ) + try: feedback_time = kwargs.get("feedback_time") or datetime.now().isoformat() session_id = kwargs.get("session_id") allow_knowledgebase_write = bool(kwargs.get("allow_knowledgebase_write")) - if not allow_knowledgebase_write: + if feedback_content.strip() == "" or not allow_knowledgebase_write: return {"record": {"add": [], "update": []}} info = {"user_id": user_name, "session_id": session_id} - logger.info(f"[Feedback Core] Starting memory feedback process for user {user_name}") - + logger.info( + f"[Feedback Core: process_feedback_core] Starting memory feedback process for user {user_name}" + ) if not chat_history: return self._pure_add(user_name, feedback_content, feedback_time, info) @@ -316,13 +386,7 @@ def process_feedback_core( chat_history, feedback_content, feedback_time=feedback_time ) valid_feedback = ( - [ - item - for item in raw_judge - if item["validity"].lower() == "true" and item["corrected_info"].strip() - ] - if raw_judge - else [] + [item for item in raw_judge if check_validity(item)] if raw_judge else [] ) if ( raw_judge @@ -333,14 +397,25 @@ def process_feedback_core( if not valid_feedback: logger.warning( - f"[Feedback Core] No valid judgements for user {user_name}: {raw_judge}." + f"[Feedback Core: process_feedback_core] No valid judgements for user {user_name}: {raw_judge}." ) return {"record": {"add": [], "update": []}} feedback_memories = [] - feedback_memories_embeddings = self.embedder.embed( - [item["corrected_info"] for item in valid_feedback] - ) + + corrected_infos = [item["corrected_info"] for item in valid_feedback] + embed_bs = 5 + feedback_memories_embeddings = [] + for i in range(0, len(corrected_infos), embed_bs): + batch = corrected_infos[i : i + embed_bs] + try: + feedback_memories_embeddings.extend(self.embedder.embed(batch)) + except Exception as e: + logger.error( + f"[Feedback Core: process_feedback_core] Embedding batch failed: {e}", + exc_info=True, + ) + for item, embedding in zip( valid_feedback, feedback_memories_embeddings, strict=False ): @@ -367,14 +442,20 @@ def process_feedback_core( ) ) - mem_record = self._feedback_memory(user_name, feedback_memories, **kwargs) + mem_record = self._feedback_memory( + user_name, + feedback_memories, + chat_history=chat_history, + feedback_content=feedback_content, + **kwargs, + ) logger.info( - f"[Feedback Core] Processed {len(feedback_memories)} feedback memories for user {user_name}." + f"[Feedback Core: process_feedback_core] Processed {len(feedback_memories)} feedback memories for user {user_name}." ) return mem_record except Exception as e: - logger.error(f"[Feedback Core] Error for user {user_name}: {e}") + logger.error(f"[Feedback Core: process_feedback_core] Error for user {user_name}: {e}") return {"record": {"add": [], "update": []}} def process_feedback( @@ -414,17 +495,25 @@ def process_feedback( feedback_content, **kwargs, ) - concurrent.futures.wait([answer_future, core_future]) + done, pending = concurrent.futures.wait([answer_future, core_future], timeout=30) + for fut in pending: + fut.cancel() try: answer = answer_future.result() record = core_future.result() logger.info( - f"[process_feedback sync] Completed concurrently for user {user_name} with full results." + f"[MemFeedback sync] Completed concurrently for user {user_name} with full results." ) return {"answer": answer, "record": record["record"]} + except concurrent.futures.TimeoutError: + logger.error( + f"[MemFeedback sync] Timeout in sync mode for {user_name}", exc_info=True + ) + return {"answer": "", "record": {"add": [], "update": []}} except Exception as e: logger.error( - f"[process_feedback sync] Error in concurrent tasks for {user_name}: {e}" + f"[MemFeedback sync] Error in concurrent tasks for {user_name}: {e}", + exc_info=True, ) return {"answer": "", "record": {"add": [], "update": []}} else: @@ -444,14 +533,34 @@ def process_feedback( def log_completion(f): try: - result = f.result() - logger.info(f"[Background Feedback] Completed for {user_name}: {result}") + result = f.result(timeout=600) + logger.info(f"[MemFeedback async] Completed for {user_name}: {result}") + except concurrent.futures.TimeoutError: + logger.error( + f"[MemFeedback async] Background task timeout for {user_name}", + exc_info=True, + ) + f.cancel() except Exception as e: - logger.error(f"[Background Feedback] Error for {user_name}: {e}") + logger.error( + f"[MemFeedback async] Background Feedback Error for {user_name}: {e}", + exc_info=True, + ) future.add_done_callback(log_completion) logger.info( - f"[process_feedback async] Returned answer, background task started for {user_name}." + f"[MemFeedback async] Returned answer, background task started for {user_name}." ) return {"answer": answer, "record": {"add": [], "update": []}} + + # Helper for DB operations with retry + @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) + def _retry_db_operation(self, operation): + try: + return operation() + except Exception as e: + logger.error( + f"[MemFeedback: _retry_db_operation] DB operation failed: {e}", exc_info=True + ) + raise diff --git a/src/memos/memories/textual/tree_text_memory/organize/manager.py b/src/memos/memories/textual/tree_text_memory/organize/manager.py index dadf7c321..94b86cec0 100644 --- a/src/memos/memories/textual/tree_text_memory/organize/manager.py +++ b/src/memos/memories/textual/tree_text_memory/organize/manager.py @@ -132,7 +132,7 @@ def update( self.graph_store.update_node(_id, field, user_name) graph_class_name = self.graph_store.__class__.__name__ - logger.info(f"Updated {graph_class_name}") + logger.info(f"[MemoryManager] Updated {graph_class_name}") return memories_ids def replace_working_memory( diff --git a/src/memos/templates/mem_feedback_prompts.py b/src/memos/templates/mem_feedback_prompts.py index e1e3abfa4..11e17e399 100644 --- a/src/memos/templates/mem_feedback_prompts.py +++ b/src/memos/templates/mem_feedback_prompts.py @@ -3,40 +3,33 @@ Analysis Steps and Criteria: 1. *Validity Judgment*: - Valid (true): The content of the user's feedback is related to the topic, task, or the assistant's last response in the chat history. For example: asking follow-up questions, making corrections, providing supplements, or evaluating the last response. - - Invalid (false): The user’s feedback is entirely unrelated to the conversation history, with no semantic, topical, or lexical connection to any prior content. + - Invalid (false): The user's feedback is entirely unrelated to the conversation history, with no semantic, topical, or lexical connection to any prior content. 2. *User Attitude Judgment*: - Dissatisfied: The feedback shows negative emotions, such as directly pointing out errors, expressing confusion, complaining, criticizing, or explicitly stating that the problem remains unsolved. - Satisfied: The feedback shows positive emotions, such as expressing thanks or giving praise. - Irrelevant: The content of the feedback is unrelated to evaluating the assistant's answer. -3. *Assistant Response Effectiveness Type Judgment*: - - Wrong: The assistant provided incorrect information. - - Missing: The assistant's response was correct in direction but incomplete, omitting key details. - - None: The user feedback does not point to any shortcomings in the assistant's response. - -4. *Summary Information Generation*(corrected_info field): +3. *Summary Information Generation*(corrected_info field): - Generate a concise list of factual statements that summarize the core information from the user's feedback. - — Focus on objective facts, corrections, or confirmations. - - Express time information as concrete, unambiguous date(s) or period(s) (e.g., “March 2023”, “2024-07”, or “May–June 2022”). - - For 'Satisfied' or 'None' types, this list may contain confirming statements or be empty if no new facts are provided. - - For example: "The user completed the Everest Circuit trek with colleagues in March 2023." + - When the feedback provides corrections, focus only on the corrected information. + - When the feedback provides supplements, integrate all valid information (both old and new). + - It is very important to keep any relevant time information and express time information as concrete, unambiguous date(s) or period(s) (e.g., "March 2023", "2024-07", or "May–June 2022"). + - For 'satisfied' attitude, this list may contain confirming statements or be empty if no new facts are provided. + - Focus on statement of objective facts. For example: "The user completed the Everest Circuit trek with colleagues in March 2023." Output Format: [ - { + {{ "validity": "", "user_attitude": "", - "error_type": "", "corrected_info": "", - "key": , - "tags": - }, - ... + "key": "", + "tags": "" + }} ] Example1: - Dialogue History: user: I can't eat spicy food these days. Can you recommend some suitable restaurants for me? assistant: Sure, I recommend the Fish Restaurant near you. Their signature dishes include various types of steamed seafood and sashimi of sea fish. @@ -47,147 +40,183 @@ Output: [ - { - "validity": "true", - "user_attitude": "dissatisfied", - "error_type": "wrong", - "corrected_info": "User is allergic to seafood", - "key": "allergic to seafood", - "tags": ["allergic", "seafood"] - }, - { - "validity": "true", - "user_attitude": "dissatisfied", - "error_type": "wrong", - "corrected_info": "User does not like eating raw fish.", - "key": "dislike eating raw fish.", - "tags": ["dislike", "raw fish"] - } + {{ + "validity": "true", + "user_attitude": "dissatisfied", + "corrected_info": "User is allergic to seafood and does not like eating raw fish.", + "key": "dietary restrictions", + "tags": ["allergic", "seafood", "raw fish", "food preference"] + }} ] Example2: +Dialogue History: +user: When did I bought on November 25, 2025? +assistant: A red coat +feedback time: 2025-11-28T20:45:00.875249 +User Feedback: +No, I also bought a blue shirt. + +Output: +[ + {{ + "validity": "true", + "user_attitude": "dissatisfied", + "corrected_info": "User bought a red coat and a blue shirt on November 25, 2025", + "key": "shopping record", + "tags": ["purchase", "clothing", "shopping"] + }} +] + +Example3: Dialogue History: -user: When did Jhon graduated? -assistant: 2014 -feedback time: 2025-11-18T20:45:00.875249 +user: What's my favorite food? +assistant: Pizza and sushi +feedback time: 2024-07-15T10:30:00.000000 User Feedback: -Wrong. He graduated the following year. +Wrong! I hate sushi. I like burgers. Output: [ - { - "validity": "true", - "user_attitude": "dissatisfied", - "error_type": "wrong", - "corrected_info": "Jhon was graduated at 2015", - "key": "Jhon graduated time", - "tags": ["Jhon", "graduated", "year"] - } + {{ + "validity": "true", + "user_attitude": "dissatisfied", + "corrected_info": "User likes pizza and burgers, but hates sushi.", + "key": "food preferences", + "tags": ["food preferences", "pizza", "burgers", "sushi"] + }} ] + Dialogue History: {chat_history} + feedback time: {feedback_time} User Feedback: {user_feedback} -Output: -""" +Output:""" +FEEDBACK_JUDGEMENT_PROMPT_ZH = """您是一个回答质量分析专家。请严格按照以下步骤和标准分析提供的"用户与助手聊天历史"和"用户反馈",并将最终评估结果填入指定的JSON格式中。 -FEEDBACK_JUDGEMENT_PROMPT_ZH = """你是一个对话质量分析专家。请严格根据以下步骤和标准,对提供的“用户和助理的对话历史”和“用户反馈”进行分析,并将最终判定结果填入指定的JSON格式中。 - -分析步骤与判定标准: -1. *有效性判定* - - 有效(true):用户反馈的内容与对话历史的主题、任务或上一次助理的回答*有关联*。例如:针对回答进行追问、纠正、补充或评价。 - - 无效(false):用户的反馈与对话历史*完全无关*,与任何先前内容之间不存在语义、主题或词汇上的联系。 -2. *用户态度判定* - - 不满意(dissatisfied):反馈中表现出负面情绪,如直接指出错误、表达困惑、抱怨、批评,或明确表示问题未解决。 - - 满意(satisfied):反馈中表现出正面情绪,如表示感谢或给予称赞。 - - 无关(irrelevant):反馈内容与评价助理回答无关。 -3. *助理回答效果类型判定* - - 错误(wrong):助理提供了不正确的信息。 - - 缺漏(missing):助理的回答方向正确但不完整,遗漏了关键细节。 - - 无(none):用户反馈并未指向助理回答的任何不足。 -4. *总结信息生成* - - 生成一份简洁的事实陈述列表,该列表概括了用户反馈中的核心信息。 - - 重点放在客观事实、更正或确认上。 - - 对于“满意”或“无”类型的反馈,该列表可能包含确认性的陈述,或者如果未提供新事实,则可能为空。 - - 例如:“用户在2023年3月与同事完成了珠峰环线徒步旅行。” +分析步骤和标准: +1. *有效性判断*:(validity字段) + - 有效(true):用户反馈的内容与聊天历史中的主题、任务或助手的最后回复相关。例如:提出后续问题、进行纠正、提供补充或评估最后回复。 + - 无效(false):用户反馈与对话历史完全无关,与之前内容没有任何语义、主题或词汇联系。 + +2. *用户态度判断*:(user_attitude字段) + - 不满意:反馈显示负面情绪,如直接指出错误、表达困惑、抱怨、批评,或明确表示问题未解决。 + - 满意:反馈显示正面情绪,如表达感谢或给予赞扬。 + - 无关:反馈内容与评估助手回答无关。 + +3. *摘要信息生成*(corrected_info字段): + - 从用户反馈中总结核心信息,生成简洁的事实陈述列表。 + - 当反馈提供纠正时,仅关注纠正后的信息。 + - 当反馈提供补充时,整合所有有效信息(包括旧信息和新信息)。 + - 非常重要:保留相关时间信息,并以具体、明确的日期或时间段表达(例如:"2023年3月"、"2024年7月"或"2022年5月至6月")。 + - 对于"满意"态度,此列表可能包含确认性陈述,如果没有提供新事实则为空。 + - 专注于客观事实陈述。例如:"用户于2023年3月与同事完成了珠峰环线徒步。" 输出格式: [ - { - "validity": <字符串,"true" 或 "false">, - "user_attitude": <字符串,"dissatisfied" 或 "satisfied" 或 "irrelevant">, - "error_type": <字符串,"wrong" 或 "missing" 或 "irrelevant">, - "corrected_info": <字符串,中文书写正确的信息记录>, - "key": <字符串,唯一且简洁的记忆标题>, - "tags": <相关主题关键词列表(例如,["截止日期", "团队", "计划"])> - }, - ... + {{ + "validity": "<字符串,'true' 或 'false'>", + "user_attitude": "<字符串,'dissatisfied' 或 'satisfied' 或 'irrelevant'>", + "corrected_info": "<字符串,用中文书写的事实信息记录>", + "key": "<字符串,简洁的中文记忆标题,用于快速识别该条目的核心内容(2-5个汉字)>", + "tags": "<列表,中文关键词列表(每个标签1-3个汉字),用于分类和检索>" + }} ] -示例: - -用户和助理的对话历史: -user: 这两天我吃不了辣椒,给我推荐一些适合的餐厅吧。 -assistant: 好的,推荐您附近的新荣记餐厅,黄鱼年糕以及各类清蒸海鲜是这件餐厅的招牌菜。 +示例1: +对话历史: +用户:这些天我不能吃辣。能给我推荐一些合适的餐厅吗? +助手:好的,我推荐您附近的鱼类餐厅。他们的招牌菜包括各种蒸海鲜和海鱼生鱼片。 反馈时间:2023-1-18T14:25:00.856481 用户反馈: -你忘记我海鲜过敏这件事了吗?而且我不喜欢年糕的口感。 +哦,不!我对海鲜过敏!而且我不喜欢吃生鱼。 + +输出: +[ + {{ + "validity": "true", + "user_attitude": "dissatisfied", + "corrected_info": "用户对海鲜过敏且不喜欢吃生鱼", + "key": "饮食限制", + "tags": ["过敏", "海鲜", "生鱼", "饮食偏好"] + }} +] + +示例2: +对话历史: +用户:我2025年11月25日买了什么? +助手:一件红色外套 +反馈时间:2025-11-28T20:45:00.875249 + +用户反馈: +不对,我还买了一件蓝色衬衫。 输出: [ - { - "validity": "true", - "user_attitude": "dissatisfied", - "error_type": "wrong", - "corrected_info": "用户对海鲜过敏。", - "key": "海鲜过敏", - "tags": ["海鲜", "过敏"] - }, - { - "validity": "true", - "user_attitude": "dissatisfied", - "error_type": "wrong", - "corrected_info": "用户不喜欢年糕的口感。", - "key": "不喜欢年糕", - "tags": ["不喜欢年糕", "年糕", "口感"] - } + {{ + "validity": "true", + "user_attitude": "dissatisfied", + "corrected_info": "用户于2025年11月25日购买了一件红色外套和一件蓝色衬衫", + "key": "购物记录", + "tags": ["红色外套", "蓝色衬衫", "服装购物"] + }} ] +示例3: +对话历史: +用户:我最喜欢的食物是什么? +助手:披萨和寿司 +反馈时间:2024-07-15T10:30:00.000000 -用户和助理的对话历史: +用户反馈: +错了!我讨厌寿司。我喜欢汉堡。 + +输出: +[ + {{ + "validity": "true", + "user_attitude": "dissatisfied", + "corrected_info": "用户喜欢披萨和汉堡,但讨厌寿司", + "key": "食物偏好", + "tags": ["偏好", "披萨和汉堡"] + }} +] + +对话历史: {chat_history} + 反馈时间:{feedback_time} 用户反馈: {user_feedback} -输出: -""" +输出:""" UPDATE_FORMER_MEMORIES = """Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add, update, or keep unchanged, and provide final operation recommendations. You must strictly return the response in the following JSON format: -{ +{{ "operation": [ - { + {{ "id": "", "text": "", "event": "", "old_memory": "" - }, + }}, ... ] -} +}} *Requirements*: 1. If the new fact does not provide additional information to the existing memory item, the existing memory can override the new fact, and the operation is set to "NONE." @@ -211,113 +240,126 @@ Example1: Current Memories: -{ +{{ "memory": [ - { + {{ "id": "0911", "text": "The user is a senior full-stack developer working at Company B" - }, - { + }}, + {{ "id": "123", "text": "The user works as a software engineer at Company A, primarily responsible for front-end development" - }, - { + }}, + {{ "id": "648", "text": "The user is responsible for front-end development of software at Company A" - }, - { + }}, + {{ "id": "7210", "text": "The user is responsible for front-end development of software at Company A" - }, - { + }}, + {{ "id": "908", "text": "The user enjoys fishing with friends on weekends" - } + }} ] -} +}} + +The background of the new fact being put forward: +user: Do you remember where I work? +assistant: Company A. +user feedback: I work at Company B, and I am a senior full-stack developer. Newly facts: "The user works as a senior full-stack developer at Company B" Operation recommendations: -{ +{{ "operation": [ - { + {{ "id": "0911", "text": "The user is a senior full-stack developer working at Company B", "event": "NONE" - }, - { + }}, + {{ "id": "123", "text": "The user works as a senior full-stack developer at Company B", "event": "UPDATE", "old_memory": "The user works as a software engineer at Company A, primarily responsible for front-end development" - }, - { + }}, + {{ "id": "648", "text": "The user works as a senior full-stack developer at Company B", "event": "UPDATE", "old_memory": "The user is responsible for front-end development of software at Company A" - }, - { + }}, + {{ "id": "7210", "text": "The user works as a senior full-stack developer at Company B", "event": "UPDATE", "old_memory": "The user is responsible for front-end development of software at Company A" - }, - { + }}, + {{ "id": "908", "text": "The user enjoys fishing with friends on weekends", "event": "NONE" - } + }} ] -} +}} Example2: Current Memories: -{ +{{ "memory": [ - { + {{ "id": "123", "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" - }, - { + }}, + {{ "id": "908", "text": "The user likes to go fishing with friends on weekends" - } + }} ] -} +}} + +The background of the new fact being put forward: +user: Guess where I live? +assistant: Hehuan Community. +user feedback: Wrong, update my address: Mingyue Community, Chaoyang District, Beijing Newly facts: "The user's residential address is Mingyue Community, Chaoyang District, Beijing" Operation recommendations: -{ +{{ "operation": [ - { + {{ "id": "123", "text": "The user works as a software engineer at Company A, primarily responsible for front-end development", "event": "NONE" - }, - { + }}, + {{ "id": "908", "text": "The user enjoys fishing with friends on weekends", "event": "NONE" - }, - { + }}, + {{ "id": "4567", "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", "event": "ADD" - } + }} ] -} +}} -Current Memories +**Current Memories** {current_memories} -Newly facts: +**The background of the new fact being put forward** +{chat_history} + +**Newly facts** {new_facts} Operation recommendations: @@ -328,18 +370,18 @@ 你必须严格按照以下JSON格式返回响应: -{ +{{ "operation": [ - { + {{ "id": "<记忆ID>", "text": "<记忆内容>", "event": "<操作类型,必须是 "ADD", "UPDATE", "NONE" 之一>", "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" - }, + }}, ... ] -} +}} 要求: 1. 如果新事实对现有记忆item没有额外补充,现有记忆的信息可以覆盖新事实,设置操作为"NONE" @@ -360,113 +402,127 @@ 示例1: 现有记忆记录: -{ +{{ "memory": [ - { + {{ "id": "0911", "text": "用户是高级全栈开发工程师,在B公司工作" - }, - { + }}, + {{ "id": "123", "text": "用户在公司A担任软件工程师,主要负责前端开发" - }, - { + }}, + {{ "id": "648", "text": "用户在公司A负责软件的前端开发工作" - }, - { + }}, + {{ "id": "7210", "text": "用户在公司A负责软件的前端开发工作" - }, - { + }}, + {{ "id": "908", "text": "用户周末喜欢和朋友一起钓鱼" - } + }} ] -} +}} + +提出新事实的背景: +user: 你还记得我现在在哪里工作吗? +assistant: A公司 +user feedback: 实际上,我在公司B工作,是一名高级全栈开发人员。 + 新获取的事实: "用户现在在公司B担任高级全栈开发工程师" 操作建议: -{ +{{ "operation": [ - { + {{ "id": "0911", "text": "用户是高级全栈开发工程师,在B公司工作", "event": "NONE" - }, - { + }}, + {{ "id": "123", "text": "用户现在在公司B担任高级全栈开发工程师", "event": "UPDATE", "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" - }, - { + }}, + {{ "id": "648", "text": "用户现在在公司B担任高级全栈开发工程师", "event": "UPDATE", "old_memory": "用户在公司A负责软件的前端开发工作" - }, - { + }}, + {{ "id": "7210", "text": "用户现在在公司B担任高级全栈开发工程师", "event": "UPDATE", "old_memory": "用户在公司A负责软件的前端开发工作" - }, - { + }}, + {{ "id": "908", "text": "用户周末喜欢和朋友一起钓鱼", "event": "NONE" - } + }} ] -} +}} 示例2: 现有记忆记录: -{ +{{ "memory": [ - { + {{ "id": "123", "text": "用户在公司A担任软件工程师,主要负责前端开发" - }, - { + }}, + {{ "id": "908", "text": "用户周末喜欢和朋友一起钓鱼" - } + }} ] -} +}} + +提出新事实的背景: +user: 猜猜我住在哪里? +assistant: 合欢社区 +user feedback: 错了,请更新我的地址:北京市朝阳区明月社区 新获取的事实: "用户的居住地址是北京市朝阳区明月小区" 操作建议: -{ +{{ "operation": [ - { + {{ "id": "123", "text": "用户在公司A担任软件工程师,主要负责前端开发", "event": "NONE" - }, - { + }}, + {{ "id": "908", "text": "用户周末喜欢和朋友一起钓鱼", "event": "NONE" - }, - { + }}, + {{ "id": "4567", "text": "用户的居住地址是北京市朝阳区明月小区", "event": "ADD" - } + }} ] -} +}} -现有记忆记录: +**现有记忆记录:** {current_memories} -新获取的事实: +**提出新事实的背景:** +{chat_history} + +**新获取的事实:** {new_facts} 操作建议: @@ -477,17 +533,17 @@ You must strictly return the response in the following JSON format: -{ +{{ "operation": [ - { + {{ "id": "", "text": "", "event": "", "old_memory": "" - }, + }}, ... ] -} +}} *Requirements*: 1. If the new fact provides no additional supplement to existing memory, set operation to "NONE" @@ -507,43 +563,43 @@ Example: Current Memories: -{ +{{ "memory": [ - { + {{ "id": "123", "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" - }, - { + }}, + {{ "id": "908", "text": "The user likes to go fishing with friends on weekends" - } + }} ] -} +}} Newly facts: ["The user is currently working as a senior full-stack development engineer at Company B", "The user's residential address is Mingyue Community, Chaoyang District, Beijing", "The user goes fishing on weekends"] Operation recommendations: -{ +{{ "operation": [ - { + {{ "id": "123", "text": "The user is currently working as a senior full-stack development engineer at Company B", "event": "UPDATE", "old_memory": "The user works as a software engineer in Company A, mainly responsible for front-end development" - }, - { + }}, + {{ "id": "4567", "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", "event": "ADD" - }, - { + }}, + {{ "id": "908", "text": "The user likes to go fishing with friends on weekends", "event": "NONE" - } + }} ] -} +}} Current Memories {current_memories} @@ -559,17 +615,17 @@ 你必须严格按照以下JSON格式返回响应: -{ +{{ "operation": [ - { + {{ "id": "<记忆ID>", "text": "<记忆内容>", "event": "<操作类型,必须是 "ADD", "UPDATE", "NONE" 之一>", "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" - }, + }}, ... ] -} +}} 要求: 1. 如果新事实对现有记忆没有额外补充,设置操作为"NONE" @@ -589,43 +645,43 @@ 示例: 现有记忆记录: -{ +{{ "memory": [ - { + {{ "id": "123", "text": "用户在公司A担任软件工程师,主要负责前端开发" - }, - { + }}, + {{ "id": "908", "text": "用户周末喜欢和朋友一起钓鱼" - } + }} ] -} +}} 新获取的事实: ["用户现在在公司B担任高级全栈开发工程师", "用户的居住地址是北京市朝阳区明月小区", "用户在周末会去钓鱼"] 操作建议: -{ +{{ "operation": [ - { + {{ "id": "123", "text": "用户在公司B担任高级全栈开发工程师", "event": "UPDATE", "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" - }, - { + }}, + {{ "id": "4567", "text": "用户的居住地址是北京市朝阳区明月小区", "event": "ADD" - }, - { + }}, + {{ "id": "908", "text": "用户周末喜欢和朋友一起钓鱼", "event": "NONE" - } + }} ] -} +}} 现有记忆记录: {current_memories} From 0fa9be7e66fb7e3cd3d8abf0425393dceeacffc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 18:17:06 +0800 Subject: [PATCH 23/40] add threshold --- src/memos/mem_feedback/feedback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index 6b8520c50..54d29ac88 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -263,7 +263,7 @@ def _add_or_update( def _vec_query(self, new_memories_embedding: list[float], user_name=None): retrieved_ids = self.graph_store.search_by_embedding( - new_memories_embedding, user_name=user_name, top_k=5 + new_memories_embedding, user_name=user_name, top_k=10, threshold=0.75 ) current_memories = [self.graph_store.get_node(item["id"]) for item in retrieved_ids] if not retrieved_ids: From facb7b31d17fccaae3f8e5572fa00ebc07976a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 19:03:34 +0800 Subject: [PATCH 24/40] update prompt --- src/memos/mem_feedback/feedback.py | 4 ++-- src/memos/templates/mem_feedback_prompts.py | 25 ++++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index 54d29ac88..dd5dbde4c 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -194,7 +194,7 @@ def _add_or_update( operations = self._id_dehallucination(operations, current_memories) else: operations = [{"event": "ADD"}] - + # TODO based on the operation, change memory_item memory info logger.info(f"[Feedback memory operations]: {operations!s}") if not operations: @@ -263,7 +263,7 @@ def _add_or_update( def _vec_query(self, new_memories_embedding: list[float], user_name=None): retrieved_ids = self.graph_store.search_by_embedding( - new_memories_embedding, user_name=user_name, top_k=10, threshold=0.75 + new_memories_embedding, user_name=user_name, top_k=10, threshold=0.7 ) current_memories = [self.graph_store.get_node(item["id"]) for item in retrieved_ids] if not retrieved_ids: diff --git a/src/memos/templates/mem_feedback_prompts.py b/src/memos/templates/mem_feedback_prompts.py index 11e17e399..85fbc6490 100644 --- a/src/memos/templates/mem_feedback_prompts.py +++ b/src/memos/templates/mem_feedback_prompts.py @@ -230,13 +230,14 @@ - Add operation: Generate a new unique ID in the format of a 4-digit string (e.g., "0001", "0002", etc.) *Important Requirements*: -- For update operations, you must provide the old_memory field to show the original content -- Compare the existing memories one by one and do not miss any content that needs to be updated. When multiple existing memories need to be updated, include all relevant entries in the operation list +1. For "UPDATE" operations, you must provide the old_memory field to display the original content +2. Compare existing memories one by one and do not omit any content requiring updates. When multiple existing memories need updating, include all relevant entries in the operation list +3. "text" field requirements: + - Use concise, complete declarative sentences, avoiding redundant information + - "text" should record the final adopted memory: if judged as "ADD", output text as "new fact"; if judged as "UPDATE", output text as "adjusted new fact"; if judged as "NONE", output text as "existing memory" +4. Both text and old_memory content should be in English +5. Return only the JSON format response, without any other content -If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update ALL affected original memories based on the new fact and set operation to "UPDATE" for each one. Multiple memories covering the same outdated information should all be updated. -- Return only the JSON format response, without any other content -- text field requirements: Use concise, complete declarative sentences that are consistent with the newly acquired factual information, avoiding redundant information -- text and old_memory content should be in English Example1: Current Memories: @@ -394,11 +395,13 @@ - 新增操作:生成新的唯一ID,格式为4位数字字符串(如:"0001", "0002"等) 重要要求: -- 对于更新操作,必须提供old_memory字段显示原内容 -- 对现有记忆逐一比对,不可漏掉需要更新的内容。当多个现有记忆需要更新时,将所有的相关条目都包含在操作列表中 -- 只返回JSON格式的响应,不要包含其他任何内容 -- text字段要求:使用简洁、完整的陈述句,和新获取的事实信息一致,避免冗余信息 -- text和old_memory内容使用中文 +1. 对于"UPDATE"更新操作,必须提供old_memory字段显示原内容 +2. 对现有记忆逐一比对,不可漏掉需要更新的内容。当多个现有记忆需要更新时,将所有的相关条目都包含在操作列表中 +3. text字段要求: + - 使用简洁、完整的陈述句,避免冗余信息 + - text要记录最终采用的记忆,如果判为"ADD",则text输出为"新事实";如果判为"UPDATE",则text输出为"调整后的新事实";如果判为"NONE",则text输出为"现有记忆" +4. text和old_memory内容使用中文 +5. 只返回JSON格式的响应,不要包含其他任何内容 示例1: 现有记忆记录: From eab5fe6d4c62c1f6b37ca100f4993ce551c6c605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 19:05:30 +0800 Subject: [PATCH 25/40] update prompt --- src/memos/reranker/factory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/memos/reranker/factory.py b/src/memos/reranker/factory.py index d2c50ba5e..1440704a6 100644 --- a/src/memos/reranker/factory.py +++ b/src/memos/reranker/factory.py @@ -2,6 +2,7 @@ from __future__ import annotations import json + from typing import TYPE_CHECKING, Any # Import singleton decorator From 7577aac491385e1063c53dca461856a5bdcfea53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 19:16:33 +0800 Subject: [PATCH 26/40] fix handler --- src/memos/api/handlers/feedback_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/feedback_handler.py b/src/memos/api/handlers/feedback_handler.py index 2e8a21cb1..8bf4db906 100644 --- a/src/memos/api/handlers/feedback_handler.py +++ b/src/memos/api/handlers/feedback_handler.py @@ -25,7 +25,7 @@ def __init__(self, dependencies: HandlerDependencies): dependencies: HandlerDependencies instance """ super().__init__(dependencies) - self._validate_dependencies("feedback_server", "mem_reader") + self._validate_dependencies("mem_reader") def handle_feedback_memories(self, feedback_req: APIFeedbackRequest) -> MemoryResponse: """ From cc4069da44a0321c2e0d9d197cbda9adc4c7e6be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Sat, 29 Nov 2025 18:32:50 +0800 Subject: [PATCH 27/40] add feedback scheduler --- src/memos/api/handlers/component_init.py | 22 +- src/memos/api/handlers/feedback_handler.py | 65 ++- src/memos/api/product_models.py | 23 +- src/memos/llms/openai.py | 1 + src/memos/mem_feedback/feedback.py | 123 ++--- src/memos/mem_scheduler/base_scheduler.py | 2 + src/memos/mem_scheduler/general_scheduler.py | 59 +++ .../mem_scheduler/schemas/general_schemas.py | 1 + src/memos/multi_mem_cube/composite_cube.py | 12 +- src/memos/multi_mem_cube/single_cube.py | 44 +- src/memos/templates/mem_feedback_prompts.py | 425 +----------------- 11 files changed, 254 insertions(+), 523 deletions(-) diff --git a/src/memos/api/handlers/component_init.py b/src/memos/api/handlers/component_init.py index 9413ec2c3..982487b18 100644 --- a/src/memos/api/handlers/component_init.py +++ b/src/memos/api/handlers/component_init.py @@ -293,6 +293,15 @@ def init_server() -> dict[str, Any]: ) logger.debug("Searcher created") + # Initialize feedback server + feedback_server = SimpleMemFeedback( + llm=llm, + embedder=embedder, + graph_store=graph_db, + memory_manager=memory_manager, + mem_reader=mem_reader, + ) + # Initialize Scheduler scheduler_config_dict = APIConfig.get_scheduler_config() scheduler_config = SchedulerConfigFactory( @@ -306,7 +315,9 @@ def init_server() -> dict[str, Any]: mem_reader=mem_reader, redis_client=redis_client, ) - mem_scheduler.init_mem_cube(mem_cube=naive_mem_cube, searcher=searcher) + mem_scheduler.init_mem_cube( + mem_cube=naive_mem_cube, searcher=searcher, feedback_server=feedback_server + ) logger.debug("Scheduler initialized") # Initialize SchedulerAPIModule @@ -327,15 +338,6 @@ def init_server() -> dict[str, Any]: online_bot = get_online_bot_function() if dingding_enabled else None logger.info("DingDing bot is enabled") - # Initialize feedback server - feedback_server = SimpleMemFeedback( - llm=llm, - embedder=embedder, - graph_store=graph_db, - memory_manager=memory_manager, - mem_reader=mem_reader, - ) - deepsearch_agent = DeepSearchMemAgent( llm=llm, memory_retriever=tree_mem, diff --git a/src/memos/api/handlers/feedback_handler.py b/src/memos/api/handlers/feedback_handler.py index 8bf4db906..2d3dc4ee2 100644 --- a/src/memos/api/handlers/feedback_handler.py +++ b/src/memos/api/handlers/feedback_handler.py @@ -5,6 +5,9 @@ from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies from memos.api.product_models import APIFeedbackRequest, MemoryResponse from memos.log import get_logger +from memos.multi_mem_cube.composite_cube import CompositeCubeView +from memos.multi_mem_cube.single_cube import SingleCubeView +from memos.multi_mem_cube.views import MemCubeView logger = get_logger(__name__) @@ -25,7 +28,7 @@ def __init__(self, dependencies: HandlerDependencies): dependencies: HandlerDependencies instance """ super().__init__(dependencies) - self._validate_dependencies("mem_reader") + self._validate_dependencies("mem_reader", "mem_scheduler") def handle_feedback_memories(self, feedback_req: APIFeedbackRequest) -> MemoryResponse: """ @@ -37,16 +40,54 @@ def handle_feedback_memories(self, feedback_req: APIFeedbackRequest) -> MemoryRe Returns: MemoryResponse with formatted results """ - process_record = self.feedback_server.process_feedback( - user_name=feedback_req.mem_cube_id, - session_id=feedback_req.session_id, - chat_history=feedback_req.history, - retrieved_memory_ids=feedback_req.retrieved_memory_ids, - feedback_content=feedback_req.feedback_content, - feedback_time=feedback_req.feedback_time, - allow_knowledgebase_write=feedback_req.allow_knowledgebase_write, - sync_mode=feedback_req.sync_mode, - corrected_answer=feedback_req.corrected_answer, + cube_view = self._build_cube_view(feedback_req) + + process_record = cube_view.feedback_memories(feedback_req) + + self.logger.info(f"[FeedbackHandler] Final feedback results count={len(process_record)}") + + return MemoryResponse( + message="Memory feedback successfully", + data=[process_record], ) - return MemoryResponse(message="Feedback process successfully", data=[process_record]) + def _resolve_cube_ids(self, feedback_req: APIFeedbackRequest) -> list[str]: + """ + Normalize target cube ids from feedback_req. + """ + if feedback_req.writable_cube_ids: + return list(dict.fromkeys(feedback_req.writable_cube_ids)) + + return [feedback_req.user_id] + + def _build_cube_view(self, feedback_req: APIFeedbackRequest) -> MemCubeView: + cube_ids = self._resolve_cube_ids(feedback_req) + + if len(cube_ids) == 1: + cube_id = cube_ids[0] + return SingleCubeView( + cube_id=cube_id, + naive_mem_cube=None, + mem_reader=None, + mem_scheduler=self.mem_scheduler, + logger=self.logger, + searcher=None, + feedback_server=self.feedback_server, + ) + else: + single_views = [ + SingleCubeView( + cube_id=cube_id, + naive_mem_cube=None, + mem_reader=None, + mem_scheduler=self.mem_scheduler, + logger=self.logger, + searcher=None, + feedback_server=self.feedback_server, + ) + for cube_id in cube_ids + ] + return CompositeCubeView( + cube_views=single_views, + logger=self.logger, + ) diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index 8035ff3f8..52d5fe6f6 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -625,29 +625,30 @@ class APIFeedbackRequest(BaseRequest): """Request model for processing feedback info.""" user_id: str = Field(..., description="User ID") - mem_cube_id: str | None = Field(..., description="Cube ID to use for chat") session_id: str | None = Field( "default_session", description="Session ID for soft-filtering memories" ) - agent_id: str = Field(None, description="Agent ID") - app_id: str = Field(None, description="App ID") + task_id: str | None = Field(None, description="Task ID for monitering async tasks") history: list[MessageDict] | None = Field(..., description="Chat history") retrieved_memory_ids: list[str] | None = Field( None, description="Retrieved memory ids at last turn" ) feedback_content: str | None = Field(..., description="Feedback content to process") feedback_time: str | None = Field(None, description="Feedback time") - allow_public: bool = Field( - False, description="Whether to allow writing to the public memory repository" - ) - allow_knowledgebase_write: bool = Field( - False, description="Whether to allow writing into the user memory repository" - ) - allow_knowledgebase_ids: bool = Field( - False, description="Write to the specified memory repository ID" + # ==== Multi-cube writing ==== + writable_cube_ids: list[str] | None = Field( + None, description="List of cube IDs user can write for multi-cube add" ) sync_mode: Literal["sync", "async"] = Field("async", description="feedback mode: sync or async") corrected_answer: bool = Field(False, description="Whether need return corrected answer") + # ==== Backward compatibility ==== + mem_cube_id: str | None = Field( + None, + description=( + "(Deprecated) Single cube ID to search in. " + "Prefer `readable_cube_ids` for multi-cube search." + ), + ) class APIChatCompleteRequest(BaseRequest): diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py index 9b348adcf..19d7a60fe 100644 --- a/src/memos/llms/openai.py +++ b/src/memos/llms/openai.py @@ -39,6 +39,7 @@ def generate(self, messages: MessageList, **kwargs) -> str: top_p=kwargs.get("top_p", self.config.top_p), extra_body=kwargs.get("extra_body", self.config.extra_body), tools=kwargs.get("tools", NOT_GIVEN), + timeout=kwargs.get("timeout", 30), ) logger.info(f"Response from OpenAI: {response.model_dump_json()}") tool_calls = getattr(response.choices[0].message, "tool_calls", None) diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index dd5dbde4c..bf668852f 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -114,7 +114,7 @@ def _feedback_judgement( return [] def _feedback_memory( - self, user_name: str, feedback_memories: list[TextualMemoryItem], **kwargs + self, user_id: str, user_name: str, feedback_memories: list[TextualMemoryItem], **kwargs ) -> dict: sync_mode = kwargs.get("sync_mode") retrieved_memory_ids = kwargs.get("retrieved_memory_ids") or [] @@ -190,10 +190,11 @@ def _add_or_update( chat_history=fact_history, ) - operations = self._get_llm_response(prompt).get("operation", []) + operations = self._get_llm_response(prompt).get("operations", []) operations = self._id_dehallucination(operations, current_memories) else: - operations = [{"event": "ADD"}] + operations = [{"operation": "ADD"}] + # TODO based on the operation, change memory_item memory info logger.info(f"[Feedback memory operations]: {operations!s}") @@ -206,7 +207,7 @@ def _add_or_update( with ContextThreadPoolExecutor(max_workers=10) as executor: future_to_op = {} for op in operations: - event_type = op.get("event", "").lower() + event_type = op.get("operation", "").lower() if event_type == "add": future = executor.submit( @@ -290,7 +291,7 @@ def _vec_query(self, new_memories_embedding: list[float], user_name=None): def _get_llm_response(self, prompt: str, dsl: bool = True) -> dict: messages = [{"role": "user", "content": prompt}] try: - response_text = self.llm.generate(messages, temperature=0.3) + response_text = self.llm.generate(messages, temperature=0.3, timeout=60) if dsl: response_text = response_text.replace("```", "").replace("json", "") response_json = json.loads(response_text) @@ -306,7 +307,7 @@ def _id_dehallucination(self, operations, current_memories): right_lower_map = {x.lower(): x for x in right_ids} def correct_item(data): - if data.get("event", "").lower() != "update": + if data.get("operation", "").lower() != "update": return data original_id = data["id"] @@ -348,6 +349,7 @@ def _generate_answer( def process_feedback_core( self, + user_id: str, user_name: str, chat_history: list[MessageDict], feedback_content: str, @@ -370,11 +372,10 @@ def check_validity(item): try: feedback_time = kwargs.get("feedback_time") or datetime.now().isoformat() session_id = kwargs.get("session_id") - allow_knowledgebase_write = bool(kwargs.get("allow_knowledgebase_write")) - if feedback_content.strip() == "" or not allow_knowledgebase_write: + if feedback_content.strip() == "": return {"record": {"add": [], "update": []}} - info = {"user_id": user_name, "session_id": session_id} + info = {"user_id": user_id, "user_name": user_name, "session_id": session_id} logger.info( f"[Feedback Core: process_feedback_core] Starting memory feedback process for user {user_name}" ) @@ -435,6 +436,7 @@ def check_validity(item): embedding=embedding, usage=[], sources=[{"type": "chat"}], + user_name=user_name, background="", confidence=0.99, type="fine", @@ -443,6 +445,7 @@ def check_validity(item): ) mem_record = self._feedback_memory( + user_id, user_name, feedback_memories, chat_history=chat_history, @@ -460,6 +463,7 @@ def check_validity(item): def process_feedback( self, + user_id: str, user_name: str, chat_history: list[MessageDict], feedback_content: str, @@ -469,7 +473,7 @@ def process_feedback( Process feedback with different modes. Args: - user_name: User identifier + user_name: cube_ids chat_history: List of chat messages feedback_content: Feedback content from user **kwargs: Additional arguments including sync_mode @@ -477,82 +481,45 @@ def process_feedback( Returns: Dict with answer and/or memory operation records """ - sync_mode = kwargs.get("sync_mode") - corrected_answer = kwargs.get("corrected_answer") - - if sync_mode == "sync": - with ContextThreadPoolExecutor(max_workers=2) as ex: - answer_future = ex.submit( - self._generate_answer, - chat_history, - feedback_content, - corrected_answer=corrected_answer, - ) - core_future = ex.submit( - self.process_feedback_core, - user_name, - chat_history, - feedback_content, - **kwargs, - ) - done, pending = concurrent.futures.wait([answer_future, core_future], timeout=30) - for fut in pending: - fut.cancel() - try: - answer = answer_future.result() - record = core_future.result() - logger.info( - f"[MemFeedback sync] Completed concurrently for user {user_name} with full results." - ) - return {"answer": answer, "record": record["record"]} - except concurrent.futures.TimeoutError: - logger.error( - f"[MemFeedback sync] Timeout in sync mode for {user_name}", exc_info=True - ) - return {"answer": "", "record": {"add": [], "update": []}} - except Exception as e: - logger.error( - f"[MemFeedback sync] Error in concurrent tasks for {user_name}: {e}", - exc_info=True, - ) - return {"answer": "", "record": {"add": [], "update": []}} - else: - answer = self._generate_answer( - chat_history, feedback_content, corrected_answer=corrected_answer - ) + corrected_answer = kwargs.get("corrected_answer", False) - ex = ContextThreadPoolExecutor(max_workers=1) - future = ex.submit( + with ContextThreadPoolExecutor(max_workers=2) as ex: + answer_future = ex.submit( + self._generate_answer, + chat_history, + feedback_content, + corrected_answer=corrected_answer, + ) + core_future = ex.submit( self.process_feedback_core, + user_id, user_name, chat_history, feedback_content, **kwargs, ) - ex.shutdown(wait=False) - - def log_completion(f): - try: - result = f.result(timeout=600) - logger.info(f"[MemFeedback async] Completed for {user_name}: {result}") - except concurrent.futures.TimeoutError: - logger.error( - f"[MemFeedback async] Background task timeout for {user_name}", - exc_info=True, - ) - f.cancel() - except Exception as e: - logger.error( - f"[MemFeedback async] Background Feedback Error for {user_name}: {e}", - exc_info=True, - ) - - future.add_done_callback(log_completion) + done, pending = concurrent.futures.wait([answer_future, core_future], timeout=30) + for fut in pending: + fut.cancel() + try: + answer = answer_future.result() + record = core_future.result() + logger.info( + f"[MemFeedback process] Completed concurrently for user {user_name} with full results." + ) - logger.info( - f"[MemFeedback async] Returned answer, background task started for {user_name}." - ) - return {"answer": answer, "record": {"add": [], "update": []}} + return {"answer": answer, "record": record["record"]} + except concurrent.futures.TimeoutError: + logger.error( + f"[MemFeedback process] Timeout in sync mode for {user_name}", exc_info=True + ) + return {"answer": "", "record": {"add": [], "update": []}} + except Exception as e: + logger.error( + f"[MemFeedback process] Error in concurrent tasks for {user_name}: {e}", + exc_info=True, + ) + return {"answer": "", "record": {"add": [], "update": []}} # Helper for DB operations with retry @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) diff --git a/src/memos/mem_scheduler/base_scheduler.py b/src/memos/mem_scheduler/base_scheduler.py index f641fc442..731655753 100644 --- a/src/memos/mem_scheduler/base_scheduler.py +++ b/src/memos/mem_scheduler/base_scheduler.py @@ -158,6 +158,7 @@ def init_mem_cube( self, mem_cube: BaseMemCube, searcher: Searcher | None = None, + feedback_server: Searcher | None = None, ): self.mem_cube = mem_cube self.text_mem: TreeTextMemory = self.mem_cube.text_mem @@ -170,6 +171,7 @@ def init_mem_cube( ) else: self.searcher = searcher + self.feedback_server = feedback_server def initialize_modules( self, diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index ac2ea2bfa..4190b8e5c 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -13,6 +13,7 @@ ANSWER_LABEL, DEFAULT_MAX_QUERY_KEY_WORDS, LONG_TERM_MEMORY_TYPE, + MEM_FEEDBACK_LABEL, MEM_ORGANIZE_LABEL, MEM_READ_LABEL, NOT_APPLICABLE_TYPE, @@ -56,6 +57,7 @@ def __init__(self, config: GeneralSchedulerConfig): MEM_READ_LABEL: self._mem_read_message_consumer, MEM_ORGANIZE_LABEL: self._mem_reorganize_message_consumer, PREF_ADD_LABEL: self._pref_add_message_consumer, + MEM_FEEDBACK_LABEL: self._mem_feedback_message_consumer, } self.dispatcher.register_handlers(handlers) @@ -359,6 +361,63 @@ def _add_message_consumer(self, messages: list[ScheduleMessageItem]) -> None: except Exception as e: logger.error(f"Error: {e}", exc_info=True) + def _mem_feedback_message_consumer(self, messages: list[ScheduleMessageItem]) -> None: + try: + message = messages[0] + mem_cube = self.current_mem_cube + + user_id = message.user_id + mem_cube_id = message.mem_cube_id + content = message.content + + feedback_data = json.loads(content) + + feedback_result = self.feedback_server.process_feedback( + user_id=user_id, + user_name=mem_cube_id, + session_id=feedback_data["session_id"], + chat_history=feedback_data["history"], + retrieved_memory_ids=feedback_data["retrieved_memory_ids"], + feedback_content=feedback_data["feedback_content"], + feedback_time=feedback_data["feedback_time"], + ) + + logger.info( + f"Successfully feedback memories for user_id={user_id}, mem_cube_id={mem_cube_id}" + ) + + should_send_log = ( + self.rabbitmq_config is not None + and hasattr(self.rabbitmq_config, "exchange_type") + and self.rabbitmq_config.exchange_type == "direct" + ) + if feedback_result and should_send_log: + feedback_content = [] + for _i, mem_item in enumerate(feedback_result): + feedback_content.append( + { + "content": mem_item.memory, + "id": mem_item["id"], + } + ) + event = self.create_event_log( + label="feedbackMemory", + from_memory_type=USER_INPUT_TYPE, + to_memory_type=LONG_TERM_MEMORY_TYPE, + user_id=user_id, + mem_cube_id=mem_cube_id, + mem_cube=mem_cube, + memcube_log_content=feedback_content, + metadata=[], + memory_len=len(feedback_content), + memcube_name=self._map_memcube_name(mem_cube_id), + ) + event.task_id = message.task_id + self._submit_web_logs([event]) + + except Exception as e: + logger.error(f"Error processing feedbackMemory message: {e}", exc_info=True) + def _mem_read_message_consumer(self, messages: list[ScheduleMessageItem]) -> None: logger.info(f"Messages {messages} assigned to {MEM_READ_LABEL} handler.") diff --git a/src/memos/mem_scheduler/schemas/general_schemas.py b/src/memos/mem_scheduler/schemas/general_schemas.py index 91d442720..e76728286 100644 --- a/src/memos/mem_scheduler/schemas/general_schemas.py +++ b/src/memos/mem_scheduler/schemas/general_schemas.py @@ -13,6 +13,7 @@ MEM_ARCHIVE_LABEL = "mem_archive" API_MIX_SEARCH_LABEL = "api_mix_search" PREF_ADD_LABEL = "pref_add" +MEM_FEEDBACK_LABEL = "mem_feedback" TreeTextMemory_SEARCH_METHOD = "tree_text_memory_search" TreeTextMemory_FINE_SEARCH_METHOD = "tree_text_memory_fine_search" diff --git a/src/memos/multi_mem_cube/composite_cube.py b/src/memos/multi_mem_cube/composite_cube.py index 8f892d60d..6db6ca3d7 100644 --- a/src/memos/multi_mem_cube/composite_cube.py +++ b/src/memos/multi_mem_cube/composite_cube.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: - from memos.api.product_models import APIADDRequest, APISearchRequest + from memos.api.product_models import APIADDRequest, APIFeedbackRequest, APISearchRequest from memos.multi_mem_cube.single_cube import SingleCubeView @@ -61,3 +61,13 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: merged_results["pref_note"] = note return merged_results + + def feedback_memories(self, feedback_req: APIFeedbackRequest) -> list[dict[str, Any]]: + all_results: list[dict[str, Any]] = [] + + for view in self.cube_views: + self.logger.info(f"[CompositeCubeView] fan-out add to cube={view.cube_id}") + results = view.feedback_memories(feedback_req) + all_results.extend(results) + + return all_results diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index 2b79a416c..21d4fd8d1 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -16,6 +16,7 @@ from memos.log import get_logger from memos.mem_scheduler.schemas.general_schemas import ( ADD_LABEL, + MEM_FEEDBACK_LABEL, MEM_READ_LABEL, PREF_ADD_LABEL, ) @@ -34,7 +35,7 @@ if TYPE_CHECKING: - from memos.api.product_models import APIADDRequest, APISearchRequest + from memos.api.product_models import APIADDRequest, APIFeedbackRequest, APISearchRequest @dataclass @@ -45,6 +46,7 @@ class SingleCubeView(MemCubeView): mem_scheduler: Any logger: Any searcher: Any + feedback_server: Any | None = None deepsearch_agent: Any | None = None def add_memories(self, add_req: APIADDRequest) -> list[dict[str, Any]]: @@ -131,6 +133,46 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: self.logger.info(f"Search memories result: {memories_result}") return memories_result + def feedback_memories(self, feedback_req: APIFeedbackRequest) -> dict[str, Any]: + target_session_id = feedback_req.session_id or "default_session" + if feedback_req.sync_mode == "async": + try: + feedback_req_str = json.dumps(feedback_req.model_dump()) + message_item_feedback = ScheduleMessageItem( + user_id=feedback_req.user_id, + task_id=feedback_req.task_id, + session_id=target_session_id, + mem_cube_id=self.cube_id, + mem_cube=self.naive_mem_cube, + label=MEM_FEEDBACK_LABEL, + content=feedback_req_str, + timestamp=datetime.utcnow(), + ) + self.mem_scheduler.memos_message_queue.submit_messages( + messages=[message_item_feedback] + ) + self.logger.info(f"[SingleCubeView] cube={self.cube_id} Submitted FEEDBACK async") + except Exception as e: + self.logger.error( + f"[SingleCubeView] cube={self.cube_id} Failed to submit FEEDBACK: {e}", + exc_info=True, + ) + return [] + else: + feedback_result = self.feedback_server.process_feedback( + user_id=feedback_req.user_id, + user_name=self.cube_id, + session_id=feedback_req.session_id, + chat_history=feedback_req.history, + retrieved_memory_ids=feedback_req.retrieved_memory_ids, + feedback_content=feedback_req.feedback_content, + feedback_time=feedback_req.feedback_time, + sync_mode=feedback_req.sync_mode, + corrected_answer=feedback_req.corrected_answer, + ) + self.logger.info(f"Feedback memories result: {feedback_result}") + return feedback_result + def _get_search_mode(self, mode: str) -> str: """ Get search mode with environment variable fallback. diff --git a/src/memos/templates/mem_feedback_prompts.py b/src/memos/templates/mem_feedback_prompts.py index 85fbc6490..2e9a0e9c9 100644 --- a/src/memos/templates/mem_feedback_prompts.py +++ b/src/memos/templates/mem_feedback_prompts.py @@ -200,18 +200,17 @@ 输出:""" - -UPDATE_FORMER_MEMORIES = """Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add, update, or keep unchanged, and provide final operation recommendations. - +UPDATE_FORMER_MEMORIES = """Operation recommendations: +Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add, update, or keep unchanged, and provide final operation recommendations. You must strictly return the response in the following JSON format: {{ - "operation": + "operations": [ {{ "id": "", "text": "", - "event": "", + "operation": "", "old_memory": "" }}, ... @@ -221,10 +220,9 @@ *Requirements*: 1. If the new fact does not provide additional information to the existing memory item, the existing memory can override the new fact, and the operation is set to "NONE." 2. If the new fact is similar to existing memory but the information is more accurate, complete, or requires correction, set operation to "UPDATE" -3. If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update the original memory based on the new fact and set operation to "UPDATE" +3. If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update the original memory based on the new fact and set operation to "UPDATE", only modifying the relevant error segments in the existing memory paragraphs while keeping other text completely unchanged. 4. If there is no existing memory that requires updating, the new fact is added as entirely new information, and the operation is set to "ADD." Therefore, in the same operation list, ADD and UPDATE will not coexist. - *ID Management Rules*: - Update operation: Keep the original ID unchanged - Add operation: Generate a new unique ID in the format of a 4-digit string (e.g., "0001", "0002", etc.) @@ -235,125 +233,11 @@ 3. "text" field requirements: - Use concise, complete declarative sentences, avoiding redundant information - "text" should record the final adopted memory: if judged as "ADD", output text as "new fact"; if judged as "UPDATE", output text as "adjusted new fact"; if judged as "NONE", output text as "existing memory" + - When updating, ensure that only the related error segments are modified, and other text remains completely unchanged. 4. Both text and old_memory content should be in English 5. Return only the JSON format response, without any other content -Example1: -Current Memories: -{{ - "memory": [ - {{ - "id": "0911", - "text": "The user is a senior full-stack developer working at Company B" - }}, - {{ - "id": "123", - "text": "The user works as a software engineer at Company A, primarily responsible for front-end development" - }}, - {{ - "id": "648", - "text": "The user is responsible for front-end development of software at Company A" - }}, - {{ - "id": "7210", - "text": "The user is responsible for front-end development of software at Company A" - }}, - {{ - "id": "908", - "text": "The user enjoys fishing with friends on weekends" - }} - ] -}} - -The background of the new fact being put forward: -user: Do you remember where I work? -assistant: Company A. -user feedback: I work at Company B, and I am a senior full-stack developer. - -Newly facts: -"The user works as a senior full-stack developer at Company B" - -Operation recommendations: -{{ - "operation": - [ - {{ - "id": "0911", - "text": "The user is a senior full-stack developer working at Company B", - "event": "NONE" - }}, - {{ - "id": "123", - "text": "The user works as a senior full-stack developer at Company B", - "event": "UPDATE", - "old_memory": "The user works as a software engineer at Company A, primarily responsible for front-end development" - }}, - {{ - "id": "648", - "text": "The user works as a senior full-stack developer at Company B", - "event": "UPDATE", - "old_memory": "The user is responsible for front-end development of software at Company A" - }}, - {{ - "id": "7210", - "text": "The user works as a senior full-stack developer at Company B", - "event": "UPDATE", - "old_memory": "The user is responsible for front-end development of software at Company A" - }}, - {{ - "id": "908", - "text": "The user enjoys fishing with friends on weekends", - "event": "NONE" - }} - ] -}} - -Example2: -Current Memories: -{{ - "memory": [ - {{ - "id": "123", - "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" - }}, - {{ - "id": "908", - "text": "The user likes to go fishing with friends on weekends" - }} - ] -}} - -The background of the new fact being put forward: -user: Guess where I live? -assistant: Hehuan Community. -user feedback: Wrong, update my address: Mingyue Community, Chaoyang District, Beijing - -Newly facts: -"The user's residential address is Mingyue Community, Chaoyang District, Beijing" - -Operation recommendations: -{{ - "operation": - [ - {{ - "id": "123", - "text": "The user works as a software engineer at Company A, primarily responsible for front-end development", - "event": "NONE" - }}, - {{ - "id": "908", - "text": "The user enjoys fishing with friends on weekends", - "event": "NONE" - }}, - {{ - "id": "4567", - "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", - "event": "ADD" - }} - ] -}} - **Current Memories** {current_memories} @@ -366,7 +250,6 @@ Operation recommendations: """ - UPDATE_FORMER_MEMORIES_ZH = """请分析新获取的事实信息,并决定这些信息应该如何更新到记忆库中:新增、更新、或保持不变,并给出最终的操作建议。 你必须严格按照以下JSON格式返回响应: @@ -377,7 +260,7 @@ {{ "id": "<记忆ID>", "text": "<记忆内容>", - "event": "<操作类型,必须是 "ADD", "UPDATE", "NONE" 之一>", + "operation": "<操作类型,必须是 "ADD", "UPDATE", "NONE" 之一>", "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" }}, ... @@ -385,10 +268,10 @@ }} 要求: -1. 如果新事实对现有记忆item没有额外补充,现有记忆的信息可以覆盖新事实,设置操作为"NONE" -2. 如果新事实与现有记忆item相似但信息更准确、完整或需要修正,设置操作为"UPDATE" -3. 如果新事实与现有记忆在关键信息上矛盾(如时间、地点、状态等),以新事实为准更新原有记忆,设置操作为"UPDATE" -4. 如果现有记忆中没有需要更新的,则新事实作为全新信息添加,设置操作为"ADD"。因此可知同一个 operation 列表中,ADD和UPDATE不会同时存在。 +1. 若新事实未对现有记忆条目提供额外信息,现有记忆可覆盖新事实,操作设为"NONE" +2. 若新事实与现有记忆相似但信息更准确、完整或需修正,操作设为"UPDATE" +3. 若新事实在关键信息(如时间、地点、状态等)上与现有记忆矛盾,则根据新事实更新原记忆,操作设为"UPDATE",仅修改现有记忆段落中的相关错误片段,其余文本完全保持不变 +4. 若无需要更新的现有记忆,则将新事实作为全新信息添加,操作设为"ADD"。因此在同一操作列表中,ADD与UPDATE不会同时存在 ID管理规则: - 更新操作:保持原有ID不变 @@ -400,296 +283,18 @@ 3. text字段要求: - 使用简洁、完整的陈述句,避免冗余信息 - text要记录最终采用的记忆,如果判为"ADD",则text输出为"新事实";如果判为"UPDATE",则text输出为"调整后的新事实";如果判为"NONE",则text输出为"现有记忆" + - 更新时确保仅修改相关错误片段,其余文本完全保持不变 4. text和old_memory内容使用中文 5. 只返回JSON格式的响应,不要包含其他任何内容 -示例1: -现有记忆记录: -{{ - "memory": [ - {{ - "id": "0911", - "text": "用户是高级全栈开发工程师,在B公司工作" - }}, - {{ - "id": "123", - "text": "用户在公司A担任软件工程师,主要负责前端开发" - }}, - {{ - "id": "648", - "text": "用户在公司A负责软件的前端开发工作" - }}, - {{ - "id": "7210", - "text": "用户在公司A负责软件的前端开发工作" - }}, - {{ - "id": "908", - "text": "用户周末喜欢和朋友一起钓鱼" - }} - ] -}} - -提出新事实的背景: -user: 你还记得我现在在哪里工作吗? -assistant: A公司 -user feedback: 实际上,我在公司B工作,是一名高级全栈开发人员。 - - -新获取的事实: -"用户现在在公司B担任高级全栈开发工程师" - -操作建议: -{{ - "operation": - [ - {{ - "id": "0911", - "text": "用户是高级全栈开发工程师,在B公司工作", - "event": "NONE" - }}, - {{ - "id": "123", - "text": "用户现在在公司B担任高级全栈开发工程师", - "event": "UPDATE", - "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" - }}, - {{ - "id": "648", - "text": "用户现在在公司B担任高级全栈开发工程师", - "event": "UPDATE", - "old_memory": "用户在公司A负责软件的前端开发工作" - }}, - {{ - "id": "7210", - "text": "用户现在在公司B担任高级全栈开发工程师", - "event": "UPDATE", - "old_memory": "用户在公司A负责软件的前端开发工作" - }}, - {{ - "id": "908", - "text": "用户周末喜欢和朋友一起钓鱼", - "event": "NONE" - }} - ] -}} - -示例2: -现有记忆记录: -{{ - "memory": [ - {{ - "id": "123", - "text": "用户在公司A担任软件工程师,主要负责前端开发" - }}, - {{ - "id": "908", - "text": "用户周末喜欢和朋友一起钓鱼" - }} - ] -}} - -提出新事实的背景: -user: 猜猜我住在哪里? -assistant: 合欢社区 -user feedback: 错了,请更新我的地址:北京市朝阳区明月社区 - -新获取的事实: -"用户的居住地址是北京市朝阳区明月小区" -操作建议: -{{ - "operation": - [ - {{ - "id": "123", - "text": "用户在公司A担任软件工程师,主要负责前端开发", - "event": "NONE" - }}, - {{ - "id": "908", - "text": "用户周末喜欢和朋友一起钓鱼", - "event": "NONE" - }}, - {{ - "id": "4567", - "text": "用户的居住地址是北京市朝阳区明月小区", - "event": "ADD" - }} - ] -}} - -**现有记忆记录:** +**当前记忆:** {current_memories} -**提出新事实的背景:** +**新事实提出的背景:** {chat_history} -**新获取的事实:** -{new_facts} - -操作建议: -""" - - -GROUP_UPDATE_FORMER_MEMORIES = """Please analyze the newly acquired factual information and determine how this information should be updated to the memory database: add, update, or keep unchanged, and provide final operation recommendations. - -You must strictly return the response in the following JSON format: - -{{ - "operation": [ - {{ - "id": "", - "text": "", - "event": "", - "old_memory": "" - }}, - ... - ] -}} - -*Requirements*: -1. If the new fact provides no additional supplement to existing memory, set operation to "NONE" -2. If the new fact is similar to existing memory but the information is more accurate, complete, or requires correction, set operation to "UPDATE" -3. If the new fact contradicts existing memory in key information (such as time, location, status, etc.), update the original memory based on the new fact and set operation to "UPDATE" -4. If there is completely new information to add, set operation to "ADD" - -*ID Management Rules*: -- Update operation: Keep the original ID unchanged -- Add operation: Generate a new unique ID in the format of a 4-digit string (e.g., "0001", "0002", etc.) - -*Important Requirements*: -- Return only the JSON format response, without any other content -- For update operations, you must provide the old_memory field to show the original content -- text field requirements: Use concise, complete declarative sentences that are consistent with the newly acquired factual information, avoiding redundant information -- text and old_memory content should be in English - -Example: -Current Memories: -{{ - "memory": [ - {{ - "id": "123", - "text": "The user works as a software engineer in Company A, mainly responsible for front-end development" - }}, - {{ - "id": "908", - "text": "The user likes to go fishing with friends on weekends" - }} - ] -}} - -Newly facts: -["The user is currently working as a senior full-stack development engineer at Company B", "The user's residential address is Mingyue Community, Chaoyang District, Beijing", "The user goes fishing on weekends"] - -Operation recommendations: -{{ - "operation": [ - {{ - "id": "123", - "text": "The user is currently working as a senior full-stack development engineer at Company B", - "event": "UPDATE", - "old_memory": "The user works as a software engineer in Company A, mainly responsible for front-end development" - }}, - {{ - "id": "4567", - "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", - "event": "ADD" - }}, - {{ - "id": "908", - "text": "The user likes to go fishing with friends on weekends", - "event": "NONE" - }} - ] -}} - -Current Memories -{current_memories} - -Newly facts: -{new_facts} - -Operation recommendations: -""" - - -GROUP_UPDATE_FORMER_MEMORIES_ZH = """请分析新获取的事实信息,并决定这些信息应该如何更新到记忆库中:新增、更新、或保持不变,并给出最终的操作建议。 - -你必须严格按照以下JSON格式返回响应: - -{{ - "operation": [ - {{ - "id": "<记忆ID>", - "text": "<记忆内容>", - "event": "<操作类型,必须是 "ADD", "UPDATE", "NONE" 之一>", - "old_memory": "<原记忆内容,仅当操作为"UPDATE"时需要提供>" - }}, - ... - ] -}} - -要求: -1. 如果新事实对现有记忆没有额外补充,设置操作为"NONE" -2. 如果新事实与现有记忆相似但信息更准确、完整或需要修正,设置操作为"UPDATE" -3. 如果新事实与现有记忆在关键信息上矛盾(如时间、地点、状态等),以新事实为准更新原有记忆,设置操作为"UPDATE" -4. 如果有全新信息添加,设置操作为"ADD" - -ID管理规则: -- 更新操作:保持原有ID不变 -- 新增操作:生成新的唯一ID,格式为4位数字字符串(如:"0001", "0002"等) - -重要要求: -- 只返回JSON格式的响应,不要包含其他任何内容 -- 对于更新操作,必须提供old_memory字段显示原内容 -- text字段要求:使用简洁、完整的陈述句,和新获取的事实信息一致,避免冗余信息 -- text和old_memory内容使用中文 - -示例: -现有记忆记录: -{{ - "memory": [ - {{ - "id": "123", - "text": "用户在公司A担任软件工程师,主要负责前端开发" - }}, - {{ - "id": "908", - "text": "用户周末喜欢和朋友一起钓鱼" - }} - ] -}} - -新获取的事实: -["用户现在在公司B担任高级全栈开发工程师", "用户的居住地址是北京市朝阳区明月小区", "用户在周末会去钓鱼"] - -操作建议: -{{ - "operation": [ - {{ - "id": "123", - "text": "用户在公司B担任高级全栈开发工程师", - "event": "UPDATE", - "old_memory": "用户在公司A担任软件工程师,主要负责前端开发" - }}, - {{ - "id": "4567", - "text": "用户的居住地址是北京市朝阳区明月小区", - "event": "ADD" - }}, - {{ - "id": "908", - "text": "用户周末喜欢和朋友一起钓鱼", - "event": "NONE" - }} - ] -}} - -现有记忆记录: -{current_memories} - -新获取的事实: +**新事实:** {new_facts} 操作建议: From 2529db24ac2a23889a2b795d6bef4c98a99eb8a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Mon, 1 Dec 2025 20:45:27 +0800 Subject: [PATCH 28/40] add handler change node update --- examples/api/product_api.py | 30 ++- src/memos/api/handlers/add_handler.py | 31 ++- src/memos/api/handlers/component_init.py | 1 + src/memos/api/handlers/feedback_handler.py | 2 +- src/memos/api/product_models.py | 4 +- src/memos/graph_dbs/polardb.py | 16 +- src/memos/mem_feedback/feedback.py | 251 ++++++++++++++---- src/memos/mem_feedback/simple_feedback.py | 3 + src/memos/mem_scheduler/general_scheduler.py | 1 + .../tree_text_memory/organize/manager.py | 20 -- src/memos/multi_mem_cube/single_cube.py | 5 +- src/memos/multi_mem_cube/views.py | 15 +- src/memos/templates/mem_feedback_prompts.py | 179 ++++++++++++- 13 files changed, 458 insertions(+), 100 deletions(-) diff --git a/examples/api/product_api.py b/examples/api/product_api.py index b98f3b8e5..687c4c02c 100644 --- a/examples/api/product_api.py +++ b/examples/api/product_api.py @@ -12,7 +12,7 @@ import requests -BASE_URL = "http://0.0.0.0:8001/product" +BASE_URL = "http://0.0.0.0:8002/product" HEADERS = {"Content-Type": "application/json"} index = "24" @@ -119,6 +119,23 @@ def chat_stream(query: str, session_id: str, history: list | None = None): print(payload) +def feedback_memory(feedback_content: str, history: list | None = None): + url = f"{BASE_URL}/feedback" + data = { + "user_id": USER_ID, + "writable_cube_ids": [MEM_CUBE_ID], + "history": history, + "feedback_content": feedback_content, + "async_mode": "sync", + "corrected_answer": "false", + } + + print("[*] Feedbacking memory ...") + resp = requests.post(url, headers=HEADERS, data=json.dumps(data), timeout=30) + print(resp.status_code, resp.text) + return resp.json() + + if __name__ == "__main__": print("===== STEP 1: Register User =====") register_user() @@ -140,5 +157,14 @@ def chat_stream(query: str, session_id: str, history: list | None = None): ], ) - print("\n===== STEP 4: Stream Chat =====") + print("\n===== STEP 5: Stream Chat =====") chat_stream("我刚和你说什么了呢", SESSION_ID2, history=[]) + + print("\n===== STEP 6: Feedback Memory =====") + feedback_memory( + feedback_content="错啦,我今天没有吃拉面", + history=[ + {"role": "user", "content": "我刚和你说什么了呢"}, + {"role": "assistant", "content": "你今天吃了好吃的拉面"}, + ], + ) diff --git a/src/memos/api/handlers/add_handler.py b/src/memos/api/handlers/add_handler.py index 1bd83eae7..340a57e4a 100644 --- a/src/memos/api/handlers/add_handler.py +++ b/src/memos/api/handlers/add_handler.py @@ -6,7 +6,7 @@ """ from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies -from memos.api.product_models import APIADDRequest, MemoryResponse +from memos.api.product_models import APIADDRequest, APIFeedbackRequest, MemoryResponse from memos.memories.textual.item import ( list_all_fields, ) @@ -56,6 +56,35 @@ def handle_add_memories(self, add_req: APIADDRequest) -> MemoryResponse: cube_view = self._build_cube_view(add_req) + if add_req.is_feedback: + chat_history = add_req.chat_history + messages = add_req.messages + concatenate_chat = chat_history + messages + + last_user_index = max(i for i, d in enumerate(concatenate_chat) if d["role"] == "user") + feedback_content = concatenate_chat[last_user_index]["content"] + feedback_history = concatenate_chat[:last_user_index] + + feedback_req = APIFeedbackRequest( + user_id=add_req.user_id, + session_id=add_req.session_id, + task_id=add_req.task_id, + history=feedback_history, + feedback_content=feedback_content, + writable_cube_ids=add_req.writable_cube_ids, + async_mode=add_req.async_mode, + ) + process_record = cube_view.feedback_memories(feedback_req) + + self.logger.info( + f"[FeedbackHandler] Final feedback results count={len(process_record)}" + ) + + return MemoryResponse( + message="Memory feedback successfully", + data=[process_record], + ) + results = cube_view.add_memories(add_req) self.logger.info(f"[AddHandler] Final add results count={len(results)}") diff --git a/src/memos/api/handlers/component_init.py b/src/memos/api/handlers/component_init.py index 982487b18..548f01887 100644 --- a/src/memos/api/handlers/component_init.py +++ b/src/memos/api/handlers/component_init.py @@ -300,6 +300,7 @@ def init_server() -> dict[str, Any]: graph_store=graph_db, memory_manager=memory_manager, mem_reader=mem_reader, + searcher=searcher, ) # Initialize Scheduler diff --git a/src/memos/api/handlers/feedback_handler.py b/src/memos/api/handlers/feedback_handler.py index 2d3dc4ee2..cf5c536ea 100644 --- a/src/memos/api/handlers/feedback_handler.py +++ b/src/memos/api/handlers/feedback_handler.py @@ -28,7 +28,7 @@ def __init__(self, dependencies: HandlerDependencies): dependencies: HandlerDependencies instance """ super().__init__(dependencies) - self._validate_dependencies("mem_reader", "mem_scheduler") + self._validate_dependencies("mem_reader", "mem_scheduler", "searcher") def handle_feedback_memories(self, feedback_req: APIFeedbackRequest) -> MemoryResponse: """ diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index 52d5fe6f6..f2b723565 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -639,7 +639,9 @@ class APIFeedbackRequest(BaseRequest): writable_cube_ids: list[str] | None = Field( None, description="List of cube IDs user can write for multi-cube add" ) - sync_mode: Literal["sync", "async"] = Field("async", description="feedback mode: sync or async") + async_mode: Literal["sync", "async"] = Field( + "async", description="feedback mode: sync or async" + ) corrected_answer: bool = Field(False, description="Whether need return corrected answer") # ==== Backward compatibility ==== mem_cube_id: str | None = Field( diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index ebaec2b90..da1635296 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -524,27 +524,19 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N properties = current_node["metadata"].copy() original_id = properties.get("id", id) # Preserve original ID original_memory = current_node.get("memory", "") # Preserve original memory - updated_at = fields.pop("updated_at", datetime.utcnow().isoformat()) - - record_info = f"User:{user_name} | Time:{updated_at} | Operation:Update | Overwrite: {original_memory}" - covered_history = current_node.get("covered_history", []) - covered_history.insert(0, record_info) - logger.info(f"New GraphDB Update: {record_info}") # If fields include memory, use it; otherwise keep original memory - new_memory = fields.pop("memory") if "memory" in fields else original_memory + if "memory" in fields: + original_memory = fields.pop("memory") properties.update(fields) - properties["id"] = original_id - properties["memory"] = new_memory - properties["covered_history"] = covered_history - properties["updated_at"] = updated_at + properties["id"] = original_id # Ensure ID is not overwritten + properties["memory"] = original_memory # Ensure memory is not overwritten # Handle embedding field embedding_vector = None if "embedding" in fields: embedding_vector = fields.pop("embedding") - assert properties["embedding"] == embedding_vector, "Embedding vector mismatch" if not isinstance(embedding_vector, list): embedding_vector = None diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index bf668852f..02b737451 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -3,6 +3,7 @@ import json from datetime import datetime +from typing import TYPE_CHECKING from tenacity import retry, stop_after_attempt, wait_exponential @@ -16,7 +17,14 @@ from memos.mem_reader.factory import MemReaderFactory from memos.mem_reader.simple_struct import detect_lang from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata -from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager +from memos.memories.textual.tree_text_memory.organize.manager import ( + MemoryManager, + extract_working_binding_ids, +) + + +if TYPE_CHECKING: + from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher from memos.templates.mem_feedback_prompts import ( FEEDBACK_ANSWER_PROMPT, FEEDBACK_ANSWER_PROMPT_ZH, @@ -64,6 +72,7 @@ def __init__(self, config: MemFeedbackConfig): }, is_reorganize=self.is_reorganize, ) + self.searcher: Searcher = self.memory_manager.searcher def _pure_add(self, user_name: str, feedback_content: str, feedback_time: str, info: dict): """ @@ -113,10 +122,119 @@ def _feedback_judgement( ) return [] + def _single_add_operation( + self, + old_memory_item: TextualMemoryItem | None, + new_memory_item: TextualMemoryItem, + user_id: str, + user_name: str, + async_mode: str, + ) -> dict: + """ + Individual addition operations + """ + if old_memory_item: + to_add_memory = old_memory_item.model_copy(deep=True) + to_add_memory.metadata.key = new_memory_item.metadata.key + to_add_memory.metadata.tags = new_memory_item.metadata.tags + to_add_memory.memory = new_memory_item.memory + to_add_memory.metadata.embedding = new_memory_item.metadata.embedding + + to_add_memory.metadata.user_id = new_memory_item.metadata.user_id + to_add_memory.metadata.created_at = to_add_memory.metadata.updated_at = ( + datetime.now().isoformat() + ) + to_add_memory.metadata.background = new_memory_item.metadata.background + else: + to_add_memory = new_memory_item.model_copy(deep=True) + to_add_memory.metadata.created_at = to_add_memory.metadata.updated_at = ( + datetime.now().isoformat() + ) + to_add_memory.metadata.background = new_memory_item.metadata.background + + to_add_memory.id = "" + added_ids = self._retry_db_operation( + lambda: self.memory_manager.add([to_add_memory], user_name=user_name, mode=async_mode) + ) + + logger.info(f"[Memory Feedback ADD] {added_ids[0]}") + return {"id": added_ids[0], "text": to_add_memory.memory} + + def _single_update_operation( + self, + old_memory_item: TextualMemoryItem, + new_memory_item: TextualMemoryItem, + user_id: str, + user_name: str, + async_mode: str, + ) -> dict: + """ + Individual update operations + """ + memory_type = old_memory_item.metadata.memory_type + if memory_type == "WorkingMemory": + fields = { + "memory": new_memory_item.memory, + "key": new_memory_item.metadata.key, + "tags": new_memory_item.metadata.tags, + "embedding": new_memory_item.metadata.embedding, + "background": new_memory_item.metadata.background, + "covered_history": old_memory_item.id, + } + self.graph_store.update_node(old_memory_item.id, fields=fields, user_name=user_name) + item_id = old_memory_item.id + else: + done = self._single_add_operation( + old_memory_item, new_memory_item, user_id, user_name, async_mode + ) + item_id = done.get("id") + self.graph_store.update_node( + item_id, {"covered_history": old_memory_item.id}, user_name=user_name + ) + self.graph_store.update_node( + old_memory_item.id, {"status": "archived"}, user_name=user_name + ) + + logger.info( + f"[Memory Feedback UPDATE] New Add:{item_id} | Set archived:{old_memory_item.id} | memory_type: {memory_type}" + ) + + return { + "id": item_id, + "text": new_memory_item.memory, + "archived_id": old_memory_item.id, + "origin_memory": old_memory_item.memory, + } + + def _del_working_binding(self, user_name, mem_items: list[TextualMemoryItem]) -> set[str]: + """Delete working memory bindings""" + bindings_to_delete = extract_working_binding_ids(mem_items) + + logger.info( + f"[Memory Feedback UPDATE] Extracted {len(bindings_to_delete)} working_binding ids to cleanup: {list(bindings_to_delete)}" + ) + + delete_ids = [] + if bindings_to_delete: + delete_ids = list({bindings_to_delete}) + + for mid in delete_ids: + try: + print("del", mid) + self.graph_store.delete_node(mid, user_name=user_name) + + logger.info( + f"[Feedback Core:_del_working_binding] Delete raw/working mem_ids: {delete_ids} for user_name: {user_name}" + ) + except Exception as e: + logger.warning( + f"[Feedback Core:_del_working_binding] TreeTextMemory.delete_hard: failed to delete {mid}: {e}" + ) + def _feedback_memory( self, user_id: str, user_name: str, feedback_memories: list[TextualMemoryItem], **kwargs ) -> dict: - sync_mode = kwargs.get("sync_mode") + async_mode = kwargs.get("async_mode") retrieved_memory_ids = kwargs.get("retrieved_memory_ids") or [] chat_history = kwargs.get("chat_history", []) feedback_content = kwargs.get("feedback_content", "") @@ -124,7 +242,9 @@ def _feedback_memory( chat_history_lis = [f"""{msg["role"]}: {msg["content"]}""" for msg in chat_history[-4:]] fact_history = "\n".join(chat_history_lis) + f"\nuser feedback: \n{feedback_content}" - retrieved_memories = [self.graph_store.get_node(_id) for _id in retrieved_memory_ids] + retrieved_memories = [ + self.graph_store.get_node(_id, user_name=user_name) for _id in retrieved_memory_ids + ] filterd_ids = [ item["id"] for item in retrieved_memories if "mode:fast" in item["metadata"]["tags"] ] @@ -134,58 +254,29 @@ def _feedback_memory( ) current_memories = [ - {"id": item["id"], "text": item["memory"]} + TextualMemoryItem(**item) for item in retrieved_memories if "mode:fast" not in item["metadata"]["tags"] ] - def _single_add_operation( - memory_item: TextualMemoryItem, user_name: str, sync_mode: str - ) -> dict: - """ - Individual addition operations - """ - added_ids = self._retry_db_operation( - lambda: self.memory_manager.add([memory_item], user_name=user_name, mode=sync_mode) - ) - logger.info(f"[Memory Feedback ADD] {added_ids[0]}") - - return {"id": added_ids[0], "text": memory_item.memory} - - def _single_update_operation( - op: dict, memory_item: TextualMemoryItem, user_name: str, sync_mode: str - ) -> dict: - """ - Individual update operations - """ - update_id = op.get("id") - updated_ids = self._retry_db_operation( - lambda: self.memory_manager.update( - [update_id], [memory_item], user_name=user_name, mode=sync_mode - ) - ) - log_update_info = op.get("old_memory", "") + " >> " + op.get("text", "") - logger.info(f"[Memory Feedback UPDATE] {updated_ids[0]}, info: {log_update_info}") - - return { - "id": update_id, - "origin_memory": op.get("old_memory", ""), - "text": op.get("text", ""), - } - def _add_or_update( - memory_item: TextualMemoryItem, current_memories: list, fact_history: str + memory_item: TextualMemoryItem, + current_memories: list[TextualMemoryItem], + fact_history: str, ): if current_memories == []: - current_memories = self._vec_query( - memory_item.metadata.embedding, user_name=user_name + current_memories = self._retrieve( + memory_item.memory, info={"user_id": user_id}, user_name=user_name ) if current_memories: lang = detect_lang("".join(memory_item.memory)) template = FEEDBACK_PROMPT_DICT["compare"][lang] + current_memories_str = "\n".join( + [f"{item.id}: {item.memory}" for item in current_memories] + ) prompt = template.format( - current_memories=str(current_memories), + current_memories=current_memories_str, new_facts=memory_item.memory, chat_history=fact_history, ) @@ -195,7 +286,7 @@ def _add_or_update( else: operations = [{"operation": "ADD"}] - # TODO based on the operation, change memory_item memory info + # TODO based on the operation, change memory_item memory info ; change source info logger.info(f"[Feedback memory operations]: {operations!s}") if not operations: @@ -203,7 +294,7 @@ def _add_or_update( add_results = [] update_results = [] - + id_to_item = {item.id: item for item in current_memories} with ContextThreadPoolExecutor(max_workers=10) as executor: future_to_op = {} for op in operations: @@ -211,12 +302,22 @@ def _add_or_update( if event_type == "add": future = executor.submit( - _single_add_operation, memory_item, user_name, sync_mode + self._single_add_operation, + None, + memory_item, + user_id, + user_name, + async_mode, ) future_to_op[future] = ("add", op) elif event_type == "update": future = executor.submit( - _single_update_operation, op, memory_item, user_name, sync_mode + self._single_update_operation, + id_to_item[op["id"]], + memory_item, + user_id, + user_name, + async_mode, ) future_to_op[future] = ("update", op) @@ -224,15 +325,18 @@ def _add_or_update( result_type, original_op = future_to_op[future] try: result = future.result() - if result_type == "add": + if result_type == "add" and result: add_results.append(result) - elif result_type == "update": + elif result_type == "update" and result: update_results.append(result) except Exception as e: logger.error( f"[Feedback Core: _add_or_update] Operation failed for {original_op}: {e}", exc_info=True, ) + if update_results: + updated_ids = [item["archived_id"] for item in update_results] + self._del_working_binding(updated_ids, user_name) return {"record": {"add": add_results, "update": update_results}} @@ -262,11 +366,38 @@ def _add_or_update( } } + def _retrieve(self, query: str, info=None, user_name=None): + """Retrieve memory items""" + retrieved_mems = self.searcher.search(query, info=info, user_name=user_name) + return retrieved_mems + def _vec_query(self, new_memories_embedding: list[float], user_name=None): - retrieved_ids = self.graph_store.search_by_embedding( - new_memories_embedding, user_name=user_name, top_k=10, threshold=0.7 + """Vector retrieval query""" + retrieved_ids = [] + retrieved_ids.extend( + self.graph_store.search_by_embedding( + new_memories_embedding, + scope="UserMemory", + user_name=user_name, + top_k=10, + threshold=0.2, + ) + ) + retrieved_ids.extend( + self.graph_store.search_by_embedding( + new_memories_embedding, + scope="LongTermMemory", + user_name=user_name, + top_k=10, + threshold=0.2, + ) ) - current_memories = [self.graph_store.get_node(item["id"]) for item in retrieved_ids] + current_memories = [ + self.graph_store.get_node(item["id"], user_name=user_name) for item in retrieved_ids + ] + + for item in current_memories: + print(item["id"], item["metadata"]["memory_type"], item["metadata"]["status"]) if not retrieved_ids: logger.info( f"[Feedback Core: _vec_query] No similar memories found for embedding query for user {user_name}." @@ -280,10 +411,7 @@ def _vec_query(self, new_memories_embedding: list[float], user_name=None): f"[Feedback Core: _vec_query] Since the tags mode is fast, no modifications are made to the following memory {filterd_ids}." ) return [ - { - "id": item["id"], - "text": item["memory"], - } + TextualMemoryItem(**item) for item in current_memories if "mode:fast" not in item["metadata"]["tags"] ] @@ -303,7 +431,7 @@ def _get_llm_response(self, prompt: str, dsl: bool = True) -> dict: return response_json def _id_dehallucination(self, operations, current_memories): - right_ids = [item["id"] for item in current_memories] + right_ids = [item.id for item in current_memories] right_lower_map = {x.lower(): x for x in right_ids} def correct_item(data): @@ -437,7 +565,10 @@ def check_validity(item): usage=[], sources=[{"type": "chat"}], user_name=user_name, - background="", + background="[Feedback update background]: " + + str(chat_history) + + "\nUser feedback: " + + str(feedback_content), confidence=0.99, type="fine", ), @@ -476,7 +607,7 @@ def process_feedback( user_name: cube_ids chat_history: List of chat messages feedback_content: Feedback content from user - **kwargs: Additional arguments including sync_mode + **kwargs: Additional arguments including async_mode Returns: Dict with answer and/or memory operation records @@ -504,8 +635,10 @@ def process_feedback( try: answer = answer_future.result() record = core_future.result() + task_id = kwargs.get("task_id", "default") + logger.info( - f"[MemFeedback process] Completed concurrently for user {user_name} with full results." + f"[MemFeedback process] Feedback Completed : user {user_name} | task_id {task_id} | record {record}." ) return {"answer": answer, "record": record["record"]} diff --git a/src/memos/mem_feedback/simple_feedback.py b/src/memos/mem_feedback/simple_feedback.py index 59ee38438..01132eb97 100644 --- a/src/memos/mem_feedback/simple_feedback.py +++ b/src/memos/mem_feedback/simple_feedback.py @@ -5,6 +5,7 @@ from memos.mem_feedback.feedback import MemFeedback from memos.mem_reader.simple_struct import SimpleStructMemReader from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager +from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher logger = log.get_logger(__name__) @@ -18,9 +19,11 @@ def __init__( graph_store: PolarDBGraphDB, memory_manager: MemoryManager, mem_reader: SimpleStructMemReader, + searcher: Searcher, ): self.llm = llm self.embedder = embedder self.graph_store = graph_store self.memory_manager = memory_manager self.mem_reader = mem_reader + self.searcher = searcher diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index 4190b8e5c..65169f358 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -380,6 +380,7 @@ def _mem_feedback_message_consumer(self, messages: list[ScheduleMessageItem]) -> retrieved_memory_ids=feedback_data["retrieved_memory_ids"], feedback_content=feedback_data["feedback_content"], feedback_time=feedback_data["feedback_time"], + task_id=feedback_data["task_id"], ) logger.info( diff --git a/src/memos/memories/textual/tree_text_memory/organize/manager.py b/src/memos/memories/textual/tree_text_memory/organize/manager.py index 94b86cec0..a71fee02f 100644 --- a/src/memos/memories/textual/tree_text_memory/organize/manager.py +++ b/src/memos/memories/textual/tree_text_memory/organize/manager.py @@ -115,26 +115,6 @@ def add( self._refresh_memory_size(user_name=user_name) return added_ids - def update( - self, - memories_ids: list[str], - memories: list[TextualMemoryItem], - user_name: str | None = None, - mode: str = "sync", - ) -> list[str]: - for _id, memory in zip(memories_ids, memories, strict=False): - field = { - "memory": memory.memory, - "key": memory.metadata.key, - "tags": memory.metadata.tags, - "embedding": memory.metadata.embedding, - } - self.graph_store.update_node(_id, field, user_name) - - graph_class_name = self.graph_store.__class__.__name__ - logger.info(f"[MemoryManager] Updated {graph_class_name}") - return memories_ids - def replace_working_memory( self, memories: list[TextualMemoryItem], user_name: str | None = None ) -> None: diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index 21d4fd8d1..4d40798ec 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -135,7 +135,7 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: def feedback_memories(self, feedback_req: APIFeedbackRequest) -> dict[str, Any]: target_session_id = feedback_req.session_id or "default_session" - if feedback_req.sync_mode == "async": + if feedback_req.async_mode == "async": try: feedback_req_str = json.dumps(feedback_req.model_dump()) message_item_feedback = ScheduleMessageItem( @@ -167,8 +167,9 @@ def feedback_memories(self, feedback_req: APIFeedbackRequest) -> dict[str, Any]: retrieved_memory_ids=feedback_req.retrieved_memory_ids, feedback_content=feedback_req.feedback_content, feedback_time=feedback_req.feedback_time, - sync_mode=feedback_req.sync_mode, + async_mode=feedback_req.async_mode, corrected_answer=feedback_req.corrected_answer, + task_id=feedback_req.task_id, ) self.logger.info(f"Feedback memories result: {feedback_result}") return feedback_result diff --git a/src/memos/multi_mem_cube/views.py b/src/memos/multi_mem_cube/views.py index baf5e80e1..7247a0328 100644 --- a/src/memos/multi_mem_cube/views.py +++ b/src/memos/multi_mem_cube/views.py @@ -4,7 +4,7 @@ if TYPE_CHECKING: - from memos.api.product_models import APIADDRequest, APISearchRequest + from memos.api.product_models import APIADDRequest, APIFeedbackRequest, APISearchRequest class MemCubeView(Protocol): @@ -39,3 +39,16 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: - cube_id """ ... + + def feedback_memories(self, feedback_req: APIFeedbackRequest) -> dict[str, Any]: + """ + Process feedback_req, read memories from one or more cubes and feedback them. + + Returns: + A list of memory dicts, each item should at least contain: + - memory + - memory_id + - memory_type + - cube_id + """ + ... diff --git a/src/memos/templates/mem_feedback_prompts.py b/src/memos/templates/mem_feedback_prompts.py index 2e9a0e9c9..f7f2e8cb4 100644 --- a/src/memos/templates/mem_feedback_prompts.py +++ b/src/memos/templates/mem_feedback_prompts.py @@ -238,6 +238,94 @@ 5. Return only the JSON format response, without any other content + +Example1: +Current Memories: +"0911": "The user is a senior full-stack developer working at Company B" +"123": "The user works as a software engineer at Company A. And he has a good relationship with his wife." +"648": "The user is responsible for front-end development of software at Company A" +"7210": "The user is responsible for front-end development of software at Company A" +"908": "The user enjoys fishing with friends on weekends" + +The background of the new fact being put forward: +user: Do you remember where I work? +assistant: Company A. +user feedback: I work at Company B, and I am a senior full-stack developer. + +Newly facts: +The user works as a senior full-stack developer at Company B + +Operation recommendations: +{{ + "operations": + [ + {{ + "id": "0911", + "text": "The user is a senior full-stack developer working at Company B", + "operation": "NONE" + }}, + {{ + "id": "123", + "text": "The user works as a senior full-stack developer at Company B. And he has a good relationship with his wife.", + "operation": "UPDATE", + "old_memory": "The user works as a software engineer at Company A. And he has a good relationship with his wife." + }}, + {{ + "id": "648", + "text": "The user works as a senior full-stack developer at Company B", + "operation": "UPDATE", + "old_memory": "The user is responsible for front-end development of software at Company A" + }}, + {{ + "id": "7210", + "text": "The user works as a senior full-stack developer at Company B", + "operation": "UPDATE", + "old_memory": "The user is responsible for front-end development of software at Company A" + }}, + {{ + "id": "908", + "text": "The user enjoys fishing with friends on weekends", + "operation": "NONE" + }} + ] +}} + +Example2: +Current Memories: +"123": "The user works as a software engineer in Company A, mainly responsible for front-end development" +"908": "The user likes to go fishing with friends on weekends" + +The background of the new fact being put forward: +user: Guess where I live? +assistant: Hehuan Community. +user feedback: Wrong, update my address: Mingyue Community, Chaoyang District, Beijing + +Newly facts: +"The user's residential address is Mingyue Community, Chaoyang District, Beijing" + +Operation recommendations: +{{ + "operations": + [ + {{ + "id": "123", + "text": "The user works as a software engineer at Company A, primarily responsible for front-end development", + "operation": "NONE" + }}, + {{ + "id": "908", + "text": "The user enjoys fishing with friends on weekends", + "operation": "NONE" + }}, + {{ + "id": "4567", + "text": "The user's residential address is Mingyue Community, Chaoyang District, Beijing", + "operation": "ADD" + }} + ] +}} + + **Current Memories** {current_memories} @@ -255,7 +343,7 @@ 你必须严格按照以下JSON格式返回响应: {{ - "operation": + "operations": [ {{ "id": "<记忆ID>", @@ -288,6 +376,95 @@ 5. 只返回JSON格式的响应,不要包含其他任何内容 +示例1: +当前记忆: +"0911": "用户是高级全栈开发工程师,在B公司工作" +"123": "用户在公司A担任软件工程师。而且用户和同事们的关系很好,他们共同协作大项目。" +"648": "用户在公司A负责软件的前端开发工作" +"7210": "用户在公司A负责软件的前端开发工作" +"908": "用户周末喜欢和朋友一起钓鱼" + + +提出新事实的背景: +user: 你还记得我现在在哪里工作吗? +assistant: A公司 +user feedback: 实际上,我在公司B工作,是一名高级全栈开发人员。 + + +新获取的事实: +"用户现在在公司B担任高级全栈开发工程师" + +操作建议: +{{ + "operations": + [ + {{ + "id": "0911", + "text": "用户是高级全栈开发工程师,在B公司工作", + "operation": "NONE" + }}, + {{ + "id": "123", + "text": "用户现在在公司B担任高级全栈开发工程师。而且用户和同事们的关系很好,他们共同协作大项目。", + "operation": "UPDATE", + "old_memory": "用户在公司A担任软件工程师,主要负责前端开发。而且用户和同事们的关系很好,他们共同协作大项目。" + }}, + {{ + "id": "648", + "text": "用户现在在公司B担任高级全栈开发工程师", + "operation": "UPDATE", + "old_memory": "用户在公司A负责软件的前端开发工作" + }}, + {{ + "id": "7210", + "text": "用户现在在公司B担任高级全栈开发工程师", + "operation": "UPDATE", + "old_memory": "用户在公司A负责软件的前端开发工作" + }}, + {{ + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼", + "operation": "NONE" + }} + ] +}} + +示例2: +当前记忆: +"123": "用户在公司A担任软件工程师,主要负责前端开发" +"908": "用户周末喜欢和朋友一起钓鱼" + + +提出新事实的背景: +user: 猜猜我住在哪里? +assistant: 合欢社区 +user feedback: 错了,请更新我的地址:北京市朝阳区明月社区 + +新获取的事实: +"用户的居住地址是北京市朝阳区明月小区" + +操作建议: +{{ + "operations": + [ + {{ + "id": "123", + "text": "用户在公司A担任软件工程师,主要负责前端开发", + "operation": "NONE" + }}, + {{ + "id": "908", + "text": "用户周末喜欢和朋友一起钓鱼", + "operation": "NONE" + }}, + {{ + "id": "4567", + "text": "用户的居住地址是北京市朝阳区明月小区", + "operation": "ADD" + }} + ] +}} + **当前记忆:** {current_memories} From 898ccacb5457c24c0ae0e27d408711b28b878e0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Mon, 1 Dec 2025 20:53:12 +0800 Subject: [PATCH 29/40] add handler change node update --- src/memos/api/handlers/add_handler.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/memos/api/handlers/add_handler.py b/src/memos/api/handlers/add_handler.py index 340a57e4a..46e7fd108 100644 --- a/src/memos/api/handlers/add_handler.py +++ b/src/memos/api/handlers/add_handler.py @@ -30,7 +30,9 @@ def __init__(self, dependencies: HandlerDependencies): dependencies: HandlerDependencies instance """ super().__init__(dependencies) - self._validate_dependencies("naive_mem_cube", "mem_reader", "mem_scheduler") + self._validate_dependencies( + "naive_mem_cube", "mem_reader", "mem_scheduler", "feedback_server" + ) def handle_add_memories(self, add_req: APIADDRequest) -> MemoryResponse: """ @@ -59,6 +61,10 @@ def handle_add_memories(self, add_req: APIADDRequest) -> MemoryResponse: if add_req.is_feedback: chat_history = add_req.chat_history messages = add_req.messages + if chat_history is None: + chat_history = [] + if messages is None: + messages = [] concatenate_chat = chat_history + messages last_user_index = max(i for i, d in enumerate(concatenate_chat) if d["role"] == "user") @@ -117,6 +123,7 @@ def _build_cube_view(self, add_req: APIADDRequest) -> MemCubeView: mem_reader=self.mem_reader, mem_scheduler=self.mem_scheduler, logger=self.logger, + feedback_server=self.feedback_server, searcher=None, ) else: @@ -127,6 +134,7 @@ def _build_cube_view(self, add_req: APIADDRequest) -> MemCubeView: mem_reader=self.mem_reader, mem_scheduler=self.mem_scheduler, logger=self.logger, + feedback_server=self.feedback_server, searcher=None, ) for cube_id in cube_ids From 91d063dd6186d65902157fbc0e1bcce060b04835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Mon, 1 Dec 2025 20:59:28 +0800 Subject: [PATCH 30/40] add handler change node update --- tests/api/test_server_router.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/api/test_server_router.py b/tests/api/test_server_router.py index 7c4b4be9d..5906697d9 100644 --- a/tests/api/test_server_router.py +++ b/tests/api/test_server_router.py @@ -38,6 +38,7 @@ def mock_init_server(): "default_cube_config": Mock(), "mos_server": Mock(), "mem_scheduler": Mock(), + "feedback_server": Mock(), "naive_mem_cube": Mock(), "searcher": Mock(), "api_module": Mock(), From 2a478802053ede23d6b882dd634d9945fd6b1066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Mon, 1 Dec 2025 21:01:27 +0800 Subject: [PATCH 31/40] add handler change node update --- examples/api/product_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/api/product_api.py b/examples/api/product_api.py index 687c4c02c..e364ce483 100644 --- a/examples/api/product_api.py +++ b/examples/api/product_api.py @@ -12,7 +12,7 @@ import requests -BASE_URL = "http://0.0.0.0:8002/product" +BASE_URL = "http://0.0.0.0:8001/product" HEADERS = {"Content-Type": "application/json"} index = "24" From ad9c2e73515711c1e8679fa0dffa745fb76ed44f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 2 Dec 2025 14:31:06 +0800 Subject: [PATCH 32/40] fix interface input --- src/memos/api/handlers/add_handler.py | 68 +++++++++++++++------------ src/memos/api/product_models.py | 3 +- src/memos/memories/textual/item.py | 5 ++ 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/src/memos/api/handlers/add_handler.py b/src/memos/api/handlers/add_handler.py index 46e7fd108..31f5bc35b 100644 --- a/src/memos/api/handlers/add_handler.py +++ b/src/memos/api/handlers/add_handler.py @@ -5,6 +5,8 @@ using dependency injection for better modularity and testability. """ +from pydantic import validate_call + from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies from memos.api.product_models import APIADDRequest, APIFeedbackRequest, MemoryResponse from memos.memories.textual.item import ( @@ -13,6 +15,7 @@ from memos.multi_mem_cube.composite_cube import CompositeCubeView from memos.multi_mem_cube.single_cube import SingleCubeView from memos.multi_mem_cube.views import MemCubeView +from memos.types import MessageList class AddHandler(BaseHandler): @@ -58,38 +61,45 @@ def handle_add_memories(self, add_req: APIADDRequest) -> MemoryResponse: cube_view = self._build_cube_view(add_req) + @validate_call + def _check_messages(messages: MessageList) -> None: + pass + if add_req.is_feedback: - chat_history = add_req.chat_history - messages = add_req.messages - if chat_history is None: - chat_history = [] - if messages is None: - messages = [] - concatenate_chat = chat_history + messages - - last_user_index = max(i for i, d in enumerate(concatenate_chat) if d["role"] == "user") - feedback_content = concatenate_chat[last_user_index]["content"] - feedback_history = concatenate_chat[:last_user_index] - - feedback_req = APIFeedbackRequest( - user_id=add_req.user_id, - session_id=add_req.session_id, - task_id=add_req.task_id, - history=feedback_history, - feedback_content=feedback_content, - writable_cube_ids=add_req.writable_cube_ids, - async_mode=add_req.async_mode, - ) - process_record = cube_view.feedback_memories(feedback_req) + try: + messages = add_req.messages + _check_messages(messages) - self.logger.info( - f"[FeedbackHandler] Final feedback results count={len(process_record)}" - ) + chat_history = add_req.chat_history if add_req.chat_history else [] + concatenate_chat = chat_history + messages - return MemoryResponse( - message="Memory feedback successfully", - data=[process_record], - ) + last_user_index = max( + i for i, d in enumerate(concatenate_chat) if d["role"] == "user" + ) + feedback_content = concatenate_chat[last_user_index]["content"] + feedback_history = concatenate_chat[:last_user_index] + + feedback_req = APIFeedbackRequest( + user_id=add_req.user_id, + session_id=add_req.session_id, + task_id=add_req.task_id, + history=feedback_history, + feedback_content=feedback_content, + writable_cube_ids=add_req.writable_cube_ids, + async_mode=add_req.async_mode, + ) + process_record = cube_view.feedback_memories(feedback_req) + + self.logger.info( + f"[ADDFeedbackHandler] Final feedback results count={len(process_record)}" + ) + + return MemoryResponse( + message="Memory feedback successfully", + data=[process_record], + ) + except Exception as e: + self.logger.warning(f"[ADDFeedbackHandler] Running error: {e}") results = cube_view.add_memories(add_req) diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index d58385c2e..216664e53 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -642,7 +642,6 @@ class APIFeedbackRequest(BaseRequest): ) feedback_content: str | None = Field(..., description="Feedback content to process") feedback_time: str | None = Field(None, description="Feedback time") - # ==== Multi-cube writing ==== writable_cube_ids: list[str] | None = Field( None, description="List of cube IDs user can write for multi-cube add" ) @@ -650,7 +649,7 @@ class APIFeedbackRequest(BaseRequest): "async", description="feedback mode: sync or async" ) corrected_answer: bool = Field(False, description="Whether need return corrected answer") - # ==== Backward compatibility ==== + # ==== mem_cube_id is NOT enabled==== mem_cube_id: str | None = Field( None, description=( diff --git a/src/memos/memories/textual/item.py b/src/memos/memories/textual/item.py index b7956bfec..63a5b3b1d 100644 --- a/src/memos/memories/textual/item.py +++ b/src/memos/memories/textual/item.py @@ -90,6 +90,11 @@ class TextualMemoryMetadata(BaseModel): model_config = ConfigDict(extra="allow") + covered_history: str | None = Field( + default=None, + description="Record the memory id covered by the update", + ) + def __str__(self) -> str: """Pretty string representation of the metadata.""" meta = self.model_dump(exclude_none=True) From 6ad8dae2253f544401541b85d95593a42fb10cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 3 Dec 2025 17:33:55 +0800 Subject: [PATCH 33/40] add chunk and ratio filter --- src/memos/graph_dbs/polardb.py | 92 ++++ src/memos/mem_feedback/feedback.py | 416 +++++++++++++----- src/memos/mem_feedback/simple_feedback.py | 2 + src/memos/mem_feedback/utils.py | 86 ++++ src/memos/memories/textual/item.py | 2 +- .../tree_text_memory/retrieve/searcher.py | 4 + src/memos/templates/mem_feedback_prompts.py | 116 +++++ 7 files changed, 599 insertions(+), 119 deletions(-) create mode 100644 src/memos/mem_feedback/utils.py diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 74dd38fc1..580811a50 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1451,6 +1451,98 @@ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]: """Get the ordered context chain starting from a node.""" raise NotImplementedError + @timed + def seach_by_keywords( + self, + query_words: list[str], + scope: str | None = None, + status: str | None = None, + search_filter: dict | None = None, + user_name: str | None = None, + filter: dict | None = None, + knowledgebase_ids: list[str] | None = None, + tsvector_field: str = "properties_tsvector_zh", + tsquery_config: str = "jiebaqry", + **kwargs, + ) -> list[dict]: + where_clauses = [] + + if scope: + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype" + ) + if status: + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"{status}\"'::agtype" + ) + else: + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype" + ) + + # Build user_name filter with knowledgebase_ids support (OR relationship) using common method + user_name_conditions = self._build_user_name_and_kb_ids_conditions_sql( + user_name=user_name, + knowledgebase_ids=knowledgebase_ids, + default_user_name=self.config.user_name, + ) + + # Add OR condition if we have any user_name conditions + if user_name_conditions: + if len(user_name_conditions) == 1: + where_clauses.append(user_name_conditions[0]) + else: + where_clauses.append(f"({' OR '.join(user_name_conditions)})") + + # Add search_filter conditions + if search_filter: + for key, value in search_filter.items(): + if isinstance(value, str): + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = '\"{value}\"'::agtype" + ) + else: + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = {value}::agtype" + ) + + # Build filter conditions using common method + filter_conditions = self._build_filter_conditions_sql(filter) + where_clauses.extend(filter_conditions) + # Add fulltext search condition + # Convert query_text to OR query format: "word1 | word2 | word3" + tsquery_string = " | ".join(query_words) + + where_clauses.append(f"{tsvector_field} @@ to_tsquery('{tsquery_config}', %s)") + + where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + + # Build fulltext search query + query = f""" + SELECT + ag_catalog.agtype_access_operator(properties, '"id"'::agtype) AS old_id, + agtype_object_field_text(properties, 'memory') as memory_text + FROM "{self.db_name}_graph"."Memory" + {where_clause} + """ + + params = (tsquery_string,) + logger.info(f"[search_by_fulltext] query: {query}, params: {params}") + conn = self._get_connection() + try: + with conn.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + output = [] + for row in results: + oldid = row[0] + id_val = str(oldid) + output.append({"id": id_val}) + + return output + finally: + self._return_connection(conn) + @timed def search_by_fulltext( self, diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index 02b737451..eed43d66e 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -10,10 +10,12 @@ from memos import log from memos.configs.memory import MemFeedbackConfig from memos.context.context import ContextThreadPoolExecutor +from memos.dependency import require_python_package from memos.embedders.factory import EmbedderFactory, OllamaEmbedder from memos.graph_dbs.factory import GraphStoreFactory, PolarDBGraphDB from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM from memos.mem_feedback.base import BaseMemFeedback +from memos.mem_feedback.utils import should_keep_update, split_into_chunks from memos.mem_reader.factory import MemReaderFactory from memos.mem_reader.simple_struct import detect_lang from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata @@ -30,6 +32,8 @@ FEEDBACK_ANSWER_PROMPT_ZH, FEEDBACK_JUDGEMENT_PROMPT, FEEDBACK_JUDGEMENT_PROMPT_ZH, + KEYWORDS_REPLACE, + KEYWORDS_REPLACE_ZH, UPDATE_FORMER_MEMORIES, UPDATE_FORMER_MEMORIES_ZH, ) @@ -37,6 +41,7 @@ FEEDBACK_PROMPT_DICT = { + "if_kw_replace": {"en": KEYWORDS_REPLACE, "zh": KEYWORDS_REPLACE_ZH}, "judge": {"en": FEEDBACK_JUDGEMENT_PROMPT, "zh": FEEDBACK_JUDGEMENT_PROMPT_ZH}, "compare": {"en": UPDATE_FORMER_MEMORIES, "zh": UPDATE_FORMER_MEMORIES_ZH}, "generation": {"en": FEEDBACK_ANSWER_PROMPT, "zh": FEEDBACK_ANSWER_PROMPT_ZH}, @@ -74,6 +79,20 @@ def __init__(self, config: MemFeedbackConfig): ) self.searcher: Searcher = self.memory_manager.searcher + def _batch_embed(self, texts: list[str], embed_bs: int = 5): + embed_bs = 5 + texts_embeddings = [] + for i in range(0, len(texts), embed_bs): + batch = texts[i : i + embed_bs] + try: + texts_embeddings.extend(self.embedder.embed(batch)) + except Exception as e: + logger.error( + f"[Feedback Core: process_feedback_core] Embedding batch failed: {e}", + exc_info=True, + ) + return texts_embeddings + def _pure_add(self, user_name: str, feedback_content: str, feedback_time: str, info: dict): """ Directly add new memory @@ -97,6 +116,25 @@ def _pure_add(self, user_name: str, feedback_content: str, feedback_time: str, i } } + def _keyword_replace_judgement(self, feedback_content: str) -> dict | None: + """ + Determine whether it is keyword replacement + """ + lang = detect_lang(feedback_content) + template = FEEDBACK_PROMPT_DICT["if_kw_replace"][lang] + prompt = template.format( + user_feedback=feedback_content, + ) + + judge_res = self._get_llm_response(prompt) + if judge_res: + return judge_res + else: + logger.warning( + "[Feedback Core: _feedback_judgement] feedback judgement failed, return []" + ) + return {} + def _feedback_judgement( self, chat_history: list[MessageDict], feedback_content: str, feedback_time: str = "" ) -> dict | None: @@ -128,7 +166,7 @@ def _single_add_operation( new_memory_item: TextualMemoryItem, user_id: str, user_name: str, - async_mode: str, + async_mode: str = "sync", ) -> dict: """ Individual addition operations @@ -166,7 +204,7 @@ def _single_update_operation( new_memory_item: TextualMemoryItem, user_id: str, user_name: str, - async_mode: str, + async_mode: str = "sync", ) -> dict: """ Individual update operations @@ -231,10 +269,111 @@ def _del_working_binding(self, user_name, mem_items: list[TextualMemoryItem]) -> f"[Feedback Core:_del_working_binding] TreeTextMemory.delete_hard: failed to delete {mid}: {e}" ) + def semantics_feedback( + self, + user_id: str, + user_name: str, + memory_item: TextualMemoryItem, + current_memories: list[TextualMemoryItem], + fact_history: str, + ): + lang = detect_lang("".join(memory_item.memory)) + template = FEEDBACK_PROMPT_DICT["compare"][lang] + if current_memories == []: + current_memories = self._retrieve( + memory_item.memory, info={"user_id": user_id}, user_name=user_name + ) + + if not current_memories: + operations = [{"operation": "ADD"}] + else: + memory_chunks = split_into_chunks(current_memories, max_tokens_per_chunk=500) + + all_operations = [] + with ContextThreadPoolExecutor(max_workers=10) as executor: + future_to_chunk_idx = {} + for chunk in memory_chunks: + current_memories_str = "\n".join( + [f"{item.id}: {item.memory}" for item in chunk] + ) + prompt = template.format( + current_memories=current_memories_str, + new_facts=memory_item.memory, + chat_history=fact_history, + ) + + future = executor.submit(self._get_llm_response, prompt) + future_to_chunk_idx[future] = chunk + for future in concurrent.futures.as_completed(future_to_chunk_idx): + try: + chunk_operations = future.result() + if ( + chunk_operations + and "operations" in chunk_operations + and isinstance(chunk_operations["operations"], list) + ): + all_operations.extend(chunk_operations["operations"]) + except Exception as e: + logger.error(f"[Feedback Core: semantics_feedback] Operation failed: {e}") + + operations = self.standard_operations(all_operations, current_memories) + + # TODO based on the operation, change memory_item memory info ; change source info + logger.info(f"[Feedback memory operations]: {operations!s}") + + if not operations: + return {"record": {"add": [], "update": []}} + + add_results = [] + update_results = [] + id_to_item = {item.id: item for item in current_memories} + + with ContextThreadPoolExecutor(max_workers=10) as executor: + future_to_op = {} + for op in operations: + event_type = op.get("operation", "").lower() + + if event_type == "add": + future = executor.submit( + self._single_add_operation, + None, + memory_item, + user_id, + user_name, + ) + future_to_op[future] = ("add", op) + elif event_type == "update": + future = executor.submit( + self._single_update_operation, + id_to_item[op["id"]], + memory_item, + user_id, + user_name, + ) + future_to_op[future] = ("update", op) + + for future in concurrent.futures.as_completed(future_to_op): + result_type, original_op = future_to_op[future] + try: + result = future.result() + if result_type == "add" and result: + add_results.append(result) + elif result_type == "update" and result: + update_results.append(result) + except Exception as e: + logger.error( + f"[Feedback Core: semantics_feedback] Operation failed for {original_op}: {e}", + exc_info=True, + ) + if update_results: + updated_ids = [item["archived_id"] for item in update_results] + self._del_working_binding(updated_ids, user_name) + + return {"record": {"add": add_results, "update": update_results}} + def _feedback_memory( self, user_id: str, user_name: str, feedback_memories: list[TextualMemoryItem], **kwargs ) -> dict: - async_mode = kwargs.get("async_mode") retrieved_memory_ids = kwargs.get("retrieved_memory_ids") or [] chat_history = kwargs.get("chat_history", []) feedback_content = kwargs.get("feedback_content", "") @@ -259,90 +398,11 @@ def _feedback_memory( if "mode:fast" not in item["metadata"]["tags"] ] - def _add_or_update( - memory_item: TextualMemoryItem, - current_memories: list[TextualMemoryItem], - fact_history: str, - ): - if current_memories == []: - current_memories = self._retrieve( - memory_item.memory, info={"user_id": user_id}, user_name=user_name - ) - - if current_memories: - lang = detect_lang("".join(memory_item.memory)) - template = FEEDBACK_PROMPT_DICT["compare"][lang] - current_memories_str = "\n".join( - [f"{item.id}: {item.memory}" for item in current_memories] - ) - prompt = template.format( - current_memories=current_memories_str, - new_facts=memory_item.memory, - chat_history=fact_history, - ) - - operations = self._get_llm_response(prompt).get("operations", []) - operations = self._id_dehallucination(operations, current_memories) - else: - operations = [{"operation": "ADD"}] - - # TODO based on the operation, change memory_item memory info ; change source info - logger.info(f"[Feedback memory operations]: {operations!s}") - - if not operations: - return {"record": {"add": [], "update": []}} - - add_results = [] - update_results = [] - id_to_item = {item.id: item for item in current_memories} - with ContextThreadPoolExecutor(max_workers=10) as executor: - future_to_op = {} - for op in operations: - event_type = op.get("operation", "").lower() - - if event_type == "add": - future = executor.submit( - self._single_add_operation, - None, - memory_item, - user_id, - user_name, - async_mode, - ) - future_to_op[future] = ("add", op) - elif event_type == "update": - future = executor.submit( - self._single_update_operation, - id_to_item[op["id"]], - memory_item, - user_id, - user_name, - async_mode, - ) - future_to_op[future] = ("update", op) - - for future in concurrent.futures.as_completed(future_to_op): - result_type, original_op = future_to_op[future] - try: - result = future.result() - if result_type == "add" and result: - add_results.append(result) - elif result_type == "update" and result: - update_results.append(result) - except Exception as e: - logger.error( - f"[Feedback Core: _add_or_update] Operation failed for {original_op}: {e}", - exc_info=True, - ) - if update_results: - updated_ids = [item["archived_id"] for item in update_results] - self._del_working_binding(updated_ids, user_name) - - return {"record": {"add": add_results, "update": update_results}} - with ContextThreadPoolExecutor(max_workers=3) as ex: futures = { - ex.submit(_add_or_update, mem, current_memories, fact_history): i + ex.submit( + self.semantics_feedback, user_id, user_name, mem, current_memories, fact_history + ): i for i, mem in enumerate(feedback_memories) } results = [None] * len(futures) @@ -368,7 +428,10 @@ def _add_or_update( def _retrieve(self, query: str, info=None, user_name=None): """Retrieve memory items""" - retrieved_mems = self.searcher.search(query, info=info, user_name=user_name) + retrieved_mems = self.searcher.search( + query, info=info, user_name=user_name, topk=50, full_recall=True + ) + retrieved_mems = [item[0] for item in retrieved_mems] return retrieved_mems def _vec_query(self, new_memories_embedding: list[float], user_name=None): @@ -430,28 +493,51 @@ def _get_llm_response(self, prompt: str, dsl: bool = True) -> dict: response_json = None return response_json - def _id_dehallucination(self, operations, current_memories): + def standard_operations(self, operations, current_memories): right_ids = [item.id for item in current_memories] right_lower_map = {x.lower(): x for x in right_ids} def correct_item(data): - if data.get("operation", "").lower() != "update": - return data - - original_id = data["id"] - if original_id in right_ids: - return data - - lower_id = original_id.lower() - if lower_id in right_lower_map: - data["id"] = right_lower_map[lower_id] - return data - - matches = difflib.get_close_matches(original_id, right_ids, n=1, cutoff=0.8) - if matches: - data["id"] = matches[0] - return data + try: + assert "operation" in data + if data.get("operation", "").lower() == "add": + return data + + if data.get("operation", "").lower() == "none": + return None + + assert ( + "id" in data + and "text" in data + and "old_memory" in data + and data["operation"].lower() == "update" + ) + if not should_keep_update(data["text"], data["old_memory"]): + logger.warning( + f"[Feedback Core: semantics_feedback] Due to the excessive proportion of changes, skip update: {data}" + ) + return None + + # id dehallucination + original_id = data["id"] + if original_id in right_ids: + return data + + lower_id = original_id.lower() + if lower_id in right_lower_map: + data["id"] = right_lower_map[lower_id] + return data + + matches = difflib.get_close_matches(original_id, right_ids, n=1, cutoff=0.8) + if matches: + data["id"] = matches[0] + return data + except Exception: + logger.error( + f"[Feedback Core: standard_operations] Error processing operation item: {data}", + exc_info=True, + ) return None dehallu_res = [correct_item(item) for item in operations] @@ -475,6 +561,86 @@ def _generate_answer( return self._get_llm_response(prompt, dsl=False) + def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict | None = None): + """ + memory keyword replace process + """ + doc_scope = kwp_judge.get("doc_scope", "NONE") + original_word = kwp_judge.get("original") + target_word = kwp_judge.get("target") + + # retrieve + lang = detect_lang(original_word) + queries = self._tokenize_chinese(original_word) if lang == "zh" else original_word.split() + + must_part = f"{' & '.join(queries)}" if len(queries) > 1 else queries[0] + retrieved_ids = self.graph_store.seach_by_keywords([must_part], user_name=user_name) + if len(retrieved_ids) < 1: + retrieved_ids = self.graph_store.search_by_fulltext( + queries, top_k=100, user_name=user_name + ) + + # filter by doc scope + mem_data = [ + self.graph_store.get_node(item["id"], user_name=user_name) for item in retrieved_ids + ] + retrieved_memories = [TextualMemoryItem(**item) for item in mem_data] + + if doc_scope != "NONE": + retrieved_memories = [ + item + for item in retrieved_memories + if doc_scope in item.metadata.sources # TODO + ] + + if not retrieved_memories: + return {"record": {"add": [], "update": []}} + + # replace keywords + pick_index = [] + update_memories = [] + for i, old_mem in enumerate(retrieved_memories): + if original_word in old_mem.memory: + mem = old_mem.model_copy(deep=True) + mem.memory = mem.memory.replace(original_word, target_word) + if target_word not in mem.metadata.tags: + mem.metadata.tags.append(target_word) + pick_index.append(i) + update_memories.append(mem) + + update_memories_embed = self._retry_db_operation( + lambda: self._batch_embed([mem.memory for mem in update_memories]) + ) + for _i, embed in zip(range(len(update_memories)), update_memories_embed, strict=False): + update_memories[_i].metadata.embedding = embed + + update_results = [] + with ContextThreadPoolExecutor(max_workers=10) as executor: + future_to_info = {} + for new_mem, old_idx in zip(update_memories, pick_index, strict=False): + old_mem = retrieved_memories[old_idx] + + future = executor.submit( + self._single_update_operation, + old_mem, + new_mem, + user_id, + user_name, + ) + future_to_info[future] = old_mem.id + + for future in future_to_info: + try: + result = future.result() + update_results.append(result) + except Exception as e: + mem_id = future_to_info[future][0] + self.logger.error( + f"[Feedback Core DB] Exception during update operation for memory {mem_id}: {e}" + ) + + return {"record": {"add": [], "update": update_results}} + def process_feedback_core( self, user_id: str, @@ -497,19 +663,28 @@ def check_validity(item): and "tags" in item ) + if feedback_content.strip() == "": + return {"record": {"add": [], "update": []}} try: feedback_time = kwargs.get("feedback_time") or datetime.now().isoformat() session_id = kwargs.get("session_id") - if feedback_content.strip() == "": - return {"record": {"add": [], "update": []}} - info = {"user_id": user_id, "user_name": user_name, "session_id": session_id} logger.info( f"[Feedback Core: process_feedback_core] Starting memory feedback process for user {user_name}" ) + # feedback keywords update + kwp_judge = self._keyword_replace_judgement(feedback_content) + if ( + kwp_judge + and kwp_judge["if_keyword_replace"].lower() == "true" + and kwp_judge.get("original", "NONE") != "NONE" + and kwp_judge.get("target", "NONE") != "NONE" + ): + return self.process_keyword_replace(user_id, user_name, kwp_judge=kwp_judge) + + # llm update memory if not chat_history: return self._pure_add(user_name, feedback_content, feedback_time, info) - else: raw_judge = self._feedback_judgement( chat_history, feedback_content, feedback_time=feedback_time @@ -533,17 +708,9 @@ def check_validity(item): feedback_memories = [] corrected_infos = [item["corrected_info"] for item in valid_feedback] - embed_bs = 5 - feedback_memories_embeddings = [] - for i in range(0, len(corrected_infos), embed_bs): - batch = corrected_infos[i : i + embed_bs] - try: - feedback_memories_embeddings.extend(self.embedder.embed(batch)) - except Exception as e: - logger.error( - f"[Feedback Core: process_feedback_core] Embedding batch failed: {e}", - exc_info=True, - ) + feedback_memories_embeddings = self._retry_db_operation( + lambda: self._batch_embed(corrected_infos) + ) for item, embedding in zip( valid_feedback, feedback_memories_embeddings, strict=False @@ -664,3 +831,16 @@ def _retry_db_operation(self, operation): f"[MemFeedback: _retry_db_operation] DB operation failed: {e}", exc_info=True ) raise + + @require_python_package( + import_name="jieba", + install_command="pip install jieba", + install_link="https://github.com/fxsjy/jieba", + ) + def _tokenize_chinese(self, text): + """split zh jieba""" + import jieba + + tokens = jieba.lcut(text) + tokens = [token.strip() for token in tokens if token.strip()] + return self.stopword_manager.filter_words(tokens) diff --git a/src/memos/mem_feedback/simple_feedback.py b/src/memos/mem_feedback/simple_feedback.py index 01132eb97..bb5a1c552 100644 --- a/src/memos/mem_feedback/simple_feedback.py +++ b/src/memos/mem_feedback/simple_feedback.py @@ -5,6 +5,7 @@ from memos.mem_feedback.feedback import MemFeedback from memos.mem_reader.simple_struct import SimpleStructMemReader from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager +from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import StopwordManager from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher @@ -27,3 +28,4 @@ def __init__( self.memory_manager = memory_manager self.mem_reader = mem_reader self.searcher = searcher + self.stopword_manager = StopwordManager diff --git a/src/memos/mem_feedback/utils.py b/src/memos/mem_feedback/utils.py new file mode 100644 index 000000000..b290993cd --- /dev/null +++ b/src/memos/mem_feedback/utils.py @@ -0,0 +1,86 @@ +from memos.memories.textual.item import TextualMemoryItem + + +def estimate_tokens(text: str) -> int: + """ + Estimate the approximate number of tokens for the text + """ + if not text: + return 0 + + chinese_chars = sum(1 for char in text if "\u4e00" <= char <= "\u9fff") + + english_parts = text.split() + english_words = 0 + for part in english_parts: + has_chinese = any("\u4e00" <= char <= "\u9fff" for char in part) + if not has_chinese and any(c.isalpha() for c in part): + english_words += 1 + + other_chars = len(text) - chinese_chars + + estimated_tokens = int(chinese_chars * 1.5 + english_words * 1.33 + other_chars * 0.5) + + return max(1, estimated_tokens) + + +def should_keep_update(new_text: str, old_text: str) -> bool: + """ + Determine whether the update should be skipped + Rule: + 1. If the length of old_text is less than 50 and the modification ratio is less than 50% => returns True + 2. If the length of old_text is greater than or equal to 50 and the modification ratio is less than 15% => returns True + 3. Return False in other cases + """ + + old_len = estimate_tokens(old_text) + + def calculate_similarity(text1: str, text2: str) -> float: + set1 = set(text1) + set2 = set(text2) + if not set1 and not set2: + return 1.0 + + intersection = len(set1.intersection(set2)) + union = len(set1.union(set2)) + return intersection / union if union > 0 else 0.0 + + similarity = calculate_similarity(old_text, new_text) + change_ratio = 1 - similarity + + if old_len < 50: + return change_ratio < 0.5 + else: + return change_ratio < 0.15 + + +def split_into_chunks(memories: list[TextualMemoryItem], max_tokens_per_chunk=500): + chunks = [] + current_chunk = [] + current_tokens = 0 + + for item in memories: + item_text = f"{item.id}: {item.memory}" + item_tokens = estimate_tokens(item_text) + + if item_tokens > max_tokens_per_chunk: + if current_chunk: + chunks.append(current_chunk) + current_chunk = [] + + chunks.append([item]) + current_tokens = 0 + + elif current_tokens + item_tokens <= max_tokens_per_chunk: + current_chunk.append(item) + current_tokens += item_tokens + else: + if current_chunk: + chunks.append(current_chunk) + current_chunk = [item] + current_tokens = item_tokens + + if current_chunk: + chunks.append(current_chunk) + + return chunks diff --git a/src/memos/memories/textual/item.py b/src/memos/memories/textual/item.py index 63a5b3b1d..8067c7f72 100644 --- a/src/memos/memories/textual/item.py +++ b/src/memos/memories/textual/item.py @@ -90,7 +90,7 @@ class TextualMemoryMetadata(BaseModel): model_config = ConfigDict(extra="allow") - covered_history: str | None = Field( + covered_history: Any | None = Field( default=None, description="Record the memory id covered by the update", ) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index f428bf5c0..4ebf73a23 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -173,6 +173,10 @@ def search( user_name=user_name, ) + full_recall = kwargs.get("full_recall", False) + if full_recall: + return retrieved_results + final_results = self.post_retrieve( retrieved_results=retrieved_results, top_k=top_k, diff --git a/src/memos/templates/mem_feedback_prompts.py b/src/memos/templates/mem_feedback_prompts.py index f7f2e8cb4..cd0c46a61 100644 --- a/src/memos/templates/mem_feedback_prompts.py +++ b/src/memos/templates/mem_feedback_prompts.py @@ -1,3 +1,119 @@ +KEYWORDS_REPLACE = """ +**Instruction:** +Please analyze the user's input text to determine if it is a "keyword replacement" request. If yes, follow these steps: + +1. **Identify the request type**: Confirm whether the user is asking to replace a specific word or phrase with another **within a specified scope**. +2. **Extract the modification scope**: Determine the scope where the modification should apply. + - If the user mentions a specific **document, file, or material identifier** (e.g., "in the Q1 operations plan", "in the prospectus numbered BT7868"), extract this description as the document scope. + - **If the user does not explicitly specify any scope, mark the scope as "NONE"**. +3. **Extract the original term (A)**: Identify the original word or phrase the user wants to be replaced. +4. **Extract the target term (B)**: Identify the target word or phrase the user wants to replace it with. + +**Output JSON Format**: +{{ + "if_keyword_replace": "true" | "false", + "doc_scope": "[Extracted specific file or document description]" | "NONE" | null, + "original": "[Extracted original word or phrase A]" | null, + "target": "[Extracted target word or phrase B]" | null +}} +- **If it is NOT a replacement request**, set `if_keyword_replace` to `"false"`, and set the values for `doc_scope`, `original`, and `target` to `null`. +- **If it IS a replacement request**, set `if_keyword_replace` to `"true"` and fill in the remaining fields. If the user did not specify a scope, set `doc_scope` to `"NONE"`. + +**Examples**: + +1. **User Input**: "In the file `User_Agreement.docx`, replace 'Party B' with 'User'." + **Output**: + {{ + "if_keyword_replace": "true", + "doc_scope": "User_Agreement.docx", + "original": "Party B", + "target": "User" + }} + +2. **User Input**: "Change 'Homepage' to 'Front Page'." + **Output**: + {{ + "if_keyword_replace": "true", + "doc_scope": "NONE", + "original": "Homepage", + "target": "Front Page" + }} + +3. **User Input**: "Does this sentence need modification?" + **Output**: + {{ + "if_keyword_replace": "false", + "doc_scope": null, + "original": null, + "target": null + }} + +**User Input** +{user_feedback} + +**Output**: +""" + + +KEYWORDS_REPLACE_ZH = """ +**指令:** +请分析用户输入的文本,判断是否为“关键词替换”需求。 如果是,请按以下步骤处理: + +1. **识别需求类型**:确认用户是否要求将**特定范围**内的某个词或短语替换为另一个词或短语。 +2. **提取修改范围**:确定用户指定的修改生效范围。 + - 如果用户提及了具体的**文档、文件或资料标识**(如“在第一季运营方案”、“编号为BT7868的招股书”),则提取此描述作为文件范围。 + - **如果用户未明确指定任何范围,则范围标记为 "NONE"**。 +3. **提取原始词汇(A)**:找出用户希望被替换的原始词或短语。 +4. **提取目标词汇(B)**:找出用户希望替换成的目标词或短语。 + +**输出JSON格式**: +{{ + "if_keyword_replace": "true" | "false", + "doc_scope": "[提取的具体文件或文档描述]" | "NONE" | null, + "original": "[提取的原始词或短语A]" | null, + "target": "[提取的目标词或短语B]" | null +}} +- **如果不是替换需求**,将 `if_keyword_replace` 设为 `"false"`,并将 `doc_scope`、`original`、`target` 三个键的值都设为 `null`。 +- **如果是替换需求**,将 `if_keyword_replace` 设为 `"true"`,并填充其余字段。如果用户未指定范围,`doc_scope` 设为 `"NONE"`。 + + +**示例**: + +1. **用户输入**:“在`用户协议.docx`这个文件中,把‘乙方’替换为‘用户’。” + **输出**: + {{ + "if_keyword_replace": "true", + "doc_scope": "用户协议.docx", + "original": "乙方", + "target": "用户" + }} + +2. **用户输入**:“把‘主页’改成‘首页’。” + **输出**: + {{ + "if_keyword_replace": "true", + "doc_scope": "NONE", + "original": "主页", + "target": "首页" + }} + +3. **用户输入**:“这个句子需要修改吗?” + **输出**: + {{ + "if_keyword_replace": "false", + "doc_scope": null, + "original": null, + "target": null + }} + + +**用户输入** +{user_feedback} + +**输出**: +""" + + FEEDBACK_JUDGEMENT_PROMPT = """You are a answer quality analysis expert. Please strictly follow the steps and criteria below to analyze the provided "User and Assistant Chat History" and "User Feedback," and fill the final evaluation results into the specified JSON format. Analysis Steps and Criteria: From 699cdf7bb07ddaafc451ba49d3a29f9b5d393d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 3 Dec 2025 18:10:34 +0800 Subject: [PATCH 34/40] update stopwords --- .../retrieve/retrieve_utils.py | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py index 9e1e6c240..5a82883c8 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py @@ -93,12 +93,6 @@ def find_project_root(marker=".git"): return Path(".") -PROJECT_ROOT = find_project_root() -DEFAULT_STOPWORD_FILE = ( - PROJECT_ROOT / "examples" / "data" / "config" / "stopwords.txt" -) # cause time delay - - class StopwordManager: _stopwords = None @@ -109,13 +103,7 @@ def _load_stopwords(cls): return cls._stopwords stopwords = set() - try: - with open(DEFAULT_STOPWORD_FILE, encoding="utf-8") as f: - stopwords = {line.strip() for line in f if line.strip()} - logger.info("Stopwords loaded successfully.") - except Exception as e: - logger.warning(f"Error loading stopwords: {e}, using default stopwords.") - stopwords = cls._load_default_stopwords() + stopwords = cls._load_default_stopwords() cls._stopwords = stopwords return stopwords @@ -370,14 +358,6 @@ def is_stopword(cls, word): cls._load_stopwords() return word in cls._stopwords - @classmethod - def reload_stopwords(cls, file_path=None): - cls._stopwords = None - if file_path: - global DEFAULT_STOPWORD_FILE - DEFAULT_STOPWORD_FILE = file_path - cls._load_stopwords() - class FastTokenizer: def __init__(self, use_jieba=True, use_stopwords=True): From 343eeb399ed60d19f71bdca9a26549c77c2f9c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Thu, 4 Dec 2025 11:19:48 +0800 Subject: [PATCH 35/40] fix messages queue --- src/memos/mem_scheduler/general_scheduler.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index ad34530bc..46b6aba1f 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -626,18 +626,8 @@ def _extract_fields(mem_item): or mem_item.get("original_content") ) source_doc_id = None - if isinstance(mem_item, dict): - source_doc_id = ( - mem_item.get("source_doc_id") - or mem_item.get("doc_id") - or (mem_item.get("metadata") or {}).get("source_doc_id") - ) - else: - metadata = getattr(mem_item, "metadata", None) - if metadata: - source_doc_id = getattr(metadata, "source_doc_id", None) or getattr( - metadata, "doc_id", None - ) + if "archived_id" in mem_item: + source_doc_id = mem_item.get("archived_id") return mem_id, mem_memory, original_content, source_doc_id From d66e8ce9fb7af08f641b47a86153639fa76ec33b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Sun, 7 Dec 2025 15:46:36 +0800 Subject: [PATCH 36/40] add seach_by_keywords_LIKE --- src/memos/graph_dbs/polardb.py | 99 ++++++++++++++++++++++- src/memos/mem_feedback/feedback.py | 26 ++++-- src/memos/mem_feedback/simple_feedback.py | 1 + 3 files changed, 116 insertions(+), 10 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 638eac9c2..458bc29aa 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1458,7 +1458,97 @@ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]: raise NotImplementedError @timed - def seach_by_keywords( + def seach_by_keywords_like( + self, + query_word: str, + scope: str | None = None, + status: str | None = None, + search_filter: dict | None = None, + user_name: str | None = None, + filter: dict | None = None, + knowledgebase_ids: list[str] | None = None, + **kwargs, + ) -> list[dict]: + where_clauses = [] + + if scope: + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype" + ) + if status: + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"{status}\"'::agtype" + ) + else: + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype" + ) + + # Build user_name filter with knowledgebase_ids support (OR relationship) using common method + user_name_conditions = self._build_user_name_and_kb_ids_conditions_sql( + user_name=user_name, + knowledgebase_ids=knowledgebase_ids, + default_user_name=self.config.user_name, + ) + + # Add OR condition if we have any user_name conditions + if user_name_conditions: + if len(user_name_conditions) == 1: + where_clauses.append(user_name_conditions[0]) + else: + where_clauses.append(f"({' OR '.join(user_name_conditions)})") + + # Add search_filter conditions + if search_filter: + for key, value in search_filter.items(): + if isinstance(value, str): + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = '\"{value}\"'::agtype" + ) + else: + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = {value}::agtype" + ) + + # Build filter conditions using common method + filter_conditions = self._build_filter_conditions_sql(filter) + where_clauses.extend(filter_conditions) + + # Build key + where_clauses.append("""(properties -> '"memory"')::text LIKE %s""") + where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + + query = f""" + SELECT + ag_catalog.agtype_access_operator(properties, '"id"'::agtype) AS old_id, + agtype_object_field_text(properties, 'memory') as memory_text + FROM "{self.db_name}_graph"."Memory" + {where_clause} + """ + + params = (query_word,) + logger.info( + f"[seach_by_keywords_LIKE start:] user_name: {user_name}, query: {query}, params: {params}" + ) + conn = self._get_connection() + try: + with conn.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + output = [] + for row in results: + oldid = row[0] + id_val = str(oldid) + output.append({"id": id_val}) + logger.info( + f"[seach_by_keywords_LIKE end:] user_name: {user_name}, query: {query}, params: {params} recalled: {output}" + ) + return output + finally: + self._return_connection(conn) + + @timed + def seach_by_keywords_tfidf( self, query_words: list[str], scope: str | None = None, @@ -1533,7 +1623,9 @@ def seach_by_keywords( """ params = (tsquery_string,) - logger.info(f"[search_by_fulltext] query: {query}, params: {params}") + logger.info( + f"[seach_by_keywords_TFIDF start:] user_name: {user_name}, query: {query}, params: {params}" + ) conn = self._get_connection() try: with conn.cursor() as cursor: @@ -1545,6 +1637,9 @@ def seach_by_keywords( id_val = str(oldid) output.append({"id": id_val}) + logger.info( + f"[seach_by_keywords_TFIDF end:] user_name: {user_name}, query: {query}, params: {params} recalled: {output}" + ) return output finally: self._return_connection(conn) diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index eed43d66e..370470534 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -78,6 +78,7 @@ def __init__(self, config: MemFeedbackConfig): is_reorganize=self.is_reorganize, ) self.searcher: Searcher = self.memory_manager.searcher + self.DB_IDX_READY = False def _batch_embed(self, texts: list[str], embed_bs: int = 5): embed_bs = 5 @@ -569,15 +570,24 @@ def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict original_word = kwp_judge.get("original") target_word = kwp_judge.get("target") - # retrieve - lang = detect_lang(original_word) - queries = self._tokenize_chinese(original_word) if lang == "zh" else original_word.split() + if self.DB_IDX_READY: + # retrieve + lang = detect_lang(original_word) + queries = ( + self._tokenize_chinese(original_word) if lang == "zh" else original_word.split() + ) - must_part = f"{' & '.join(queries)}" if len(queries) > 1 else queries[0] - retrieved_ids = self.graph_store.seach_by_keywords([must_part], user_name=user_name) - if len(retrieved_ids) < 1: - retrieved_ids = self.graph_store.search_by_fulltext( - queries, top_k=100, user_name=user_name + must_part = f"{' & '.join(queries)}" if len(queries) > 1 else queries[0] + retrieved_ids = self.graph_store.seach_by_keywords_tfidf( + [must_part], user_name=user_name + ) + if len(retrieved_ids) < 1: + retrieved_ids = self.graph_store.search_by_fulltext( + queries, top_k=100, user_name=user_name + ) + else: + retrieved_ids = self.graph_store.seach_by_keywords_like( + f"%{original_word}%", user_name=user_name ) # filter by doc scope diff --git a/src/memos/mem_feedback/simple_feedback.py b/src/memos/mem_feedback/simple_feedback.py index bb5a1c552..478fa104f 100644 --- a/src/memos/mem_feedback/simple_feedback.py +++ b/src/memos/mem_feedback/simple_feedback.py @@ -29,3 +29,4 @@ def __init__( self.mem_reader = mem_reader self.searcher = searcher self.stopword_manager = StopwordManager + self.DB_IDX_READY = False From ae60994c998b0af1ffc8b26c2f8d9c578be5862a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 9 Dec 2025 16:57:10 +0800 Subject: [PATCH 37/40] add doc filter --- src/memos/api/handlers/component_init.py | 1 + src/memos/api/handlers/feedback_handler.py | 2 +- src/memos/graph_dbs/polardb.py | 6 +- src/memos/mem_feedback/feedback.py | 82 ++++++++++++++-------- src/memos/mem_feedback/simple_feedback.py | 3 + src/memos/mem_feedback/utils.py | 24 ++++++- 6 files changed, 82 insertions(+), 36 deletions(-) diff --git a/src/memos/api/handlers/component_init.py b/src/memos/api/handlers/component_init.py index 632c2ed4c..670a19110 100644 --- a/src/memos/api/handlers/component_init.py +++ b/src/memos/api/handlers/component_init.py @@ -304,6 +304,7 @@ def init_server() -> dict[str, Any]: memory_manager=memory_manager, mem_reader=mem_reader, searcher=searcher, + reranker=reranker, ) # Initialize Scheduler diff --git a/src/memos/api/handlers/feedback_handler.py b/src/memos/api/handlers/feedback_handler.py index cf5c536ea..217bca7cd 100644 --- a/src/memos/api/handlers/feedback_handler.py +++ b/src/memos/api/handlers/feedback_handler.py @@ -28,7 +28,7 @@ def __init__(self, dependencies: HandlerDependencies): dependencies: HandlerDependencies instance """ super().__init__(dependencies) - self._validate_dependencies("mem_reader", "mem_scheduler", "searcher") + self._validate_dependencies("mem_reader", "mem_scheduler", "searcher", "reranker") def handle_feedback_memories(self, feedback_req: APIFeedbackRequest) -> MemoryResponse: """ diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 517005c9d..df365fc80 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1597,9 +1597,7 @@ def seach_by_keywords_like( """ params = (query_word,) - logger.info( - f"[seach_by_keywords_LIKE start:] user_name: {user_name}, query: {query}, params: {params}" - ) + logger.info(f"[seach_by_keywords_LIKE start:] user_name: {user_name}, params: {params}") conn = self._get_connection() try: with conn.cursor() as cursor: @@ -1611,7 +1609,7 @@ def seach_by_keywords_like( id_val = str(oldid) output.append({"id": id_val}) logger.info( - f"[seach_by_keywords_LIKE end:] user_name: {user_name}, query: {query}, params: {params} recalled: {output}" + f"[seach_by_keywords_LIKE end:] user_name: {user_name}, params: {params} recalled: {output}" ) return output finally: diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index b986f7f13..6ff2e0df5 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -15,10 +15,10 @@ from memos.graph_dbs.factory import GraphStoreFactory, PolarDBGraphDB from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM from memos.mem_feedback.base import BaseMemFeedback -from memos.mem_feedback.utils import should_keep_update, split_into_chunks +from memos.mem_feedback.utils import make_mem_item, should_keep_update, split_into_chunks from memos.mem_reader.factory import MemReaderFactory from memos.mem_reader.read_multi_modal import detect_lang -from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata +from memos.memories.textual.item import TextualMemoryItem from memos.memories.textual.tree_text_memory.organize.manager import ( MemoryManager, extract_working_binding_ids, @@ -77,7 +77,8 @@ def __init__(self, config: MemFeedbackConfig): }, is_reorganize=self.is_reorganize, ) - self.searcher: Searcher = self.memory_manager.searcher + self.searcher: Searcher = None + self.reranker = None self.DB_IDX_READY = False def _batch_embed(self, texts: list[str], embed_bs: int = 5): @@ -562,6 +563,35 @@ def _generate_answer( return self._get_llm_response(prompt, dsl=False) + def _doc_filter(self, doc_scope: str, memories: list[TextualMemoryItem]): + """ + Filter the memory based on filename + """ + filename2_memid = {} + filename_mems = [] + + for item in memories: + for file_info in item.metadata.sources: + if file_info.type == "file": + file_dict = file_info.original_part + filename = file_dict["file"]["filename"] + if filename not in filename2_memid: + filename2_memid[filename] = [] + filename_mems.append(make_mem_item(filename)) + filename2_memid[filename].append(item.id) + + rerank_res = self.reranker.rerank(doc_scope, filename_mems, top_k=100) + inscope_docs = [item[0].memory for item in rerank_res if item[1] > 0.95] + + inscope_ids = [ + memid for inscope_file in inscope_docs for memid in filename2_memid[inscope_file] + ] + logger.info( + f"[Feedback Core: process_keyword_replace] These docs are in scope : {inscope_docs}, relared memids: {inscope_ids}" + ) + filter_memories = [mem for mem in memories if mem.id in inscope_ids] + return filter_memories + def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict | None = None): """ memory keyword replace process @@ -597,11 +627,7 @@ def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict retrieved_memories = [TextualMemoryItem(**item) for item in mem_data] if doc_scope != "NONE": - retrieved_memories = [ - item - for item in retrieved_memories - if doc_scope in item.metadata.sources # TODO - ] + retrieved_memories = self._doc_filter(doc_scope, retrieved_memories) if not retrieved_memories: return {"record": {"add": [], "update": []}} @@ -728,29 +754,25 @@ def check_validity(item): value = item["corrected_info"] key = item["key"] tags = item["tags"] - feedback_memories.append( - TextualMemoryItem( - memory=value, - metadata=TreeNodeTextualMemoryMetadata( - user_id=info.get("user_id", ""), - session_id=info.get("session_id", ""), - memory_type="LongTermMemory", - status="activated", - tags=tags, - key=key, - embedding=embedding, - usage=[], - sources=[{"type": "chat"}], - user_name=user_name, - background="[Feedback update background]: " - + str(chat_history) - + "\nUser feedback: " - + str(feedback_content), - confidence=0.99, - type="fine", - ), - ) + background = ( + "[Feedback update background]: " + + str(chat_history) + + "\nUser feedback: " + + str(feedback_content) + ) + mem_item = make_mem_item( + value, + user_id=user_id, + user_name=user_name, + session_id=session_id, + tags=tags, + key=key, + embedding=embedding, + sources=[{"type": "chat"}], + background=background, + type="fine", ) + feedback_memories.append(mem_item) mem_record = self._feedback_memory( user_id, diff --git a/src/memos/mem_feedback/simple_feedback.py b/src/memos/mem_feedback/simple_feedback.py index 478fa104f..429c2ea20 100644 --- a/src/memos/mem_feedback/simple_feedback.py +++ b/src/memos/mem_feedback/simple_feedback.py @@ -7,6 +7,7 @@ from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import StopwordManager from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher +from memos.reranker.base import BaseReranker logger = log.get_logger(__name__) @@ -21,6 +22,7 @@ def __init__( memory_manager: MemoryManager, mem_reader: SimpleStructMemReader, searcher: Searcher, + reranker: BaseReranker, ): self.llm = llm self.embedder = embedder @@ -29,4 +31,5 @@ def __init__( self.mem_reader = mem_reader self.searcher = searcher self.stopword_manager = StopwordManager + self.reranker = reranker self.DB_IDX_READY = False diff --git a/src/memos/mem_feedback/utils.py b/src/memos/mem_feedback/utils.py index b290993cd..24817a1d3 100644 --- a/src/memos/mem_feedback/utils.py +++ b/src/memos/mem_feedback/utils.py @@ -1,4 +1,4 @@ -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata def estimate_tokens(text: str) -> int: @@ -84,3 +84,25 @@ def split_into_chunks(memories: list[TextualMemoryItem], max_tokens_per_chunk=50 chunks.append(current_chunk) return chunks + + +def make_mem_item(text: str, **kwargs) -> TextualMemoryItem: + """Build a minimal TextualMemoryItem.""" + return TextualMemoryItem( + memory=text, + metadata=TreeNodeTextualMemoryMetadata( + user_id=kwargs.get("user_id", ""), + session_id=kwargs.get("session_id", ""), + memory_type="LongTermMemory", + status="activated", + tags=kwargs.get("tags", []), + key=kwargs.get("key", ""), + embedding=kwargs.get("embedding", []), + usage=[], + sources=kwargs.get("sources", []), + user_name=kwargs.get("user_name", ""), + background=kwargs.get("background", ""), + confidence=0.99, + type=kwargs.get("type", ""), + ), + ) From 7b0f2f4d1489c1a7a87c2f78572f096c581c32e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Tue, 9 Dec 2025 19:57:01 +0800 Subject: [PATCH 38/40] add retrieve query --- src/memos/mem_feedback/feedback.py | 43 +++++++++++++++++++++++++----- src/memos/mem_feedback/utils.py | 6 ++--- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index 6ff2e0df5..ebe7b9f24 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -277,15 +277,30 @@ def semantics_feedback( user_name: str, memory_item: TextualMemoryItem, current_memories: list[TextualMemoryItem], - fact_history: str, + history_str: str, + chat_history_list: list, ): + """Modify memory at the semantic level""" lang = detect_lang("".join(memory_item.memory)) template = FEEDBACK_PROMPT_DICT["compare"][lang] if current_memories == []: - current_memories = self._retrieve( + # retrieve feedback + feedback_retrieved = self._retrieve( memory_item.memory, info={"user_id": user_id}, user_name=user_name ) + # retrieve question + last_user_index = max(i for i, d in enumerate(chat_history_list) if d["role"] == "user") + last_qa = " ".join([item["content"] for item in chat_history_list[last_user_index:]]) + supplementary_retrieved = self._retrieve( + last_qa, info={"user_id": user_id}, user_name=user_name + ) + ids = [] + for item in feedback_retrieved + supplementary_retrieved: + if item.id not in ids: + ids.append(item.id) + current_memories.append(item) + if not current_memories: operations = [{"operation": "ADD"}] else: @@ -301,7 +316,7 @@ def semantics_feedback( prompt = template.format( current_memories=current_memories_str, new_facts=memory_item.memory, - chat_history=fact_history, + chat_history=history_str, ) future = executor.submit(self._get_llm_response, prompt) @@ -381,7 +396,7 @@ def _feedback_memory( feedback_content = kwargs.get("feedback_content", "") chat_history_lis = [f"""{msg["role"]}: {msg["content"]}""" for msg in chat_history[-4:]] - fact_history = "\n".join(chat_history_lis) + f"\nuser feedback: \n{feedback_content}" + history_str = "\n".join(chat_history_lis) + f"\nuser feedback: \n{feedback_content}" retrieved_memories = [ self.graph_store.get_node(_id, user_name=user_name) for _id in retrieved_memory_ids @@ -403,7 +418,13 @@ def _feedback_memory( with ContextThreadPoolExecutor(max_workers=3) as ex: futures = { ex.submit( - self.semantics_feedback, user_id, user_name, mem, current_memories, fact_history + self.semantics_feedback, + user_id, + user_name, + mem, + current_memories, + history_str, + chat_history, ): i for i, mem in enumerate(feedback_memories) } @@ -543,7 +564,17 @@ def correct_item(data): return None dehallu_res = [correct_item(item) for item in operations] - return [item for item in dehallu_res if item] + llm_operations = [item for item in dehallu_res if item] + + # Update takes precedence over add + has_update = any(item.get("operation").lower() == "update" for item in llm_operations) + if has_update: + filtered_items = [ + item for item in llm_operations if item.get("operation").lower() != "add" + ] + return filtered_items + else: + return llm_operations def _generate_answer( self, chat_history: list[MessageDict], feedback_content: str, corrected_answer: bool diff --git a/src/memos/mem_feedback/utils.py b/src/memos/mem_feedback/utils.py index 24817a1d3..ae0e0ba80 100644 --- a/src/memos/mem_feedback/utils.py +++ b/src/memos/mem_feedback/utils.py @@ -48,13 +48,13 @@ def calculate_similarity(text1: str, text2: str) -> float: similarity = calculate_similarity(old_text, new_text) change_ratio = 1 - similarity - if old_len < 50: + if old_len < 200: return change_ratio < 0.5 else: - return change_ratio < 0.15 + return change_ratio < 0.2 -def split_into_chunks(memories: list[TextualMemoryItem], max_tokens_per_chunk=500): +def split_into_chunks(memories: list[TextualMemoryItem], max_tokens_per_chunk: int = 500): chunks = [] current_chunk = [] current_tokens = 0 From 005a5bb42e1cd2983d070d63011fdba5146c8a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 10 Dec 2025 14:47:26 +0800 Subject: [PATCH 39/40] add retrieve queies --- src/memos/mem_feedback/feedback.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index ebe7b9f24..c8e081937 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -23,6 +23,7 @@ MemoryManager, extract_working_binding_ids, ) +from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import StopwordManager if TYPE_CHECKING: @@ -77,6 +78,7 @@ def __init__(self, config: MemFeedbackConfig): }, is_reorganize=self.is_reorganize, ) + self.stopword_manager = StopwordManager self.searcher: Searcher = None self.reranker = None self.DB_IDX_READY = False @@ -260,7 +262,6 @@ def _del_working_binding(self, user_name, mem_items: list[TextualMemoryItem]) -> for mid in delete_ids: try: - print("del", mid) self.graph_store.delete_node(mid, user_name=user_name) logger.info( @@ -482,8 +483,6 @@ def _vec_query(self, new_memories_embedding: list[float], user_name=None): self.graph_store.get_node(item["id"], user_name=user_name) for item in retrieved_ids ] - for item in current_memories: - print(item["id"], item["metadata"]["memory_type"], item["metadata"]["status"]) if not retrieved_ids: logger.info( f"[Feedback Core: _vec_query] No similar memories found for embedding query for user {user_name}." From d69e7f4cbffa6174210551fc7aaee1d31f1b3baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 10 Dec 2025 21:14:33 +0800 Subject: [PATCH 40/40] patch info filter --- src/memos/api/handlers/add_handler.py | 1 + src/memos/api/product_models.py | 13 ++++ src/memos/mem_feedback/feedback.py | 69 +++++++++++++++----- src/memos/mem_feedback/utils.py | 10 ++- src/memos/mem_scheduler/general_scheduler.py | 1 + src/memos/multi_mem_cube/single_cube.py | 1 + 6 files changed, 76 insertions(+), 19 deletions(-) diff --git a/src/memos/api/handlers/add_handler.py b/src/memos/api/handlers/add_handler.py index 2758c9e32..3cdbedabf 100644 --- a/src/memos/api/handlers/add_handler.py +++ b/src/memos/api/handlers/add_handler.py @@ -89,6 +89,7 @@ def _check_messages(messages: MessageList) -> None: feedback_content=feedback_content, writable_cube_ids=add_req.writable_cube_ids, async_mode=add_req.async_mode, + info=add_req.info, ) process_record = cube_view.feedback_memories(feedback_req) diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index 06cc29729..d583f3e1f 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -684,6 +684,19 @@ class APIFeedbackRequest(BaseRequest): "async", description="feedback mode: sync or async" ) corrected_answer: bool = Field(False, description="Whether need return corrected answer") + info: dict[str, Any] | None = Field( + None, + description=( + "Additional metadata for the add request. " + "All keys can be used as filters in search. " + "Example: " + "{'agent_id': 'xxxxxx', " + "'app_id': 'xxxx', " + "'source_type': 'web', " + "'source_url': 'https://www.baidu.com', " + "'source_content': 'West Lake is the most famous scenic spot in Hangzhou'}." + ), + ) # ==== mem_cube_id is NOT enabled==== mem_cube_id: str | None = Field( None, diff --git a/src/memos/mem_feedback/feedback.py b/src/memos/mem_feedback/feedback.py index c8e081937..3d650c17b 100644 --- a/src/memos/mem_feedback/feedback.py +++ b/src/memos/mem_feedback/feedback.py @@ -3,7 +3,7 @@ import json from datetime import datetime -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from tenacity import retry, stop_after_attempt, wait_exponential @@ -280,27 +280,28 @@ def semantics_feedback( current_memories: list[TextualMemoryItem], history_str: str, chat_history_list: list, + info: dict, ): """Modify memory at the semantic level""" lang = detect_lang("".join(memory_item.memory)) template = FEEDBACK_PROMPT_DICT["compare"][lang] if current_memories == []: # retrieve feedback - feedback_retrieved = self._retrieve( - memory_item.memory, info={"user_id": user_id}, user_name=user_name - ) + feedback_retrieved = self._retrieve(memory_item.memory, info=info, user_name=user_name) # retrieve question last_user_index = max(i for i, d in enumerate(chat_history_list) if d["role"] == "user") last_qa = " ".join([item["content"] for item in chat_history_list[last_user_index:]]) - supplementary_retrieved = self._retrieve( - last_qa, info={"user_id": user_id}, user_name=user_name - ) + supplementary_retrieved = self._retrieve(last_qa, info=info, user_name=user_name) ids = [] for item in feedback_retrieved + supplementary_retrieved: if item.id not in ids: ids.append(item.id) current_memories.append(item) + include_keys = ["agent_id", "app_id"] + current_memories = [ + item for item in current_memories if self._info_comparison(item, info, include_keys) + ] if not current_memories: operations = [{"operation": "ADD"}] @@ -336,7 +337,6 @@ def semantics_feedback( operations = self.standard_operations(all_operations, current_memories) - # TODO based on the operation, change memory_item memory info ; change source info logger.info(f"[Feedback memory operations]: {operations!s}") if not operations: @@ -395,6 +395,7 @@ def _feedback_memory( retrieved_memory_ids = kwargs.get("retrieved_memory_ids") or [] chat_history = kwargs.get("chat_history", []) feedback_content = kwargs.get("feedback_content", "") + info = kwargs.get("info", {}) chat_history_lis = [f"""{msg["role"]}: {msg["content"]}""" for msg in chat_history[-4:]] history_str = "\n".join(chat_history_lis) + f"\nuser feedback: \n{feedback_content}" @@ -426,6 +427,7 @@ def _feedback_memory( current_memories, history_str, chat_history, + info, ): i for i, mem in enumerate(feedback_memories) } @@ -450,6 +452,17 @@ def _feedback_memory( } } + def _info_comparison(self, memory: TextualMemoryItem, _info: dict, include_keys: list) -> bool: + if not _info and not memory.metadata.info: + return True + + record = [] + for key in include_keys: + info_v = _info.get(key) + mem_v = memory.metadata.info.get(key, None) + record.append(info_v == mem_v) + return all(record) + def _retrieve(self, query: str, info=None, user_name=None): """Retrieve memory items""" retrieved_mems = self.searcher.search( @@ -622,13 +635,20 @@ def _doc_filter(self, doc_scope: str, memories: list[TextualMemoryItem]): filter_memories = [mem for mem in memories if mem.id in inscope_ids] return filter_memories - def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict | None = None): + def process_keyword_replace( + self, user_id: str, user_name: str, kwp_judge: dict | None = None, info: dict | None = None + ): """ - memory keyword replace process + Memory keyword replace process """ + info = info or {} doc_scope = kwp_judge.get("doc_scope", "NONE") original_word = kwp_judge.get("original") target_word = kwp_judge.get("target") + include_keys = ["agent_id", "app_id"] + + mem_info = {key: info[key] for key in info if key in include_keys} + filter_dict = {f"info.{key}": info[key] for key in mem_info} if self.DB_IDX_READY: # retrieve @@ -639,22 +659,26 @@ def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict must_part = f"{' & '.join(queries)}" if len(queries) > 1 else queries[0] retrieved_ids = self.graph_store.seach_by_keywords_tfidf( - [must_part], user_name=user_name + [must_part], user_name=user_name, filter=filter_dict ) if len(retrieved_ids) < 1: retrieved_ids = self.graph_store.search_by_fulltext( - queries, top_k=100, user_name=user_name + queries, top_k=100, user_name=user_name, filter=filter_dict ) else: retrieved_ids = self.graph_store.seach_by_keywords_like( - f"%{original_word}%", user_name=user_name + f"%{original_word}%", user_name=user_name, filter=filter_dict ) - # filter by doc scope mem_data = [ self.graph_store.get_node(item["id"], user_name=user_name) for item in retrieved_ids ] retrieved_memories = [TextualMemoryItem(**item) for item in mem_data] + retrieved_memories = [ + item + for item in retrieved_memories + if self._info_comparison(item, mem_info, include_keys) + ] if doc_scope != "NONE": retrieved_memories = self._doc_filter(doc_scope, retrieved_memories) @@ -701,7 +725,7 @@ def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict update_results.append(result) except Exception as e: mem_id = future_to_info[future][0] - self.logger.error( + logger.error( f"[Feedback Core DB] Exception during update operation for memory {mem_id}: {e}" ) @@ -713,6 +737,7 @@ def process_feedback_core( user_name: str, chat_history: list[MessageDict], feedback_content: str, + info: dict | None = None, **kwargs, ) -> dict: """ @@ -734,7 +759,11 @@ def check_validity(item): try: feedback_time = kwargs.get("feedback_time") or datetime.now().isoformat() session_id = kwargs.get("session_id") - info = {"user_id": user_id, "user_name": user_name, "session_id": session_id} + if not info: + info = {"user_id": user_id, "user_name": user_name, "session_id": session_id} + else: + info.update({"user_id": user_id, "user_name": user_name, "session_id": session_id}) + logger.info( f"[Feedback Core: process_feedback_core] Starting memory feedback process for user {user_name}" ) @@ -746,7 +775,9 @@ def check_validity(item): and kwp_judge.get("original", "NONE") != "NONE" and kwp_judge.get("target", "NONE") != "NONE" ): - return self.process_keyword_replace(user_id, user_name, kwp_judge=kwp_judge) + return self.process_keyword_replace( + user_id, user_name, kwp_judge=kwp_judge, info=info + ) # llm update memory if not chat_history: @@ -801,6 +832,7 @@ def check_validity(item): sources=[{"type": "chat"}], background=background, type="fine", + info=info, ) feedback_memories.append(mem_item) @@ -810,6 +842,7 @@ def check_validity(item): feedback_memories, chat_history=chat_history, feedback_content=feedback_content, + info=info, **kwargs, ) logger.info( @@ -827,6 +860,7 @@ def process_feedback( user_name: str, chat_history: list[MessageDict], feedback_content: str, + info: dict[str, Any] | None = None, **kwargs, ): """ @@ -856,6 +890,7 @@ def process_feedback( user_name, chat_history, feedback_content, + info, **kwargs, ) done, pending = concurrent.futures.wait([answer_future, core_future], timeout=30) diff --git a/src/memos/mem_feedback/utils.py b/src/memos/mem_feedback/utils.py index ae0e0ba80..0033d85b4 100644 --- a/src/memos/mem_feedback/utils.py +++ b/src/memos/mem_feedback/utils.py @@ -88,11 +88,16 @@ def split_into_chunks(memories: list[TextualMemoryItem], max_tokens_per_chunk: i def make_mem_item(text: str, **kwargs) -> TextualMemoryItem: """Build a minimal TextualMemoryItem.""" + info = kwargs.get("info", {}) + info_ = info.copy() + user_id = info_.pop("user_id", "") + session_id = info_.pop("session_id", "") + return TextualMemoryItem( memory=text, metadata=TreeNodeTextualMemoryMetadata( - user_id=kwargs.get("user_id", ""), - session_id=kwargs.get("session_id", ""), + user_id=user_id, + session_id=session_id, memory_type="LongTermMemory", status="activated", tags=kwargs.get("tags", []), @@ -104,5 +109,6 @@ def make_mem_item(text: str, **kwargs) -> TextualMemoryItem: background=kwargs.get("background", ""), confidence=0.99, type=kwargs.get("type", ""), + info=info_, ), ) diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index 8f3eccecf..a9502c394 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -604,6 +604,7 @@ def _mem_feedback_message_consumer(self, messages: list[ScheduleMessageItem]) -> feedback_content=feedback_data.get("feedback_content"), feedback_time=feedback_data.get("feedback_time"), task_id=task_id, + info=feedback_data.get("info", None), ) logger.info( diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index f0157952b..71a34beb4 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -183,6 +183,7 @@ def feedback_memories(self, feedback_req: APIFeedbackRequest) -> dict[str, Any]: async_mode=feedback_req.async_mode, corrected_answer=feedback_req.corrected_answer, task_id=feedback_req.task_id, + info=feedback_req.info, ) self.logger.info(f"Feedback memories result: {feedback_result}") return feedback_result