|
2 | 2 | import hashlib |
3 | 3 | import json |
4 | 4 | import re |
| 5 | +from datetime import timedelta |
5 | 6 | from pathlib import Path |
6 | 7 | from typing import List, Optional |
7 | 8 |
|
@@ -200,20 +201,23 @@ def _extract_request_message(self, request: str) -> Optional[dict]: |
200 | 201 |
|
201 | 202 | def _create_hash_key(self, message: str, provider: str) -> str: |
202 | 203 | """Creates a hash key from the message and includes the provider""" |
203 | | - # Try to extract the path from the message. Most of the times is at the top of the message. |
204 | | - # The pattern was generated using ChatGPT. Should match common occurrences like: |
| 204 | + # Try to extract the path from the FIM message. The path is in FIM request in these formats: |
205 | 205 | # folder/testing_file.py |
206 | 206 | # Path: file3.py |
207 | | - pattern = r"(?:[a-zA-Z]:\\|\/)?(?:[^\s\/]+\/)*[^\s\/]+\.[^\s\/]+" |
208 | | - match = re.search(pattern, message) |
209 | | - # Copilot it's the only provider that has an easy path to extract. |
210 | | - # Other providers are harder to extact. This part needs to be revisited for the moment |
211 | | - # hasing the entire request message. |
212 | | - if match is None or provider != "copilot": |
213 | | - logger.warning("No path found in message or not copilot. Creating hash from message.") |
| 207 | + pattern = r"^#.*?\b([a-zA-Z0-9_\-\/]+\.\w+)\b" |
| 208 | + matches = re.findall(pattern, message, re.MULTILINE) |
| 209 | + # If no path is found, hash the entire prompt message. |
| 210 | + if not matches: |
| 211 | + logger.warning("No path found in messages. Creating hash cache from message.") |
214 | 212 | message_to_hash = f"{message}-{provider}" |
215 | 213 | else: |
216 | | - message_to_hash = f"{match.group(0)}-{provider}" |
| 214 | + # Copilot puts the path at the top of the file. Continue providers contain |
| 215 | + # several paths, the one in which the fim is triggered is the last one. |
| 216 | + if provider == "copilot": |
| 217 | + filepath = matches[0] |
| 218 | + else: |
| 219 | + filepath = matches[-1] |
| 220 | + message_to_hash = f"{filepath}-{provider}" |
217 | 221 |
|
218 | 222 | logger.debug(f"Message to hash: {message_to_hash}") |
219 | 223 | hashed_content = hashlib.sha256(message_to_hash.encode("utf-8")).hexdigest() |
@@ -247,7 +251,10 @@ def _should_record_context(self, context: Optional[PipelineContext]) -> bool: |
247 | 251 |
|
248 | 252 | elapsed_seconds = (context.input_request.timestamp - old_timestamp).total_seconds() |
249 | 253 | if elapsed_seconds < Config.get_config().max_fim_hash_lifetime: |
250 | | - logger.info(f"Skipping context recording. Elapsed time: {elapsed_seconds} seconds.") |
| 254 | + logger.info( |
| 255 | + f"Skipping DB context recording. " |
| 256 | + f"Elapsed time since last FIM cache: {timedelta(seconds=elapsed_seconds)}." |
| 257 | + ) |
251 | 258 | return False |
252 | 259 |
|
253 | 260 | async def record_context(self, context: Optional[PipelineContext]) -> None: |
|
0 commit comments