|
1291 | 1291 | " end: str = Field(..., description=\"The end position of the trim.\")\n", |
1292 | 1292 | "\n", |
1293 | 1293 | "llm_task_trim_content = f\"\"\"\n", |
1294 | | - " You will be given the full text of a question, extracted from a markdown file by line numbers.\n", |
| 1294 | + " You will be given the full text of a question, extracted from a markdown file using line numbers.\n", |
| 1295 | + " Assuming the extracted text is correct, then only the start of the first and the end of last lines may contain unwanted text.\n", |
1295 | 1296 | " The first and last lines may contain unwanted text, such as:\n", |
1296 | 1297 | " - Question numbering (e.g. \"1.\", \"2.\", \"(a)\", \"(b)\", \"i.\", \"ii.\", etc.)\n", |
1297 | 1298 | " - Text from the previous or next question.\n", |
| 1299 | + " We want to remove this unwanted text.\n", |
1298 | 1300 | "\n", |
1299 | 1301 | " Focus only on the actual stem (content) of the question.\n", |
1300 | 1302 | "\n", |
1301 | 1303 | " Your task is to, using the full question as guidance:\n", |
1302 | | - " - From the first line, identify the exact substring where the stem begins, without the unwanted text, and put it in `start`.\n", |
1303 | | - " - From the last line, identify the exact substring where the stem ends, without the unwanted text, and put it in `end`.\n", |
1304 | | - " - Ensure that the substrings are taken verbatim from the original text, so they can be located precisely in Python code.\n", |
1305 | | - " - Try to output as little as possible.\n", |
1306 | | - "\n", |
1307 | | - " We assume that the middle of the stem is always correct, so only the start and end may need trimming.\n", |
| 1304 | + " - First and Last line of the stem may be the same if the stem is only one line.\n", |
| 1305 | + " - From the first line, identify the exact substring where the wanted text begins, and put it in `start`.\n", |
| 1306 | + " - From the last line, identify the exact substring where the wanted text ends and put it in `end`.\n", |
| 1307 | + " - These two substrings will be used to find the start and end index using regex afterwards, so try to use as few words as possible.\n", |
| 1308 | + " - Overlapping between start and end is allowed.\n", |
1308 | 1309 | "\n", |
1309 | 1310 | " Example #1:\n", |
1310 | 1311 | " first line: \"1. A man is going up hill at 1m/s\"\n", |
|
1315 | 1316 | " \"\"\"\n", |
1316 | 1317 | "\n", |
1317 | 1318 | "llm_task_trim_part = f\"\"\"\n", |
1318 | | - " You will be given the full text of a question, extracted from a markdown file by line numbers.\n", |
| 1319 | + " You will be given the full text of a question, extracted from a markdown file using line numbers.\n", |
| 1320 | + " Assuming the extracted text is correct, then only the start of the first and the end of last lines may contain unwanted text.\n", |
1319 | 1321 | " The first and last lines may contain unwanted text, such as:\n", |
1320 | 1322 | " - Question numbering (e.g. \"1.\", \"2.\", \"(a)\", \"(b)\", \"i.\", \"ii.\", etc.)\n", |
1321 | 1323 | " - Text from the previous or next question.\n", |
| 1324 | + " We want to remove this unwanted text.\n", |
1322 | 1325 | "\n", |
1323 | 1326 | " Focus only on one sub-question (part) of the question, specified later.\n", |
1324 | 1327 | "\n", |
1325 | 1328 | " Your task is to, using the full question as guidance:\n", |
1326 | | - " - Identify the exact substring where the sub-question begins, without the unwanted text, and put it in `start`.\n", |
1327 | | - " - Identify the exact substring where the sub-question ends, without the unwanted text, and put it in `end`.\n", |
1328 | | - " - Ensure that the substrings are taken verbatim from the original text, so they can be located precisely in Python code.\n", |
1329 | | - " - Try to output as little as possible.\n", |
1330 | | - "\n", |
1331 | | - " We assume that the middle of the sub-question is always correct, so only the start and end may need trimming.\n", |
| 1329 | + " - First and Last line of the sub-question may be the same if the sub-question is only one line.\n", |
| 1330 | + " - From the first line, identify the exact substring where the wanted text begins, and put it in `start`.\n", |
| 1331 | + " - From the last line, identify the exact substring where the wanted text ends and put it in `end`.\n", |
| 1332 | + " - These two substrings will be used to find the start and end index using regex afterwards, so try to use as few words as possible.\n", |
| 1333 | + " - Overlapping between start and end is allowed.\n", |
1332 | 1334 | "\n", |
1333 | 1335 | " Example #1:\n", |
1334 | 1336 | " first line: \"answer the following question: (a) what is his speed?\"\n", |
|
1339 | 1341 | " \"\"\"\n", |
1340 | 1342 | "\n", |
1341 | 1343 | "llm_task_trim_part_solution = f\"\"\"\n", |
1342 | | - " You will be given the full text of a question, extracted from a markdown file by line numbers.\n", |
| 1344 | + " You will be given the full text of a question, extracted from a markdown file using line numbers.\n", |
| 1345 | + " Assuming the extracted text is correct, then only the start of the first and the end of last lines may contain unwanted text.\n", |
1343 | 1346 | " The first and last lines may contain unwanted text, such as:\n", |
1344 | 1347 | " - Question numbering (e.g. \"1.\", \"2.\", \"(a)\", \"(b)\", \"i.\", \"ii.\", etc.)\n", |
1345 | 1348 | " - Text from the previous or next question.\n", |
| 1349 | + " We want to remove this unwanted text.\n", |
1346 | 1350 | "\n", |
1347 | | - " Focus only on one part-solution of a sub-question of the question, specified later.\n", |
| 1351 | + " Focus only on one part-solution of a sub-question (part) of the question, specified later.\n", |
1348 | 1352 | "\n", |
1349 | 1353 | " Your task is to, using the full question as guidance:\n", |
1350 | | - " - Identify the exact substring where the part-solution begins, without the unwanted text, and put it in `start`.\n", |
1351 | | - " - Identify the exact substring where the part-solution ends, without the unwanted text, and put it in `end`.\n", |
1352 | | - " - Ensure that the substrings are taken verbatim from the original text, so they can be located precisely in Python code.\n", |
1353 | | - " - Try to output as little as possible.\n", |
1354 | | - "\n", |
1355 | | - " We assume that the middle of the part-solution is always correct, so only the start and end may need trimming.\n", |
| 1354 | + " - First and Last line of the part-solution may be the same if the part-solution is only one line.\n", |
| 1355 | + " - From the first line, identify the exact substring where the wanted text begins, and put it in `start`.\n", |
| 1356 | + " - From the last line, identify the exact substring where the wanted text ends and put it in `end`.\n", |
| 1357 | + " - These two substrings will be used to find the start and end index using regex afterwards, so try to use as few words as possible.\n", |
| 1358 | + " - Overlapping between start and end is allowed.\n", |
1356 | 1359 | "\n", |
1357 | 1360 | " Example #1:\n", |
1358 | 1361 | " first line: \"A: (a) 2 + 3 = 5\"\n", |
|
1384 | 1387 | " Full question:\n", |
1385 | 1388 | " {question}\n", |
1386 | 1389 | "\n", |
1387 | | - " Stem (content) of the question:\n", |
| 1390 | + " Stem (content) of the question to extract from:\n", |
1388 | 1391 | " {content_text}\n", |
1389 | 1392 | "\n", |
1390 | 1393 | " Return the JSON now.\n", |
|
1428 | 1431 | " Full question:\n", |
1429 | 1432 | " {question}\n", |
1430 | 1433 | "\n", |
1431 | | - " specific sub-question (part) of the question:\n", |
| 1434 | + " specific sub-question (part) of the question to extract from:\n", |
1432 | 1435 | " {part_text}\n", |
1433 | 1436 | "\n", |
1434 | 1437 | " Return the JSON now.\n", |
|
1472 | 1475 | " Full question:\n", |
1473 | 1476 | " {question}\n", |
1474 | 1477 | "\n", |
1475 | | - " Specific part-solution of the question:\n", |
| 1478 | + " Specific part-solution of the question to extract from:\n", |
1476 | 1479 | " {solution_text}\n", |
1477 | 1480 | "\n", |
1478 | 1481 | " Return the JSON now.\n", |
|
1484 | 1487 | "\n", |
1485 | 1488 | " try:\n", |
1486 | 1489 | " parsed_output = solution_parser.parse(response.content)\n", |
1487 | | - " start = solution_text.index(parsed_output.start)\n", |
1488 | | - " end = solution_text.index(parsed_output.end) + len(parsed_output.end)\n", |
| 1490 | + " start = solution_text.index(parsed_output.start.replace('\\\\\\\\', '\\\\'))\n", |
| 1491 | + " end = solution_text.index(parsed_output.end.replace('\\\\\\\\', '\\\\')) + len(parsed_output.end)\n", |
1489 | 1492 | " print(f\"Successfully trimmed part-solution for question {question_number}, part {part_number}.\")\n", |
1490 | 1493 | "\n", |
1491 | 1494 | " return improve_trim(solution_text, start, end)\n", |
1492 | 1495 | " except Exception as e:\n", |
1493 | 1496 | " print(f\"Error parsing LLM response as JSON for trimming solution part for question {question_number}, part {part_number}\")\n", |
| 1497 | + " print(response.content)\n", |
1494 | 1498 | " print(f\"Retrying... Attempt No.{attempt_idx + 1}\")\n", |
1495 | 1499 | " time.sleep(2)\n", |
1496 | 1500 | "\n", |
|
0 commit comments