|
541 | 541 | " \"\"\"\n", |
542 | 542 | "\n", |
543 | 543 | "# Prompt for the LLM to extract questions.\n", |
544 | | - "def seperate_questions_prompt(parser: PydanticOutputParser[AllQuestionsModelLines], doc_page_content: list[str]) -> str: #, previous_repsonse: str = \"\", improvements: list[str] = \"\") -> str:\n", |
| 544 | + "def seperate_questions_prompt(parser: PydanticOutputParser[AllQuestionsModelLines], doc_page_content: list[Markdown]) -> str: #, previous_repsonse: str = \"\", improvements: list[str] = \"\") -> str:\n", |
545 | 545 | "\n", |
546 | 546 | " feedback = \"\"\n", |
547 | 547 | " # if previous_repsonse:\n", |
|
624 | 624 | " questions: list[QuestionModel] = Field(..., description=\"A list of questions.\")\n", |
625 | 625 | "\n", |
626 | 626 | "\n", |
627 | | - "def extract_questions(allQuestionsModel: AllQuestionsModelLines, doc_page_content: list[str]) -> AllQuestionsModel:\n", |
| 627 | + "def extract_questions(allQuestionsModel: AllQuestionsModelLines, doc_page_content: list[Markdown]) -> AllQuestionsModel:\n", |
628 | 628 | " \"\"\"\n", |
629 | 629 | " Extracts questions from the AllQuestions model and returns a list of Question objects.\n", |
630 | 630 | " \"\"\"\n", |
|
635 | 635 | " questions = []\n", |
636 | 636 | "\n", |
637 | 637 | " for question in allQuestionsModel.questions:\n", |
638 | | - " question_content = \"\\n\".join(doc_page_content[question.question_content_start:question.question_content_end+1])\n", |
639 | | - " solution_content = \"\\n\".join(doc_page_content[question.solution_content_start:question.solution_content_end+1])\n", |
| 638 | + " question_content = classes_to_markdown(doc_page_content[question.question_content_start:question.question_content_end+1])\n", |
| 639 | + " solution_content = classes_to_markdown(doc_page_content[question.solution_content_start:question.solution_content_end+1])\n", |
640 | 640 | " #important, image will be wrong if two identical images are used, although this should not be possible.\n", |
641 | 641 | " images = list(set(extract_images(question_content) + extract_images(solution_content)))\n", |
642 | 642 | "\n", |
|
774 | 774 | "metadata": {}, |
775 | 775 | "outputs": [], |
776 | 776 | "source": [ |
777 | | - "def llm_extract_questions_lines(markdown: list[str]) -> dict:\n", |
| 777 | + "def llm_extract_questions_lines(markdown: list[Markdown]) -> dict:\n", |
778 | 778 | " print(\"Begining to seperate the questions from the markdown content...\")\n", |
779 | 779 | " \n", |
780 | 780 | " # Initialise the parser for the output.\n", |
|
879 | 879 | "metadata": {}, |
880 | 880 | "outputs": [], |
881 | 881 | "source": [ |
882 | | - "def convert_set_question_lines_to_set_question(set_question_lines: Set_Question_Lines, question_content: list[str], images: list[str] = []) -> Set_Question:\n", |
| 882 | + "def convert_set_question_lines_to_set_question(set_question_lines: Set_Question_Lines, question_content: list[Markdown], images: list[str] = []) -> Set_Question:\n", |
883 | 883 | " \"\"\"\n", |
884 | 884 | " Convert Set_Question_Lines to Set_Question.\n", |
885 | 885 | " \"\"\"\n", |
886 | 886 | " return Set_Question(\n", |
887 | 887 | " title=set_question_lines.title,\n", |
888 | | - " content=\"\\n\".join(question_content[set_question_lines.content_start:set_question_lines.content_end + 1]),\n", |
889 | | - " parts=[\"\\n\".join(question_content[part.part_start:part.part_end + 1]) for part in set_question_lines.parts],\n", |
| 888 | + " content=classes_to_markdown(question_content[set_question_lines.content_start:set_question_lines.content_end + 1]),\n", |
| 889 | + " parts=[classes_to_markdown(question_content[part.part_start:part.part_end + 1]) for part in set_question_lines.parts],\n", |
890 | 890 | " images=images\n", |
891 | 891 | " )\n", |
892 | 892 | "\n", |
893 | | - "def convert_set_solution_lines_to_set_solution(set_solution_lines: list[Set_Solution_Part_Lines], solution_content: list[str]) -> Set_Solution:\n", |
| 893 | + "def convert_set_solution_lines_to_set_solution(set_solution_lines: list[Set_Solution_Part_Lines], solution_content: list[Markdown]) -> Set_Solution:\n", |
894 | 894 | " \"\"\"\n", |
895 | 895 | " Convert Set_Solution_Part_Lines to Set_Solution.\n", |
896 | 896 | " \"\"\"\n", |
897 | 897 | " return Set_Solution(\n", |
898 | 898 | " parts_solutions=[\n", |
899 | | - " \"\\n\".join(solution_content[part.part_solution_start:part.part_solution_end + 1])\n", |
| 899 | + " classes_to_markdown(solution_content[part.part_solution_start:part.part_solution_end + 1])\n", |
900 | 900 | " for part in set_solution_lines\n", |
901 | 901 | " ]\n", |
902 | 902 | " )\n" |
|
972 | 972 | " # Initialize the output parser with the Set_Question schema.\n", |
973 | 973 | " question_parser = PydanticOutputParser(pydantic_object=Set_Question_Lines)\n", |
974 | 974 | "\n", |
975 | | - " question_input: list[str] = question[\"question_content\"].splitlines()\n", |
| 975 | + " question_input: list[Markdown] = markdown_to_classes(question[\"question_content\"])\n", |
976 | 976 | " solution_input: str = question[\"solution_content\"]\n", |
977 | 977 | " all_images = question[\"images\"]\n", |
978 | 978 | "\n", |
|
1023 | 1023 | " part_idx, part = part_data\n", |
1024 | 1024 | " solution_parser = PydanticOutputParser(pydantic_object=Set_Solution_Part_Lines)\n", |
1025 | 1025 | "\n", |
1026 | | - " target_solution_input: list[str] = solution_input.splitlines()\n", |
| 1026 | + " target_solution_input: list[Markdown] = markdown_to_classes(solution_input)\n", |
1027 | 1027 | "\n", |
1028 | 1028 | " # Prompt for the LLM to extract The solution part.\n", |
1029 | 1029 | " # Use the full solution content and the part to extract the specific solution.\n", |
|
1071 | 1071 | "\n", |
1072 | 1072 | " solutions_parts = convert_set_solution_lines_to_set_solution(\n", |
1073 | 1073 | " solutions_parts, \n", |
1074 | | - " solution_input.splitlines()\n", |
| 1074 | + " markdown_to_classes(solution_input)\n", |
1075 | 1075 | " )\n", |
1076 | 1076 | "\n", |
1077 | 1077 | " # set_solution = Set_Solution(parts_solutions=solutions_parts)\n", |
|
1166 | 1166 | " If parsing fails, returns None.\n", |
1167 | 1167 | " \"\"\"\n", |
1168 | 1168 | "\n", |
1169 | | - " md_content_lines = md_content.splitlines()\n", |
| 1169 | + " md_content_lines = markdown_to_classes(md_content)\n", |
1170 | 1170 | "\n", |
1171 | 1171 | " # corrected_md_content = correct_mistakes_in_markdown(md_content)\n", |
1172 | 1172 | " # print(\"Markdown content corrected for spelling, grammar, and structure.\")\n", |
|
0 commit comments