removed evaluations

HarrySu123 · HarrySu123 · commit 76cbea16b504 · 2025-09-01T16:08:31.000+01:00
diff --git a/conversion2025/assumptions.txt b/conversion2025/assumptions.txt
@@ -3,4 +3,7 @@ assumptions:
     - the set of question contains the questions AND solutions
     - parts are only 1 level deep (i.e. no Q1, part a), i)
     - individual questions and solutions are seperatable by using just lines
-    - all parts are explicitly enumerated
+    - all parts are explicitly enumerated
+
+
+parts needs to be ordered
diff --git a/conversion2025/mathpix_to_llm_with_lines_to_api.ipynb b/conversion2025/mathpix_to_llm_with_lines_to_api.ipynb
@@ -316,11 +316,12 @@
     "            api_key=os.environ[\"OPENAI_API_KEY\"],\n",
     "        )\n",
     "\n",
-    "# Uses gpt-4.1-mini:\n",
+    "# Uses gpt-5-mini:\n",
     "#    - more intelligent\n",
     "llm_mini = ChatOpenAI(\n",
-    "            model=\"gpt-4.1-mini\",\n",
+    "            model=\"gpt-5-mini\",\n",
     "            api_key=os.environ[\"OPENAI_API_KEY\"],\n",
+    "            reasoning_effort=\"minimal\"\n",
     "        )"
    ]
   },
@@ -472,19 +473,19 @@
     "    \"\"\"\n",
     "\n",
     "# Prompt for the LLM to extract questions.\n",
-    "def seperate_questions_prompt(parser: PydanticOutputParser[AllQuestionsModelLines], doc_page_content: list[str], previous_repsonse: str = \"\", improvements: list[str] = \"\") -> str:\n",
+    "def seperate_questions_prompt(parser: PydanticOutputParser[AllQuestionsModelLines], doc_page_content: list[str]) -> str: #, previous_repsonse: str = \"\", improvements: list[str] = \"\") -> str:\n",
     "\n",
     "    feedback = \"\"\n",
-    "    if previous_repsonse:\n",
-    "        feedback = f\"\"\"\n",
+    "    # if previous_repsonse:\n",
+    "    #     feedback = f\"\"\"\n",
     "        \n",
-    "            Previous output:\n",
-    "            {previous_repsonse}\n",
+    "    #         Previous output:\n",
+    "    #         {previous_repsonse}\n",
     "\n",
-    "            Improvements:\n",
-    "            {improvements}\n",
+    "    #         Improvements:\n",
+    "    #         {improvements}\n",
     "\n",
-    "        \"\"\"\n",
+    "    #     \"\"\"\n",
     "\n",
     "    return f\"\"\"\n",
     "        Your task is to extract a JSON with the following structure exactly, ready to be parsed by a pydantic model:\n",
@@ -716,14 +717,14 @@
     "\n",
     "    for attempt_idx in range(3):\n",
     "        try:\n",
-    "            response = llm_mini.invoke(seperate_questions_prompt(parser, markdown, previous_response, improvements))\n",
+    "            response = llm_mini.invoke(seperate_questions_prompt(parser, markdown)) #, previous_response, improvements))\n",
     "            parsed_response = parser.parse(response.content)\n",
     "            questions_dict = extract_questions(parsed_response, markdown)\n",
     "            print(questions_dict.model_dump_json())\n",
     "\n",
-    "            evaluation = evaluate_questions_separation(parsed_output=questions_dict, markdown=markdown)\n",
-    "\n",
-    "            if all(e.well_separated for e in evaluation):\n",
+    "            # evaluation = evaluate_questions_separation(parsed_output=questions_dict, markdown=markdown)\n",
+    "            # if all(e.well_separated for e in evaluation):\n",
+    "            if True:\n",
     "                print(\"Question separation was successful.\")\n",
     "                return questions_dict.model_dump()\n",
     "            else:\n",
diff --git a/conversion2025/testing.ipynb b/conversion2025/testing.ipynb
@@ -119,6 +119,37 @@
     "result = extract_images(\"1 images here, ![yap](text.png)\")\n",
     "print(result)  # Output: ['text.png']"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Uses gpt-5-mini:\n",
+    "#    - more intelligent\n",
+    "llm_mini = ChatOpenAI(\n",
+    "            model=\"gpt-5-mini\",\n",
+    "            api_key=os.environ[\"OPENAI_API_KEY\"],\n",
+    "            reasoning_effort=\"high\"\n",
+    "        )\n",
+    "prompt = \"how many letters are in this prompt, only return the number.\"\n",
+    "prompt = \"return and only return the prompt exactly\"\n",
+    "prompt = \"waeuifgiufaiu liaisofeoidob ofbea df kdb vboae beoihffewafne nod In this prompt, where does the first p occur, using 0 indexing? only return the answer\"\n",
+    "\n",
+    "response = llm_mini.invoke(prompt).content\n",
+    "\n",
+    "print(list(prompt).index(\"p\"))\n",
+    "print(response)\n",
+    "# print(len(response) == int(response))"
+   ]
   }
  ],
  "metadata": {
diff --git a/conversion2025/testing.json b/conversion2025/testing.json