lambda-feedback
diff --git a/‎conversion2025/assumptions.txt‎
Lines changed: 1 addition & 0 deletions b/‎conversion2025/assumptions.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎conversion2025/line_parser.lua‎
Lines changed: 48 additions & 0 deletions b/‎conversion2025/line_parser.lua‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎conversion2025/mathpix_to_llm_with_lines_to_api.ipynb‎
Lines changed: 87 additions & 19 deletions b/‎conversion2025/mathpix_to_llm_with_lines_to_api.ipynb‎
Lines changed: 87 additions & 19 deletions
@@ -4,6 +4,7 @@ assumptions:
     - parts are only 1 level deep (i.e. no Q1, part a), i)
     - individual questions and solutions are seperatable by using just lines
     - all parts are explicitly enumerated
+    - Chunky Independent Maths are deperated (otherwise Mathpix will not be able to seperate them)
 
 
 parts needs to be ordered
@@ -0,0 +1,48 @@
+-- customer parser to parse input line by line except display math in a markdown file
+local pandoc = require("pandoc")
+
+function Reader(input, reader_opts)
+    local lines = {}
+    input = tostring(input)
+    print(input)
+    for line in (input .. "\n"):gmatch("(.-)\n") do
+        table.insert(lines, line)
+    end
+
+    local blocks = {}
+    local in_math = false
+    local math_buffer = {}
+
+    for _, line in ipairs(lines) do
+        -- matches "$$"
+        if line:match("%$%$") then
+            -- end of display math
+            if in_math then
+                table.insert(math_buffer, line)
+                local math_content = table.concat(math_buffer, "\n")
+                table.insert(blocks, pandoc.Para{pandoc.Math("DisplayMath", math_content)})
+                math_buffer = {}
+                in_math = false
+            
+            -- start of display math
+            else
+                in_math = true
+                math_buffer = {line}
+            end
+        
+        -- middle of display math
+        elseif in_math then
+            table.insert(math_buffer, line)
+        
+        -- empty line
+        elseif line:match("^%s*$") then
+            -- skip empty lines
+
+        -- a regular line
+        else
+            table.insert(blocks, pandoc.Para{pandoc.Str(line)})
+        end
+    end
+
+    return pandoc.Pandoc(blocks)
+end
@@ -381,7 +381,7 @@
    "id": "16",
    "metadata": {},
    "source": [
-    "# Extract Questions"
+    "# Transform into markdown"
    ]
   },
   {
@@ -390,6 +390,74 @@
    "id": "17",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "# intermediate representation of the markdown\n",
+    "class Markdown():\n",
+    "    def __init__(self, content):\n",
+    "        self.content = content\n",
+    "\n",
+    "class DisplayMath(Markdown):\n",
+    "    content = \"\"\n",
+    "    \n",
+    "    def __init__(self, content):\n",
+    "        super().__init__(content)\n",
+    "\n",
+    "    def __str__(self):\n",
+    "        return f\"$$\\n{self.content}\\n$$\"\n",
+    "    \n",
+    "    def __repr__(self):\n",
+    "        return f\"DisplayMath({self.content!r})\"\n",
+    "\n",
+    "class RegularText(Markdown):\n",
+    "    def __init__(self, content):\n",
+    "        super().__init__(content)\n",
+    "\n",
+    "    def __str__(self):\n",
+    "        return self.content\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return f\"RegularText({self.content!r})\"\n",
+    "\n",
+    "\n",
+    "def markdown_to_classes(markdown: str) -> list[Markdown]:\n",
+    "    lines = markdown.split(\"\\n\")\n",
+    "    ret = []\n",
+    "    math_buffer = []\n",
+    "    displayMath = False\n",
+    "    for line in lines:\n",
+    "        if line == \"$$\":\n",
+    "            displayMath = not displayMath\n",
+    "            if not displayMath:\n",
+    "                ret.append(DisplayMath(\"\\n\".join(math_buffer)))\n",
+    "                math_buffer = []\n",
+    "        else:\n",
+    "            if displayMath:\n",
+    "                math_buffer.append(line)\n",
+    "            else:\n",
+    "                ret.append(RegularText(line))\n",
+    "    return ret\n",
+    "\n",
+    "def classes_to_markdown(classes: list[Markdown]) -> str:\n",
+    "    lines = []\n",
+    "    for c in classes:\n",
+    "        lines.append(str(c))\n",
+    "    return \"\\n\".join(lines)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18",
+   "metadata": {},
+   "source": [
+    "# Extract Questions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "#define initial question model\n",
     "class QuestionModelLines(BaseModel):\n",
@@ -506,7 +574,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "18",
+   "id": "20",
    "metadata": {},
    "source": [
     "# extracting images from content"
@@ -515,7 +583,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "19",
+   "id": "21",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -531,7 +599,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "20",
+   "id": "22",
    "metadata": {},
    "source": [
     "# extracting questions form the problem sheet"
@@ -540,7 +608,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "21",
+   "id": "23",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -593,7 +661,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "22",
+   "id": "24",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -702,7 +770,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "23",
+   "id": "25",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -739,7 +807,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "24",
+   "id": "26",
    "metadata": {},
    "source": [
     "# Extract question parts and solutions"
@@ -748,7 +816,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "25",
+   "id": "27",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -807,7 +875,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "26",
+   "id": "28",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -837,7 +905,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "27",
+   "id": "29",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1034,7 +1102,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "28",
+   "id": "30",
    "metadata": {},
    "source": [
     "# remove the duplicated text for single part questions"
@@ -1043,7 +1111,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "29",
+   "id": "31",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1082,7 +1150,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "30",
+   "id": "32",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1115,7 +1183,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "31",
+   "id": "33",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1124,7 +1192,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "32",
+   "id": "34",
    "metadata": {},
    "source": [
     "# Displaying questions"
@@ -1133,7 +1201,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "33",
+   "id": "35",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1160,7 +1228,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "34",
+   "id": "36",
    "metadata": {},
    "source": [
     "# in2lambda to JSON"
@@ -1169,7 +1237,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "35",
+   "id": "37",
    "metadata": {},
    "outputs": [],
    "source": [