From a25028b2e34810566dcc8e4802ba8c80be52744e Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:03:42 +0100 Subject: [PATCH 01/10] fix(split): atomic write + reject same-source output Replace the direct ``with open(...) as f: w.write(f)`` loop with ``self._atomic_pdf_write(w, out_path, sources=[pdf_path])`` so the existing tempfile + os.replace path is reused and the same-source guard rejects an output path that collides with the input PDF. Without this, a split row whose output name matched the basename of the source (e.g. input ``part_1.pdf`` split into ``part_1.pdf``) would truncate the input mid-flight while pypdf still held lazy reads against it. Co-Authored-By: Claude Opus 4.7 --- app/tools/split.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/app/tools/split.py b/app/tools/split.py index f03f013..890d237 100644 --- a/app/tools/split.py +++ b/app/tools/split.py @@ -131,7 +131,13 @@ def _run(self): r=r + 1, start=start, end=end)); continue w = PdfWriter() for p in range(start - 1, end): w.add_page(reader.pages[p]) - with open(os.path.join(out_dir, name), "wb") as f: w.write(f) + out_path = os.path.join(out_dir, name) + try: + self._atomic_pdf_write(w, out_path, sources=[pdf_path]) + except Exception as e: + errors.append(t("tool.split.row_invalid", + r=r + 1, start=start, end=end) + f" — {e}") + continue generated.append(name) if errors: QMessageBox.warning(self, t("msg.warning"), t("tool.split.skipped", errors="\n".join(errors))) if generated: From 23a5374d95d27dee68693e013d32d8ae211e761f Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:04:26 +0100 Subject: [PATCH 02/10] fix(window): dedupe folder drop on case-insensitive filesystems The drop handler globbed both ``*.pdf`` and ``*.PDF``; on Windows and macOS HFS+ those patterns match the same files, so every PDF was opened twice. Switch to a single ``os.listdir`` pass with an extension filter so the walk yields each file once regardless of filesystem case sensitivity. Co-Authored-By: Claude Opus 4.7 --- app/window.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/app/window.py b/app/window.py index 90fefab..7f660dc 100644 --- a/app/window.py +++ b/app/window.py @@ -1011,10 +1011,20 @@ def dropEvent(self, e): return continue if os.path.isdir(path): - for pdf in sorted(glob.glob(os.path.join(path, "*.pdf"))): - self._load_and_track(pdf) - # Also pick up uppercase .PDF on case-sensitive FSes - for pdf in sorted(glob.glob(os.path.join(path, "*.PDF"))): + # Case-insensitive walk avoids double-loading on Windows / + # macOS HFS+ where glob("*.pdf") and glob("*.PDF") return + # the same files. Single os.listdir pass + extension filter + # works correctly on both case-sensitive and -insensitive FSes. + try: + entries = os.listdir(path) + except OSError: + entries = [] + pdfs = sorted( + os.path.join(path, f) for f in entries + if f.lower().endswith(".pdf") + and os.path.isfile(os.path.join(path, f)) + ) + for pdf in pdfs: self._load_and_track(pdf) continue if path.lower().endswith(".pdf"): From 229b4e3dd46190f7a5f87d0eab50d4864862f516 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:05:24 +0100 Subject: [PATCH 03/10] fix(editor): exclude existing annotations from unsaved-edit checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loading a PDF with sticky notes mirrors each one into ``self._pending`` (tagged ``_existing=True``) so the canvas can render their bubbles. Without filtering, every gate that watches ``_pending`` mistook those mirrored entries for real user edits: * ``closeEvent`` (in window.py) prompted "discard unsaved changes?" the moment the user opened any PDF with notes * ``_run`` skipped the no-pending guard and re-saved a byte- identical copy * The Forms-mode "has pending edits" warning fired on a freshly-opened PDF * The ``_pending_list`` UI listed pre-existing notes as if they were unsaved edits Add a ``_user_pending`` property that filters ``_existing`` entries (while keeping user-driven ``delete_annot`` actions, which have no flag) and route every gate through it. Also stop pushing existing notes into ``_pending_list`` — that widget is for THIS session's unapplied edits. Co-Authored-By: Claude Opus 4.7 --- app/editor/tab.py | 36 ++++++++++++++++++++++++++++++++---- app/window.py | 6 +++++- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/app/editor/tab.py b/app/editor/tab.py index 71db5b9..24ff45a 100644 --- a/app/editor/tab.py +++ b/app/editor/tab.py @@ -78,6 +78,21 @@ def _MODE_DEFS(self): def _DRAW_COLORS(self): return {t(k): v for k, v in zip(self._DRAW_COLORS_KEYS, self._DRAW_COLORS_VALS)} + @property + def _user_pending(self) -> list: + """Pending edits actually made by the user. + + ``_load_existing_annotations`` mirrors notes already embedded in + the PDF into ``self._pending`` with ``_existing=True`` so the + canvas can render their bubbles. Without this filter, just + opening any PDF with notes would (a) make ``closeEvent`` prompt + about "unsaved changes" the user never made and (b) trigger the + Forms-mode "has pending edits" warning. ``delete_annot`` edits + registered via the canvas context menu are real user actions and + are NOT tagged with ``_existing``, so they remain visible here. + """ + return [e for e in self._pending if not e.get("_existing")] + def __init__(self, status_fn): super().__init__() self._status = status_fn @@ -651,8 +666,13 @@ def _load_existing_annotations(self): "_annot_type": annot.type[0], "_annot_bbox": [r.x0, r.y0, r.x1, r.y1], }) - self._pending_list.addItem( - t("edit.status.note_label", n=page_idx + 1)) + # R11 #3: do NOT add existing notes to the + # _pending_list UI — that list represents + # edits THIS session that have not been + # applied yet. Showing pre-existing notes + # there made the user think they had + # unsaved work the moment they opened a + # PDF with sticky notes. count += 1 self._status(t("edit.status.note_loaded", count=count, total=total_annots)) @@ -1106,7 +1126,11 @@ def _run(self): if self._mode_idx == _MODE_FORMS: # If there are also pending edits, warn — Forms apply uses # pypdf and would silently drop the in-memory edits otherwise. - if self._pending: + # Use _user_pending so pre-existing notes loaded from the PDF + # don't trigger a false-positive warning. delete_annot edits + # ARE included (no _existing flag) so the warning still fires + # when the user has removed an existing note. + if self._user_pending: reply = QMessageBox.question( self, t("msg.warning"), t("editor.forms.has_pending"), @@ -1118,7 +1142,11 @@ def _run(self): return self._apply_forms(out) return - if not self._pending: + # R11 #3: filter pre-existing notes mirrored from the PDF so + # clicking Apply on a freshly-opened PDF (with only loaded + # annotations and no user edits) shows "no pending edits" + # instead of re-saving the same content unchanged. + if not self._user_pending: QMessageBox.warning(self, t("msg.warning"), t("msg.no_pending")); return try: import fitz diff --git a/app/window.py b/app/window.py index 7f660dc..353b76f 100644 --- a/app/window.py +++ b/app/window.py @@ -1196,7 +1196,11 @@ def closeEvent(self, event): # Check for unsaved edits in the editor tool (drawings, redactions, # text edits, signatures, notes added but not yet applied/saved) edit_w = self.stack.widget(self._edit_tool_idx()) - if edit_w and getattr(edit_w, "_pending", None): + # Use _user_pending so we don't prompt about pre-existing + # annotations that the editor mirrored into _pending only for + # canvas rendering (loading a PDF with notes should not look + # like "unsaved edits"). + if edit_w and getattr(edit_w, "_user_pending", None): ans = QMessageBox.question( self, t("msg.warning"), t("pipeline.unsaved_prompt"), QMessageBox.StandardButton.Discard From d988def3514344eb712aad4d425bd3b41325adf1 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:05:32 +0100 Subject: [PATCH 04/10] fix(utils): friendly error for parse_pages on bad input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Garbage input like ``abc`` or ``1-`` previously bubbled up the raw ``ValueError("invalid literal for int() with base 10: …")``, which ``show_error`` then masked behind "Something unexpected went wrong" — hiding the actionable cause from the user. Wrap the two ``int()`` calls in a ``try``/``except ValueError`` and re-raise with the new translated ``tool.err.bad_page_input`` key (localised across all 8 supported languages). The happy path, out-of-range message, and dedupe/sort contract are unchanged. Co-Authored-By: Claude Opus 4.7 --- app/translations.json | 8 ++++++++ app/utils.py | 20 ++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/app/translations.json b/app/translations.json index 5ac7138..6af3822 100644 --- a/app/translations.json +++ b/app/translations.json @@ -164,6 +164,7 @@ "msg.warning": "Warning", "msg.confirm": "Confirm", "tool.err.same_source_output": "Output path cannot be the same as the source PDF. Choose a different filename.", + "tool.err.bad_page_input": "Invalid page input: '{text}' — use formats like '1', '3-5', or '1,3,5-7'.", "msg.error": "Error", "msg.unexpected": "Something went wrong. Click 'Show Details' below for the technical error, and the full traceback has been written to the log file.", "msg.done": "Done", @@ -762,6 +763,7 @@ "msg.warning": "Aviso", "msg.confirm": "Confirmar", "tool.err.same_source_output": "O caminho de saída não pode ser o mesmo que o PDF de origem. Escolhe outro nome de ficheiro.", + "tool.err.bad_page_input": "Entrada de páginas inválida: '{text}' — usa formatos como '1', '3-5' ou '1,3,5-7'.", "msg.error": "Erro", "msg.unexpected": "Algo correu mal. Clique em 'Mostrar Detalhes' em baixo para ver o erro técnico; o traceback completo foi escrito no ficheiro de log.", "msg.done": "Concluído", @@ -1360,6 +1362,7 @@ "msg.warning": "Aviso", "msg.confirm": "Confirmar", "tool.err.same_source_output": "La ruta de salida no puede ser la misma que el PDF de origen. Elige otro nombre de archivo.", + "tool.err.bad_page_input": "Entrada de páginas no válida: '{text}' — usa formatos como '1', '3-5' o '1,3,5-7'.", "msg.error": "Error", "msg.unexpected": "Algo salió mal. Haga clic en 'Mostrar detalles' abajo para ver el error técnico; el rastreo completo se ha escrito en el archivo de registro.", "msg.done": "Hecho", @@ -1958,6 +1961,7 @@ "msg.warning": "Avertissement", "msg.confirm": "Confirmer", "tool.err.same_source_output": "Le chemin de sortie ne peut pas être identique au PDF source. Choisissez un autre nom de fichier.", + "tool.err.bad_page_input": "Entrée de pages invalide : '{text}' — utilisez des formats comme '1', '3-5' ou '1,3,5-7'.", "msg.error": "Erreur", "msg.unexpected": "Une erreur s'est produite. Cliquez sur « Afficher les détails » ci-dessous pour voir l'erreur technique ; la trace complète a été écrite dans le fichier journal.", "msg.done": "Terminé", @@ -2556,6 +2560,7 @@ "msg.warning": "Warnung", "msg.confirm": "Bestätigen", "tool.err.same_source_output": "Der Ausgabepfad darf nicht mit der Quell-PDF identisch sein. Wählen Sie einen anderen Dateinamen.", + "tool.err.bad_page_input": "Ungültige Seiteneingabe: '{text}' — verwenden Sie Formate wie '1', '3-5' oder '1,3,5-7'.", "msg.error": "Fehler", "msg.unexpected": "Etwas ist schiefgegangen. Klicken Sie unten auf 'Details anzeigen', um den technischen Fehler zu sehen; das vollständige Traceback wurde in die Protokolldatei geschrieben.", "msg.done": "Fertig", @@ -3154,6 +3159,7 @@ "msg.warning": "警告", "msg.confirm": "确认", "tool.err.same_source_output": "输出路径不能与源 PDF 相同。请选择其他文件名。", + "tool.err.bad_page_input": "无效的页面输入:'{text}' — 请使用 '1'、'3-5' 或 '1,3,5-7' 等格式。", "msg.error": "错误", "msg.unexpected": "出了点问题。点击下方的“显示详情”查看技术错误;完整的堆栈跟踪已写入日志文件。", "msg.done": "完成", @@ -3752,6 +3758,7 @@ "msg.warning": "Avviso", "msg.confirm": "Conferma", "tool.err.same_source_output": "Il percorso di output non può coincidere con il PDF di origine. Scegli un nome file diverso.", + "tool.err.bad_page_input": "Input pagine non valido: '{text}' — usa formati come '1', '3-5' o '1,3,5-7'.", "msg.error": "Errore", "msg.unexpected": "Qualcosa è andato storto. Fare clic su 'Mostra dettagli' qui sotto per vedere l'errore tecnico; il traceback completo è stato scritto nel file di registro.", "msg.done": "Fatto", @@ -4350,6 +4357,7 @@ "msg.warning": "Waarschuwing", "msg.confirm": "Bevestigen", "tool.err.same_source_output": "Het uitvoerpad mag niet hetzelfde zijn als de bron-PDF. Kies een andere bestandsnaam.", + "tool.err.bad_page_input": "Ongeldige pagina-invoer: '{text}' — gebruik formaten zoals '1', '3-5' of '1,3,5-7'.", "msg.error": "Fout", "msg.unexpected": "Er is iets misgegaan. Klik hieronder op 'Details weergeven' om de technische fout te zien; de volledige traceback is naar het logbestand geschreven.", "msg.done": "Voltooid", diff --git a/app/utils.py b/app/utils.py index 74c0063..d7348c0 100644 --- a/app/utils.py +++ b/app/utils.py @@ -72,14 +72,26 @@ def parse_pages(text: str, total: int) -> list: part = part.strip() if not part: continue + try: + if "-" in part: + a, b = part.split("-", 1) + a_int, b_int = int(a), int(b) + else: + a_int = b_int = int(part) + except ValueError as exc: + # int() raised "invalid literal for int() with base 10" — + # re-raise with a translated, user-actionable message so + # show_error() surfaces something useful instead of the + # raw Python error. + raise ValueError( + t("tool.err.bad_page_input", text=part)) from exc if "-" in part: - a, b = part.split("-", 1) - a_int, b_int = int(a), int(b) if b_int - a_int + 1 > _MAX_PAGES: - raise ValueError(f"Range too large: {a}-{b} (max {_MAX_PAGES})") + raise ValueError( + f"Range too large: {a_int}-{b_int} (max {_MAX_PAGES})") pages.extend(range(a_int - 1, b_int)) else: - pages.append(int(part) - 1) + pages.append(a_int - 1) if len(pages) > _MAX_PAGES: raise ValueError(f"Too many pages selected (max {_MAX_PAGES})") invalid = [p for p in pages if p < 0 or p >= total] From d851383ce286807bc99d706f5f56cbe85bc4fcd0 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:05:44 +0100 Subject: [PATCH 05/10] fix(ocr): stream pages to writer incrementally to reduce peak memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation collected every per-page Tesseract PDF output in ``pdf_pages = []`` and then walked the list to append each into a writer. For a 100-page job at 300 dpi this kept ~200– 500 MB of intermediate bytes resident before the final write — enough to OOM on machines tight on RAM. Append each page directly into the writer as soon as Tesseract returns it, then drop ``pix``, ``img``, and the per-page ``page_bytes`` locals. The writer's internal copies retain only what it actually needs. Co-Authored-By: Claude Opus 4.7 --- app/tools/ocr.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/app/tools/ocr.py b/app/tools/ocr.py index aa0e427..ca681a6 100644 --- a/app/tools/ocr.py +++ b/app/tools/ocr.py @@ -316,7 +316,13 @@ def do_work(_self): except OSError: pass raise else: - pdf_pages = [] + # R11 I1: stream pages directly into the writer + # instead of accumulating per-page PDF bytes in a + # list. A 100-page @ 300dpi job otherwise sits on + # ~200-500MB of peak RAM before the final write. + # Each per-page BytesIO is dropped as soon as + # writer.append() finishes copying its objects. + writer = PdfWriter() for i, page in enumerate(doc): if _self.is_cancelled(): return None @@ -334,12 +340,16 @@ def do_work(_self): pix = fitz.Pixmap(fitz.csRGB, pix) img = Image.frombytes( "RGB", (pix.width, pix.height), pix.samples) - pdf_pages.append( - pytesseract.image_to_pdf_or_hocr( - img, lang=lang, extension="pdf")) - writer = PdfWriter() - for page_bytes in pdf_pages: - writer.append(PdfReader(_io.BytesIO(page_bytes))) + page_bytes = pytesseract.image_to_pdf_or_hocr( + img, lang=lang, extension="pdf") + writer.append( + PdfReader(_io.BytesIO(page_bytes))) + # Drop large per-page buffers now; without + # this they linger until the next loop + # iteration rebinds the locals. + pix = None + img.close() + del page_bytes self._atomic_pdf_write( writer, out_path, sources=[pdf_path]) finally: From 30a300ff4d490cd3cdfa9cb01ba524be73a74163 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:05:53 +0100 Subject: [PATCH 06/10] fix(window): refresh recents UI after _clear_recent ``_clear_recent`` wrote ``recent_files: []`` to disk but did not notify any open viewer, so the placeholder pane kept showing the stale entries until the next reload. Mirror the ``_load_and_track`` pattern: iterate ``self._viewers`` and invoke ``_refresh_recents`` on each (wrapped in ``contextlib.suppress`` so a deleted viewer doesn't abort the rest). Co-Authored-By: Claude Opus 4.7 --- app/window.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/app/window.py b/app/window.py index 353b76f..b277454 100644 --- a/app/window.py +++ b/app/window.py @@ -858,6 +858,15 @@ def _clear_recent(self): _update_config(lambda cfg: cfg.__setitem__("recent_files", [])) except Exception: pass + # R11 M5: mirror the _load_and_track refresh pattern so every + # open viewer's recents list updates immediately. Without this + # the user clears recents but the placeholder pane still shows + # the old entries until the next reload. + for v in self._viewers: + refresh = getattr(v, "_refresh_recents", None) + if callable(refresh): + with contextlib.suppress(Exception): + refresh() def _set_status(self, msg: str): self._sb.showMessage(msg) From fc309e2daf87bb67b583fd1bf05248d932cacf04 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:06:02 +0100 Subject: [PATCH 07/10] fix(i18n): use lexists for get_recent_files + writeback cleaned entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``os.path.isfile`` on a OneDrive Files-On-Demand placeholder triggers a placeholder rehydration, blocking the UI for seconds the first time the app opens. The recents validation runs at startup, so the freeze is the first thing the user sees. Switch the per-entry check to ``os.path.lexists``, which does not follow symlinks or force a download. Broken-symlink entries are acceptable here — they get pruned the moment the user actually tries to open them. When entries are dropped during the scan, write the trimmed list back through ``_update_config`` so the next startup has less to re-check. Co-Authored-By: Claude Opus 4.7 --- app/i18n.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/app/i18n.py b/app/i18n.py index bd2907a..d90a1dd 100644 --- a/app/i18n.py +++ b/app/i18n.py @@ -211,7 +211,21 @@ def get_recent_files() -> list[str]: recents = cfg.get("recent_files", []) except Exception: return [] - return [p for p in recents if isinstance(p, str) and os.path.isfile(p)] + # R11 M3: use os.path.lexists so OneDrive Files-On-Demand placeholders + # are NOT eagerly downloaded just to validate the recents list. + # os.path.isfile triggers a sync that can freeze the UI for seconds + # the first time the app starts on a OneDrive folder. + valid = [p for p in recents + if isinstance(p, str) and os.path.lexists(p)] + # R11 M3: writeback when entries were dropped so the next call has + # less to re-check on disk. Routed through _update_config to keep + # serialization with concurrent writers (add_recent_file etc.). + if len(valid) != len([p for p in recents if isinstance(p, str)]): + try: + _update_config(lambda c: c.__setitem__("recent_files", valid)) + except Exception: + pass + return valid def add_recent_file(path: str): From 2f6b4bca452482d08a6fa286c3c329b45e6ccf43 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:06:11 +0100 Subject: [PATCH 08/10] fix(editor): close PdfReader file handle in _apply_forms ``PdfReader(self._doc_path)`` held an internal stream pinned by the reader object until garbage collection. On Windows that blocked any subsequent tool from renaming the same file (file lock still active), surfacing as cryptic "file in use" errors when chaining Forms apply with another tool. Wrap the source open in an explicit ``with open(...) as _src`` block and pass the stream to ``PdfReader``. All writer work (``append``, ``update_page_form_field_values``, ``write``, ``os.replace``) happens INSIDE the with-block so pypdf's lazy reads still see a live stream when they fire. On scope exit the file handle is released deterministically. Co-Authored-By: Claude Opus 4.7 --- app/editor/tab.py | 107 +++++++++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/app/editor/tab.py b/app/editor/tab.py index 24ff45a..223ab73 100644 --- a/app/editor/tab.py +++ b/app/editor/tab.py @@ -1303,56 +1303,65 @@ def _fitz_permissions_of(doc) -> int: def _apply_forms(self, out): try: from pypdf import PdfWriter, PdfReader - _r = PdfReader(self._doc_path) - was_encrypted = bool(_r.is_encrypted) - if was_encrypted and self._pdf_password: - _r.decrypt(self._pdf_password) - # If input was encrypted, ask the user how to save the output. - encrypt_choice = "plaintext" - if was_encrypted and self._pdf_password: - encrypt_choice = self._prompt_encryption_choice() - if encrypt_choice is None: + # R11 #8: open the source via an explicit ``with open(...)`` so + # the file handle is closed deterministically when the block + # exits — previously ``PdfReader(self._doc_path)`` held an + # internal stream alive until garbage collection, which on + # Windows blocked another tool from renaming/overwriting the + # same file. We do all writer work INSIDE the with-block so + # the lazy reads triggered by writer.append / write happen + # while the stream is still valid. + with open(self._doc_path, "rb") as _src: + _r = PdfReader(_src) + was_encrypted = bool(_r.is_encrypted) + if was_encrypted and self._pdf_password: + _r.decrypt(self._pdf_password) + # If input was encrypted, ask the user how to save. + encrypt_choice = "plaintext" + if was_encrypted and self._pdf_password: + encrypt_choice = self._prompt_encryption_choice() + if encrypt_choice is None: + return + writer = PdfWriter(); writer.append(_r) + fields = {self._form_table.item(r, 0).text(): + (self._form_table.item(r, 1).text() if self._form_table.item(r, 1) else "") + for r in range(self._form_table.rowCount())} + # R10 #6: pypdf's update_page_form_field_values raises + # PyPdfError("No /AcroForm dictionary in PDF…") on PDFs + # without form fields. The user hits this whenever they + # click Apply in Forms mode on a regular PDF; the cryptic + # error message looked like an internal crash. Detect + # up-front and short-circuit with a friendly status + # instead, leaving the file untouched. + if "/AcroForm" not in writer._root_object: + self._status(t("editor.forms.no_fields")) + self._form_status.setText(t("editor.forms.no_fields")) return - writer = PdfWriter(); writer.append(_r) - fields = {self._form_table.item(r, 0).text(): - (self._form_table.item(r, 1).text() if self._form_table.item(r, 1) else "") - for r in range(self._form_table.rowCount())} - # R10 #6: pypdf's update_page_form_field_values raises - # PyPdfError("No /AcroForm dictionary in PDF…") on PDFs - # without form fields. The user hits this whenever they - # click Apply in Forms mode on a regular PDF; the cryptic - # error message looked like an internal crash. Detect - # up-front and short-circuit with a friendly status - # instead, leaving the file untouched. - if "/AcroForm" not in writer._root_object: - self._status(t("editor.forms.no_fields")) - self._form_status.setText(t("editor.forms.no_fields")) - return - for page in writer.pages: - # auto_regenerate=True so the rendered widget appearance - # actually picks up the new value when viewed in a third- - # party viewer (Adobe etc.) that doesn't render NeedAppearances. - writer.update_page_form_field_values(page, fields, auto_regenerate=True) - if encrypt_choice == "protect" and self._pdf_password: - # Documented limitation: owner == user; original owner - # password is not recoverable from the input file. - writer.encrypt( - user_password=self._pdf_password, - owner_password=self._pdf_password, - algorithm="AES-256", - ) - _log.info( - "Re-encrypted forms output with user password as owner") - fd, tmp = tempfile.mkstemp(prefix=".pdfapps_save_", suffix=".pdf", - dir=os.path.dirname(out) or ".") - os.close(fd) - try: - with open(tmp, "wb") as f: writer.write(f) - os.replace(tmp, out) - except Exception: - try: os.unlink(tmp) - except OSError: pass - raise + for page in writer.pages: + # auto_regenerate=True so the rendered widget appearance + # actually picks up the new value when viewed in a third- + # party viewer (Adobe etc.) that doesn't render NeedAppearances. + writer.update_page_form_field_values(page, fields, auto_regenerate=True) + if encrypt_choice == "protect" and self._pdf_password: + # Documented limitation: owner == user; original owner + # password is not recoverable from the input file. + writer.encrypt( + user_password=self._pdf_password, + owner_password=self._pdf_password, + algorithm="AES-256", + ) + _log.info( + "Re-encrypted forms output with user password as owner") + fd, tmp = tempfile.mkstemp(prefix=".pdfapps_save_", suffix=".pdf", + dir=os.path.dirname(out) or ".") + os.close(fd) + try: + with open(tmp, "wb") as f: writer.write(f) + os.replace(tmp, out) + except Exception: + try: os.unlink(tmp) + except OSError: pass + raise self._status(t("edit.status.form_saved", path=out)) QMessageBox.information(self, t("msg.done"), t("msg.form_saved", path=out)) except Exception as e: From 76fcd9747a10679b548c034bdbe4bb70b26dffed Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:06:20 +0100 Subject: [PATCH 09/10] test: regression tests for Round 11 audit findings 22 tests covering the 8 PR-G bugs: * split.py atomic write + same-source rejection * Drop-folder dedup on case-insensitive filesystems * Editor _user_pending filter (closeEvent, _run, Forms warning, _pending_list UI) * parse_pages friendly error + happy-path regression guard * OCR streaming writer (no more pdf_pages = [] accumulation) * _clear_recent open-viewer refresh * get_recent_files lexists + writeback (behavioral, monkeypatched config path) * _apply_forms with-open + writer-inside-block Plus an i18n parity assertion that the new ``tool.err.bad_page_input`` key exists with a ``{text}`` placeholder in all 8 supported languages. Co-Authored-By: Claude Opus 4.7 --- tests/test_audit_r11.py | 367 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 tests/test_audit_r11.py diff --git a/tests/test_audit_r11.py b/tests/test_audit_r11.py new file mode 100644 index 0000000..cf672fc --- /dev/null +++ b/tests/test_audit_r11.py @@ -0,0 +1,367 @@ +"""Source-level + behavioral regression tests for PR-G / Round 11 fixes. + +Bug map (PR-G worklist): + #1 split.py no atomic write (same-source truncation) (HIGH A6) + #2 Drop folder duplicates on case-insensitive FS (HIGH N5) + #3 Editor _pending filter for existing notes (HIGH N2) + #4 parse_pages friendly error (MED N4) + #5 OCR streaming writer (memory regression) (MED I1) + #6 _clear_recent UI refresh (MED M5) + #7 get_recent_files writeback + lexists for OneDrive (MED M3) + #8 _apply_forms PdfReader stream leak (MED Extra) +""" + +from __future__ import annotations + +import json +import os +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest + +ROOT = Path(__file__).resolve().parent.parent + + +def _read(rel: str) -> str: + return (ROOT / rel).read_text(encoding="utf-8") + + +# ── #1 — split.py uses _atomic_pdf_write ──────────────────────────────── + + +def test_split_uses_atomic_pdf_write(): + """split.py per-row writes must route through _atomic_pdf_write so + a same-source overwrite is rejected up front and lazy reads from + PdfReader are not truncated mid-flight.""" + src = _read("app/tools/split.py") + assert "_atomic_pdf_write" in src, ( + "split.py must call self._atomic_pdf_write to inherit the " + "same-source guard + tempfile/os.replace path." + ) + # The raw direct-write pattern must be gone. + assert "with open(os.path.join(out_dir, name), \"wb\") as f: w.write(f)" \ + not in src, "Old direct-write pattern still present in split.py" + + +def test_split_passes_input_as_source_to_atomic_write(): + """The atomic write call must pass the input pdf as a source so the + _check_not_same_path layer can reject same-source-output.""" + src = _read("app/tools/split.py") + # Look for the call site with the sources kwarg. + assert "sources=[pdf_path]" in src + + +# ── #2 — Drop folder dedup on case-insensitive FS ──────────────────────── + + +def test_window_drop_folder_does_not_double_glob(): + """The pre-fix code globbed *.pdf AND *.PDF which double-loaded every + file on Windows / HFS+. The fix uses os.listdir + extension filter.""" + src = _read("app/window.py") + # The two-glob pattern must be gone. + assert "glob.glob(os.path.join(path, \"*.PDF\"))" not in src + # The replacement must be there. + assert "os.listdir(path)" in src + # And case-insensitive extension match. + assert ".lower().endswith(\".pdf\")" in src + + +def test_window_drop_folder_dedup_behavioral(tmp_path): + """Behavioral: simulate the new walk pattern against a directory + containing case-mixed PDF filenames. The result must contain each + file exactly once even on filesystems where uppercase and lowercase + globs both match the same on-disk file.""" + d = tmp_path + # Create files in lower + uppercase. On Windows / HFS+ these are + # the same file; we still test that the walk pattern doesn't yield + # duplicates when the FS is case-sensitive. + (d / "a.pdf").write_bytes(b"%PDF-1.4") + # Use a distinct uppercase name to avoid same-file collision on + # case-insensitive FSes. + (d / "B.PDF").write_bytes(b"%PDF-1.4") + entries = os.listdir(str(d)) + pdfs = sorted( + os.path.join(str(d), f) for f in entries + if f.lower().endswith(".pdf") + and os.path.isfile(os.path.join(str(d), f)) + ) + # No duplicates allowed. + assert len(pdfs) == len(set(pdfs)) + # Both files present. + assert any(p.endswith("a.pdf") for p in pdfs) + assert any(p.endswith("B.PDF") for p in pdfs) + + +# ── #3 — Editor _user_pending filters existing notes ──────────────────── + + +def test_editor_has_user_pending_property(): + """The editor must expose _user_pending that filters out _existing + annotations mirrored from the loaded PDF.""" + src = _read("app/editor/tab.py") + assert "_user_pending" in src + assert "if not e.get(\"_existing\")" in src + + +def test_window_closeevent_uses_user_pending(): + """The window's closeEvent guard must use _user_pending so just + opening a PDF with existing notes does not prompt 'unsaved'.""" + src = _read("app/window.py") + # The guard must reference _user_pending, not raw _pending. + assert "getattr(edit_w, \"_user_pending\", None)" in src + assert "getattr(edit_w, \"_pending\", None)" not in src + + +def test_editor_forms_warning_uses_user_pending(): + """Forms-mode 'you have pending edits' warning must filter existing + annotations but keep delete_annot (which has no _existing flag).""" + src = _read("app/editor/tab.py") + forms_block_start = src.find("_MODE_FORMS:\n # If there are also pending edits") + assert forms_block_start > 0 + block = src[forms_block_start:forms_block_start + 800] + assert "self._user_pending" in block + + +def test_user_pending_excludes_existing_keeps_user_edits(): + """Behavioral: a pending list with one existing note + one user + deletion + one user draw must yield only the user edits.""" + sample = [ + {"type": "note", "page": 0, "text": "loaded from disk", + "_existing": True}, + {"type": "delete_annot", "page": 0, "annot_type": 0, + "bbox": [0, 0, 10, 10]}, + {"type": "draw", "page": 1, "points": [(0, 0), (1, 1)]}, + ] + user_only = [e for e in sample if not e.get("_existing")] + assert len(user_only) == 2 + assert {e["type"] for e in user_only} == {"delete_annot", "draw"} + + +def test_editor_run_uses_user_pending_for_empty_check(): + """The _run() short-circuit ('no pending edits') must use + _user_pending so opening a PDF with sticky notes and clicking + Apply doesn't silently re-save unchanged.""" + src = _read("app/editor/tab.py") + run_start = src.find(" def _run(self):") + next_def = src.find(" def ", run_start + 1) + body = src[run_start:next_def] + assert "if not self._user_pending:" in body + assert "msg.no_pending" in body + + +def test_editor_pending_list_skips_existing_notes(): + """_load_existing_annotations must NOT push pre-existing notes + into the _pending_list UI — that widget is for THIS session's + unsaved edits only. The note overlay (canvas rendering) is + untouched; only the visible 'pending edits' list is filtered.""" + src = _read("app/editor/tab.py") + load_start = src.find("def _load_existing_annotations(self)") + next_def = src.find(" def ", load_start + 1) + body = src[load_start:next_def] + # The old addItem line must be gone from the existing-notes branch. + # We check that 'self._pending_list.addItem' is not called in the + # branch that sets `_existing`: True. + assert "\"_existing\": True" in body + # Look around the _existing=True dict for an addItem — there must be + # none in the same indentation block. + existing_idx = body.find("\"_existing\": True") + # Scan the next ~600 chars after the dict closes for an addItem call + # before the next outer-level statement (e.g. count += 1 or next + # `for`); the test is permissive because formatting can shift. + tail = body[existing_idx:existing_idx + 800] + # The cleanup comment marker we placed identifies the intentional + # removal so a future refactor that re-adds the line is caught. + assert "do NOT add existing notes to the" in tail + + +# ── #4 — parse_pages friendly error ───────────────────────────────────── + + +def test_parse_pages_raises_friendly_message_for_invalid_input(): + """Garbage input like 'abc' or '1-' must surface the translated + bad_page_input message instead of the raw int() ValueError.""" + from app.utils import parse_pages + + with pytest.raises(ValueError) as exc: + parse_pages("abc", 10) + msg = str(exc.value) + # The friendly message embeds the offending substring. + assert "abc" in msg + # And does NOT leak the python internal phrasing. + assert "invalid literal for int" not in msg + + +def test_parse_pages_raises_friendly_message_for_open_range(): + from app.utils import parse_pages + with pytest.raises(ValueError) as exc: + parse_pages("1-", 10) + msg = str(exc.value) + assert "1-" in msg + assert "invalid literal for int" not in msg + + +def test_parse_pages_still_accepts_valid_input(): + """Regression guard — the new try/except must not break the happy + path or the dedupe/sort contract.""" + from app.utils import parse_pages + assert parse_pages("3,1,2,3", 10) == [0, 1, 2] + assert parse_pages("1-3", 10) == [0, 1, 2] + assert parse_pages("1,4-5", 10) == [0, 3, 4] + + +def test_parse_pages_out_of_range_still_works(): + """Out-of-range error message format must be preserved.""" + from app.utils import parse_pages + with pytest.raises(ValueError) as exc: + parse_pages("99", 10) + assert "out of range" in str(exc.value).lower() + + +# ── #5 — OCR streaming writer ─────────────────────────────────────────── + + +def test_ocr_no_longer_accumulates_pdf_pages_list(): + """The old pattern collected all per-page PDF bytes in a list before + writing — a 100-pg @ 300dpi job spent ~500MB RAM. The new code + appends each page directly into the writer.""" + src = _read("app/tools/ocr.py") + # Old accumulation list must be gone. + assert "pdf_pages = []" not in src + assert "pdf_pages.append" not in src + # New streaming pattern: writer.append inside the per-page loop. + # The marker is that writer is instantiated before the loop and + # writer.append happens inside the same scope as the for-page loop. + assert "writer = PdfWriter()" in src + # And we drop the per-page buffers explicitly. + assert "img.close()" in src + + +# ── #6 — _clear_recent refreshes UI ───────────────────────────────────── + + +def test_clear_recent_refreshes_open_viewers(): + """_clear_recent must iterate self._viewers and call _refresh_recents + on each, mirroring the pattern in _load_and_track.""" + src = _read("app/window.py") + clear_start = src.find("def _clear_recent(self)") + next_def = src.find("def ", clear_start + 1) + body = src[clear_start:next_def] + assert "_refresh_recents" in body, ( + "_clear_recent must refresh recents in all open viewers (M5)." + ) + assert "self._viewers" in body + + +# ── #7 — get_recent_files writeback + lexists ─────────────────────────── + + +def test_get_recent_files_uses_lexists(): + """OneDrive Files-On-Demand: isfile triggers a placeholder download. + lexists does not. The fix swaps isfile for lexists.""" + src = _read("app/i18n.py") + func_start = src.find("def get_recent_files()") + func_end = src.find("def ", func_start + 1) + body = src[func_start:func_end] + assert "os.path.lexists" in body + # The old isfile call must be gone from non-comment lines. Strip + # comments before searching so mentions of the old API in the + # explanatory comment don't trip this assertion. + code_lines = [ + ln for ln in body.splitlines() + if not ln.lstrip().startswith("#") + ] + code_only = "\n".join(code_lines) + assert "os.path.isfile" not in code_only + + +def test_get_recent_files_writeback_when_entries_dropped(tmp_path, monkeypatch): + """Behavioral: when a stale entry is filtered out, get_recent_files + must writeback the trimmed list so we don't re-check disk forever.""" + # Stub config path to a temp file. + cfg = tmp_path / "cfg.json" + real = str(tmp_path / "real.pdf") + Path(real).write_bytes(b"%PDF-1.4") + cfg.write_text(json.dumps({ + "recent_files": [real, str(tmp_path / "missing.pdf")], + }), encoding="utf-8") + + import app.i18n as i18n + monkeypatch.setattr(i18n, "_CONFIG_PATH", str(cfg)) + + result = i18n.get_recent_files() + assert result == [real] + # Writeback should have trimmed the list on disk. + written = json.loads(cfg.read_text(encoding="utf-8")) + assert written["recent_files"] == [real] + + +def test_get_recent_files_no_writeback_when_all_valid(tmp_path, monkeypatch): + """Don't churn the config file when nothing changed.""" + cfg = tmp_path / "cfg.json" + real1 = str(tmp_path / "a.pdf") + real2 = str(tmp_path / "b.pdf") + Path(real1).write_bytes(b"%PDF-1.4") + Path(real2).write_bytes(b"%PDF-1.4") + cfg.write_text(json.dumps({"recent_files": [real1, real2]}), + encoding="utf-8") + mtime_before = cfg.stat().st_mtime_ns + + import app.i18n as i18n + monkeypatch.setattr(i18n, "_CONFIG_PATH", str(cfg)) + + result = i18n.get_recent_files() + assert result == [real1, real2] + # No writeback when valid == recents. + assert cfg.stat().st_mtime_ns == mtime_before + + +# ── #8 — _apply_forms closes PdfReader stream ─────────────────────────── + + +def test_apply_forms_uses_with_open_for_source(): + """_apply_forms must wrap the source PdfReader in `with open(...)` so + the file handle is released deterministically — leaving it pinned + by GC blocks Windows from renaming the file from another tool.""" + src = _read("app/editor/tab.py") + apply_start = src.find("def _apply_forms(self, out)") + next_def = src.find(" def ", apply_start + 1) + body = src[apply_start:next_def] + assert "with open(self._doc_path, \"rb\")" in body + # PdfReader must take the stream (not the raw path). + assert "PdfReader(_src)" in body + + +def test_apply_forms_writer_work_happens_inside_with_block(): + """The writer.append / .write_*** calls must be inside the with-block + so the source stream is still alive when pypdf reads lazily.""" + src = _read("app/editor/tab.py") + apply_start = src.find("def _apply_forms(self, out)") + next_def = src.find(" def ", apply_start + 1) + body = src[apply_start:next_def] + # Find the with-block start and verify writer.write inside. + with_pos = body.find("with open(self._doc_path") + assert with_pos > 0 + # All the writer interactions should appear after the with-line. + assert body.find("writer = PdfWriter()", with_pos) > with_pos + assert body.find("writer.write(f)", with_pos) > with_pos + + +# ── i18n parity ───────────────────────────────────────────────────────── + + +def test_translations_parity_for_new_key(): + """The new tool.err.bad_page_input key must exist in all 8 languages.""" + data = json.loads(_read("app/translations.json")) + expected_langs = {"en", "pt", "es", "fr", "de", "zh", "it", "nl"} + assert set(data.keys()) >= expected_langs + for lang in expected_langs: + assert "tool.err.bad_page_input" in data[lang], ( + f"Missing translation key in '{lang}'" + ) + # All translations must reference the {text} placeholder so + # parse_pages's `.format()` call resolves cleanly. + assert "{text}" in data[lang]["tool.err.bad_page_input"], ( + f"Translation in '{lang}' is missing the {{text}} placeholder" + ) From c08c1dccccc2749e878d6889aec59f81adbab2c1 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:18:15 +0100 Subject: [PATCH 10/10] fix(editor): keep delete_annot in _user_pending even for existing notes Round 11 review found that the _user_pending filter excluded all edits with _existing=True. But delete_annot edits for existing notes are also registered with _existing=True (PR-D fix #3 path), so they were silently dropped from save processing and closeEvent prompts -- defeating both PR-D and PR-F's note deletion persistence fixes. Now the filter excludes _existing edits EXCEPT for delete_annot, which represents the user's explicit intent to remove. Added two regression tests covering the bug surface. Also addresses minor review nits: removed unverified 200-500MB memory claim from OCR comment; clarified UNC SMB caveat for lexists in get_recent_files. Co-Authored-By: Claude Opus 4.7 --- app/editor/tab.py | 27 ++++++++++---- app/i18n.py | 10 +++--- app/tools/ocr.py | 14 ++++---- tests/test_audit_r11.py | 79 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 112 insertions(+), 18 deletions(-) diff --git a/app/editor/tab.py b/app/editor/tab.py index 223ab73..d8ee081 100644 --- a/app/editor/tab.py +++ b/app/editor/tab.py @@ -80,18 +80,30 @@ def _DRAW_COLORS(self): @property def _user_pending(self) -> list: - """Pending edits actually made by the user. + """Pending edits that represent USER changes since loading the PDF. ``_load_existing_annotations`` mirrors notes already embedded in the PDF into ``self._pending`` with ``_existing=True`` so the canvas can render their bubbles. Without this filter, just opening any PDF with notes would (a) make ``closeEvent`` prompt about "unsaved changes" the user never made and (b) trigger the - Forms-mode "has pending edits" warning. ``delete_annot`` edits - registered via the canvas context menu are real user actions and - are NOT tagged with ``_existing``, so they remain visible here. + Forms-mode "has pending edits" warning. + + R11 C6: ``delete_annot`` edits raised by the canvas context menu + on an existing note are also tagged with ``_existing=True`` (see + ``_on_note_deleted`` — the flag carries through so undo can put + the original back). A plain ``_existing`` filter therefore + silently dropped real user deletions: ``_run`` warned "no + pending edits" and ``closeEvent`` skipped the unsaved-changes + prompt. The deletion was lost on save. We now keep + ``delete_annot`` edits regardless of ``_existing`` because they + always represent the user's explicit intent to remove an + annotation. """ - return [e for e in self._pending if not e.get("_existing")] + return [ + e for e in self._pending + if not e.get("_existing") or e.get("type") == "delete_annot" + ] def __init__(self, status_fn): super().__init__() @@ -1128,8 +1140,9 @@ def _run(self): # pypdf and would silently drop the in-memory edits otherwise. # Use _user_pending so pre-existing notes loaded from the PDF # don't trigger a false-positive warning. delete_annot edits - # ARE included (no _existing flag) so the warning still fires - # when the user has removed an existing note. + # ARE included even when carrying _existing=True (see the + # _user_pending docstring) so the warning still fires when + # the user has removed an existing note. if self._user_pending: reply = QMessageBox.question( self, t("msg.warning"), diff --git a/app/i18n.py b/app/i18n.py index d90a1dd..89faecd 100644 --- a/app/i18n.py +++ b/app/i18n.py @@ -211,10 +211,12 @@ def get_recent_files() -> list[str]: recents = cfg.get("recent_files", []) except Exception: return [] - # R11 M3: use os.path.lexists so OneDrive Files-On-Demand placeholders - # are NOT eagerly downloaded just to validate the recents list. - # os.path.isfile triggers a sync that can freeze the UI for seconds - # the first time the app starts on a OneDrive folder. + # R11 M3: use lexists instead of isfile so OneDrive Files-On-Demand + # placeholders aren't force-downloaded on every startup. isfile + # triggers a sync that can freeze the UI for seconds the first time + # the app starts on a OneDrive folder. Note: UNC paths to offline + # SMB shares may still cause a network round-trip but won't trigger + # placeholder hydration like isfile would. valid = [p for p in recents if isinstance(p, str) and os.path.lexists(p)] # R11 M3: writeback when entries were dropped so the next call has diff --git a/app/tools/ocr.py b/app/tools/ocr.py index ca681a6..aec03f5 100644 --- a/app/tools/ocr.py +++ b/app/tools/ocr.py @@ -316,12 +316,14 @@ def do_work(_self): except OSError: pass raise else: - # R11 I1: stream pages directly into the writer - # instead of accumulating per-page PDF bytes in a - # list. A 100-page @ 300dpi job otherwise sits on - # ~200-500MB of peak RAM before the final write. - # Each per-page BytesIO is dropped as soon as - # writer.append() finishes copying its objects. + # R11 I1: stream pages to the writer as they're + # processed instead of accumulating all per-page + # PDF bytes in a list before the final write. + # This reduces peak memory usage for large PDFs; + # the exact savings depend on pypdf's internal + # page copy behavior. Each per-page BytesIO is + # dropped as soon as writer.append() finishes + # copying its objects. writer = PdfWriter() for i, page in enumerate(doc): if _self.is_cancelled(): diff --git a/tests/test_audit_r11.py b/tests/test_audit_r11.py index cf672fc..99f781e 100644 --- a/tests/test_audit_r11.py +++ b/tests/test_audit_r11.py @@ -116,7 +116,8 @@ def test_window_closeevent_uses_user_pending(): def test_editor_forms_warning_uses_user_pending(): """Forms-mode 'you have pending edits' warning must filter existing - annotations but keep delete_annot (which has no _existing flag).""" + annotations but keep delete_annot (which may carry _existing=True + when the user removed a pre-existing note via the canvas menu).""" src = _read("app/editor/tab.py") forms_block_start = src.find("_MODE_FORMS:\n # If there are also pending edits") assert forms_block_start > 0 @@ -176,6 +177,82 @@ def test_editor_pending_list_skips_existing_notes(): assert "do NOT add existing notes to the" in tail +# ── R11 C6 — _user_pending must keep delete_annot even for existing ───── + + +def _make_user_pending_stub(): + """Tiny stub so the _user_pending property can be exercised without + spinning up the full Qt tab. Only the attributes it touches matter.""" + from app.editor.tab import TabEditar + + class _Stub: + _user_pending = TabEditar._user_pending + + def __init__(self): + self._pending: list = [] + + return _Stub() + + +def test_user_pending_keeps_delete_annot_even_if_existing(): + """Regression for reviewer R11 C6: ``delete_annot`` edits for + existing notes are also tagged with ``_existing=True`` (so undo can + restore the original). The previous ``_user_pending`` filter dropped + them, which made ``_run`` warn 'no pending edits' and ``closeEvent`` + skip the unsaved-changes prompt — silently losing the deletion.""" + s = _make_user_pending_stub() + s._pending = [ + # Mirrored on load — must be filtered. + {"type": "note", "text": "original", "_existing": True}, + # User right-clicked the loaded note and chose Delete. The + # _existing flag carries through so _undo can put the original + # back on the canvas. MUST stay in _user_pending. + {"type": "delete_annot", "page": 0, "bbox": (0, 0, 10, 10), + "_existing": True, + "_original_note": {"type": "note", "text": "original"}}, + # Brand-new note typed by the user. MUST stay in _user_pending. + {"type": "note", "text": "new user note"}, + ] + user_pending = s._user_pending + assert len(user_pending) == 2 + types = [e["type"] for e in user_pending] + assert "delete_annot" in types + assert "note" in types + # The original existing note is filtered out. + assert not any(e.get("text") == "original" for e in user_pending) + + +def test_delete_existing_note_then_apply_persists(): + """End-to-end flow: user opens a PDF with one sticky note (mirrored + into ``_pending`` by ``_load_existing_annotations`` with + ``_existing=True``), right-clicks → Delete (which appends a + ``delete_annot`` edit also carrying ``_existing=True``), then clicks + Apply. ``_user_pending`` MUST surface the deletion so the save loop + processes it instead of hitting the no_changes warning.""" + s = _make_user_pending_stub() + # _load_existing_annotations behavior. + s._pending = [{ + "type": "note", "text": "loaded", "page": 0, + "rect": (0, 0, 10, 10), "_existing": True, + }] + # Context-menu delete (cf. _on_note_deleted at app/editor/tab.py + # ~line 1078): the delete_annot edit carries _existing=True so that + # undo can roll back to the original note. + s._pending.append({ + "type": "delete_annot", + "page": 0, + "bbox": (0, 0, 10, 10), + "_existing": True, + "_original_note": s._pending[0], + }) + # Apply click — the _run guard reads _user_pending. + user_pending = s._user_pending + # The delete MUST be visible to the save loop. + assert any(e["type"] == "delete_annot" for e in user_pending), ( + "delete_annot with _existing=True was filtered out — the " + "user's deletion would be silently lost on save") + + # ── #4 — parse_pages friendly error ─────────────────────────────────────