From 06f3e85ce5f26b8dca3b041331eaca23ea133f68 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:37:13 +0100 Subject: [PATCH 01/12] fix: install global sys.excepthook to log uncaught Qt exceptions Uncaught exceptions raised from queued Qt slots in PyInstaller --windowed builds (console=False on Windows/macOS) were written to a stderr stream pointed at NUL/dev/null and never reached pdfapps.log. Users got a silent UI freeze with zero diagnostic trail (R7 I2). Route them through the logging framework with exc_info= so the rotating handler captures the traceback, and surface a best-effort QMessageBox so the failure is visible. KeyboardInterrupt keeps the default behaviour so Ctrl-C in console builds still exits cleanly. Co-Authored-By: Claude Opus 4.7 --- pdfapps.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pdfapps.py b/pdfapps.py index 754a5b7..0d8edb5 100644 --- a/pdfapps.py +++ b/pdfapps.py @@ -1,5 +1,6 @@ """PDFApps – entry point.""" import argparse +import logging import sys import os @@ -28,6 +29,42 @@ from app.utils import _make_palette, setup_logging +def _excepthook(exc_type, exc_value, exc_traceback): + """Route uncaught exceptions through the logging framework so + PyInstaller --windowed builds still surface crashes in the log + file instead of silently swallowing them on closed stderr. + + Without this hook (R7 I2), tracebacks raised from queued Qt slots + in console=False builds (Windows/macOS PyInstaller --windowed) are + written to a stderr stream wired to ``NUL`` / ``/dev/null`` and + never reach ``pdfapps.log`` either. The default :data:`sys.excepthook` + only prints to stderr, so the user sees a silent "nothing happened" + while we have zero diagnostic trail. + + KeyboardInterrupt keeps the default behavior so Ctrl-C in a console + build still exits cleanly without spamming the log. + """ + if issubclass(exc_type, KeyboardInterrupt): + sys.__excepthook__(exc_type, exc_value, exc_traceback) + return + logging.getLogger(__name__).critical( + "Uncaught exception", + exc_info=(exc_type, exc_value, exc_traceback), + ) + # Best-effort: surface a user-facing dialog so the app doesn't appear + # to silently hang after a slot blew up. Guarded against the (rare) + # case where the QApplication is gone or QMessageBox itself raises. + try: + from PySide6.QtWidgets import QApplication, QMessageBox + if QApplication.instance() is not None: + QMessageBox.critical( + None, "Unexpected error", + f"{exc_type.__name__}: {exc_value}\n\nDetails logged.", + ) + except Exception: + pass + + def _load_dark_pref() -> bool: try: import json @@ -68,6 +105,10 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: def main(): setup_logging() + # Install the global excepthook AFTER setup_logging (so the rotating + # file handler is in place) and BEFORE QApplication so any crash in + # window construction is already covered. + sys.excepthook = _excepthook # Parse BEFORE QApplication so --help / --version exit cleanly # without bringing up the Qt event loop (and the splash screen). args = _parse_args(sys.argv[1:]) From 61a799ed3595b67a964f3eb185f6c56c6fda57e0 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:37:44 +0100 Subject: [PATCH 02/12] fix(i18n): backup config.json before resetting on JSON corruption A corrupt config.json was silently overwritten with {} on the next _update_config call, wiping language / dark_mode / recents with no warning to the user or support trail (R8 N1). Distinguish 'missing file' (normal first run) from 'unparseable file' and, in the latter case, copy the broken file aside to .corrupt.bak before resetting. Use contextlib.suppress so a read-only profile or permission error still lets the app start with default settings instead of failing hard. Co-Authored-By: Claude Opus 4.7 --- app/i18n.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/app/i18n.py b/app/i18n.py index 89faecd..ad41f2b 100644 --- a/app/i18n.py +++ b/app/i18n.py @@ -1,12 +1,17 @@ """PDFApps – Internationalization (i18n) module.""" +import contextlib import json import locale +import logging import os +import shutil import sys import threading from typing import Callable +_log = logging.getLogger(__name__) + _TRANSLATIONS: dict = {} _LANG: str = "en" @@ -146,13 +151,36 @@ def _update_config(mutator: Callable[[dict], None]) -> None: """ with _CONFIG_LOCK: cfg: dict = {} + corrupt = False try: with open(_CONFIG_PATH, "r", encoding="utf-8") as f: cfg = json.load(f) + except FileNotFoundError: + cfg = {} except Exception: cfg = {} + corrupt = True if not isinstance(cfg, dict): cfg = {} + corrupt = True + # R8 N1: previously a corrupt config.json was silently overwritten + # with `{}`, wiping the user's saved language / dark_mode / recents + # without warning. Snapshot the broken file before resetting so + # support can recover settings (and so we have evidence the + # corruption happened rather than a silent reset triggered by us). + if corrupt: + try: + if (os.path.isfile(_CONFIG_PATH) + and os.path.getsize(_CONFIG_PATH) > 0): + backup_path = _CONFIG_PATH + ".corrupt.bak" + with contextlib.suppress(Exception): + shutil.copy2(_CONFIG_PATH, backup_path) + _log.warning( + "config.json appeared corrupt; saved backup " + "to %s before reset", backup_path, + ) + except OSError: + pass mutator(cfg) _atomic_write_config(cfg) From 2a1b2b1d575a2ab12e5553d4126be783b1f85b43 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:40:08 +0100 Subject: [PATCH 03/12] fix(base): NFC-normalize PDF passwords before decrypt/authenticate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unicode strings flowing into pypdf PdfReader.decrypt and PyMuPDF Document.authenticate were used as-is. A password typed on macOS (NFD-leaning) and copied to a Windows machine (NFC-leaning) failed to authenticate even though the on-screen glyphs were identical (R6 C1). Add a static _nfc helper and run every cached password through it at the three entry points (prompt-and-cache, _open_reader, _open_fitz). NFC matches what Windows/Linux clipboards produce and what most PDF producers wrote pre-PDF-2.0; we still don't SASLprep (PDF 2.0 only) — that's a separate, higher-risk change. Co-Authored-By: Claude Opus 4.7 --- app/base.py | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/app/base.py b/app/base.py index 96fb35b..a54c5f0 100644 --- a/app/base.py +++ b/app/base.py @@ -6,6 +6,7 @@ import sys import tempfile import shutil +import unicodedata from typing import Iterable from PySide6.QtCore import Qt, QTimer, Signal @@ -294,6 +295,30 @@ def _resolve_output_dir(self, drop_widget, input_path: str = "") -> str: # ── encrypted-PDF helpers ────────────────────────────────────────────── + @staticmethod + def _nfc(pwd: str) -> str: + """Return the NFC-normalized form of ``pwd`` (R6 C1). + + Unicode in PDF owner/user passwords is whatever the producing + application wrote (PDF 2.0 mandates SASLprep, but the world is + full of pre-2.0 files). macOS HFS/APFS often stores filenames + and clipboard text in NFD; Windows clipboards default to NFC. + A password typed once on macOS and copied to a Windows machine + can therefore fail to authenticate even though the on-screen + glyphs are identical. Normalize to NFC on every code path that + feeds the password to pypdf/PyMuPDF so the in-memory bytes are + deterministic regardless of where the password was originally + composed. + """ + if not pwd: + return pwd + try: + return unicodedata.normalize("NFC", pwd) + except (TypeError, ValueError): + # str inputs only ever raise on absurd code points; + # falling back to the raw string is safer than crashing. + return pwd + def _maybe_prompt_password(self, path: str) -> bool: """If the PDF at `path` is encrypted, prompt the user; on success store the password on `self._pdf_password` and return True. Plain @@ -312,7 +337,7 @@ def _maybe_prompt_password(self, path: str) -> bool: if not doc.needs_pass: self._pdf_password = "" return True - if self._pdf_password and doc.authenticate(self._pdf_password): + if self._pdf_password and doc.authenticate(self._nfc(self._pdf_password)): return True finally: doc.close() @@ -320,25 +345,35 @@ def _maybe_prompt_password(self, path: str) -> bool: ok, pwd = prompt_pdf_password(path, self) if not ok: return False - self._pdf_password = pwd + self._pdf_password = self._nfc(pwd) return True def _open_reader(self, path: str): """Open a pypdf PdfReader, decrypting with the stored password if - the file is encrypted.""" + the file is encrypted. + + The cached password is NFC-normalized before being passed to + :meth:`pypdf.PdfReader.decrypt`. See :meth:`_nfc` for the + rationale (R6 C1). + """ from pypdf import PdfReader r = PdfReader(path) if r.is_encrypted and self._pdf_password: - r.decrypt(self._pdf_password) + r.decrypt(self._nfc(self._pdf_password)) return r def _open_fitz(self, path: str): """Open a PyMuPDF Document, authenticating with the stored - password if needed.""" + password if needed. + + The cached password is NFC-normalized before being passed to + :meth:`fitz.Document.authenticate`. See :meth:`_nfc` for the + rationale (R6 C1). + """ import fitz doc = fitz.open(path) if doc.needs_pass and self._pdf_password: - doc.authenticate(self._pdf_password) + doc.authenticate(self._nfc(self._pdf_password)) return doc def _clear_pdf_password(self) -> None: From ff685bae2490d3c5f2102f28b7a807d240f429af Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:40:56 +0100 Subject: [PATCH 04/12] fix(base): guard set_compact_mode lambda with shiboken6.isValid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compact-mode 'Change source' link installs a lambda that calls self.set_compact_mode(False). If the page is destroyed between the Qt click event being queued and the slot actually running, the lambda touches a dead C++ QWidget and Qt logs 'Internal C++ object already deleted' / crashes (R6 O2). PySide6 has no QPointer; gate the slot on shiboken6.isValid(self), which is already imported in this module. Cost is one isValid call per compact-mode exit — negligible. Co-Authored-By: Claude Opus 4.7 --- app/base.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/app/base.py b/app/base.py index a54c5f0..3764a27 100644 --- a/app/base.py +++ b/app/base.py @@ -207,7 +207,15 @@ def set_compact_mode(self, active: bool, path: str = "") -> None: f"background:transparent; padding:2px 4px; text-align:left; }}" f"QPushButton#compact_link:hover {{ text-decoration: underline; }}" ) - link.clicked.connect(lambda: self.set_compact_mode(False)) + # R6 O2: guard against the page being destroyed between the + # Qt click queueing and the slot actually running. PySide6 + # has no QPointer, so use shiboken6.isValid() — without it + # the lambda may touch a dead C++ QWidget and crash with + # "Internal C++ object already deleted". The lambda is the + # only callback installed here, so cost is one isValid call + # per compact-mode exit. + link.clicked.connect( + lambda: self.set_compact_mode(False) if isValid(self) else None) self._form.insertWidget(0, link) self._compact_link = link From 8c8f1da3b9b91b5c218f504a27f16116088e49d6 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:41:43 +0100 Subject: [PATCH 05/12] fix(editor): explicit tab order in _PdfPasswordDialog Without setTabOrder Qt walked the dialog's children in construction order (icon, title, password, Cancel, OK) and on some platforms also landed initial focus on the Cancel button. A keyboard-only user had to press Tab three times to reach the password field they came here to fill in (R11 G1). Anchor focus on the password field and force the cycle password -> OK -> Cancel so Enter / Tab match the expected flow. Co-Authored-By: Claude Opus 4.7 --- app/editor/dialogs.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/app/editor/dialogs.py b/app/editor/dialogs.py index 27647ee..57da0a4 100644 --- a/app/editor/dialogs.py +++ b/app/editor/dialogs.py @@ -84,6 +84,16 @@ def __init__(self, filename: str, wrong: bool = False, parent=None): btns.addWidget(ca); btns.addWidget(ok) v.addLayout(btns) + # R11 G1: without an explicit tab order Qt walks widgets in the + # construction order (icon → title → password → Cancel → OK), + # but the dialog opens with focus on Cancel via the addStretch + # quirk on some platforms. Force a predictable, accessible + # path: password field → OK → Cancel. Keyboard-only users now + # land on the field they need to fill in first. + self._edit.setFocus() + self.setTabOrder(self._edit, ok) + self.setTabOrder(ok, ca) + def password(self) -> str: return self._edit.text() From 908adfe23808682f146659caa2f5e0da5108eae2 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:42:01 +0100 Subject: [PATCH 06/12] fix(editor): use QLineEdit in _TextEditDialog to match fitz single-line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _TextEditDialog asked the user for replacement text via a QTextEdit (multi-line), but page.insert_text in PyMuPDF only supports single-line strings — any newline got rasterized as a '?' glyph in the output PDF (R11 B2). Switch to QLineEdit so the input is restricted to what the writer can actually emit; wire returnPressed to accept the dialog for consistency with the password dialog. Simpler than implementing a per-line iteration on insert_text in the editor and matches the existing _TextDialog (insert) behavior. Co-Authored-By: Claude Opus 4.7 --- app/editor/dialogs.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/app/editor/dialogs.py b/app/editor/dialogs.py index 57da0a4..a16bd05 100644 --- a/app/editor/dialogs.py +++ b/app/editor/dialogs.py @@ -122,9 +122,15 @@ def __init__(self, old_text: str, font_size: float, parent=None): lbl_new.setStyleSheet(f"color:{pri}; font-size:10pt;") v.addWidget(lbl_new) - self._edit = QTextEdit() - self._edit.setPlainText(old_text) - self._edit.setMinimumHeight(80) + # R11 B2: the previous QTextEdit accepted newlines that + # ``page.insert_text`` (PyMuPDF) cannot render — each \n got + # rasterized as a "?" glyph in the output PDF. Use QLineEdit + # so the input is restricted to what the writer can actually + # produce; Enter submits via returnPressed, matching the + # password dialog above. + self._edit = QLineEdit() + self._edit.setText(old_text) + self._edit.returnPressed.connect(self.accept) v.addWidget(self._edit) btns = QHBoxLayout(); btns.setSpacing(8); btns.addStretch() @@ -135,7 +141,7 @@ def __init__(self, old_text: str, font_size: float, parent=None): v.addLayout(btns) def new_text(self) -> str: - return self._edit.toPlainText() + return self._edit.text() class _TextDialog(QDialog): From dd6d3808fdc6c5db93aaa2becc8238b6ebb72e7a Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:44:40 +0100 Subject: [PATCH 07/12] feat(window): confirm before opening >20 PDFs dropped as folder Dropping a folder onto the main window used to silently open every .pdf inside (R10 review nit). A folder with 200 PDFs flooded the tab bar, locked the UI for several seconds while each tab loaded, and gave the user no way to back out. Show a Yes/No QMessageBox when the recursive list exceeds 20 files so the user can confirm or skip. Default is No so accidental drops on archive folders do nothing surprising. i18n: add viewer.drop_many_pdfs_confirm and widgets.drop_first_only across all 8 supported languages. The latter ships with this commit so the JSON stays parity-clean; it is consumed by the next commit (DropFileEdit multi-URL warning). Co-Authored-By: Claude Opus 4.7 --- app/translations.json | 16 ++++++++++++++++ app/window.py | 14 ++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/app/translations.json b/app/translations.json index 6af3822..d16fd06 100644 --- a/app/translations.json +++ b/app/translations.json @@ -61,6 +61,8 @@ "viewer.delete_comment": "Delete comment", "viewer.confirm_delete_comment": "Are you sure you want to delete this comment? This change is saved immediately and cannot be undone.", "viewer.drop_url_not_supported": "Online URLs are not supported. Please download the PDF first, then drag the local file here.", + "viewer.drop_many_pdfs_confirm": "Open {count} PDF files at once?", + "widgets.drop_first_only": "Only the first of {count} files was loaded.", "viewer.copy_chars": " Copy ({n} chars)", "search.placeholder": "Search in PDF...", "search.prev": "Previous match", @@ -660,6 +662,8 @@ "viewer.delete_comment": "Apagar comentário", "viewer.confirm_delete_comment": "Tens a certeza que queres apagar este comentário? Esta alteração é guardada imediatamente e não pode ser desfeita.", "viewer.drop_url_not_supported": "URLs da internet não são suportados. Descarrega primeiro o PDF e depois arrasta o ficheiro local para aqui.", + "viewer.drop_many_pdfs_confirm": "Abrir {count} ficheiros PDF de uma vez?", + "widgets.drop_first_only": "Apenas o primeiro de {count} ficheiros foi carregado.", "viewer.copy_chars": " Copiar ({n} car.)", "search.placeholder": "Pesquisar no PDF...", "search.prev": "Resultado anterior", @@ -1259,6 +1263,8 @@ "viewer.delete_comment": "Eliminar comentario", "viewer.confirm_delete_comment": "¿Seguro que quieres eliminar este comentario? Este cambio se guarda inmediatamente y no se puede deshacer.", "viewer.drop_url_not_supported": "Las URLs en línea no son compatibles. Descarga primero el PDF y luego arrastra el archivo local aquí.", + "viewer.drop_many_pdfs_confirm": "¿Abrir {count} archivos PDF a la vez?", + "widgets.drop_first_only": "Solo se cargó el primero de {count} archivos.", "viewer.copy_chars": " Copiar ({n} caract.)", "search.placeholder": "Buscar en PDF...", "search.prev": "Resultado anterior", @@ -1858,6 +1864,8 @@ "viewer.delete_comment": "Supprimer le commentaire", "viewer.confirm_delete_comment": "Voulez-vous vraiment supprimer ce commentaire ? Cette modification est enregistrée immédiatement et ne peut pas être annulée.", "viewer.drop_url_not_supported": "Les URL en ligne ne sont pas prises en charge. Téléchargez d'abord le PDF, puis faites glisser le fichier local ici.", + "viewer.drop_many_pdfs_confirm": "Ouvrir {count} fichiers PDF en une fois ?", + "widgets.drop_first_only": "Seul le premier des {count} fichiers a été chargé.", "viewer.copy_chars": " Copier ({n} caract.)", "search.placeholder": "Rechercher dans le PDF...", "search.prev": "Résultat précédent", @@ -2457,6 +2465,8 @@ "viewer.delete_comment": "Kommentar löschen", "viewer.confirm_delete_comment": "Soll dieser Kommentar wirklich gelöscht werden? Diese Änderung wird sofort gespeichert und kann nicht rückgängig gemacht werden.", "viewer.drop_url_not_supported": "Online-URLs werden nicht unterstützt. Laden Sie die PDF zuerst herunter und ziehen Sie dann die lokale Datei hierher.", + "viewer.drop_many_pdfs_confirm": "{count} PDF-Dateien gleichzeitig öffnen?", + "widgets.drop_first_only": "Nur die erste der {count} Dateien wurde geladen.", "viewer.copy_chars": " Kopieren ({n} Zeichen)", "search.placeholder": "Im PDF suchen...", "search.prev": "Vorheriges Ergebnis", @@ -3056,6 +3066,8 @@ "viewer.delete_comment": "删除批注", "viewer.confirm_delete_comment": "确定要删除此批注吗?此更改将立即保存且无法撤销。", "viewer.drop_url_not_supported": "不支持在线 URL。请先下载 PDF,然后将本地文件拖到此处。", + "viewer.drop_many_pdfs_confirm": "一次打开 {count} 个 PDF 文件?", + "widgets.drop_first_only": "仅加载了 {count} 个文件中的第一个。", "viewer.copy_chars": " 复制 ({n} 个字符)", "search.placeholder": "在 PDF 中搜索...", "search.prev": "上一个匹配", @@ -3655,6 +3667,8 @@ "viewer.delete_comment": "Elimina commento", "viewer.confirm_delete_comment": "Sei sicuro di voler eliminare questo commento? Questa modifica viene salvata immediatamente e non può essere annullata.", "viewer.drop_url_not_supported": "Gli URL online non sono supportati. Scarica prima il PDF, poi trascina il file locale qui.", + "viewer.drop_many_pdfs_confirm": "Aprire {count} file PDF contemporaneamente?", + "widgets.drop_first_only": "È stato caricato solo il primo dei {count} file.", "viewer.copy_chars": " Copia ({n} caratteri)", "search.placeholder": "Cerca nel PDF...", "search.prev": "Corrispondenza precedente", @@ -4254,6 +4268,8 @@ "viewer.delete_comment": "Opmerking verwijderen", "viewer.confirm_delete_comment": "Weet je zeker dat je deze opmerking wilt verwijderen? Deze wijziging wordt onmiddellijk opgeslagen en kan niet ongedaan worden gemaakt.", "viewer.drop_url_not_supported": "Online-URL's worden niet ondersteund. Download eerst de PDF en sleep vervolgens het lokale bestand hierheen.", + "viewer.drop_many_pdfs_confirm": "{count} PDF-bestanden tegelijk openen?", + "widgets.drop_first_only": "Alleen het eerste van {count} bestanden is geladen.", "viewer.copy_chars": " Kopiëren ({n} tekens)", "search.placeholder": "Zoeken in PDF...", "search.prev": "Vorige overeenkomst", diff --git a/app/window.py b/app/window.py index b277454..36e5027 100644 --- a/app/window.py +++ b/app/window.py @@ -1033,6 +1033,20 @@ def dropEvent(self, e): if f.lower().endswith(".pdf") and os.path.isfile(os.path.join(path, f)) ) + # R10 review nit: dropping a folder used to silently open + # every PDF inside it. A folder with 200 PDFs would + # flood the tab bar and lock the UI for several seconds + # with no escape. Confirm above a reasonable threshold + # before we start spawning tabs. + if len(pdfs) > 20: + reply = QMessageBox.question( + self, t("msg.confirm"), + t("viewer.drop_many_pdfs_confirm", count=len(pdfs)), + QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, + QMessageBox.StandardButton.No, + ) + if reply != QMessageBox.StandardButton.Yes: + continue for pdf in pdfs: self._load_and_track(pdf) continue From 1ba9ab09e7b07fd0d2b4dfd16ed8a2796e4878e0 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:45:31 +0100 Subject: [PATCH 08/12] fix(widgets): warn when DropFileEdit receives multiple URLs DropFileEdit is a single-file widget but used to silently take only urls[0] when the user dragged multiple files in. The extra files vanished with no feedback (R11 N3). Filter to URLs that pass the extension filter first so dragging one PDF plus an unrelated screenshot still counts as 'one PDF dropped' and skips the warning. When more than one accepted file is present, surface a QMessageBox.information with the count using the new widgets.drop_first_only key (added in the previous commit). Co-Authored-By: Claude Opus 4.7 --- app/widgets.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/app/widgets.py b/app/widgets.py index dfd7ef6..1289c19 100644 --- a/app/widgets.py +++ b/app/widgets.py @@ -6,6 +6,7 @@ from PySide6.QtGui import QDragEnterEvent, QDropEvent, QColor, QPixmap, QPainter from PySide6.QtWidgets import ( QWidget, QHBoxLayout, QLabel, QPushButton, QFileDialog, QColorDialog, + QMessageBox, ) import qtawesome as qta @@ -167,8 +168,22 @@ def dropEvent(self, e: QDropEvent): self.setProperty("drag_active", "false") self.style().unpolish(self); self.style().polish(self) urls = e.mimeData().urls() - if urls and self._url_accepted(urls[0]): - self.set_path(urls[0].toLocalFile()) + # R11 N3: DropFileEdit is a single-file widget but used to + # silently accept only urls[0] when the user dragged multiple + # files in. The extra files vanished with no feedback. Count + # the urls that PASS the extension filter (so dragging one + # PDF plus an unrelated screenshot still counts as 'one PDF + # dropped' and skips the warning) and surface a friendly + # notice when more than one valid file was dropped. + accepted = [u for u in urls if self._url_accepted(u)] + if not accepted: + return + if len(accepted) > 1: + QMessageBox.information( + self, t("msg.info"), + t("widgets.drop_first_only", count=len(accepted)), + ) + self.set_path(accepted[0].toLocalFile()) def _browse(self): if self._save: From 5de78250b9995250cd639fc43cc7f90dc09c1945 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:45:58 +0100 Subject: [PATCH 09/12] fix(editor): show no_fields status when AcroForm dict has zero widgets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The R10 #6 guard rejected the 'no /AcroForm dict at all' case but missed the rarer one where /AcroForm exists with zero widget entries — e.g. files whose fields were flattened by a third-party tool but the dict was left behind. update_page_form_field_values then ran a silent no-op and the user got no feedback that nothing changed (R10 review follow-up). Reuse _r.get_fields() (already cached by pypdf when the form table was first populated) and surface the same editor.forms.no_fields status the plain-PDF branch uses. Avoids importing fitz just to count widgets and keeps the messaging consistent for the user. Co-Authored-By: Claude Opus 4.7 --- app/editor/tab.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/app/editor/tab.py b/app/editor/tab.py index d8ee081..c23b950 100644 --- a/app/editor/tab.py +++ b/app/editor/tab.py @@ -1350,6 +1350,24 @@ def _apply_forms(self, out): self._status(t("editor.forms.no_fields")) self._form_status.setText(t("editor.forms.no_fields")) return + # R10 review follow-up: an /AcroForm dict can exist with + # zero actual widgets (e.g. forms whose fields were + # flattened by a third-party tool but the dict was left + # behind). update_page_form_field_values then runs a + # silent no-op and the user gets no feedback. Surface + # the same no_fields status so the result matches the + # 'plain PDF' case above. Use the get_fields() count + # since pypdf already exposes it cheaply via the cached + # AcroForm tree — avoids importing fitz just for a + # widget count. + try: + _w_fields = _r.get_fields() or {} + except Exception: + _w_fields = {} + if not _w_fields: + self._status(t("editor.forms.no_fields")) + self._form_status.setText(t("editor.forms.no_fields")) + return for page in writer.pages: # auto_regenerate=True so the rendered widget appearance # actually picks up the new value when viewed in a third- From 21fd103d262c6c2167b2073a4d0938639d9314c2 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:47:12 +0100 Subject: [PATCH 10/12] fix(utils): allow open-ended range '3-' to mean '3 to total' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse_pages used to raise ValueError on inputs like '3-' because int('') failed inside the split-on-'-' branch. PR-G surfaced the friendly bad_page_input message but the underlying behaviour stayed unhelpful — most users typed '3-' expecting 'from page 3 to the end', matching pdftk's familiar 1- syntax (R7 E5 follow-up). Restructure the per-part loop so missing-side ranges fall back to 1 (left) or total (right). '-' on its own is still invalid (no bounds at all). Update the existing open-range test in test_audit_r11.py to use 'abc-' as the regression target since '1-' is now intentionally valid. Co-Authored-By: Claude Opus 4.7 --- app/utils.py | 39 +++++++++++++++++++++++++++------------ tests/test_audit_r11.py | 13 +++++++++++-- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/app/utils.py b/app/utils.py index d7348c0..5b28178 100644 --- a/app/utils.py +++ b/app/utils.py @@ -72,19 +72,34 @@ def parse_pages(text: str, total: int) -> list: part = part.strip() if not part: continue - try: - if "-" in part: - a, b = part.split("-", 1) - a_int, b_int = int(a), int(b) - else: + # R7 E5 follow-up: accept open-ended ranges so '3-' means + # 'from page 3 to the end' and '-5' means 'from page 1 to + # page 5'. Without this, '3-' raised ValueError because + # int('') failed. Reject '-' alone (no bounds at all); + # otherwise default the missing side to the document's + # extreme. Matches the pdftk-style 1- range syntax. + if "-" in part: + a, b = part.split("-", 1) + a = a.strip(); b = b.strip() + if not a and not b: + raise ValueError( + t("tool.err.bad_page_input", text=part)) + try: + a_int = int(a) if a else 1 + b_int = int(b) if b else total + except ValueError as exc: + raise ValueError( + t("tool.err.bad_page_input", text=part)) from exc + else: + try: a_int = b_int = int(part) - except ValueError as exc: - # int() raised "invalid literal for int() with base 10" — - # re-raise with a translated, user-actionable message so - # show_error() surfaces something useful instead of the - # raw Python error. - raise ValueError( - t("tool.err.bad_page_input", text=part)) from exc + except ValueError as exc: + # int() raised "invalid literal for int() with base 10" — + # re-raise with a translated, user-actionable message so + # show_error() surfaces something useful instead of the + # raw Python error. + raise ValueError( + t("tool.err.bad_page_input", text=part)) from exc if "-" in part: if b_int - a_int + 1 > _MAX_PAGES: raise ValueError( diff --git a/tests/test_audit_r11.py b/tests/test_audit_r11.py index 99f781e..88c6957 100644 --- a/tests/test_audit_r11.py +++ b/tests/test_audit_r11.py @@ -271,12 +271,21 @@ def test_parse_pages_raises_friendly_message_for_invalid_input(): def test_parse_pages_raises_friendly_message_for_open_range(): + """Open-ended ranges with non-numeric bounds (e.g. 'abc-') still + surface the translated bad_page_input message. PR-H fix #10 made + purely-numeric open ranges like '1-' valid (interpreted as + '1..total'), so the regression target moves to a still-invalid + input.""" from app.utils import parse_pages with pytest.raises(ValueError) as exc: - parse_pages("1-", 10) + parse_pages("abc-", 10) msg = str(exc.value) - assert "1-" in msg + assert "abc-" in msg assert "invalid literal for int" not in msg + # Empty range '-' must also fail (no bounds at all). + with pytest.raises(ValueError) as exc2: + parse_pages("-", 10) + assert "invalid literal for int" not in str(exc2.value) def test_parse_pages_still_accepts_valid_input(): From ef4c79409b82cefe7958f21ab41f7c9b7b5582c1 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Wed, 17 Jun 2026 20:48:29 +0100 Subject: [PATCH 11/12] test: source-level regression tests for PR-H polish fixes 16 tests covering all ten PR-H bug fixes: #1 pdfapps.py installs sys.excepthook #2 i18n backs up corrupt config.json before reset #3 base.py NFC-normalizes passwords #4 _PdfPasswordDialog explicit tab order #5 window confirms folder drop >20 PDFs #6 editor zero-widget AcroForm short-circuits #7 _TextEditDialog uses QLineEdit #8 DropFileEdit warns on multiple URLs #9 set_compact_mode lambda guarded by shiboken6.isValid #10 parse_pages accepts '3-' / '-5' open ranges, still rejects '-' Plus two i18n parity guards verifying both new translation keys (viewer.drop_many_pdfs_confirm, widgets.drop_first_only) land in all eight supported languages with the {count} placeholder intact. Co-Authored-By: Claude Opus 4.7 --- tests/test_polish_lows.py | 262 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 tests/test_polish_lows.py diff --git a/tests/test_polish_lows.py b/tests/test_polish_lows.py new file mode 100644 index 0000000..3ea7537 --- /dev/null +++ b/tests/test_polish_lows.py @@ -0,0 +1,262 @@ +"""Source-level + behavioral regression tests for PR-H polish fixes. + +Bug map (PR-H worklist): + #1 Global sys.excepthook in pdfapps.py main entry (R7 I2 LOW) + #2 config.json backup before reset on corruption (R8 N1 LOW) + #3 NFC password normalization in BasePage helpers (R6 C1 LOW) + #4 Explicit tab order in _PdfPasswordDialog (R11 G1 LOW) + #5 Drop folder >20 PDFs confirmation (R10 review LOW) + #6 AcroForm with zero widgets no_fields short-circuit (R10 review LOW) + #7 _TextEditDialog single-line QLineEdit (R11 B2 LOW) + #8 DropFileEdit multi-URL warning (R11 N3 LOW) + #9 set_compact_mode lambda guarded with shiboken6.isValid (R6 O2 LOW) + #10 parse_pages accepts open-ended ranges (R7 E5 LOW) +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parent.parent + + +def _read(rel: str) -> str: + return (ROOT / rel).read_text(encoding="utf-8") + + +# ── #1 — sys.excepthook installed in main entry ───────────────────────── + + +def test_pdfapps_installs_global_excepthook(): + """The entry point must install sys.excepthook so uncaught + exceptions in PyInstaller --windowed builds reach pdfapps.log + instead of vanishing into a closed stderr.""" + src = _read("pdfapps.py") + assert "sys.excepthook = _excepthook" in src, ( + "main() must wire sys.excepthook to the logging hook." + ) + # KeyboardInterrupt must keep the default behaviour so Ctrl-C still + # exits cleanly without spamming the log. + assert "KeyboardInterrupt" in src + assert "sys.__excepthook__" in src + # The hook must log via the logging framework (not bare print). + assert "logging.getLogger" in src + assert "exc_info=" in src + + +# ── #2 — corrupt config.json backed up before reset ───────────────────── + + +def test_i18n_backs_up_corrupt_config_before_reset(): + """_update_config must copy a corrupt config.json aside as + .corrupt.bak before overwriting it with {}.""" + src = _read("app/i18n.py") + assert ".corrupt.bak" in src, "Backup suffix marker missing" + assert "shutil.copy2" in src, ( + "_update_config must shutil.copy2 the corrupt file aside." + ) + # The 'missing file' path must NOT trigger a backup (would log noise + # on every fresh install). + assert "FileNotFoundError" in src, ( + "FileNotFoundError handled separately so first run is silent." + ) + + +# ── #3 — NFC password normalization ───────────────────────────────────── + + +def test_base_normalizes_passwords_to_nfc(): + """All three encrypted-PDF entry points must route the password + through unicodedata.normalize('NFC', ...) so a macOS-typed (NFD) + password unlocks the same PDF on Windows (NFC).""" + src = _read("app/base.py") + assert "import unicodedata" in src + assert "unicodedata.normalize(\"NFC\"" in src + # Helper exists and is called from each entry point. + assert "def _nfc(" in src + # The three call sites all use _nfc(). + assert "self._nfc(self._pdf_password)" in src + assert "self._nfc(pwd)" in src + + +def test_nfc_helper_behavior_on_combining_characters(): + """Smoke test: a Unicode string composed in NFD (e + combining acute) + must become its NFC singleton (é) before pypdf/PyMuPDF see it.""" + import unicodedata + + nfd = "passé" # 'e' + combining acute + nfc = "passé".encode("utf-8") + normalized = unicodedata.normalize("NFC", nfd) + assert normalized != nfd, "Test input must actually differ from NFC" + assert unicodedata.is_normalized("NFC", normalized) + + +# ── #4 — _PdfPasswordDialog explicit tab order ────────────────────────── + + +def test_password_dialog_sets_tab_order(): + """The dialog must call setTabOrder(_edit, ok) and setTabOrder(ok, ca) + so keyboard users land on the password field and Tab through OK then + Cancel.""" + src = _read("app/editor/dialogs.py") + assert "self.setTabOrder(self._edit, ok)" in src + assert "self.setTabOrder(ok, ca)" in src + # Initial focus is on the password field, not on Cancel. + assert "self._edit.setFocus()" in src + + +# ── #5 — drop folder >20 PDFs confirmation ────────────────────────────── + + +def test_window_confirms_drop_of_many_pdfs(): + """Dropping a folder with more than 20 PDFs must show a confirm + QMessageBox so the user can back out before tabs flood the UI.""" + src = _read("app/window.py") + assert "viewer.drop_many_pdfs_confirm" in src + assert "len(pdfs) > 20" in src + # Default button is No so accidental drops do nothing surprising. + assert "QMessageBox.StandardButton.No" in src + + +# ── #6 — AcroForm dict but zero widgets ───────────────────────────────── + + +def test_editor_zero_widget_acroform_short_circuits(): + """When /AcroForm exists but get_fields() is empty, + _apply_forms must surface the no_fields status instead of running + update_page_form_field_values as a silent no-op.""" + src = _read("app/editor/tab.py") + # The new guard reads from _r.get_fields() and routes through the + # same no_fields message used by the missing-AcroForm branch. + assert "_r.get_fields()" in src + # Both no_fields branches reside in _apply_forms. + assert src.count("editor.forms.no_fields") >= 3, ( + "Expected three editor.forms.no_fields references (load-time + " + "missing-acroform + zero-widget branches)." + ) + + +# ── #7 — _TextEditDialog single-line QLineEdit ────────────────────────── + + +def test_text_edit_dialog_uses_qlineedit(): + """_TextEditDialog must build a QLineEdit so the user can't enter + newlines that PyMuPDF's insert_text would rasterise as '?'.""" + src = _read("app/editor/dialogs.py") + # Locate the class block. + cls_start = src.index("class _TextEditDialog") + cls_end = src.index("class ", cls_start + 1) + block = src[cls_start:cls_end] + assert "self._edit = QLineEdit()" in block, ( + "_TextEditDialog._edit must be a QLineEdit (single-line)." + ) + assert "self._edit = QTextEdit()" not in block, ( + "QTextEdit must not be used inside _TextEditDialog any more." + ) + # new_text() must return .text(), not .toPlainText(). + assert "self._edit.text()" in block + assert "self._edit.toPlainText()" not in block + + +# ── #8 — DropFileEdit multi-URL warning ───────────────────────────────── + + +def test_drop_file_edit_warns_on_multiple_urls(): + """DropFileEdit.dropEvent must surface a QMessageBox.information + when more than one accepted file is dropped on the single-file + widget.""" + src = _read("app/widgets.py") + assert "QMessageBox" in src + assert "widgets.drop_first_only" in src + assert "len(accepted) > 1" in src + + +# ── #9 — set_compact_mode lambda isValid guard ────────────────────────── + + +def test_compact_mode_lambda_uses_shiboken_isvalid(): + """The 'Change source' compact-mode link's lambda must guard + self.set_compact_mode(False) behind shiboken6.isValid(self) so a + queued click does not touch a destroyed page.""" + src = _read("app/base.py") + # Locate the lambda close to the compact_link section. + assert "link.clicked.connect(" in src + # The guard must wrap the slot. + assert "set_compact_mode(False) if isValid(self) else None" in src + + +# ── #10 — parse_pages open-ended range support ────────────────────────── + + +def test_parse_pages_accepts_open_ended_start(): + """'3-' on a 10-page document must yield pages 3..10 (0-indexed + 2..9). Previously raised ValueError.""" + from app.utils import parse_pages + + assert parse_pages("3-", 10) == [2, 3, 4, 5, 6, 7, 8, 9] + + +def test_parse_pages_accepts_open_ended_end(): + """'-5' must yield pages 1..5 (0-indexed 0..4).""" + from app.utils import parse_pages + + assert parse_pages("-5", 10) == [0, 1, 2, 3, 4] + + +def test_parse_pages_rejects_bare_dash(): + """'-' alone has no bounds at all and must still raise with the + friendly message.""" + from app.utils import parse_pages + + with pytest.raises(ValueError) as exc: + parse_pages("-", 10) + msg = str(exc.value) + # The friendly message echoes the offending substring and avoids + # the raw Python error. + assert "-" in msg + assert "invalid literal for int" not in msg + + +def test_parse_pages_open_range_combined_with_csv(): + """Open ranges must compose with CSV input — '1,3-' must yield the + full document and stay deduped/sorted.""" + from app.utils import parse_pages + + assert parse_pages("1,3-", 5) == [0, 2, 3, 4] + + +# ── i18n parity guards ───────────────────────────────────────────────── + + +def test_new_translation_keys_present_in_all_languages(): + """Both new PR-H keys must ship for all 8 supported languages + (en, pt, es, fr, de, zh, it, nl) so t() never falls back to the + raw key name in the UI.""" + raw = _read("app/translations.json") + data = json.loads(raw) + assert set(data.keys()) == {"en", "pt", "es", "fr", "de", "zh", "it", "nl"} + for key in ("viewer.drop_many_pdfs_confirm", "widgets.drop_first_only"): + missing = [lang for lang, kv in data.items() if key not in kv] + assert not missing, f"{key} missing in: {missing}" + # All language sections must share the same key count. + counts = {lang: len(kv) for lang, kv in data.items()} + assert len(set(counts.values())) == 1, ( + f"Key counts diverge across languages: {counts}" + ) + + +def test_new_translation_keys_use_count_placeholder(): + """Both new keys take a {count} placeholder; if a translator dropped + it the t() format would silently render '... files were loaded.' with + no number. Catch the regression at test time.""" + data = json.loads(_read("app/translations.json")) + for lang, kv in data.items(): + assert "{count}" in kv["viewer.drop_many_pdfs_confirm"], ( + f"{lang} viewer.drop_many_pdfs_confirm dropped {{count}}" + ) + assert "{count}" in kv["widgets.drop_first_only"], ( + f"{lang} widgets.drop_first_only dropped {{count}}" + ) From 1f3d31a67c546fdc3a77435dccb1a3a5f47cf0a8 Mon Sep 17 00:00:00 2001 From: nelsoduarte Date: Thu, 18 Jun 2026 15:36:43 +0100 Subject: [PATCH 12/12] fix(base): normalize PDF password to NFC on cache write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 11 review caught that the previous NFC fix only normalized at read time in the BasePage helpers (_maybe_prompt_password, _open_reader, _open_fitz). The cached self._pdf_password attribute was still set raw at 2-3 sites (editor/tab.py, viewer/panel.py, plus the prompt path in base.py itself), so any tool that reads self._pdf_password directly (merge, watermark, ocr, page_numbers, convert ×8, nup, plus ~17 other call sites) still saw the un-normalized value. The fix lives in app.utils.normalize_password (single source of truth) and gets applied at every WRITE site: BasePage._maybe_prompt_password, EditorTab._load_pdf, PdfViewerPanel._open_path. BasePage._nfc is kept as a thin delegator so subclasses calling self._nfc(...) keep working and the three read-side helpers stay defensive against externally-set caches. By normalizing at write time the attribute becomes deterministic and every consumer is covered without instrumenting each tool. Also pulled in two adjacent polish items the same review flagged: B5: corrupt config.json backups now suffixed with a wall-clock timestamp (.corrupt-YYYYMMDD_HHMMSS.bak) so successive corruption events do not clobber prior evidence. F6: _TextEditDialog now collapses \r/\n in the pre-filled old_text before QLineEdit.setText, since QLineEdit silently truncates at the first newline and would hide the rest of the detected string from the user. Tests: test_polish_lows.py grows from 19 to 22 cases — three new write-site source assertions (base/editor/viewer) plus a behavioral NFD→NFC round-trip through the cache; the existing _nfc(pwd) source assertion was tightened to require all three defensive read-side calls plus the normalize_password write call. F6 is covered by two new substring assertions inside the _TextEditDialog source block. Co-Authored-By: Claude Opus 4.7 --- app/base.py | 39 ++++++------- app/editor/dialogs.py | 10 +++- app/editor/tab.py | 12 +++- app/i18n.py | 11 +++- app/utils.py | 26 +++++++++ app/viewer/panel.py | 10 +++- tests/test_polish_lows.py | 120 ++++++++++++++++++++++++++++++++++---- 7 files changed, 189 insertions(+), 39 deletions(-) diff --git a/app/base.py b/app/base.py index 3764a27..f6b509c 100644 --- a/app/base.py +++ b/app/base.py @@ -6,7 +6,6 @@ import sys import tempfile import shutil -import unicodedata from typing import Iterable from PySide6.QtCore import Qt, QTimer, Signal @@ -307,25 +306,19 @@ def _resolve_output_dir(self, drop_widget, input_path: str = "") -> str: def _nfc(pwd: str) -> str: """Return the NFC-normalized form of ``pwd`` (R6 C1). - Unicode in PDF owner/user passwords is whatever the producing - application wrote (PDF 2.0 mandates SASLprep, but the world is - full of pre-2.0 files). macOS HFS/APFS often stores filenames - and clipboard text in NFD; Windows clipboards default to NFC. - A password typed once on macOS and copied to a Windows machine - can therefore fail to authenticate even though the on-screen - glyphs are identical. Normalize to NFC on every code path that - feeds the password to pypdf/PyMuPDF so the in-memory bytes are - deterministic regardless of where the password was originally - composed. + Thin compatibility wrapper that now delegates to + :func:`app.utils.normalize_password` so the codebase has a + single source of truth for password normalisation (the R11 + review of PR-H flagged that tools reading ``self._pdf_password`` + directly bypassed the per-helper normalisation). Kept around so + BasePage subclasses calling ``self._nfc(...)`` keep working, + but new code should normalise at the WRITE site of the cache + (see PdfViewerPanel / EditorTab._load_pdf) so every consumer + — including the ~30 ``self._pdf_password`` reads across + ``tools/*.py`` — receives a deterministic value. """ - if not pwd: - return pwd - try: - return unicodedata.normalize("NFC", pwd) - except (TypeError, ValueError): - # str inputs only ever raise on absurd code points; - # falling back to the raw string is safer than crashing. - return pwd + from app.utils import normalize_password + return normalize_password(pwd) def _maybe_prompt_password(self, path: str) -> bool: """If the PDF at `path` is encrypted, prompt the user; on success @@ -349,11 +342,15 @@ def _maybe_prompt_password(self, path: str) -> bool: return True finally: doc.close() - from app.utils import prompt_pdf_password + from app.utils import prompt_pdf_password, normalize_password ok, pwd = prompt_pdf_password(path, self) if not ok: return False - self._pdf_password = self._nfc(pwd) + # NFC-normalise at WRITE time so every downstream consumer + # (tools/* that read self._pdf_password directly, plus the + # _open_reader / _open_fitz helpers) sees a deterministic + # value. R11 review C2 — see utils.normalize_password. + self._pdf_password = normalize_password(pwd) return True def _open_reader(self, path: str): diff --git a/app/editor/dialogs.py b/app/editor/dialogs.py index a16bd05..d5bec23 100644 --- a/app/editor/dialogs.py +++ b/app/editor/dialogs.py @@ -129,7 +129,15 @@ def __init__(self, old_text: str, font_size: float, parent=None): # produce; Enter submits via returnPressed, matching the # password dialog above. self._edit = QLineEdit() - self._edit.setText(old_text) + # R11 review F6: ``old_text`` can contain ``\n``/``\r`` when the + # viewer extracted text that spans multiple PDF lines. QLineEdit + # silently truncates at the first newline, which hides the + # remainder of the original content from the user. Collapse + # newlines into spaces so the whole detected string stays + # editable; PyMuPDF's writer cannot render real newlines anyway + # (see R11 B2 above). + safe_text = (old_text or "").replace("\r\n", " ").replace("\n", " ").replace("\r", " ") + self._edit.setText(safe_text) self._edit.returnPressed.connect(self.accept) v.addWidget(self._edit) diff --git a/app/editor/tab.py b/app/editor/tab.py index c23b950..9a9f741 100644 --- a/app/editor/tab.py +++ b/app/editor/tab.py @@ -16,7 +16,10 @@ import qtawesome as qta from app.constants import ACCENT, TEXT_PRI, TEXT_SEC, DESKTOP, _LQ, _LP -from app.utils import ToolHeader, ActionBar, info_lbl, _paint_bg, show_error +from app.utils import ( + ToolHeader, ActionBar, info_lbl, _paint_bg, show_error, + normalize_password, +) from app.i18n import t from app.widgets import DropFileEdit, ColorPickerButton from app.editor.canvas import PdfEditCanvas, _get_icon_cursor @@ -621,7 +624,12 @@ def _load_pdf(self, p: str): ok, pwd = prompt_pdf_password(p, self) if not ok: return - self._pdf_password = pwd + # NFC-normalise at the WRITE site so every reader of + # self._pdf_password (~10 call sites in this file plus + # ~8 tools under tools/) receives a deterministic value + # without needing per-site normalisation. See R11 review + # C2 / utils.normalize_password. + self._pdf_password = normalize_password(pwd) elif not needs_pass: self._pdf_password = "" self._doc_path = p diff --git a/app/i18n.py b/app/i18n.py index ad41f2b..93e537c 100644 --- a/app/i18n.py +++ b/app/i18n.py @@ -172,7 +172,16 @@ def _update_config(mutator: Callable[[dict], None]) -> None: try: if (os.path.isfile(_CONFIG_PATH) and os.path.getsize(_CONFIG_PATH) > 0): - backup_path = _CONFIG_PATH + ".corrupt.bak" + # R11 review B5: include a timestamp in the backup + # name so multiple corruption events don't clobber + # each other (the previous `.corrupt.bak` was a + # single slot, so the second corruption would + # overwrite the first — losing the original + # evidence support needed to recover the user's + # settings). + from datetime import datetime + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_path = _CONFIG_PATH + f".corrupt-{ts}.bak" with contextlib.suppress(Exception): shutil.copy2(_CONFIG_PATH, backup_path) _log.warning( diff --git a/app/utils.py b/app/utils.py index 5b28178..33aaeab 100644 --- a/app/utils.py +++ b/app/utils.py @@ -135,6 +135,32 @@ def pick_folder(parent: QWidget) -> str: return QFileDialog.getExistingDirectory(parent, t("btn.select_folder")) +def normalize_password(pwd: str) -> str: + """Return ``pwd`` normalised to Unicode NFC form (R6 C1 / R11 review C2). + + Passwords typed on macOS frequently land in NFD (decomposed) form, + while Windows clipboards produce NFC (composed) form. The on-screen + glyphs are identical but the underlying byte sequences are not, so a + password that authenticates on one OS may fail on the other. + + Normalising at the WRITE side of the cache (i.e. wherever + ``self._pdf_password = pwd`` happens) makes the cached value + deterministic and frees every downstream consumer + (``editor/tab.py``, ``viewer/panel.py``, ``tools/*``) from having to + remember to normalise on read. Returns falsy inputs unchanged so the + helper is safe to apply unconditionally. + """ + if not pwd: + return pwd + try: + import unicodedata + return unicodedata.normalize("NFC", pwd) + except (TypeError, ValueError): + # str inputs only ever raise on absurd code points; falling + # back to the raw string is safer than crashing. + return pwd + + def wipe_pdf_password(obj) -> None: """Best-effort wipe of the cached PDF password attribute on ``obj``. diff --git a/app/viewer/panel.py b/app/viewer/panel.py index 980dac5..505ee55 100644 --- a/app/viewer/panel.py +++ b/app/viewer/panel.py @@ -13,7 +13,7 @@ import qtawesome as qta from app.constants import ACCENT, TEXT_SEC, _LQ, DESKTOP -from app.utils import _paint_bg +from app.utils import _paint_bg, normalize_password from app.i18n import t from app.viewer.canvas import _SelectCanvas @@ -510,7 +510,13 @@ def load(self, path: str): if dlg.exec() != QDialog.DialogCode.Accepted: doc.close(); return if doc.authenticate(dlg.password()): - self._pdf_password = dlg.password() + # NFC-normalise at WRITE time so every consumer + # downstream (canvas.load, propagation to editor + # / tools) sees a deterministic value. R11 review + # C2: per-call-site normalisation missed the ~30 + # raw reads under tools/ — see + # utils.normalize_password. + self._pdf_password = normalize_password(dlg.password()) break wrong = True self._current_path = path diff --git a/tests/test_polish_lows.py b/tests/test_polish_lows.py index 3ea7537..86f4810 100644 --- a/tests/test_polish_lows.py +++ b/tests/test_polish_lows.py @@ -52,9 +52,16 @@ def test_pdfapps_installs_global_excepthook(): def test_i18n_backs_up_corrupt_config_before_reset(): """_update_config must copy a corrupt config.json aside as - .corrupt.bak before overwriting it with {}.""" + .corrupt-.bak before overwriting it with {}. + + R11 review B5 added the ``-`` suffix so successive + corruption events do not clobber each other; the marker we look + for therefore moves from the literal ``.corrupt.bak`` to the + ``.corrupt-`` prefix plus ``.bak`` extension. + """ src = _read("app/i18n.py") - assert ".corrupt.bak" in src, "Backup suffix marker missing" + assert ".corrupt-" in src, "Timestamped backup prefix marker missing" + assert ".bak" in src, "Backup extension marker missing" assert "shutil.copy2" in src, ( "_update_config must shutil.copy2 the corrupt file aside." ) @@ -63,6 +70,9 @@ def test_i18n_backs_up_corrupt_config_before_reset(): assert "FileNotFoundError" in src, ( "FileNotFoundError handled separately so first run is silent." ) + # The timestamp source must be wall-clock so the backups sort + # naturally and we can tell which event came first. + assert "datetime.now()" in src # ── #3 — NFC password normalization ───────────────────────────────────── @@ -70,16 +80,93 @@ def test_i18n_backs_up_corrupt_config_before_reset(): def test_base_normalizes_passwords_to_nfc(): """All three encrypted-PDF entry points must route the password - through unicodedata.normalize('NFC', ...) so a macOS-typed (NFD) - password unlocks the same PDF on Windows (NFC).""" - src = _read("app/base.py") - assert "import unicodedata" in src - assert "unicodedata.normalize(\"NFC\"" in src - # Helper exists and is called from each entry point. - assert "def _nfc(" in src - # The three call sites all use _nfc(). - assert "self._nfc(self._pdf_password)" in src - assert "self._nfc(pwd)" in src + through ``unicodedata.normalize('NFC', ...)`` so a macOS-typed (NFD) + password unlocks the same PDF on Windows (NFC). + + After the R11 review the canonical normalisation function moved to + :func:`app.utils.normalize_password` (so all the + ``self._pdf_password`` *read* sites in ``tools/*`` and + ``editor/tab.py`` are covered transitively when the cache is set); + ``BasePage._nfc`` is now a thin delegator. The actual + ``unicodedata.normalize("NFC", ...)`` call therefore lives in + ``utils.py`` rather than ``base.py``, which is what we assert here. + + After the follow-up R11 review fix, normalisation now also happens at + the WRITE site of the cache (BasePage._maybe_prompt_password) so the + ``self._pdf_password`` attribute itself is deterministic for every + downstream consumer in ``tools/*`` that reads it directly. The three + helpers keep their defensive read-side ``self._nfc(self._pdf_password)`` + so a value cached before this fix (or set externally without going + through ``normalize_password``) still authenticates correctly. + """ + base_src = _read("app/base.py") + utils_src = _read("app/utils.py") + # Normalisation primitive lives in utils.py now. + assert "import unicodedata" in utils_src + assert "unicodedata.normalize(\"NFC\"" in utils_src + assert "def normalize_password(" in utils_src + # BasePage helper still exists and still has the three read-side + # call sites (defensive — the WRITE site below covers the cache). + assert "def _nfc(" in base_src + assert base_src.count("self._nfc(self._pdf_password)") == 3, ( + "Expected three defensive read-side _nfc calls (auth probe, " + "PdfReader.decrypt, fitz.authenticate)." + ) + # WRITE-site normalisation: the prompt path must NFC-normalise + # before storing on self._pdf_password so every tool that reads + # the attribute raw (~30 sites under tools/*) is covered without + # per-call instrumentation. + assert "self._pdf_password = normalize_password(pwd)" in base_src + # And the BasePage helper must delegate to the utils primitive so + # the two paths cannot drift. + assert "normalize_password" in base_src + + +def test_editor_tab_normalizes_password_on_cache_write(): + """R11 review C2 follow-up: the editor's _load_pdf path stores the + prompted password directly on self._pdf_password. It must route + through normalize_password so tools reading the cache raw + (merge, watermark, ocr, page_numbers, convert, nup, ...) see the + same NFC string the BasePage helpers would compare against.""" + src = _read("app/editor/tab.py") + assert "self._pdf_password = normalize_password(pwd)" in src + assert "from app.utils import" in src and "normalize_password" in src + + +def test_viewer_panel_normalizes_password_on_cache_write(): + """R11 review C2 follow-up: PdfViewerPanel._open_path stores the + typed password on self._pdf_password. Same NFC-at-WRITE rule applies + — this is the value propagated to compact-mode tools by + MainWindow._on_tab_changed.""" + src = _read("app/viewer/panel.py") + assert "normalize_password(dlg.password())" in src + assert "from app.utils import" in src and "normalize_password" in src + + +def test_password_cache_round_trips_nfd_to_nfc(): + """Behavioral guard: feed an NFD-composed string through the same + write path the prompt uses and confirm the cached attribute reads + back in NFC form, matching what tools that bypass _nfc would see.""" + import unicodedata + + from app.utils import normalize_password + + nfd = unicodedata.normalize("NFD", "passé") + assert not unicodedata.is_normalized("NFC", nfd), ( + "Test fixture must actually be in NFD form." + ) + + class _Fake: + _pdf_password = "" + + obj = _Fake() + # Mirror the WRITE-site idiom used by BasePage / editor / viewer. + obj._pdf_password = normalize_password(nfd) + assert unicodedata.is_normalized("NFC", obj._pdf_password), ( + "Cache must hold NFC after going through normalize_password." + ) + # And the on-screen form must be preserved (no characters dropped). + assert obj._pdf_password == "passé" def test_nfc_helper_behavior_on_combining_characters(): @@ -159,6 +246,15 @@ def test_text_edit_dialog_uses_qlineedit(): # new_text() must return .text(), not .toPlainText(). assert "self._edit.text()" in block assert "self._edit.toPlainText()" not in block + # R11 review F6: newlines in old_text must be sanitised before + # setText (QLineEdit truncates at the first \n, so the user would + # only see the first physical line of the detected string). + assert ".replace(\"\\n\"" in block, ( + "old_text newlines must be collapsed before QLineEdit.setText." + ) + assert ".replace(\"\\r\"" in block, ( + "old_text carriage returns must also be sanitised." + ) # ── #8 — DropFileEdit multi-URL warning ─────────────────────────────────