diff --git a/app/base.py b/app/base.py index 96fb35b..f6b509c 100644 --- a/app/base.py +++ b/app/base.py @@ -206,7 +206,15 @@ def set_compact_mode(self, active: bool, path: str = "") -> None: f"background:transparent; padding:2px 4px; text-align:left; }}" f"QPushButton#compact_link:hover {{ text-decoration: underline; }}" ) - link.clicked.connect(lambda: self.set_compact_mode(False)) + # R6 O2: guard against the page being destroyed between the + # Qt click queueing and the slot actually running. PySide6 + # has no QPointer, so use shiboken6.isValid() — without it + # the lambda may touch a dead C++ QWidget and crash with + # "Internal C++ object already deleted". The lambda is the + # only callback installed here, so cost is one isValid call + # per compact-mode exit. + link.clicked.connect( + lambda: self.set_compact_mode(False) if isValid(self) else None) self._form.insertWidget(0, link) self._compact_link = link @@ -294,6 +302,24 @@ def _resolve_output_dir(self, drop_widget, input_path: str = "") -> str: # ── encrypted-PDF helpers ────────────────────────────────────────────── + @staticmethod + def _nfc(pwd: str) -> str: + """Return the NFC-normalized form of ``pwd`` (R6 C1). + + Thin compatibility wrapper that now delegates to + :func:`app.utils.normalize_password` so the codebase has a + single source of truth for password normalisation (the R11 + review of PR-H flagged that tools reading ``self._pdf_password`` + directly bypassed the per-helper normalisation). Kept around so + BasePage subclasses calling ``self._nfc(...)`` keep working, + but new code should normalise at the WRITE site of the cache + (see PdfViewerPanel / EditorTab._load_pdf) so every consumer + — including the ~30 ``self._pdf_password`` reads across + ``tools/*.py`` — receives a deterministic value. + """ + from app.utils import normalize_password + return normalize_password(pwd) + def _maybe_prompt_password(self, path: str) -> bool: """If the PDF at `path` is encrypted, prompt the user; on success store the password on `self._pdf_password` and return True. Plain @@ -312,33 +338,47 @@ def _maybe_prompt_password(self, path: str) -> bool: if not doc.needs_pass: self._pdf_password = "" return True - if self._pdf_password and doc.authenticate(self._pdf_password): + if self._pdf_password and doc.authenticate(self._nfc(self._pdf_password)): return True finally: doc.close() - from app.utils import prompt_pdf_password + from app.utils import prompt_pdf_password, normalize_password ok, pwd = prompt_pdf_password(path, self) if not ok: return False - self._pdf_password = pwd + # NFC-normalise at WRITE time so every downstream consumer + # (tools/* that read self._pdf_password directly, plus the + # _open_reader / _open_fitz helpers) sees a deterministic + # value. R11 review C2 — see utils.normalize_password. + self._pdf_password = normalize_password(pwd) return True def _open_reader(self, path: str): """Open a pypdf PdfReader, decrypting with the stored password if - the file is encrypted.""" + the file is encrypted. + + The cached password is NFC-normalized before being passed to + :meth:`pypdf.PdfReader.decrypt`. See :meth:`_nfc` for the + rationale (R6 C1). + """ from pypdf import PdfReader r = PdfReader(path) if r.is_encrypted and self._pdf_password: - r.decrypt(self._pdf_password) + r.decrypt(self._nfc(self._pdf_password)) return r def _open_fitz(self, path: str): """Open a PyMuPDF Document, authenticating with the stored - password if needed.""" + password if needed. + + The cached password is NFC-normalized before being passed to + :meth:`fitz.Document.authenticate`. See :meth:`_nfc` for the + rationale (R6 C1). + """ import fitz doc = fitz.open(path) if doc.needs_pass and self._pdf_password: - doc.authenticate(self._pdf_password) + doc.authenticate(self._nfc(self._pdf_password)) return doc def _clear_pdf_password(self) -> None: diff --git a/app/editor/dialogs.py b/app/editor/dialogs.py index 27647ee..d5bec23 100644 --- a/app/editor/dialogs.py +++ b/app/editor/dialogs.py @@ -84,6 +84,16 @@ def __init__(self, filename: str, wrong: bool = False, parent=None): btns.addWidget(ca); btns.addWidget(ok) v.addLayout(btns) + # R11 G1: without an explicit tab order Qt walks widgets in the + # construction order (icon → title → password → Cancel → OK), + # but the dialog opens with focus on Cancel via the addStretch + # quirk on some platforms. Force a predictable, accessible + # path: password field → OK → Cancel. Keyboard-only users now + # land on the field they need to fill in first. + self._edit.setFocus() + self.setTabOrder(self._edit, ok) + self.setTabOrder(ok, ca) + def password(self) -> str: return self._edit.text() @@ -112,9 +122,23 @@ def __init__(self, old_text: str, font_size: float, parent=None): lbl_new.setStyleSheet(f"color:{pri}; font-size:10pt;") v.addWidget(lbl_new) - self._edit = QTextEdit() - self._edit.setPlainText(old_text) - self._edit.setMinimumHeight(80) + # R11 B2: the previous QTextEdit accepted newlines that + # ``page.insert_text`` (PyMuPDF) cannot render — each \n got + # rasterized as a "?" glyph in the output PDF. Use QLineEdit + # so the input is restricted to what the writer can actually + # produce; Enter submits via returnPressed, matching the + # password dialog above. + self._edit = QLineEdit() + # R11 review F6: ``old_text`` can contain ``\n``/``\r`` when the + # viewer extracted text that spans multiple PDF lines. QLineEdit + # silently truncates at the first newline, which hides the + # remainder of the original content from the user. Collapse + # newlines into spaces so the whole detected string stays + # editable; PyMuPDF's writer cannot render real newlines anyway + # (see R11 B2 above). + safe_text = (old_text or "").replace("\r\n", " ").replace("\n", " ").replace("\r", " ") + self._edit.setText(safe_text) + self._edit.returnPressed.connect(self.accept) v.addWidget(self._edit) btns = QHBoxLayout(); btns.setSpacing(8); btns.addStretch() @@ -125,7 +149,7 @@ def __init__(self, old_text: str, font_size: float, parent=None): v.addLayout(btns) def new_text(self) -> str: - return self._edit.toPlainText() + return self._edit.text() class _TextDialog(QDialog): diff --git a/app/editor/tab.py b/app/editor/tab.py index d8ee081..9a9f741 100644 --- a/app/editor/tab.py +++ b/app/editor/tab.py @@ -16,7 +16,10 @@ import qtawesome as qta from app.constants import ACCENT, TEXT_PRI, TEXT_SEC, DESKTOP, _LQ, _LP -from app.utils import ToolHeader, ActionBar, info_lbl, _paint_bg, show_error +from app.utils import ( + ToolHeader, ActionBar, info_lbl, _paint_bg, show_error, + normalize_password, +) from app.i18n import t from app.widgets import DropFileEdit, ColorPickerButton from app.editor.canvas import PdfEditCanvas, _get_icon_cursor @@ -621,7 +624,12 @@ def _load_pdf(self, p: str): ok, pwd = prompt_pdf_password(p, self) if not ok: return - self._pdf_password = pwd + # NFC-normalise at the WRITE site so every reader of + # self._pdf_password (~10 call sites in this file plus + # ~8 tools under tools/) receives a deterministic value + # without needing per-site normalisation. See R11 review + # C2 / utils.normalize_password. + self._pdf_password = normalize_password(pwd) elif not needs_pass: self._pdf_password = "" self._doc_path = p @@ -1350,6 +1358,24 @@ def _apply_forms(self, out): self._status(t("editor.forms.no_fields")) self._form_status.setText(t("editor.forms.no_fields")) return + # R10 review follow-up: an /AcroForm dict can exist with + # zero actual widgets (e.g. forms whose fields were + # flattened by a third-party tool but the dict was left + # behind). update_page_form_field_values then runs a + # silent no-op and the user gets no feedback. Surface + # the same no_fields status so the result matches the + # 'plain PDF' case above. Use the get_fields() count + # since pypdf already exposes it cheaply via the cached + # AcroForm tree — avoids importing fitz just for a + # widget count. + try: + _w_fields = _r.get_fields() or {} + except Exception: + _w_fields = {} + if not _w_fields: + self._status(t("editor.forms.no_fields")) + self._form_status.setText(t("editor.forms.no_fields")) + return for page in writer.pages: # auto_regenerate=True so the rendered widget appearance # actually picks up the new value when viewed in a third- diff --git a/app/i18n.py b/app/i18n.py index 89faecd..93e537c 100644 --- a/app/i18n.py +++ b/app/i18n.py @@ -1,12 +1,17 @@ """PDFApps – Internationalization (i18n) module.""" +import contextlib import json import locale +import logging import os +import shutil import sys import threading from typing import Callable +_log = logging.getLogger(__name__) + _TRANSLATIONS: dict = {} _LANG: str = "en" @@ -146,13 +151,45 @@ def _update_config(mutator: Callable[[dict], None]) -> None: """ with _CONFIG_LOCK: cfg: dict = {} + corrupt = False try: with open(_CONFIG_PATH, "r", encoding="utf-8") as f: cfg = json.load(f) + except FileNotFoundError: + cfg = {} except Exception: cfg = {} + corrupt = True if not isinstance(cfg, dict): cfg = {} + corrupt = True + # R8 N1: previously a corrupt config.json was silently overwritten + # with `{}`, wiping the user's saved language / dark_mode / recents + # without warning. Snapshot the broken file before resetting so + # support can recover settings (and so we have evidence the + # corruption happened rather than a silent reset triggered by us). + if corrupt: + try: + if (os.path.isfile(_CONFIG_PATH) + and os.path.getsize(_CONFIG_PATH) > 0): + # R11 review B5: include a timestamp in the backup + # name so multiple corruption events don't clobber + # each other (the previous `.corrupt.bak` was a + # single slot, so the second corruption would + # overwrite the first — losing the original + # evidence support needed to recover the user's + # settings). + from datetime import datetime + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_path = _CONFIG_PATH + f".corrupt-{ts}.bak" + with contextlib.suppress(Exception): + shutil.copy2(_CONFIG_PATH, backup_path) + _log.warning( + "config.json appeared corrupt; saved backup " + "to %s before reset", backup_path, + ) + except OSError: + pass mutator(cfg) _atomic_write_config(cfg) diff --git a/app/translations.json b/app/translations.json index 6af3822..d16fd06 100644 --- a/app/translations.json +++ b/app/translations.json @@ -61,6 +61,8 @@ "viewer.delete_comment": "Delete comment", "viewer.confirm_delete_comment": "Are you sure you want to delete this comment? This change is saved immediately and cannot be undone.", "viewer.drop_url_not_supported": "Online URLs are not supported. Please download the PDF first, then drag the local file here.", + "viewer.drop_many_pdfs_confirm": "Open {count} PDF files at once?", + "widgets.drop_first_only": "Only the first of {count} files was loaded.", "viewer.copy_chars": " Copy ({n} chars)", "search.placeholder": "Search in PDF...", "search.prev": "Previous match", @@ -660,6 +662,8 @@ "viewer.delete_comment": "Apagar comentário", "viewer.confirm_delete_comment": "Tens a certeza que queres apagar este comentário? Esta alteração é guardada imediatamente e não pode ser desfeita.", "viewer.drop_url_not_supported": "URLs da internet não são suportados. Descarrega primeiro o PDF e depois arrasta o ficheiro local para aqui.", + "viewer.drop_many_pdfs_confirm": "Abrir {count} ficheiros PDF de uma vez?", + "widgets.drop_first_only": "Apenas o primeiro de {count} ficheiros foi carregado.", "viewer.copy_chars": " Copiar ({n} car.)", "search.placeholder": "Pesquisar no PDF...", "search.prev": "Resultado anterior", @@ -1259,6 +1263,8 @@ "viewer.delete_comment": "Eliminar comentario", "viewer.confirm_delete_comment": "¿Seguro que quieres eliminar este comentario? Este cambio se guarda inmediatamente y no se puede deshacer.", "viewer.drop_url_not_supported": "Las URLs en línea no son compatibles. Descarga primero el PDF y luego arrastra el archivo local aquí.", + "viewer.drop_many_pdfs_confirm": "¿Abrir {count} archivos PDF a la vez?", + "widgets.drop_first_only": "Solo se cargó el primero de {count} archivos.", "viewer.copy_chars": " Copiar ({n} caract.)", "search.placeholder": "Buscar en PDF...", "search.prev": "Resultado anterior", @@ -1858,6 +1864,8 @@ "viewer.delete_comment": "Supprimer le commentaire", "viewer.confirm_delete_comment": "Voulez-vous vraiment supprimer ce commentaire ? Cette modification est enregistrée immédiatement et ne peut pas être annulée.", "viewer.drop_url_not_supported": "Les URL en ligne ne sont pas prises en charge. Téléchargez d'abord le PDF, puis faites glisser le fichier local ici.", + "viewer.drop_many_pdfs_confirm": "Ouvrir {count} fichiers PDF en une fois ?", + "widgets.drop_first_only": "Seul le premier des {count} fichiers a été chargé.", "viewer.copy_chars": " Copier ({n} caract.)", "search.placeholder": "Rechercher dans le PDF...", "search.prev": "Résultat précédent", @@ -2457,6 +2465,8 @@ "viewer.delete_comment": "Kommentar löschen", "viewer.confirm_delete_comment": "Soll dieser Kommentar wirklich gelöscht werden? Diese Änderung wird sofort gespeichert und kann nicht rückgängig gemacht werden.", "viewer.drop_url_not_supported": "Online-URLs werden nicht unterstützt. Laden Sie die PDF zuerst herunter und ziehen Sie dann die lokale Datei hierher.", + "viewer.drop_many_pdfs_confirm": "{count} PDF-Dateien gleichzeitig öffnen?", + "widgets.drop_first_only": "Nur die erste der {count} Dateien wurde geladen.", "viewer.copy_chars": " Kopieren ({n} Zeichen)", "search.placeholder": "Im PDF suchen...", "search.prev": "Vorheriges Ergebnis", @@ -3056,6 +3066,8 @@ "viewer.delete_comment": "删除批注", "viewer.confirm_delete_comment": "确定要删除此批注吗?此更改将立即保存且无法撤销。", "viewer.drop_url_not_supported": "不支持在线 URL。请先下载 PDF,然后将本地文件拖到此处。", + "viewer.drop_many_pdfs_confirm": "一次打开 {count} 个 PDF 文件?", + "widgets.drop_first_only": "仅加载了 {count} 个文件中的第一个。", "viewer.copy_chars": " 复制 ({n} 个字符)", "search.placeholder": "在 PDF 中搜索...", "search.prev": "上一个匹配", @@ -3655,6 +3667,8 @@ "viewer.delete_comment": "Elimina commento", "viewer.confirm_delete_comment": "Sei sicuro di voler eliminare questo commento? Questa modifica viene salvata immediatamente e non può essere annullata.", "viewer.drop_url_not_supported": "Gli URL online non sono supportati. Scarica prima il PDF, poi trascina il file locale qui.", + "viewer.drop_many_pdfs_confirm": "Aprire {count} file PDF contemporaneamente?", + "widgets.drop_first_only": "È stato caricato solo il primo dei {count} file.", "viewer.copy_chars": " Copia ({n} caratteri)", "search.placeholder": "Cerca nel PDF...", "search.prev": "Corrispondenza precedente", @@ -4254,6 +4268,8 @@ "viewer.delete_comment": "Opmerking verwijderen", "viewer.confirm_delete_comment": "Weet je zeker dat je deze opmerking wilt verwijderen? Deze wijziging wordt onmiddellijk opgeslagen en kan niet ongedaan worden gemaakt.", "viewer.drop_url_not_supported": "Online-URL's worden niet ondersteund. Download eerst de PDF en sleep vervolgens het lokale bestand hierheen.", + "viewer.drop_many_pdfs_confirm": "{count} PDF-bestanden tegelijk openen?", + "widgets.drop_first_only": "Alleen het eerste van {count} bestanden is geladen.", "viewer.copy_chars": " Kopiëren ({n} tekens)", "search.placeholder": "Zoeken in PDF...", "search.prev": "Vorige overeenkomst", diff --git a/app/utils.py b/app/utils.py index d7348c0..33aaeab 100644 --- a/app/utils.py +++ b/app/utils.py @@ -72,19 +72,34 @@ def parse_pages(text: str, total: int) -> list: part = part.strip() if not part: continue - try: - if "-" in part: - a, b = part.split("-", 1) - a_int, b_int = int(a), int(b) - else: + # R7 E5 follow-up: accept open-ended ranges so '3-' means + # 'from page 3 to the end' and '-5' means 'from page 1 to + # page 5'. Without this, '3-' raised ValueError because + # int('') failed. Reject '-' alone (no bounds at all); + # otherwise default the missing side to the document's + # extreme. Matches the pdftk-style 1- range syntax. + if "-" in part: + a, b = part.split("-", 1) + a = a.strip(); b = b.strip() + if not a and not b: + raise ValueError( + t("tool.err.bad_page_input", text=part)) + try: + a_int = int(a) if a else 1 + b_int = int(b) if b else total + except ValueError as exc: + raise ValueError( + t("tool.err.bad_page_input", text=part)) from exc + else: + try: a_int = b_int = int(part) - except ValueError as exc: - # int() raised "invalid literal for int() with base 10" — - # re-raise with a translated, user-actionable message so - # show_error() surfaces something useful instead of the - # raw Python error. - raise ValueError( - t("tool.err.bad_page_input", text=part)) from exc + except ValueError as exc: + # int() raised "invalid literal for int() with base 10" — + # re-raise with a translated, user-actionable message so + # show_error() surfaces something useful instead of the + # raw Python error. + raise ValueError( + t("tool.err.bad_page_input", text=part)) from exc if "-" in part: if b_int - a_int + 1 > _MAX_PAGES: raise ValueError( @@ -120,6 +135,32 @@ def pick_folder(parent: QWidget) -> str: return QFileDialog.getExistingDirectory(parent, t("btn.select_folder")) +def normalize_password(pwd: str) -> str: + """Return ``pwd`` normalised to Unicode NFC form (R6 C1 / R11 review C2). + + Passwords typed on macOS frequently land in NFD (decomposed) form, + while Windows clipboards produce NFC (composed) form. The on-screen + glyphs are identical but the underlying byte sequences are not, so a + password that authenticates on one OS may fail on the other. + + Normalising at the WRITE side of the cache (i.e. wherever + ``self._pdf_password = pwd`` happens) makes the cached value + deterministic and frees every downstream consumer + (``editor/tab.py``, ``viewer/panel.py``, ``tools/*``) from having to + remember to normalise on read. Returns falsy inputs unchanged so the + helper is safe to apply unconditionally. + """ + if not pwd: + return pwd + try: + import unicodedata + return unicodedata.normalize("NFC", pwd) + except (TypeError, ValueError): + # str inputs only ever raise on absurd code points; falling + # back to the raw string is safer than crashing. + return pwd + + def wipe_pdf_password(obj) -> None: """Best-effort wipe of the cached PDF password attribute on ``obj``. diff --git a/app/viewer/panel.py b/app/viewer/panel.py index 980dac5..505ee55 100644 --- a/app/viewer/panel.py +++ b/app/viewer/panel.py @@ -13,7 +13,7 @@ import qtawesome as qta from app.constants import ACCENT, TEXT_SEC, _LQ, DESKTOP -from app.utils import _paint_bg +from app.utils import _paint_bg, normalize_password from app.i18n import t from app.viewer.canvas import _SelectCanvas @@ -510,7 +510,13 @@ def load(self, path: str): if dlg.exec() != QDialog.DialogCode.Accepted: doc.close(); return if doc.authenticate(dlg.password()): - self._pdf_password = dlg.password() + # NFC-normalise at WRITE time so every consumer + # downstream (canvas.load, propagation to editor + # / tools) sees a deterministic value. R11 review + # C2: per-call-site normalisation missed the ~30 + # raw reads under tools/ — see + # utils.normalize_password. + self._pdf_password = normalize_password(dlg.password()) break wrong = True self._current_path = path diff --git a/app/widgets.py b/app/widgets.py index dfd7ef6..1289c19 100644 --- a/app/widgets.py +++ b/app/widgets.py @@ -6,6 +6,7 @@ from PySide6.QtGui import QDragEnterEvent, QDropEvent, QColor, QPixmap, QPainter from PySide6.QtWidgets import ( QWidget, QHBoxLayout, QLabel, QPushButton, QFileDialog, QColorDialog, + QMessageBox, ) import qtawesome as qta @@ -167,8 +168,22 @@ def dropEvent(self, e: QDropEvent): self.setProperty("drag_active", "false") self.style().unpolish(self); self.style().polish(self) urls = e.mimeData().urls() - if urls and self._url_accepted(urls[0]): - self.set_path(urls[0].toLocalFile()) + # R11 N3: DropFileEdit is a single-file widget but used to + # silently accept only urls[0] when the user dragged multiple + # files in. The extra files vanished with no feedback. Count + # the urls that PASS the extension filter (so dragging one + # PDF plus an unrelated screenshot still counts as 'one PDF + # dropped' and skips the warning) and surface a friendly + # notice when more than one valid file was dropped. + accepted = [u for u in urls if self._url_accepted(u)] + if not accepted: + return + if len(accepted) > 1: + QMessageBox.information( + self, t("msg.info"), + t("widgets.drop_first_only", count=len(accepted)), + ) + self.set_path(accepted[0].toLocalFile()) def _browse(self): if self._save: diff --git a/app/window.py b/app/window.py index b277454..36e5027 100644 --- a/app/window.py +++ b/app/window.py @@ -1033,6 +1033,20 @@ def dropEvent(self, e): if f.lower().endswith(".pdf") and os.path.isfile(os.path.join(path, f)) ) + # R10 review nit: dropping a folder used to silently open + # every PDF inside it. A folder with 200 PDFs would + # flood the tab bar and lock the UI for several seconds + # with no escape. Confirm above a reasonable threshold + # before we start spawning tabs. + if len(pdfs) > 20: + reply = QMessageBox.question( + self, t("msg.confirm"), + t("viewer.drop_many_pdfs_confirm", count=len(pdfs)), + QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, + QMessageBox.StandardButton.No, + ) + if reply != QMessageBox.StandardButton.Yes: + continue for pdf in pdfs: self._load_and_track(pdf) continue diff --git a/pdfapps.py b/pdfapps.py index 754a5b7..0d8edb5 100644 --- a/pdfapps.py +++ b/pdfapps.py @@ -1,5 +1,6 @@ """PDFApps – entry point.""" import argparse +import logging import sys import os @@ -28,6 +29,42 @@ from app.utils import _make_palette, setup_logging +def _excepthook(exc_type, exc_value, exc_traceback): + """Route uncaught exceptions through the logging framework so + PyInstaller --windowed builds still surface crashes in the log + file instead of silently swallowing them on closed stderr. + + Without this hook (R7 I2), tracebacks raised from queued Qt slots + in console=False builds (Windows/macOS PyInstaller --windowed) are + written to a stderr stream wired to ``NUL`` / ``/dev/null`` and + never reach ``pdfapps.log`` either. The default :data:`sys.excepthook` + only prints to stderr, so the user sees a silent "nothing happened" + while we have zero diagnostic trail. + + KeyboardInterrupt keeps the default behavior so Ctrl-C in a console + build still exits cleanly without spamming the log. + """ + if issubclass(exc_type, KeyboardInterrupt): + sys.__excepthook__(exc_type, exc_value, exc_traceback) + return + logging.getLogger(__name__).critical( + "Uncaught exception", + exc_info=(exc_type, exc_value, exc_traceback), + ) + # Best-effort: surface a user-facing dialog so the app doesn't appear + # to silently hang after a slot blew up. Guarded against the (rare) + # case where the QApplication is gone or QMessageBox itself raises. + try: + from PySide6.QtWidgets import QApplication, QMessageBox + if QApplication.instance() is not None: + QMessageBox.critical( + None, "Unexpected error", + f"{exc_type.__name__}: {exc_value}\n\nDetails logged.", + ) + except Exception: + pass + + def _load_dark_pref() -> bool: try: import json @@ -68,6 +105,10 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: def main(): setup_logging() + # Install the global excepthook AFTER setup_logging (so the rotating + # file handler is in place) and BEFORE QApplication so any crash in + # window construction is already covered. + sys.excepthook = _excepthook # Parse BEFORE QApplication so --help / --version exit cleanly # without bringing up the Qt event loop (and the splash screen). args = _parse_args(sys.argv[1:]) diff --git a/tests/test_audit_r11.py b/tests/test_audit_r11.py index 99f781e..88c6957 100644 --- a/tests/test_audit_r11.py +++ b/tests/test_audit_r11.py @@ -271,12 +271,21 @@ def test_parse_pages_raises_friendly_message_for_invalid_input(): def test_parse_pages_raises_friendly_message_for_open_range(): + """Open-ended ranges with non-numeric bounds (e.g. 'abc-') still + surface the translated bad_page_input message. PR-H fix #10 made + purely-numeric open ranges like '1-' valid (interpreted as + '1..total'), so the regression target moves to a still-invalid + input.""" from app.utils import parse_pages with pytest.raises(ValueError) as exc: - parse_pages("1-", 10) + parse_pages("abc-", 10) msg = str(exc.value) - assert "1-" in msg + assert "abc-" in msg assert "invalid literal for int" not in msg + # Empty range '-' must also fail (no bounds at all). + with pytest.raises(ValueError) as exc2: + parse_pages("-", 10) + assert "invalid literal for int" not in str(exc2.value) def test_parse_pages_still_accepts_valid_input(): diff --git a/tests/test_polish_lows.py b/tests/test_polish_lows.py new file mode 100644 index 0000000..86f4810 --- /dev/null +++ b/tests/test_polish_lows.py @@ -0,0 +1,358 @@ +"""Source-level + behavioral regression tests for PR-H polish fixes. + +Bug map (PR-H worklist): + #1 Global sys.excepthook in pdfapps.py main entry (R7 I2 LOW) + #2 config.json backup before reset on corruption (R8 N1 LOW) + #3 NFC password normalization in BasePage helpers (R6 C1 LOW) + #4 Explicit tab order in _PdfPasswordDialog (R11 G1 LOW) + #5 Drop folder >20 PDFs confirmation (R10 review LOW) + #6 AcroForm with zero widgets no_fields short-circuit (R10 review LOW) + #7 _TextEditDialog single-line QLineEdit (R11 B2 LOW) + #8 DropFileEdit multi-URL warning (R11 N3 LOW) + #9 set_compact_mode lambda guarded with shiboken6.isValid (R6 O2 LOW) + #10 parse_pages accepts open-ended ranges (R7 E5 LOW) +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parent.parent + + +def _read(rel: str) -> str: + return (ROOT / rel).read_text(encoding="utf-8") + + +# ── #1 — sys.excepthook installed in main entry ───────────────────────── + + +def test_pdfapps_installs_global_excepthook(): + """The entry point must install sys.excepthook so uncaught + exceptions in PyInstaller --windowed builds reach pdfapps.log + instead of vanishing into a closed stderr.""" + src = _read("pdfapps.py") + assert "sys.excepthook = _excepthook" in src, ( + "main() must wire sys.excepthook to the logging hook." + ) + # KeyboardInterrupt must keep the default behaviour so Ctrl-C still + # exits cleanly without spamming the log. + assert "KeyboardInterrupt" in src + assert "sys.__excepthook__" in src + # The hook must log via the logging framework (not bare print). + assert "logging.getLogger" in src + assert "exc_info=" in src + + +# ── #2 — corrupt config.json backed up before reset ───────────────────── + + +def test_i18n_backs_up_corrupt_config_before_reset(): + """_update_config must copy a corrupt config.json aside as + .corrupt-.bak before overwriting it with {}. + + R11 review B5 added the ``-`` suffix so successive + corruption events do not clobber each other; the marker we look + for therefore moves from the literal ``.corrupt.bak`` to the + ``.corrupt-`` prefix plus ``.bak`` extension. + """ + src = _read("app/i18n.py") + assert ".corrupt-" in src, "Timestamped backup prefix marker missing" + assert ".bak" in src, "Backup extension marker missing" + assert "shutil.copy2" in src, ( + "_update_config must shutil.copy2 the corrupt file aside." + ) + # The 'missing file' path must NOT trigger a backup (would log noise + # on every fresh install). + assert "FileNotFoundError" in src, ( + "FileNotFoundError handled separately so first run is silent." + ) + # The timestamp source must be wall-clock so the backups sort + # naturally and we can tell which event came first. + assert "datetime.now()" in src + + +# ── #3 — NFC password normalization ───────────────────────────────────── + + +def test_base_normalizes_passwords_to_nfc(): + """All three encrypted-PDF entry points must route the password + through ``unicodedata.normalize('NFC', ...)`` so a macOS-typed (NFD) + password unlocks the same PDF on Windows (NFC). + + After the R11 review the canonical normalisation function moved to + :func:`app.utils.normalize_password` (so all the + ``self._pdf_password`` *read* sites in ``tools/*`` and + ``editor/tab.py`` are covered transitively when the cache is set); + ``BasePage._nfc`` is now a thin delegator. The actual + ``unicodedata.normalize("NFC", ...)`` call therefore lives in + ``utils.py`` rather than ``base.py``, which is what we assert here. + + After the follow-up R11 review fix, normalisation now also happens at + the WRITE site of the cache (BasePage._maybe_prompt_password) so the + ``self._pdf_password`` attribute itself is deterministic for every + downstream consumer in ``tools/*`` that reads it directly. The three + helpers keep their defensive read-side ``self._nfc(self._pdf_password)`` + so a value cached before this fix (or set externally without going + through ``normalize_password``) still authenticates correctly. + """ + base_src = _read("app/base.py") + utils_src = _read("app/utils.py") + # Normalisation primitive lives in utils.py now. + assert "import unicodedata" in utils_src + assert "unicodedata.normalize(\"NFC\"" in utils_src + assert "def normalize_password(" in utils_src + # BasePage helper still exists and still has the three read-side + # call sites (defensive — the WRITE site below covers the cache). + assert "def _nfc(" in base_src + assert base_src.count("self._nfc(self._pdf_password)") == 3, ( + "Expected three defensive read-side _nfc calls (auth probe, " + "PdfReader.decrypt, fitz.authenticate)." + ) + # WRITE-site normalisation: the prompt path must NFC-normalise + # before storing on self._pdf_password so every tool that reads + # the attribute raw (~30 sites under tools/*) is covered without + # per-call instrumentation. + assert "self._pdf_password = normalize_password(pwd)" in base_src + # And the BasePage helper must delegate to the utils primitive so + # the two paths cannot drift. + assert "normalize_password" in base_src + + +def test_editor_tab_normalizes_password_on_cache_write(): + """R11 review C2 follow-up: the editor's _load_pdf path stores the + prompted password directly on self._pdf_password. It must route + through normalize_password so tools reading the cache raw + (merge, watermark, ocr, page_numbers, convert, nup, ...) see the + same NFC string the BasePage helpers would compare against.""" + src = _read("app/editor/tab.py") + assert "self._pdf_password = normalize_password(pwd)" in src + assert "from app.utils import" in src and "normalize_password" in src + + +def test_viewer_panel_normalizes_password_on_cache_write(): + """R11 review C2 follow-up: PdfViewerPanel._open_path stores the + typed password on self._pdf_password. Same NFC-at-WRITE rule applies + — this is the value propagated to compact-mode tools by + MainWindow._on_tab_changed.""" + src = _read("app/viewer/panel.py") + assert "normalize_password(dlg.password())" in src + assert "from app.utils import" in src and "normalize_password" in src + + +def test_password_cache_round_trips_nfd_to_nfc(): + """Behavioral guard: feed an NFD-composed string through the same + write path the prompt uses and confirm the cached attribute reads + back in NFC form, matching what tools that bypass _nfc would see.""" + import unicodedata + + from app.utils import normalize_password + + nfd = unicodedata.normalize("NFD", "passé") + assert not unicodedata.is_normalized("NFC", nfd), ( + "Test fixture must actually be in NFD form." + ) + + class _Fake: + _pdf_password = "" + + obj = _Fake() + # Mirror the WRITE-site idiom used by BasePage / editor / viewer. + obj._pdf_password = normalize_password(nfd) + assert unicodedata.is_normalized("NFC", obj._pdf_password), ( + "Cache must hold NFC after going through normalize_password." + ) + # And the on-screen form must be preserved (no characters dropped). + assert obj._pdf_password == "passé" + + +def test_nfc_helper_behavior_on_combining_characters(): + """Smoke test: a Unicode string composed in NFD (e + combining acute) + must become its NFC singleton (é) before pypdf/PyMuPDF see it.""" + import unicodedata + + nfd = "passé" # 'e' + combining acute + nfc = "passé".encode("utf-8") + normalized = unicodedata.normalize("NFC", nfd) + assert normalized != nfd, "Test input must actually differ from NFC" + assert unicodedata.is_normalized("NFC", normalized) + + +# ── #4 — _PdfPasswordDialog explicit tab order ────────────────────────── + + +def test_password_dialog_sets_tab_order(): + """The dialog must call setTabOrder(_edit, ok) and setTabOrder(ok, ca) + so keyboard users land on the password field and Tab through OK then + Cancel.""" + src = _read("app/editor/dialogs.py") + assert "self.setTabOrder(self._edit, ok)" in src + assert "self.setTabOrder(ok, ca)" in src + # Initial focus is on the password field, not on Cancel. + assert "self._edit.setFocus()" in src + + +# ── #5 — drop folder >20 PDFs confirmation ────────────────────────────── + + +def test_window_confirms_drop_of_many_pdfs(): + """Dropping a folder with more than 20 PDFs must show a confirm + QMessageBox so the user can back out before tabs flood the UI.""" + src = _read("app/window.py") + assert "viewer.drop_many_pdfs_confirm" in src + assert "len(pdfs) > 20" in src + # Default button is No so accidental drops do nothing surprising. + assert "QMessageBox.StandardButton.No" in src + + +# ── #6 — AcroForm dict but zero widgets ───────────────────────────────── + + +def test_editor_zero_widget_acroform_short_circuits(): + """When /AcroForm exists but get_fields() is empty, + _apply_forms must surface the no_fields status instead of running + update_page_form_field_values as a silent no-op.""" + src = _read("app/editor/tab.py") + # The new guard reads from _r.get_fields() and routes through the + # same no_fields message used by the missing-AcroForm branch. + assert "_r.get_fields()" in src + # Both no_fields branches reside in _apply_forms. + assert src.count("editor.forms.no_fields") >= 3, ( + "Expected three editor.forms.no_fields references (load-time + " + "missing-acroform + zero-widget branches)." + ) + + +# ── #7 — _TextEditDialog single-line QLineEdit ────────────────────────── + + +def test_text_edit_dialog_uses_qlineedit(): + """_TextEditDialog must build a QLineEdit so the user can't enter + newlines that PyMuPDF's insert_text would rasterise as '?'.""" + src = _read("app/editor/dialogs.py") + # Locate the class block. + cls_start = src.index("class _TextEditDialog") + cls_end = src.index("class ", cls_start + 1) + block = src[cls_start:cls_end] + assert "self._edit = QLineEdit()" in block, ( + "_TextEditDialog._edit must be a QLineEdit (single-line)." + ) + assert "self._edit = QTextEdit()" not in block, ( + "QTextEdit must not be used inside _TextEditDialog any more." + ) + # new_text() must return .text(), not .toPlainText(). + assert "self._edit.text()" in block + assert "self._edit.toPlainText()" not in block + # R11 review F6: newlines in old_text must be sanitised before + # setText (QLineEdit truncates at the first \n, so the user would + # only see the first physical line of the detected string). + assert ".replace(\"\\n\"" in block, ( + "old_text newlines must be collapsed before QLineEdit.setText." + ) + assert ".replace(\"\\r\"" in block, ( + "old_text carriage returns must also be sanitised." + ) + + +# ── #8 — DropFileEdit multi-URL warning ───────────────────────────────── + + +def test_drop_file_edit_warns_on_multiple_urls(): + """DropFileEdit.dropEvent must surface a QMessageBox.information + when more than one accepted file is dropped on the single-file + widget.""" + src = _read("app/widgets.py") + assert "QMessageBox" in src + assert "widgets.drop_first_only" in src + assert "len(accepted) > 1" in src + + +# ── #9 — set_compact_mode lambda isValid guard ────────────────────────── + + +def test_compact_mode_lambda_uses_shiboken_isvalid(): + """The 'Change source' compact-mode link's lambda must guard + self.set_compact_mode(False) behind shiboken6.isValid(self) so a + queued click does not touch a destroyed page.""" + src = _read("app/base.py") + # Locate the lambda close to the compact_link section. + assert "link.clicked.connect(" in src + # The guard must wrap the slot. + assert "set_compact_mode(False) if isValid(self) else None" in src + + +# ── #10 — parse_pages open-ended range support ────────────────────────── + + +def test_parse_pages_accepts_open_ended_start(): + """'3-' on a 10-page document must yield pages 3..10 (0-indexed + 2..9). Previously raised ValueError.""" + from app.utils import parse_pages + + assert parse_pages("3-", 10) == [2, 3, 4, 5, 6, 7, 8, 9] + + +def test_parse_pages_accepts_open_ended_end(): + """'-5' must yield pages 1..5 (0-indexed 0..4).""" + from app.utils import parse_pages + + assert parse_pages("-5", 10) == [0, 1, 2, 3, 4] + + +def test_parse_pages_rejects_bare_dash(): + """'-' alone has no bounds at all and must still raise with the + friendly message.""" + from app.utils import parse_pages + + with pytest.raises(ValueError) as exc: + parse_pages("-", 10) + msg = str(exc.value) + # The friendly message echoes the offending substring and avoids + # the raw Python error. + assert "-" in msg + assert "invalid literal for int" not in msg + + +def test_parse_pages_open_range_combined_with_csv(): + """Open ranges must compose with CSV input — '1,3-' must yield the + full document and stay deduped/sorted.""" + from app.utils import parse_pages + + assert parse_pages("1,3-", 5) == [0, 2, 3, 4] + + +# ── i18n parity guards ───────────────────────────────────────────────── + + +def test_new_translation_keys_present_in_all_languages(): + """Both new PR-H keys must ship for all 8 supported languages + (en, pt, es, fr, de, zh, it, nl) so t() never falls back to the + raw key name in the UI.""" + raw = _read("app/translations.json") + data = json.loads(raw) + assert set(data.keys()) == {"en", "pt", "es", "fr", "de", "zh", "it", "nl"} + for key in ("viewer.drop_many_pdfs_confirm", "widgets.drop_first_only"): + missing = [lang for lang, kv in data.items() if key not in kv] + assert not missing, f"{key} missing in: {missing}" + # All language sections must share the same key count. + counts = {lang: len(kv) for lang, kv in data.items()} + assert len(set(counts.values())) == 1, ( + f"Key counts diverge across languages: {counts}" + ) + + +def test_new_translation_keys_use_count_placeholder(): + """Both new keys take a {count} placeholder; if a translator dropped + it the t() format would silently render '... files were loaded.' with + no number. Catch the regression at test time.""" + data = json.loads(_read("app/translations.json")) + for lang, kv in data.items(): + assert "{count}" in kv["viewer.drop_many_pdfs_confirm"], ( + f"{lang} viewer.drop_many_pdfs_confirm dropped {{count}}" + ) + assert "{count}" in kv["widgets.drop_first_only"], ( + f"{lang} widgets.drop_first_only dropped {{count}}" + )