From fc818ab13921f7bb72c9824d97fe780e5f0246de Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sat, 24 Jan 2026 15:04:33 -0800 Subject: [PATCH 01/12] reformatted dicom receiver by splitting organization of files to a new module for better post processing --- pytheranostics/dicomtools/__init__.py | 7 + pytheranostics/dicomtools/dicom_organizer.py | 414 +++++++++++++++++++ pytheranostics/dicomtools/dicom_receiver.py | 328 +-------------- 3 files changed, 433 insertions(+), 316 deletions(-) create mode 100644 pytheranostics/dicomtools/dicom_organizer.py diff --git a/pytheranostics/dicomtools/__init__.py b/pytheranostics/dicomtools/__init__.py index 1ab4f90..16510a2 100644 --- a/pytheranostics/dicomtools/__init__.py +++ b/pytheranostics/dicomtools/__init__.py @@ -1 +1,8 @@ """DICOM utilities exposed at the package level.""" + +from .dicom_organizer import organize_folder_by_cycles, summarize_timepoints + +__all__ = [ + "organize_folder_by_cycles", + "summarize_timepoints", +] diff --git a/pytheranostics/dicomtools/dicom_organizer.py b/pytheranostics/dicomtools/dicom_organizer.py new file mode 100644 index 0000000..2f03040 --- /dev/null +++ b/pytheranostics/dicomtools/dicom_organizer.py @@ -0,0 +1,414 @@ +"""Utilities for organizing DICOM files by patient, cycle, and timepoint.""" + +import logging +from datetime import datetime, timedelta +from pathlib import Path +from typing import Dict, List, Optional + +import pydicom + +logger = logging.getLogger(__name__) + + +def _parse_dt(date_str: Optional[str], time_str: Optional[str]) -> Optional[datetime]: + """Parse common DICOM date/time fields to a datetime object. + + Parameters + ---------- + date_str : str | None + DICOM DA (YYYYMMDD) + time_str : str | None + DICOM TM (HHMMSS.frac) + + Returns + ------- + datetime | None + Parsed datetime or None if not enough info + """ + if not date_str: + return None + try: + y = int(date_str[0:4]) + m = int(date_str[4:6]) + d = int(date_str[6:8]) + if time_str: + hh = int(time_str[0:2]) if len(time_str) >= 2 else 0 + mm = int(time_str[2:4]) if len(time_str) >= 4 else 0 + ss = int(time_str[4:6]) if len(time_str) >= 6 else 0 + micro = 0 + if len(time_str) > 7 and "." in time_str: + frac = time_str.split(".")[-1] + # pad/cut to microseconds + frac = (frac + "000000")[:6] + micro = int(frac) + return datetime(y, m, d, hh, mm, ss, micro) + return datetime(y, m, d) + except Exception: + return None + + +def _series_datetime_from_any(dcm: pydicom.Dataset) -> Optional[datetime]: + """Best-effort extraction of a datetime for a DICOM series instance. + + Tries AcquisitionDate/Time first (most accurate), then SeriesDate/Time, + then ContentDate/Time, finally falls back to StudyDate/Time. + """ + # Acquisition (most accurate for actual scan time) + dt = _parse_dt( + getattr(dcm, "AcquisitionDate", None), getattr(dcm, "AcquisitionTime", None) + ) + if dt: + return dt + # Series + dt = _parse_dt(getattr(dcm, "SeriesDate", None), getattr(dcm, "SeriesTime", None)) + if dt: + return dt + # Content + dt = _parse_dt(getattr(dcm, "ContentDate", None), getattr(dcm, "ContentTime", None)) + if dt: + return dt + # Study + return _parse_dt(getattr(dcm, "StudyDate", None), getattr(dcm, "StudyTime", None)) + + +def organize_folder_by_cycles( + storage_root: Path | str, + output_base: Path | str | None = None, + *, + cycle_gap_days: float = 15, + timepoint_separation_days: float = 1, + move: bool = True, + patient_id_filter: Optional[List[str]] = None, +) -> Dict[str, Dict[str, List[Path]]]: + """Organize a folder of DICOM files into Patient/Cycle/Timepoint structure. + + This scans ``storage_root`` recursively for ``*.dcm`` files, groups them by + PatientID and StudyDate, then creates folders like: + + PatientID/Cycle1/tp1/CT + PatientID/Cycle1/tp1/SPECT + PatientID/Cycle1/tp1/CT/RTstruct + + Behavior mirrors ``organize_by_cycles()`` but does not require + a running receiver nor a metadata file; grouping is inferred from DICOM tags. + + Parameters + ---------- + storage_root : Path | str + Root directory to scan for DICOM files (searched recursively). + output_base : Path | str | None + Base directory where organized output will be created. Defaults to + ``storage_root`` when None. + cycle_gap_days : float + New cycle if consecutive study dates differ by >= this many days. + timepoint_separation_days : float + New timepoint when datetime gap is >= this many days (can be fractional, e.g., 0.2 ≈ 4.8 hours). + move : bool + If True, move files (and prune emptied dirs opportunistically). If False, copy files. + patient_id_filter : list[str] | None + If provided, only organize these PatientIDs. + + Returns + ------- + dict + Mapping: {PatientID: {"CycleX": {"tpY": [Path, ...]}}} + """ + storage_root = Path(storage_root) + if output_base is None: + output_base = storage_root + output_base = Path(output_base) + + index: Dict[str, Dict[tuple, List[Path]]] = {} + rep_dt_by_series: Dict[str, Dict[tuple, List[datetime]]] = {} + + def _read_minimal(dcm_path: Path) -> Optional[pydicom.Dataset]: + try: + return pydicom.dcmread(str(dcm_path), stop_before_pixels=True, force=True) + except Exception: + return None + + candidates: set[Path] = set() + for pattern in ("*.dcm", "*.DCM"): + candidates.update(storage_root.rglob(pattern)) + + for dcm_file in sorted(candidates): + ds = _read_minimal(dcm_file) + if ds is None: + continue + + patient_id = getattr(ds, "PatientID", None) or "UNKNOWN" + if patient_id_filter and patient_id not in patient_id_filter: + continue + + dt = _series_datetime_from_any(ds) + study_date = getattr(ds, "StudyDate", None) + if dt is None: + # Last-resort fallback to file modification time to help split same-day scans + try: + dt = datetime.fromtimestamp(dcm_file.stat().st_mtime) + except Exception: + dt = None + + if not study_date: + if dt: + study_date = dt.strftime("%Y%m%d") + else: + study_date = "00000000" + + modality = getattr(ds, "Modality", None) or "UNKNOWN" + series_number = getattr(ds, "SeriesNumber", None) + try: + series_number = int(series_number) if series_number is not None else -1 + except Exception: + series_number = -1 + + key = (study_date, modality, series_number) + index.setdefault(patient_id, {}).setdefault(key, []).append(dcm_file) + + if dt is None: + try: + dt = datetime.strptime(study_date, "%Y%m%d") + except Exception: + dt = datetime(1900, 1, 1) + + rep_dt_by_series.setdefault(patient_id, {}).setdefault(key, []).append(dt) + + results: Dict[str, Dict[str, Dict[str, List[Path]]]] = {} + + for patient_id, series_map in index.items(): + series_entries: List[Dict[str, object]] = [] + for key, files in series_map.items(): + study_date, modality, series_number = key + dt_list = rep_dt_by_series.get(patient_id, {}).get(key, []) + rep_dt = min(dt_list) if dt_list else datetime(1900, 1, 1) + + # Sub-group files within this series by datetime gaps to split same series_number across timepoints + # Sort files by their datetime and split into subgroups when gap >= timepoint_separation_days + file_dts: List[tuple[Path, datetime]] = [] + for f in files: + try: + ds = pydicom.dcmread(str(f), stop_before_pixels=True, force=True) + fdt = _series_datetime_from_any(ds) + if fdt is None: + try: + fdt = datetime.fromtimestamp(f.stat().st_mtime) + except Exception: + fdt = datetime.strptime(study_date, "%Y%m%d") + except Exception: + try: + fdt = datetime.fromtimestamp(f.stat().st_mtime) + except Exception: + fdt = datetime.strptime(study_date, "%Y%m%d") + file_dts.append((f, fdt)) + + file_dts = sorted(file_dts, key=lambda x: x[1]) + + # Split into subgroups when gap >= timepoint_separation_days + subgroups: List[List[Path]] = [] + current_group: List[Path] = [] + prev_dt: Optional[datetime] = None + for f, fdt in file_dts: + if prev_dt is not None and (fdt - prev_dt) >= timedelta( + days=timepoint_separation_days + ): + # Start new subgroup + subgroups.append(current_group) + current_group = [f] + else: + current_group.append(f) + prev_dt = fdt + + if current_group: + subgroups.append(current_group) + + # Create a series_entry per subgroup + for sg_idx, sg_files in enumerate(subgroups): + sg_dts = [fdt for f, fdt in file_dts if f in sg_files] + sg_rep_dt = min(sg_dts) if sg_dts else rep_dt + series_entries.append( + { + "study_date": study_date, + "datetime": sg_rep_dt, + "modality": modality, + "series_number": series_number, + "files": sg_files, + } + ) + + series_entries = sorted(series_entries, key=lambda s: s["datetime"]) + if not series_entries: + continue + + patient_root = output_base / patient_id + patient_root.mkdir(parents=True, exist_ok=True) + + cycle_idx = 1 + tp_idx = 1 + prev_dt = series_entries[0]["datetime"] + src_dirs_for_cleanup: set[Path] = set() + + for i, s in enumerate(series_entries): + this_dt = s["datetime"] + if i > 0: + if (this_dt - prev_dt) >= timedelta(days=cycle_gap_days): + cycle_idx += 1 + tp_idx = 1 + elif (this_dt - prev_dt) >= timedelta(days=timepoint_separation_days): + tp_idx += 1 + + cycle_dir = patient_root / f"Cycle{cycle_idx}" / f"tp{tp_idx}" + cycle_dir.mkdir(parents=True, exist_ok=True) + + cycle_key = f"Cycle{cycle_idx}" + tp_key = f"tp{tp_idx}" + results.setdefault(patient_id, {}).setdefault(cycle_key, {}).setdefault( + tp_key, [] + ) + + modality = s["modality"] + series_number = s["series_number"] + files = s["files"] + + if modality in ["NM", "PT"]: + dest_dir = cycle_dir / "SPECT" + elif modality == "RTSTRUCT": + dest_dir = cycle_dir / "CT" / "RTstruct" + else: + dest_dir = cycle_dir / (modality or "UNKNOWN") + + dest_dir.mkdir(parents=True, exist_ok=True) + + imported = 0 + for src_file in files: + try: + target = dest_dir / src_file.name + if target.exists(): + continue + if move: + import shutil + + shutil.move(str(src_file), str(target)) + src_dirs_for_cleanup.add(src_file.parent) + else: + import shutil + + shutil.copy2(str(src_file), str(target)) + imported += 1 + except Exception: + continue + + logger.info( + f"Organized {imported} files -> {dest_dir} ({modality}, Series{series_number}, {this_dt})" + ) + + results[patient_id][cycle_key][tp_key].append(dest_dir) + + prev_dt = this_dt + + if move: + for src_dir in list(src_dirs_for_cleanup): + try: + if src_dir.exists() and not any(src_dir.iterdir()): + src_dir.rmdir() + except Exception: + pass + try: + parent1 = src_dir.parent + if parent1.exists() and not any(parent1.iterdir()): + parent1.rmdir() + except Exception: + pass + + return results + + +def summarize_timepoints( + storage_root: Path | str, + *, + patient_id_filter: Optional[List[str]] = None, +) -> Dict[str, List[Dict[str, object]]]: + """Summarize detected series (one per modality/series_number per StudyDate) for debugging. + + Scans ``storage_root`` with the same datetime extraction logic used by + ``organize_folder_by_cycles`` and returns, per patient, the ordered list of + all distinct (study_date, modality, series_number) with their representative + datetimes and gaps in hours to the previous entry. + """ + storage_root = Path(storage_root) + + # Track all unique (study_date, modality, series_number) per patient with min datetime + index: Dict[str, Dict[tuple, List[datetime]]] = {} + + def _read_minimal(dcm_path: Path) -> Optional[pydicom.Dataset]: + try: + return pydicom.dcmread(str(dcm_path), stop_before_pixels=True, force=True) + except Exception: + return None + + candidates: set[Path] = set() + for pattern in ("*.dcm", "*.DCM"): + candidates.update(storage_root.rglob(pattern)) + + for dcm_file in sorted(candidates): + ds = _read_minimal(dcm_file) + if ds is None: + continue + patient_id = getattr(ds, "PatientID", None) or "UNKNOWN" + if patient_id_filter and patient_id not in patient_id_filter: + continue + + dt = _series_datetime_from_any(ds) + study_date = getattr(ds, "StudyDate", None) + if dt is None: + try: + dt = datetime.fromtimestamp(dcm_file.stat().st_mtime) + except Exception: + dt = None + + if not study_date: + study_date = dt.strftime("%Y%m%d") if dt else "00000000" + + modality = getattr(ds, "Modality", None) or "UNKNOWN" + series_number = getattr(ds, "SeriesNumber", None) + try: + series_number = int(series_number) if series_number is not None else -1 + except Exception: + series_number = -1 + + if dt is None: + try: + dt = datetime.strptime(study_date, "%Y%m%d") + except Exception: + dt = datetime(1900, 1, 1) + + key = (study_date, modality, series_number) + index.setdefault(patient_id, {}).setdefault(key, []).append(dt) + + summary: Dict[str, List[Dict[str, object]]] = {} + for patient_id, by_key in index.items(): + entries: List[Dict[str, object]] = [] + for key, dts in sorted(by_key.items(), key=lambda kv: min(kv[1])): + sd, mod, sn = key + rep_dt = min(dts) + entries.append( + { + "study_date": sd, + "modality": mod, + "series_number": sn, + "datetime": rep_dt, + } + ) + + # Compute deltas after sorting by datetime + entries = sorted(entries, key=lambda e: e["datetime"]) + prev_dt: Optional[datetime] = None + for entry in entries: + delta_hours = None + if prev_dt is not None: + delta_hours = (entry["datetime"] - prev_dt).total_seconds() / 3600.0 + entry["delta_hours"] = delta_hours + prev_dt = entry["datetime"] + + summary[patient_id] = entries + + return summary diff --git a/pytheranostics/dicomtools/dicom_receiver.py b/pytheranostics/dicomtools/dicom_receiver.py index 6f7edb7..c372687 100644 --- a/pytheranostics/dicomtools/dicom_receiver.py +++ b/pytheranostics/dicomtools/dicom_receiver.py @@ -6,7 +6,7 @@ import json import logging -from datetime import datetime, timedelta +from datetime import datetime from pathlib import Path from typing import Callable, Dict, List, Optional @@ -22,6 +22,8 @@ from pynetdicom import AE, AllStoragePresentationContexts, evt from pynetdicom.sop_class import Verification +from .dicom_organizer import organize_folder_by_cycles + # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -36,6 +38,8 @@ class DICOMReceiver: - Metadata extraction for dosimetry parameters - Support for CT, SPECT/NM, PET, and RT Structure Sets - Configurable storage paths and callbacks + + Requires pynetdicom to be installed. """ def __init__( @@ -163,11 +167,15 @@ def _runner(): logger.info( f"Auto-organizing cycles for patient {patient_id} after {self.auto_organize_debounce_seconds}s idle" ) - self.organize_by_cycles( - patient_id=patient_id, + organize_folder_by_cycles( + storage_root=self.storage_root, output_base=self.auto_organize_output_base, cycle_gap_days=self.auto_organize_cycle_gap_days, - timepoint_separation_days=self.auto_organize_timepoint_separation_days, + timepoint_separation_days=float( + self.auto_organize_timepoint_separation_days + ), + move=True, + patient_id_filter=[patient_id], ) except Exception as e: logger.exception(f"Auto-organize failed for {patient_id}: {e}") @@ -275,318 +283,6 @@ def _get_storage_path(self, ds: pydicom.Dataset) -> Path: path.mkdir(parents=True, exist_ok=True) return path - # -------------------------- - # Post-processing utilities - # -------------------------- - @staticmethod - def _parse_dt( - date_str: Optional[str], time_str: Optional[str] - ) -> Optional[datetime]: - """Parse common DICOM date/time fields to a datetime object. - - Parameters - ---------- - date_str : str | None - DICOM DA (YYYYMMDD) - time_str : str | None - DICOM TM (HHMMSS.frac) - - Returns - ------- - datetime | None - Parsed datetime or None if not enough info - """ - if not date_str: - return None - try: - y = int(date_str[0:4]) - m = int(date_str[4:6]) - d = int(date_str[6:8]) - if time_str: - hh = int(time_str[0:2]) if len(time_str) >= 2 else 0 - mm = int(time_str[2:4]) if len(time_str) >= 4 else 0 - ss = int(time_str[4:6]) if len(time_str) >= 6 else 0 - micro = 0 - if len(time_str) > 7 and "." in time_str: - frac = time_str.split(".")[-1] - # pad/cut to microseconds - frac = (frac + "000000")[:6] - micro = int(frac) - return datetime(y, m, d, hh, mm, ss, micro) - return datetime(y, m, d) - except Exception: - return None - - @staticmethod - def _series_datetime_from_any(dcm: pydicom.Dataset) -> Optional[datetime]: - """Best-effort extraction of a datetime for a DICOM series instance. - - Tries SeriesDate/Time, then AcquisitionDate/Time, then ContentDate/Time, - finally falls back to StudyDate/Time. - """ - # Series - dt = DICOMReceiver._parse_dt( - getattr(dcm, "SeriesDate", None), getattr(dcm, "SeriesTime", None) - ) - if dt: - return dt - # Acquisition - dt = DICOMReceiver._parse_dt( - getattr(dcm, "AcquisitionDate", None), getattr(dcm, "AcquisitionTime", None) - ) - if dt: - return dt - # Content - dt = DICOMReceiver._parse_dt( - getattr(dcm, "ContentDate", None), getattr(dcm, "ContentTime", None) - ) - if dt: - return dt - # Study - return DICOMReceiver._parse_dt( - getattr(dcm, "StudyDate", None), getattr(dcm, "StudyTime", None) - ) - - @staticmethod - def _get_any_dicom_datetime_in_path(path: Path) -> Optional[datetime]: - """Find any DICOM file in a directory and return its best-effort datetime. - - Parameters - ---------- - path : Path - Directory containing DICOM files - - Returns - ------- - datetime | None - """ - try: - for dcm_file in sorted(path.glob("*.dcm")): - try: - ds = pydicom.dcmread( - str(dcm_file), stop_before_pixels=True, force=True - ) - dt = DICOMReceiver._series_datetime_from_any(ds) - if dt: - return dt - except Exception: - continue - return None - except Exception: - return None - - def _collect_patient_series(self, patient_id: str) -> List[Dict]: - """Collect all known series for a patient across all studies. - - Returns list of dicts with keys: modality, series_number, series_description, - path (Path), datetime (datetime | None), study_date (str | None). - """ - series_list: List[Dict] = [] - for key, info in self.metadata.items(): - if not key.startswith(f"{patient_id}_"): - continue - study_date = info.get("patient_info", {}).get("StudyDate") - series = info.get("series", {}) - for s_key, s in series.items(): - src_path = Path(s.get("path", self.storage_root)) - # Determine a representative datetime for the series - rep_dt = self._get_any_dicom_datetime_in_path(src_path) - if rep_dt is None and study_date: - # Fallback to study_date - rep_dt = self._parse_dt( - study_date, info.get("patient_info", {}).get("StudyTime") - ) - series_list.append( - { - "modality": s.get("modality", "UNKNOWN"), - "series_number": s.get("series_number", 0), - "series_description": s.get("series_description", ""), - "path": src_path, - "datetime": rep_dt, - "study_date": study_date, - } - ) - # Filter out those without any path - return [x for x in series_list if x.get("path") is not None] - - def organize_by_cycles( - self, - patient_id: str, - output_base: Path, - cycle_gap_days: int = 15, - timepoint_separation_days: int = 1, - ) -> Dict[str, Dict[str, List[Path]]]: - """Post-process received DICOMs into Cycle/Timepoint structure. - - Creates folders like: - PatientID/Cycle1/tp1/CT/Series3 - PatientID/Cycle1/tp1/SPECT/Series5 - PatientID/Cycle1/tp2/CT/Series2 - - RTSTRUCT will be placed under the corresponding CT timepoint: - PatientID/Cycle1/tp1/CT/RTstruct/Series7 - - Parameters - ---------- - patient_id : str - Patient identifier - output_base : Path - Directory under which the new structure will be created - cycle_gap_days : int - Start a new cycle if the gap since the previous scan is >= this many days (default 15 days). - timepoint_separation_days : int - Start a new timepoint when acquisition date changes by this many days or more (default 1 day) - - Returns - ------- - dict - Nested dict with created directories per cycle and timepoint - """ - series_list = self._collect_patient_series(patient_id) - if not series_list: - raise ValueError(f"No series found for patient '{patient_id}'.") - - # Ensure we have datetimes; if some missing, use file mtime as last resort - for s in series_list: - if s["datetime"] is None: - try: - any_file = next(iter(sorted(s["path"].glob("*.dcm")))) - mtime = datetime.fromtimestamp(any_file.stat().st_mtime) - s["datetime"] = mtime - except StopIteration: - # No files present - skip later - s["datetime"] = None - - # Drop any without datetime ultimately - series_list = [s for s in series_list if s["datetime"] is not None] - - # Group series by StudyDate to define timepoints, so RTSTRUCT doesn't create new cycles - # Build mapping: study_date -> list[series] - tp_by_date: Dict[str, List[Dict]] = {} - for s in series_list: - sd = s.get("study_date") or s["datetime"].strftime("%Y%m%d") - tp_by_date.setdefault(sd, []).append(s) - - # Sort timepoints by study date - sorted_dates = sorted(tp_by_date.keys()) - - out: Dict[str, Dict[str, List[Path]]] = {} - patient_root = Path(output_base) / patient_id - patient_root.mkdir(parents=True, exist_ok=True) - - if not sorted_dates: - return out - - # Compute cycles from consecutive study date gaps - cycle_idx = 1 - tp_idx = 1 - prev_date_dt = datetime.strptime(sorted_dates[0], "%Y%m%d") - - for i, sd in enumerate(sorted_dates): - this_date_dt = datetime.strptime(sd, "%Y%m%d") - if i > 0: - if (this_date_dt - prev_date_dt) >= timedelta(days=cycle_gap_days): - # New cycle - cycle_idx += 1 - tp_idx = 1 - else: - # Same cycle, next timepoint (optionally collapse same-day scans if needed) - if ( - this_date_dt.date() - prev_date_dt.date() - ).days >= timepoint_separation_days: - tp_idx += 1 - - # For all series in this study date, place under tp folder - cycle_dir = patient_root / f"Cycle{cycle_idx}" / f"tp{tp_idx}" - cycle_dir.mkdir(parents=True, exist_ok=True) - - # Track source modality directories seen for cleanup after moving - src_dirs_for_cleanup: set[Path] = set() - - for s in tp_by_date[sd]: - modality = s["modality"] - # Normalize modality names for destination - if modality in ["NM", "PT"]: - modality_folder = "SPECT" - elif modality == "RTSTRUCT": - modality_folder = "CT" # RTSTRUCT under CT/RTstruct - else: - modality_folder = modality - - series_number = s.get("series_number", 0) or 0 - # Destination folders drop the Series subfolder; put instances directly under modality - if modality == "RTSTRUCT": - dest_dir = cycle_dir / "CT" / "RTstruct" - else: - dest_dir = cycle_dir / modality_folder - - dest_dir.mkdir(parents=True, exist_ok=True) - - # Copy only files belonging to this SeriesNumber - src_path: Path = s["path"] - src_dirs_for_cleanup.add(src_path) - copied = 0 - for dcm_file in src_path.glob("*.dcm"): - try: - ds = pydicom.dcmread( - str(dcm_file), stop_before_pixels=True, force=True - ) - if int(getattr(ds, "SeriesNumber", -1) or -1) == int( - series_number - ): - import shutil - - dest_file = dest_dir / dcm_file.name - if dest_file.exists(): - # Skip if already present to avoid accidental overwrite - continue - # Move instead of copy to avoid duplication - shutil.move(str(dcm_file), str(dest_file)) - copied += 1 - except Exception: - continue - logger.info( - f"Organized {copied} files -> {dest_dir} ({modality}, Series{int(series_number)}, {sd})" - ) - - # Record in output mapping - cycle_key = f"Cycle{cycle_idx}" - tp_key = f"tp{tp_idx}" - out.setdefault(cycle_key, {}).setdefault(tp_key, []).append(dest_dir) - - # After processing all series for this StudyDate, prune empty source directories - try: - for src_dir in src_dirs_for_cleanup: - # Remove dir if empty - try: - if src_dir.exists() and not any(src_dir.iterdir()): - src_dir.rmdir() - except Exception: - pass - # Attempt to remove parent StudyDate dir if empty - try: - study_parent = src_dir.parent - if study_parent.exists() and not any(study_parent.iterdir()): - study_parent.rmdir() - except Exception: - pass - # Attempt to remove patient dir if now empty (rare) - try: - patient_dir = study_parent.parent - if patient_dir.exists() and not any(patient_dir.iterdir()): - patient_dir.rmdir() - except Exception: - pass - except Exception: - logger.debug("Cleanup after move encountered issues; continuing.") - - prev_date_dt = this_date_dt - - logger.info( - f"Cycle/Timepoint organization complete for patient {patient_id} at {patient_root}" - ) - return out - def _handle_store(self, event): """ Handle an incoming C-STORE request. From 7869ad07c9af2e05806ca6db8e0d31222dad0fb9 Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sat, 24 Jan 2026 15:08:41 -0800 Subject: [PATCH 02/12] Add documentation for the dicom organization --- .../dicom_organization/dicom_organization.rst | 178 ++++++++++++++++++ docs/source/tutorials/index.rst | 3 +- 2 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 docs/source/tutorials/dicom_organization/dicom_organization.rst diff --git a/docs/source/tutorials/dicom_organization/dicom_organization.rst b/docs/source/tutorials/dicom_organization/dicom_organization.rst new file mode 100644 index 0000000..cf1025b --- /dev/null +++ b/docs/source/tutorials/dicom_organization/dicom_organization.rst @@ -0,0 +1,178 @@ +DICOM File Organization +======================= + +PyTheranostics provides utilities to organize DICOM files into a structured format suitable for dosimetry workflows. The ``dicom_organizer`` module can process folders of DICOM files and automatically organize them by patient, cycle, and timepoint. + +Overview +-------- + +The organizer handles: + +* **Multiple patients** in a single folder +* **Multiple imaging cycles** per patient (e.g., therapy cycles separated by weeks) +* **Multiple timepoints** per cycle (e.g., scans at different times during a cycle) +* **Mixed modalities** (CT, SPECT/NM, PET, RTSTRUCT) +* **Same-day acquisitions** at different times (using datetime-based splitting) + +Output structure +---------------- + +The organizer creates a hierarchical folder structure:: + + PatientID/ + Cycle1/ + tp1/ + CT/ + *.dcm + SPECT/ + *.dcm + CT/ + RTstruct/ + *.dcm + tp2/ + CT/ + SPECT/ + Cycle2/ + tp1/ + ... + +Basic Usage +----------- + +Organize all DICOM files in a folder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + from pytheranostics.dicomtools import organize_folder_by_cycles + + # Organize all patients in a folder + result = organize_folder_by_cycles( + storage_root="/path/to/dicom/files", + output_base="/path/to/organized/output", + cycle_gap_days=15, # New cycle if gap >= 15 days + timepoint_separation_days=1, # New timepoint if gap >= 1 day + move=True # Move files (False to copy) + ) + + # Result is a dict: {PatientID: {CycleX: {tpY: [Path, ...]}}} + print(result) + +Organize specific patients only +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + # Filter to specific patient IDs + result = organize_folder_by_cycles( + storage_root="/path/to/dicom/files", + output_base="/path/to/organized/output", + patient_id_filter=["PATIENT001", "PATIENT002"] + ) + +Handle same-day acquisitions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For protocols with multiple scans on the same day (e.g., morning CT and afternoon SPECT+CT), use fractional days to separate timepoints based on actual acquisition times: + +.. code-block:: python + + # Separate timepoints if acquisition times differ by >= 4.8 hours + result = organize_folder_by_cycles( + storage_root="/path/to/dicom/files", + output_base="/path/to/organized/output", + timepoint_separation_days=0.2 # 0.2 days ≈ 4.8 hours + ) + +This uses ``AcquisitionDateTime`` from DICOM tags (or file modification time as fallback) to split same-day scans into separate timepoints. + +Debugging and Inspection +------------------------- + +Use ``summarize_timepoints()`` to inspect detected series before organizing: + +.. code-block:: python + + from pytheranostics.dicomtools import summarize_timepoints + + # Get summary of all detected series + summary = summarize_timepoints( + storage_root="/path/to/dicom/files", + patient_id_filter=["PATIENT001"] + ) + + # Summary shows: study_date, modality, series_number, datetime, and gaps + for patient_id, entries in summary.items(): + print(f"\n{patient_id}:") + for entry in entries: + print(f" {entry['study_date']} - {entry['modality']} " + f"Series{entry['series_number']} at {entry['datetime']} " + f"(gap: {entry['delta_hours']:.1f}h)") + +Example output:: + + PATIENT001: + 20190409 - CT Series2 at 2019-04-09 11:34:57 (gap: None) + 20190409 - NM Series5 at 2019-04-09 16:06:50 (gap: 4.5h) + 20190409 - CT Series2 at 2019-04-09 16:26:59 (gap: 0.3h) + 20190410 - CT Series2 at 2019-04-10 10:15:23 (gap: 17.8h) + 20190413 - NM Series4 at 2019-04-13 14:22:10 (gap: 76.1h) + +This helps diagnose issues like: + +* Missing timepoints +* Incorrectly merged same-day scans +* Unexpected gaps between acquisitions + +Parameters Reference +-------------------- + +``organize_folder_by_cycles()`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:storage_root: Root directory to scan recursively for ``.dcm`` files +:output_base: Base directory for organized output (defaults to ``storage_root``) +:cycle_gap_days: Gap threshold (days) to start a new cycle (default: 15) +:timepoint_separation_days: Gap threshold (days) to start a new timepoint within a cycle (default: 1, can be fractional like 0.1 for 2.4 hours) +:move: If ``True``, move files; if ``False``, copy files (default: ``True``) +:patient_id_filter: List of PatientIDs to process; if ``None``, process all (default: ``None``) + +Returns a nested dictionary: ``{PatientID: {"CycleX": {"tpY": [Path, ...]}}}}`` + +``summarize_timepoints()`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:storage_root: Root directory to scan for DICOM files +:patient_id_filter: Optional list of PatientIDs to summarize + +Returns: ``{PatientID: [{study_date, modality, series_number, datetime, delta_hours}, ...]}`` + +Advanced: Integration with DICOM Receiver +------------------------------------------ + +The organizer can be triggered automatically after receiving DICOM files via C-STORE: + +.. code-block:: python + + from pytheranostics.dicomtools.dicom_receiver import DICOMReceiver + + receiver = DICOMReceiver( + ae_title="PYTHERANOSTICS", + port=11112, + storage_root="/path/to/storage", + auto_organize=True, # Enable auto-organize + auto_organize_output_base="/path/to/organized", + auto_organize_cycle_gap_days=15, + auto_organize_timepoint_separation_days=0.2, # 4.8 hours + auto_organize_debounce_seconds=60 # Wait 60s after last file + ) + + receiver.start() + +The receiver will automatically call ``organize_folder_by_cycles()`` 60 seconds after the last DICOM file is received for each patient. + +See Also +-------- + +* :doc:`../Data_Ingestion_Examples/Data_Ingestion_Examples` - General data ingestion workflows +* :doc:`../getting_started/project_setup_tutorial` - Initial project setup diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst index 7d02989..e55dd70 100644 --- a/docs/source/tutorials/index.rst +++ b/docs/source/tutorials/index.rst @@ -7,7 +7,8 @@ Hands-on walkthroughs that demonstrate common PyTheranostics workflows. :maxdepth: 1 getting_started/project_setup_tutorial + dicom_organization/dicom_organization + Data_Ingestion_Examples/Data_Ingestion_Examples segmentation/total_segmentator_tutorial SPECT2SUV/SPECT2SUV ROI_Mapping_Tutorial/ROI_Mapping_Tutorial - Data_Ingestion_Examples/Data_Ingestion_Examples From 0bd274b15b56f89b3da6f20282f2b481dbf7b85a Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sat, 24 Jan 2026 15:34:56 -0800 Subject: [PATCH 03/12] Fix init so plots can be used eaiser with tx --- pytheranostics/plots/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pytheranostics/plots/__init__.py b/pytheranostics/plots/__init__.py index 3731650..297c122 100644 --- a/pytheranostics/plots/__init__.py +++ b/pytheranostics/plots/__init__.py @@ -1 +1,9 @@ -"""PyTheranostics package.""" +"""Plotting utilities for PyTheranostics workflows.""" + +from .plots import ewin_montage, plot_MIP_with_mask_outlines, plot_tac_residuals + +__all__ = [ + "ewin_montage", + "plot_MIP_with_mask_outlines", + "plot_tac_residuals", +] From f04223e5f4d5e1e68a71e1c3bbd8d5ce26b68f3a Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sat, 24 Jan 2026 15:44:55 -0800 Subject: [PATCH 04/12] move handling of axes to the plot function itself --- pytheranostics/plots/plots.py | 66 ++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/pytheranostics/plots/plots.py b/pytheranostics/plots/plots.py index 4d9307b..a77725a 100644 --- a/pytheranostics/plots/plots.py +++ b/pytheranostics/plots/plots.py @@ -149,20 +149,61 @@ def plot_tac_residuals( return None -def plot_MIP_with_mask_outlines(ax, SPECT, masks=None, vmax=300000, label=None): +def plot_MIP_with_mask_outlines( + SPECT, + masks=None, + vmax=300000, + label=None, + save_path=None, + dpi=300, + ax=None, + figsize=None, +): """Plot Maximum Intensity Projection (MIP) of SPECT data with masks outlines. Parameters ---------- - ax : _type_ - _description_ - SPECT : _type_ - _description_ - masks : _type_, optional - _description_, by default None + SPECT : numpy.ndarray + 3D SPECT data array. + masks : dict, optional + Dictionary of masks with organ names as keys and 3D arrays as values. + By default None. vmax : int, optional - _description_, by default 300000 + Maximum value for display intensity. By default 300000. + label : bool, optional + Whether to add text labels at mask centers. By default None. + save_path : str or Path, optional + Path to save the figure. If provided, the parent directory will be created + if it doesn't exist. By default None (no saving). + dpi : int, optional + Resolution for saved figure in dots per inch. By default 300. + ax : matplotlib.axes.Axes, optional + Matplotlib axes object to plot on. If None, creates a new figure and axes. + By default None. + figsize : tuple, optional + Figure size (width, height) in inches when creating a new figure. + If None, automatically calculates based on data limits. By default None. + + Returns + ------- + matplotlib.axes.Axes + The axes object containing the plot. """ + # Create figure and axes if not provided + if ax is None: + # Auto-calculate figsize based on the data limits to minimize white space + if figsize is None: + # Data limits that will be applied later + xlim_range = 100 - 30 # 70 units + ylim_range = 234 - 0 # 234 units + # Calculate aspect ratio and create appropriately sized figure + aspect = ylim_range / xlim_range # ~3.34 + width = 4 # Base width in inches + height = width * aspect + figsize = (width, height) + + fig, ax = plt.subplots(figsize=figsize) + plt.sca(ax) spect_mip = SPECT.max(axis=0) plt.imshow(spect_mip.T, cmap="Greys", interpolation="Gaussian", vmax=vmax, vmin=0) @@ -222,3 +263,12 @@ def plot_MIP_with_mask_outlines(ax, SPECT, masks=None, vmax=300000, label=None): plt.axis("off") plt.xticks([]) plt.yticks([]) + + # Save figure if path is provided + if save_path is not None: + save_path = Path(save_path) + # Create parent directory if it doesn't exist + save_path.parent.mkdir(parents=True, exist_ok=True) + plt.savefig(save_path, dpi=dpi, bbox_inches="tight") + + return ax From c5e7082215c79844d47e9f9069e7d29e16ec70f6 Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sat, 24 Jan 2026 15:57:18 -0800 Subject: [PATCH 05/12] better adjust size of figure --- pytheranostics/plots/plots.py | 84 ++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 17 deletions(-) diff --git a/pytheranostics/plots/plots.py b/pytheranostics/plots/plots.py index a77725a..ebe8abe 100644 --- a/pytheranostics/plots/plots.py +++ b/pytheranostics/plots/plots.py @@ -158,6 +158,7 @@ def plot_MIP_with_mask_outlines( dpi=300, ax=None, figsize=None, + spacing=None, ): """Plot Maximum Intensity Projection (MIP) of SPECT data with masks outlines. @@ -182,36 +183,85 @@ def plot_MIP_with_mask_outlines( By default None. figsize : tuple, optional Figure size (width, height) in inches when creating a new figure. - If None, automatically calculates based on data limits. By default None. + If None, automatically calculates based on physical dimensions. By default None. + spacing : tuple, optional + Pixel spacing (x, y, z) in mm from DICOM. If provided, used to create + physically accurate aspect ratio. By default None. Returns ------- matplotlib.axes.Axes The axes object containing the plot. """ + plt.sca(ax) if ax is not None else None + spect_mip = SPECT.max(axis=0) + + # Calculate aspect ratio for proper physical scaling + if spacing is not None: + # spacing is (x, y, z) in mm + # For proper aspect ratio: aspect = dy/dx + data_aspect = spacing[1] / spacing[0] # y-spacing / x-spacing + else: + data_aspect = 1.0 + + # Automatically determine bounds based on data content + # Use a threshold to find where there's actual signal + threshold = vmax * 0.01 # 1% of max display value + signal_mask = spect_mip.T > threshold + + if signal_mask.any(): + # Find bounding box of signal + rows, cols = numpy.where(signal_mask) + ylim_min, ylim_max = rows.min(), rows.max() + xlim_min, xlim_max = cols.min(), cols.max() + + # Add small margin (5% on each side) + margin_x = int((xlim_max - xlim_min) * 0.05) + margin_y = int((ylim_max - ylim_min) * 0.05) + + xlim_min = max(0, xlim_min - margin_x) + xlim_max = min(spect_mip.shape[1] - 1, xlim_max + margin_x) + ylim_min = max(0, ylim_min - margin_y) + ylim_max = min(spect_mip.shape[0] - 1, ylim_max + margin_y) + else: + # Fallback to full image if no signal detected + xlim_min, xlim_max = 0, spect_mip.shape[1] - 1 + ylim_min, ylim_max = 0, spect_mip.shape[0] - 1 + # Create figure and axes if not provided if ax is None: - # Auto-calculate figsize based on the data limits to minimize white space if figsize is None: - # Data limits that will be applied later - xlim_range = 100 - 30 # 70 units - ylim_range = 234 - 0 # 234 units - # Calculate aspect ratio and create appropriately sized figure - aspect = ylim_range / xlim_range # ~3.34 - width = 4 # Base width in inches - height = width * aspect - figsize = (width, height) + if spacing is not None: + # Physical dimensions of ROI in mm + roi_width_mm = (xlim_max - xlim_min) * spacing[0] + roi_height_mm = (ylim_max - ylim_min) * spacing[1] + + # Create compact figure matching ROI aspect ratio + base_width = 3 # inches - smaller base + figsize = (base_width, base_width * roi_height_mm / roi_width_mm) + else: + # Fallback to pixel-based calculation + xlim_range = xlim_max - xlim_min + ylim_range = ylim_max - ylim_min + aspect_ratio = ylim_range / xlim_range + base_width = 3 + figsize = (base_width, base_width * aspect_ratio) fig, ax = plt.subplots(figsize=figsize) + plt.sca(ax) - plt.sca(ax) - spect_mip = SPECT.max(axis=0) - plt.imshow(spect_mip.T, cmap="Greys", interpolation="Gaussian", vmax=vmax, vmin=0) + plt.imshow( + spect_mip.T, + cmap="Greys", + interpolation="Gaussian", + vmax=vmax, + vmin=0, + aspect=data_aspect, + ) if masks is not None: for organ, mask in masks.items(): organ_lower = organ.lower() - print(organ_lower) if "peak" in organ_lower: continue else: @@ -258,8 +308,8 @@ def plot_MIP_with_mask_outlines( alpha=0.7, ) - plt.xlim(30, 100) - plt.ylim(0, 234) + plt.xlim(xlim_min, xlim_max) + plt.ylim(ylim_min, ylim_max) plt.axis("off") plt.xticks([]) plt.yticks([]) @@ -269,6 +319,6 @@ def plot_MIP_with_mask_outlines( save_path = Path(save_path) # Create parent directory if it doesn't exist save_path.parent.mkdir(parents=True, exist_ok=True) - plt.savefig(save_path, dpi=dpi, bbox_inches="tight") + plt.savefig(save_path, dpi=dpi, bbox_inches="tight", pad_inches=0) return ax From 0a6bcdcef903cb8a295cf6873dea35122cd0e59b Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sat, 24 Jan 2026 16:35:17 -0800 Subject: [PATCH 06/12] Add option for colorbar and units --- pytheranostics/plots/plots.py | 169 +++++++++++++++++++++++++++++++--- 1 file changed, 155 insertions(+), 14 deletions(-) diff --git a/pytheranostics/plots/plots.py b/pytheranostics/plots/plots.py index ebe8abe..2e28f18 100644 --- a/pytheranostics/plots/plots.py +++ b/pytheranostics/plots/plots.py @@ -1,5 +1,6 @@ """Plotting utilities for PyTheranostics workflows.""" +import json from pathlib import Path from typing import Optional @@ -7,6 +8,40 @@ import matplotlib.pyplot as plt import numpy +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files + + +def _find_config_file(filename="voi_mappings_config.json", max_depth=2): + """Search upward from current directory for config file. + + Searches current directory and one parent level only to avoid + finding unrelated configs from other projects. + + Parameters + ---------- + filename : str + Name of config file to find + max_depth : int + Maximum number of parent directories to search (default: 2) + + Returns + ------- + Path or None + Path to config file if found, None otherwise + """ + current = Path.cwd() + for _ in range(max_depth): + config_path = current / filename + if config_path.exists(): + return config_path + if current.parent == current: # Reached filesystem root + break + current = current.parent + return None + def ewin_montage(img: numpy.ndarray, ewin: dict) -> None: """Create a montage of energy window images. @@ -159,6 +194,10 @@ def plot_MIP_with_mask_outlines( ax=None, figsize=None, spacing=None, + mask_colors=None, + config_path=None, + colorbar=False, + units=None, ): """Plot Maximum Intensity Projection (MIP) of SPECT data with masks outlines. @@ -187,6 +226,25 @@ def plot_MIP_with_mask_outlines( spacing : tuple, optional Pixel spacing (x, y, z) in mm from DICOM. If provided, used to create physically accurate aspect ratio. By default None. + mask_colors : dict, optional + Dictionary mapping organ names (or keywords) to colors. If a mask name matches + a key (case-insensitive substring match), that color is used. If None, uses + default color mapping. Unmapped organs use matplotlib's color cycle. + Example: {"kidney": "lime", "liver": "yellow", "lesion": "red"} + By default None. + config_path : str or Path, optional + Path to voi_mappings_config.json file. If None, searches upward from current + directory (like git) to find the config in your project root. Falls back to + package template if not found. + Example: Path("./my_project/voi_mappings_config.json") + By default None. + colorbar : bool, optional + Whether to add a colorbar showing the intensity scale. By default False. + units : str, optional + Units from DICOM header (e.g., 'BQML', 'CNTS'). Can be extracted from + DICOM tag (0054,1001). If provided, automatically determines scaling and + colorbar label. Common values: 'BQML' (Bq/ml), 'CNTS' (counts). + By default None (assumes Bq and scales to MBq). Returns ------- @@ -250,31 +308,114 @@ def plot_MIP_with_mask_outlines( fig, ax = plt.subplots(figsize=figsize) plt.sca(ax) - plt.imshow( - spect_mip.T, + # Determine scaling and label based on units + if units is not None: + units_upper = units.upper() + if units_upper in ["BQML", "BQ/ML"]: + scale_factor = 1e6 + colorbar_label = "MBq/ml" + elif units_upper in ["BQ", "BECQUEREL"]: + scale_factor = 1e6 + colorbar_label = "MBq" + elif units_upper in ["CNTS", "COUNTS"]: + scale_factor = 1.0 + colorbar_label = "Counts" + else: + # Unknown units, no scaling + scale_factor = 1.0 + colorbar_label = units + else: + # Default: assume Bq and convert to MBq + scale_factor = 1e6 + colorbar_label = "MBq" + + # Apply scaling + spect_mip_scaled = spect_mip / scale_factor + vmax_scaled = vmax / scale_factor + + im = plt.imshow( + spect_mip_scaled.T, cmap="Greys", interpolation="Gaussian", - vmax=vmax, + vmax=vmax_scaled, vmin=0, aspect=data_aspect, ) + # Add colorbar if requested + if colorbar: + plt.colorbar(im, ax=ax, label=colorbar_label) + + # Hardcoded fallback defaults (only used if config file is missing) + fallback_defaults = { + "kidney": "lime", + "parotid": "red", + "submandibular": "red", + "lesion": "magenta", + "liver": "yellow", + "spleen": "cyan", + "tumor": "magenta", + } + + # Determine color mapping: parameter > config file > fallback defaults + if mask_colors is not None: + # Explicit override via parameter + color_map = mask_colors + else: + # Try to load from configuration file + try: + if config_path is not None: + # User-provided config path + config_file = Path(config_path) + else: + # Search for config in project directory (upward search) + config_file = _find_config_file("voi_mappings_config.json") + + if config_file is None: + # Not found in project, use package template + config_file = files( + "pytheranostics.data.configuration_templates" + ).joinpath("voi_mappings_config.json") + + with config_file.open("r") as f: + config = json.load(f) + if "plot_colors" in config: + # Use plot_colors from config (even if empty - will trigger color cycle) + color_map = { + k: v + for k, v in config["plot_colors"].items() + if not k.startswith("_") + } + else: + # plot_colors key missing, use fallback + color_map = fallback_defaults + except (FileNotFoundError, json.JSONDecodeError, KeyError): + # Config file missing or invalid, use fallback defaults + color_map = fallback_defaults + + # Color cycle for unmapped organs + color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] + color_index = 0 + if masks is not None: for organ, mask in masks.items(): organ_lower = organ.lower() + + # Skip organs with 'peak' in the name (background/noise) if "peak" in organ_lower: continue - else: - if "kidney" in organ_lower: - color = "lime" - elif "parotid" in organ_lower: - color = "red" - elif "submandibular" in organ_lower: - color = "red" - elif "lesion" in organ_lower: - color = "m" - else: - continue + + # Find matching color from color_map (keyword match) + color = None + for keyword, keyword_color in color_map.items(): + if keyword.lower() in organ_lower: + color = keyword_color + break + + # If no match found, use color cycle + if color is None: + color = color_cycle[color_index % len(color_cycle)] + color_index += 1 mip_mask = mask.max(axis=0) if mip_mask.shape != spect_mip.shape: From fecf18a0623362607835d24ebe357c000a052b81 Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sat, 24 Jan 2026 16:35:45 -0800 Subject: [PATCH 07/12] add color options to the config file and explain in the tutorial --- .../project_setup_tutorial.ipynb | 84 ++++++++++++++++++- .../voi_mappings_config.json | 6 +- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/docs/source/tutorials/getting_started/project_setup_tutorial.ipynb b/docs/source/tutorials/getting_started/project_setup_tutorial.ipynb index bde2dcd..3f43f2e 100644 --- a/docs/source/tutorials/getting_started/project_setup_tutorial.ipynb +++ b/docs/source/tutorials/getting_started/project_setup_tutorial.ipynb @@ -263,6 +263,74 @@ "Let's create a custom configuration for a kidney dosimetry study:" ] }, + { + "cell_type": "markdown", + "id": "dd1729d3", + "metadata": {}, + "source": [ + "### Plot Colors Configuration\n", + "\n", + "The `voi_mappings_config.json` also includes a `plot_colors` section to customize how organs appear in MIP plots:\n", + "\n", + "**Default behavior**: Leave `plot_colors` empty `{}` to use matplotlib's automatic color cycle\n", + "\n", + "**Custom colors**: Add organ keywords mapped to colors (case-insensitive substring matching)\n", + "\n", + "Example:\n", + "```json\n", + "\"plot_colors\": {\n", + " \"kidney\": \"lime\",\n", + " \"liver\": \"#FFD700\",\n", + " \"tumor\": \"red\",\n", + " \"lesion\": \"magenta\"\n", + "}\n", + "```\n", + "\n", + "Colors can be:\n", + "- **Named colors**: `\"red\"`, `\"lime\"`, `\"cyan\"`, `\"magenta\"`, etc.\n", + "- **Hex codes**: `\"#FF5733\"`, `\"#00FF00\"`, etc.\n", + "- **Empty**: `{}` uses matplotlib's default color cycle for all organs\n", + "\n", + "The keyword matching is flexible - `\"kidney\"` will match `\"kidney_left\"`, `\"Kidney_Right\"`, `\"kidney_cyst\"`, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cbdede6", + "metadata": {}, + "outputs": [], + "source": [ + "# Example: Customize plot colors for your study\n", + "import json\n", + "\n", + "# Load the mappings config\n", + "with open(mappings_path, 'r') as f:\n", + " config = json.load(f)\n", + "\n", + "# Option 1: Use automatic colors (recommended for starting)\n", + "config['plot_colors'] = {}\n", + "\n", + "# Option 2: Set custom colors for specific organs\n", + "# Uncomment and customize as needed:\n", + "# config['plot_colors'] = {\n", + "# \"kidney\": \"lime\",\n", + "# \"liver\": \"#FFD700\", # gold\n", + "# \"spleen\": \"cyan\",\n", + "# \"tumor\": \"red\",\n", + "# \"lesion\": \"magenta\"\n", + "# }\n", + "\n", + "# Save the updated config\n", + "with open(mappings_path, 'w') as f:\n", + " json.dump(config, f, indent=2)\n", + "\n", + "print(\"Plot colors configuration:\")\n", + "print(f\" {config['plot_colors']}\")\n", + "print(\"\\nThese colors will be used by plot_MIP_with_mask_outlines()\")\n", + "print(\"Tip: Edit voi_mappings_config.json directly for permanent changes\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -442,8 +510,12 @@ " - Combine structures (e.g., all ribs → \"Skeleton\")\n", "\n", "2. **voi_mappings_config.json**\n", - " - Map between naming conventions\n", - " - Unify CT (morphology) and SPECT (activity) names\n", + " - Map between naming conventions (CT ↔ SPECT)\n", + " - Unify organ names across data sources\n", + " - **NEW**: Configure plot colors for MIP visualizations\n", + " - Leave `plot_colors` empty `{}` for automatic colors\n", + " - Add custom colors: `{\"kidney\": \"lime\", \"tumor\": \"red\"}`\n", + " - Supports matplotlib color names and hex codes\n", "\n", "### ✓ Customization Options\n", "- Selective templates (`templates=['config.json']`)\n", @@ -453,12 +525,14 @@ "### ✓ Best Practices\n", "- Start with `init_project()` for new studies\n", "- Customize config files for your specific organs of interest\n", + "- Configure plot colors in `voi_mappings_config.json` for consistent visualizations\n", "- Use standardized directory structure for reproducibility\n", "- Keep configs in version control (git) alongside analysis code\n", "\n", "## Next Steps\n", "\n", "- **Tutorial**: Check out the [TotalSegmentator Tutorial](./segmentation/total_segmentator_tutorial.ipynb) to use these configs\n", + "- **Plotting**: See `plot_MIP_with_mask_outlines()` in API docs for visualization examples\n", "- **Documentation**: See API Reference for detailed function signatures\n", "- **Examples**: Browse Data Ingestion Examples for real workflows\n", "\n", @@ -510,6 +584,12 @@ "\n", "**Remember**: Always edit the configuration files to match your specific study needs before running analysis workflows!" ] + }, + { + "cell_type": "markdown", + "id": "eeff51a6", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/pytheranostics/data/configuration_templates/voi_mappings_config.json b/pytheranostics/data/configuration_templates/voi_mappings_config.json index 2e40588..870cf53 100644 --- a/pytheranostics/data/configuration_templates/voi_mappings_config.json +++ b/pytheranostics/data/configuration_templates/voi_mappings_config.json @@ -1,5 +1,5 @@ { - "_instructions": "Map VOI names between different naming conventions.", + "_instructions": "Map VOI names between different naming conventions and configure visualization colors. Edit this file to customize organ name mappings and plot appearance.", "ct_mappings": { "organ_name_in_ct": "standardized_name" @@ -7,5 +7,9 @@ "spect_mappings": { "organ_name_in_spect": "standardized_name" + }, + + "plot_colors": { + "_description": "Configure colors for organ contours in MIP plots. Add entries with organ names or keywords as keys (case-insensitive substring matching) and matplotlib color names or hex codes as values. Leave empty {} to use matplotlib's default color cycle. Example: 'kidney': 'lime', 'liver': '#FFD700', 'tumor': 'red'" } } From adfe488763decfb9416d2d87a92d3ca1e190ebcb Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sun, 25 Jan 2026 08:15:39 -0800 Subject: [PATCH 08/12] remove hardcoded organ names for mapping and move to the configuration file for easy maintenability --- .../voi_mappings_config.json | 46 ++++++++- pytheranostics/imaging_ds/cycle_loader.py | 68 ++++++++++--- .../imaging_ds/longitudinal_study.py | 95 +++++++++++++++---- 3 files changed, 178 insertions(+), 31 deletions(-) diff --git a/pytheranostics/data/configuration_templates/voi_mappings_config.json b/pytheranostics/data/configuration_templates/voi_mappings_config.json index 2e40588..5af3592 100644 --- a/pytheranostics/data/configuration_templates/voi_mappings_config.json +++ b/pytheranostics/data/configuration_templates/voi_mappings_config.json @@ -1,5 +1,38 @@ { - "_instructions": "Map VOI names between different naming conventions.", + "_instructions": "Configure VOI/organ naming conventions, mappings, and visualization settings.", + + "valid_organ_names": { + "_description": "List of valid organ/VOI names for validation. These default names are compatible with OLINDA dosimetry calculations. Users can add custom organs as needed for their workflows.", + "names": [ + "Kidney_Left", + "Kidney_Right", + "Liver", + "Spleen", + "Bladder", + "SubmandibularGland_Left", + "SubmandibularGland_Right", + "ParotidGland_Left", + "ParotidGland_Right", + "BoneMarrow", + "Skeleton", + "WholeBody", + "RemainderOfBody", + "TotalTumorBurden" + ] + }, + + "canonical_mappings": { + "_description": "Best-effort ROI name normalization for auto_map mode. Maps abbreviated/common names to canonical organ names. Used when auto_map=True is set in create_studies_with_masks.", + "mappings": { + "Kidney_L": "Kidney_Left", + "Kidney_R": "Kidney_Right", + "Parotid_L": "ParotidGland_Left", + "Parotid_R": "ParotidGland_Right", + "Submandibular_L": "SubmandibularGland_Left", + "Submandibular_R": "SubmandibularGland_Right", + "WBCT": "WholeBody" + } + }, "ct_mappings": { "organ_name_in_ct": "standardized_name" @@ -7,5 +40,16 @@ "spect_mappings": { "organ_name_in_spect": "standardized_name" + }, + + "plot_colors": { + "_description": "Color mapping for organ contours in MIP plots. Keys can be organ names or keywords (case-insensitive substring match). Any matplotlib color name or hex code is valid.", + "kidney": "lime", + "parotid": "red", + "submandibular": "red", + "lesion": "magenta", + "liver": "yellow", + "spleen": "cyan", + "tumor": "magenta" } } diff --git a/pytheranostics/imaging_ds/cycle_loader.py b/pytheranostics/imaging_ds/cycle_loader.py index 90d8a36..0dd7013 100644 --- a/pytheranostics/imaging_ds/cycle_loader.py +++ b/pytheranostics/imaging_ds/cycle_loader.py @@ -7,6 +7,7 @@ from __future__ import annotations +import json import re from pathlib import Path from typing import Dict, List, Optional, Tuple, Union @@ -224,26 +225,69 @@ def prepare_cycle_inputs( # --- New high-level orchestration API --------------------------------------------------------- +def _get_canonical_mappings() -> Dict[str, str]: + """Load canonical name mappings from config file. + + Searches for voi_mappings_config.json in order: + 1. Current directory (project-specific) + 2. One level up (project root) + 3. Package template (defaults) + + Returns + ------- + Dict[str, str] + Mapping of abbreviated/common names to canonical names. + Returns empty dict if no config found. + """ + search_paths = [ + Path.cwd() / "voi_mappings_config.json", + Path.cwd().parent / "voi_mappings_config.json", + ] + + for config_path in search_paths: + if config_path.exists(): + try: + with open(config_path, "r") as f: + config = json.load(f) + if "canonical_mappings" in config: + canon_config = config["canonical_mappings"] + if isinstance(canon_config, dict): + return canon_config.get("mappings", {}) + except Exception: + continue + + # Try package template + try: + import importlib.resources as pkg_resources + + template_path = pkg_resources.files("pytheranostics.data").joinpath( + "configuration_templates/voi_mappings_config.json" + ) + with open(template_path, "r") as f: + config = json.load(f) + if "canonical_mappings" in config: + canon_config = config["canonical_mappings"] + if isinstance(canon_config, dict): + return canon_config.get("mappings", {}) + except Exception: + pass + + return {} + + def _canonical_mask_name(name: str) -> str: - """Map RTSTRUCT ROI names to canonical pyTheranostics mask names. + """Apply canonical name mappings from config. Best-effort normalization used for auto mapping. Keeps unknown names as-is. + Mappings are loaded from voi_mappings_config.json. """ # Strip modality suffixes often used in notebooks (e.g., _m for CT-based, _a for activity) base = name if base.endswith("_m") or base.endswith("_a"): base = base[:-2] - # Common synonyms/abbreviations - replacements = { - "Kidney_L": "Kidney_Left", - "Kidney_R": "Kidney_Right", - "Parotid_L": "ParotidGland_Left", - "Parotid_R": "ParotidGland_Right", - "Submandibular_L": "SubmandibularGland_Left", - "Submandibular_R": "SubmandibularGland_Right", - "WBCT": "WholeBody", - } + # Load canonical mappings from config + replacements = _get_canonical_mappings() return replacements.get(base, base) @@ -389,7 +433,7 @@ def create_studies_with_masks( apply_spect_mapping = (final_spect_mapping is not None) or auto_map def _is_valid_target(name: str) -> bool: - if name in LongitudinalStudy._VALID_ORGAN_NAMES: + if name in LongitudinalStudy._get_valid_organ_names(): return True return re.match(r"^Lesion_([1-9]\d*)$", name) is not None diff --git a/pytheranostics/imaging_ds/longitudinal_study.py b/pytheranostics/imaging_ds/longitudinal_study.py index 44edebc..a64499b 100644 --- a/pytheranostics/imaging_ds/longitudinal_study.py +++ b/pytheranostics/imaging_ds/longitudinal_study.py @@ -28,22 +28,80 @@ class LongitudinalStudy: of interest and meta-data. """ - _VALID_ORGAN_NAMES = [ - "Kidney_Left", - "Kidney_Right", - "Liver", - "Spleen", - "Bladder", - "SubmandibularGland_Left", - "SubmandibularGland_Right", - "ParotidGland_Left", - "ParotidGland_Right", - "BoneMarrow", - "Skeleton", - "WholeBody", - "RemainderOfBody", - "TotalTumorBurden", - ] + # Cached valid organ names loaded from config + _VALID_ORGAN_NAMES = None + + @classmethod + def _get_valid_organ_names(cls) -> List[str]: + """Get valid organ names from config file. + + Searches for voi_mappings_config.json in order: + 1. Current directory (project-specific config) + 2. One level up (project root) + 3. Package template (OLINDA-compatible defaults) + + Returns + ------- + List[str] + List of valid organ names. + + Raises + ------ + FileNotFoundError + If no config file can be found. + ValueError + If config file doesn't contain valid_organ_names. + """ + if cls._VALID_ORGAN_NAMES is not None: + return cls._VALID_ORGAN_NAMES + + # Try project-specific configs first + search_paths = [ + Path.cwd() / "voi_mappings_config.json", + Path.cwd().parent / "voi_mappings_config.json", + ] + + for config_path in search_paths: + if config_path.exists(): + try: + with open(config_path, "r") as f: + config = json.load(f) + if "valid_organ_names" in config: + # Handle both old format (list) and new format (dict with names key) + organ_names = config["valid_organ_names"] + if isinstance(organ_names, dict): + cls._VALID_ORGAN_NAMES = organ_names.get("names", []) + else: + cls._VALID_ORGAN_NAMES = organ_names + return cls._VALID_ORGAN_NAMES + except Exception: + continue + + # Load from package template (OLINDA defaults) + try: + import importlib.resources as pkg_resources + + template_path = pkg_resources.files("pytheranostics.data").joinpath( + "configuration_templates/voi_mappings_config.json" + ) + with open(template_path, "r") as f: + config = json.load(f) + if "valid_organ_names" in config: + organ_names = config["valid_organ_names"] + if isinstance(organ_names, dict): + cls._VALID_ORGAN_NAMES = organ_names.get("names", []) + else: + cls._VALID_ORGAN_NAMES = organ_names + return cls._VALID_ORGAN_NAMES + except Exception as e: + raise FileNotFoundError( + "Could not load valid_organ_names from any config file. " + "Please ensure voi_mappings_config.json exists in your project or package." + ) from e + + raise ValueError( + "Config file found but does not contain 'valid_organ_names' section." + ) def __init__( self, @@ -196,10 +254,11 @@ def _is_valid_mask_name(mask_name: str) -> bool: """Check if a mask name is valid. Valid names are either: - - Standard organ names from _VALID_ORGAN_NAMES + - Standard organ names from config or default list - Lesion names in format 'Lesion_N' where N is a positive integer """ - if mask_name in LongitudinalStudy._VALID_ORGAN_NAMES: + valid_names = LongitudinalStudy._get_valid_organ_names() + if mask_name in valid_names: return True lesion_pattern = r"^Lesion_([1-9]\d*)$" return bool(re.match(lesion_pattern, mask_name)) From 5cf710545c04a9b49192c75e93a18e011d803324 Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sun, 25 Jan 2026 08:16:09 -0800 Subject: [PATCH 09/12] include new options in config iin tutorial --- .../ROI_Mapping_Tutorial.ipynb | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) diff --git a/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb b/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb index 6973dd7..68af87f 100644 --- a/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb +++ b/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb @@ -309,6 +309,219 @@ "You can add lesion or other one-off mappings using `manual_overrides`:" ] }, + { + "cell_type": "markdown", + "id": "42944b4d", + "metadata": {}, + "source": [ + "## Configuration: Valid Organ Names\n", + "\n", + "### Overview\n", + "\n", + "The `voi_mappings_config.json` file now includes a `valid_organ_names` section that defines which organ/VOI names are accepted as valid targets for mappings. This prevents invalid mappings from silently failing.\n", + "\n", + "### Why This Matters\n", + "\n", + "When you create a mapping like:\n", + "```json\n", + "\"spect_mappings\": {\n", + " \"Kidney_L_a\": \"Kidney_L\"\n", + "}\n", + "```\n", + "\n", + "The system validates that `\"Kidney_L\"` is in the list of `valid_organ_names`. If it's not, the mapping is rejected and the ROI stays unmapped (identity mapping). This helps catch typos and naming inconsistencies early.\n", + "\n", + "### Default Valid Organ Names (OLINDA-Compatible)\n", + "\n", + "The package template includes these OLINDA-compatible organ names by default. You can customize this list for your project:\n", + "\n", + "```json\n", + "{\n", + " \"valid_organ_names\": {\n", + " \"_description\": \"List of valid organ/VOI names for validation. These default names are compatible with OLINDA dosimetry calculations. Users can add custom organs as needed for their workflows.\",\n", + " \"names\": [\n", + " \"Kidney_Left\",\n", + " \"Kidney_Right\",\n", + " \"Liver\",\n", + " \"Spleen\",\n", + " \"Bladder\",\n", + " \"SubmandibularGland_Left\",\n", + " \"SubmandibularGland_Right\",\n", + " \"ParotidGland_Left\",\n", + " \"ParotidGland_Right\",\n", + " \"BoneMarrow\",\n", + " \"Skeleton\",\n", + " \"WholeBody\",\n", + " \"RemainderOfBody\",\n", + " \"TotalTumorBurden\"\n", + " ]\n", + " }\n", + "}\n", + "```\n", + "\n", + "### Customizing Valid Organ Names\n", + "\n", + "If your project uses different naming conventions or custom organs, add them to your project's `voi_mappings_config.json`:\n", + "\n", + "```json\n", + "{\n", + " \"valid_organ_names\": {\n", + " \"_description\": \"Custom organ names for our project\",\n", + " \"names\": [\n", + " \"Kidney_Left\",\n", + " \"Kidney_Right\",\n", + " \"kidney_cyst_left\",\n", + " \"kidney_cyst_right\",\n", + " \"MyCustomOrgan\",\n", + " \"Lesion_1\",\n", + " \"Lesion_2\"\n", + " ]\n", + " },\n", + " \"ct_mappings\": {...},\n", + " \"spect_mappings\": {...}\n", + "}\n", + "```\n", + "\n", + "### Loading Valid Organ Names in Your Code\n", + "\n", + "```python\n", + "from pytheranostics.imaging_ds import LongitudinalStudy\n", + "\n", + "# Get the current valid organ names (loads from config or uses defaults)\n", + "valid_organs = LongitudinalStudy._get_valid_organ_names()\n", + "print(\"Valid organs:\", valid_organs)\n", + "```\n", + "\n", + "The system searches for `voi_mappings_config.json` in this order:\n", + "1. **Current directory** (your notebook location)\n", + "2. **One level up** (project root)\n", + "3. **Package template** (OLINDA defaults)\n", + "\n", + "### Workflow Integration\n", + "\n", + "When you call `create_studies_with_masks()` with a mapping config:\n", + "\n", + "```python\n", + "longCT, longSPECT, inj, used = tx.imaging_ds.create_studies_with_masks(\n", + " storage_root=\"./data\",\n", + " patient_id=\"PATIENT_ID\",\n", + " cycle_no=1,\n", + " mapping_config=\"./voi_mappings_config.json\" # ← Loads valid_organ_names too\n", + ")\n", + "```\n", + "\n", + "Both the mappings AND the valid organ names are loaded from your config file. This ensures consistency across your project." + ] + }, + { + "cell_type": "markdown", + "id": "825a6f45", + "metadata": {}, + "source": [ + "## Configuration: Canonical Name Mappings\n", + "\n", + "### Overview\n", + "\n", + "The `canonical_mappings` section in `voi_mappings_config.json` defines automatic abbreviation normalization for the `auto_map=True` mode in `create_studies_with_masks()`. This is useful when your RTSTRUCT files use abbreviated names like `Kidney_L` but you want them automatically converted to the canonical form `Kidney_Left`.\n", + "\n", + "### How It Works\n", + "\n", + "When you set `auto_map=True`:\n", + "\n", + "```python\n", + "longCT, longSPECT, inj, used = tx.imaging_ds.create_studies_with_masks(\n", + " storage_root=\"./data\",\n", + " patient_id=\"PATIENT_ID\",\n", + " cycle_no=1,\n", + " auto_map=True # ← Enables automatic canonical name mapping\n", + ")\n", + "```\n", + "\n", + "The system:\n", + "1. Loads canonical_mappings from your config file\n", + "2. For each ROI name in your RTSTRUCT:\n", + " - Strips modality suffixes (`_m`, `_a`)\n", + " - Looks up the base name in canonical_mappings\n", + " - Maps to the canonical name if found\n", + " - Keeps the name as-is if no mapping exists\n", + "\n", + "### Default Canonical Mappings\n", + "\n", + "The package template includes these common abbreviations:\n", + "\n", + "```json\n", + "{\n", + " \"canonical_mappings\": {\n", + " \"_description\": \"Best-effort ROI name normalization for auto_map mode. Maps abbreviated/common names to canonical organ names. Used when auto_map=True is set in create_studies_with_masks.\",\n", + " \"mappings\": {\n", + " \"Kidney_L\": \"Kidney_Left\",\n", + " \"Kidney_R\": \"Kidney_Right\",\n", + " \"Parotid_L\": \"ParotidGland_Left\",\n", + " \"Parotid_R\": \"ParotidGland_Right\",\n", + " \"Submandibular_L\": \"SubmandibularGland_Left\",\n", + " \"Submandibular_R\": \"SubmandibularGland_Right\",\n", + " \"WBCT\": \"WholeBody\"\n", + " }\n", + " }\n", + "}\n", + "```\n", + "\n", + "### Customizing Canonical Mappings\n", + "\n", + "Add custom mappings to your project's `voi_mappings_config.json` for your institution's naming conventions:\n", + "\n", + "```json\n", + "{\n", + " \"canonical_mappings\": {\n", + " \"_description\": \"Custom mappings for our institution\",\n", + " \"mappings\": {\n", + " \"Kidney_L\": \"Kidney_Left\",\n", + " \"Kidney_R\": \"Kidney_Right\",\n", + " \"KL\": \"Kidney_Left\",\n", + " \"KR\": \"Kidney_Right\",\n", + " \"Parotid_L\": \"ParotidGland_Left\",\n", + " \"Parotid_R\": \"ParotidGland_Right\",\n", + " \"Liver_N\": \"Liver\",\n", + " \"Liver_C\": \"Liver\",\n", + " \"WBCT\": \"WholeBody\"\n", + " }\n", + " }\n", + "}\n", + "```\n", + "\n", + "### When to Use: auto_map vs Explicit Mapping\n", + "\n", + "| Scenario | Approach | Example |\n", + "|----------|----------|---------|\n", + "| **Known abbreviations, consistent naming** | `auto_map=True` | RTSTRUCT names are always `Kidney_L`, `Kidney_R`, etc. → auto converts to canonical |\n", + "| **Modality-specific names** | Explicit `ct_mappings`/`spect_mappings` | CT has `Kidney_L_m`, SPECT has `Kidney_L_a` → different mappings per modality |\n", + "| **Complex/variable naming** | Explicit mappings in config | ROIs named inconsistently across projects → use full `ct_mappings`/`spect_mappings` |\n", + "| **Mixed approach** | `auto_map=True` + explicit overrides | Use auto_map for most, but override specific conflicting names with explicit mappings |\n", + "\n", + "### Example Workflow: Auto-Map with Suffix Stripping\n", + "\n", + "```python\n", + "# RTSTRUCT contains: Kidney_L_a, Kidney_R_a, Kidney_L_m, Kidney_R_m\n", + "\n", + "longCT, longSPECT, inj, used = tx.imaging_ds.create_studies_with_masks(\n", + " storage_root=\"./data\",\n", + " patient_id=\"PATIENT_ID\",\n", + " cycle_no=1,\n", + " auto_map=True # Enables canonical_mappings\n", + ")\n", + "\n", + "# Results (from used_mappings):\n", + "# CT: Kidney_L_m → Kidney_Left, Kidney_R_m → Kidney_Right\n", + "# SPECT: Kidney_L_a → Kidney_Left, Kidney_R_a → Kidney_Right\n", + "```\n", + "\n", + "The system:\n", + "1. Strips `_m` and `_a` suffixes → `Kidney_L`, `Kidney_R`\n", + "2. Looks up in canonical_mappings → finds `Kidney_Left`, `Kidney_Right`\n", + "3. Validates against valid_organ_names → passes\n", + "4. Applies the mapping ✓" + ] + }, { "cell_type": "code", "execution_count": null, From eee2b410301de903e9ed6f1c27a67f9a1b7050c1 Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sun, 25 Jan 2026 09:02:31 -0800 Subject: [PATCH 10/12] change the way the logging is being done with different levels to avoid much verbose when not needed --- docs/source/index.rst | 1 + docs/source/intro/logging.rst | 262 ++++++++++++++++++ .../imaging_ds/longitudinal_study.py | 43 +-- pytheranostics/imaging_ds/mapping_summary.py | 13 +- pytheranostics/imaging_tools/tools.py | 29 +- 5 files changed, 315 insertions(+), 33 deletions(-) create mode 100644 docs/source/intro/logging.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 7d16d8d..b0199a5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -14,6 +14,7 @@ PyTheranostics is a comprehensive Python library for nuclear medicine image proc intro/overview intro/installation + intro/logging tutorials/getting_started/basic_usage .. toctree:: diff --git a/docs/source/intro/logging.rst b/docs/source/intro/logging.rst new file mode 100644 index 0000000..d943d37 --- /dev/null +++ b/docs/source/intro/logging.rst @@ -0,0 +1,262 @@ +.. _logging_guide: + +Logging and Verbosity Control +============================== + +PyTheranostics uses Python's standard ``logging`` module to provide configurable output verbosity. This allows you to control how much information is displayed during processing, making it suitable for both interactive notebook use and automated pipelines. + +Overview +-------- + +The logging system provides four severity levels: + +.. list-table:: + :widths: 15 30 55 + :header-rows: 1 + + * - Level + - When to Use + - What You'll See + * - ``DEBUG`` + - Troubleshooting and development + - Every step: individual timepoint loading, mask resampling operations, registration iterations + * - ``INFO`` + - Normal production use + - Important milestones: "Loaded 5 CT timepoints", "Applied mappings", file save operations + * - ``WARNING`` + - Default (quiet mode) + - Only important notices: mask overwrites, data quality issues, deprecated features + * - ``ERROR`` + - Critical issues only + - Fatal errors that prevent processing + +Quick Start +----------- + +Set Logging Level in Notebooks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add this at the top of your notebook to control verbosity: + +.. code-block:: python + + import logging + + # Quiet mode (recommended for notebooks) - only warnings and errors + logging.basicConfig(level=logging.WARNING, format='%(levelname)s: %(message)s') + + # Normal mode - see important milestones + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + # Verbose mode - see every step (for debugging) + logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s') + +.. note:: + Call ``basicConfig()`` **before** importing PyTheranostics modules for the settings to take effect. + +Example: Clean Notebook Output +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + import logging + logging.basicConfig(level=logging.WARNING) # Quiet mode + + from pytheranostics.imaging_ds.cycle_loader import create_studies_with_masks + + # This will now run quietly, only showing warnings/errors + longCT, longSPECT, inj, mappings = create_studies_with_masks( + storage_root="./data", + patient_id="PATIENT001", + cycle_no=1, + calibration_factor=106.0, + auto_map=True + ) + +Example: Debugging with Verbose Output +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + import logging + logging.basicConfig(level=logging.DEBUG, format='%(name)s - %(levelname)s: %(message)s') + + from pytheranostics.imaging_ds.cycle_loader import create_studies_with_masks + + # This will show detailed progress: + # - Loading timepoint 0 from CT_TP1... + # - ✓ Timepoint 0 loaded + # - Resampling Masks: Liver ... + # - etc. + longCT, longSPECT, inj, mappings = create_studies_with_masks(...) + +Affected Modules +---------------- + +The following modules use the logging system: + +Imaging & Data Loading +~~~~~~~~~~~~~~~~~~~~~~~ + +* ``pytheranostics.imaging_ds.longitudinal_study`` + + - Logs timepoint loading progress (parallel and sequential) + - Reports mask addition and overwrites + - Shows registration iterations and Jaccard indices + - Confirms file save operations + +* ``pytheranostics.imaging_ds.mapping_summary`` + + - Reports mapping statistics per timepoint + - Shows applied mappings when verbose mode enabled + - Confirms JSON export of mapping details + +* ``pytheranostics.imaging_tools.tools`` + + - Reports DICOM metadata extraction (injected activity, patient weight) + - Warns about orientation matrix corrections + - Shows mask resampling and registration progress + +Common Use Cases +---------------- + +Case 1: Interactive Notebook (Recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For clean notebook output, use WARNING level: + +.. code-block:: python + + import logging + logging.basicConfig(level=logging.WARNING, format='%(message)s') + +This gives you a clean interface while still alerting you to important issues like: + +* Mask overwrites +* Missing DICOM metadata +* Data quality warnings + +Case 2: Production Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For automated workflows, use INFO level with more context: + +.. code-block:: python + + import logging + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s: %(message)s', + filename='processing.log' + ) + +This logs important milestones to a file for audit trails. + +Case 3: Debugging Issues +~~~~~~~~~~~~~~~~~~~~~~~~~ + +When troubleshooting, enable DEBUG with full context: + +.. code-block:: python + + import logging + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s:%(lineno)d - %(levelname)s: %(message)s' + ) + +This shows every operation with timestamps and source locations. + +Advanced Configuration +---------------------- + +Module-Specific Levels +~~~~~~~~~~~~~~~~~~~~~~~ + +You can set different log levels for different modules: + +.. code-block:: python + + import logging + + # Global default: WARNING + logging.basicConfig(level=logging.WARNING) + + # But show INFO for longitudinal study operations + logging.getLogger('pytheranostics.imaging_ds.longitudinal_study').setLevel(logging.INFO) + + # And DEBUG for mapping summary + logging.getLogger('pytheranostics.imaging_ds.mapping_summary').setLevel(logging.DEBUG) + +Custom Formatters +~~~~~~~~~~~~~~~~~ + +Customize the output format: + +.. code-block:: python + + import logging + + # Simple format for notebooks + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + # Detailed format for log files + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s | %(name)-40s | %(levelname)-8s | %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + +Troubleshooting +--------------- + +"Logging settings not taking effect" +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Problem**: You set the logging level but still see too much or too little output. + +**Solution**: Make sure to call ``logging.basicConfig()`` **before** importing any PyTheranostics modules: + +.. code-block:: python + + # ✓ CORRECT ORDER + import logging + logging.basicConfig(level=logging.WARNING) + from pytheranostics.imaging_ds import LongitudinalStudy # Now uses WARNING level + + # ✗ WRONG ORDER + from pytheranostics.imaging_ds import LongitudinalStudy # Uses default level + import logging + logging.basicConfig(level=logging.WARNING) # Too late! + +"Multiple log messages appearing" +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Problem**: Each message appears multiple times or from different loggers. + +**Solution**: This happens when ``basicConfig()`` is called multiple times. In Jupyter notebooks, restart the kernel and configure logging once at the start: + +.. code-block:: python + + # In your first notebook cell: + import logging + + # Only call this once per kernel session + if not logging.getLogger().hasHandlers(): + logging.basicConfig(level=logging.INFO, format='%(message)s') + +Migration from Previous Versions +--------------------------------- + +If you're upgrading from a version that used ``print()`` statements, no code changes are required. The transition is automatic: + +* All previous ``print()`` output is now logged at INFO or WARNING level +* To restore previous behavior (seeing all messages), use ``logging.INFO`` +* To reduce output in notebooks, use ``logging.WARNING`` + +See Also +-------- + +* :ref:`overview` - General introduction to PyTheranostics +* :ref:`installation` - Installation instructions +* `Python Logging Documentation `_ - Official Python logging guide diff --git a/pytheranostics/imaging_ds/longitudinal_study.py b/pytheranostics/imaging_ds/longitudinal_study.py index a64499b..e5cfbdb 100644 --- a/pytheranostics/imaging_ds/longitudinal_study.py +++ b/pytheranostics/imaging_ds/longitudinal_study.py @@ -1,6 +1,7 @@ """Module for longitudinal medical imaging studies.""" import json +import logging import os import re from concurrent.futures import ThreadPoolExecutor, as_completed @@ -20,6 +21,8 @@ ) from pytheranostics.registration.phantom_to_ct import PhantomToCTBoneReg +logger = logging.getLogger(__name__) + class LongitudinalStudy: """Longitudinal Study Data Class. @@ -205,12 +208,16 @@ def from_dicom( if parallel and len(dicom_dirs) > 1: # Parallel loading for multiple timepoints - print(f"Loading {len(dicom_dirs)} {modality} timepoints in parallel...") + logger.info( + f"Loading {len(dicom_dirs)} {modality} timepoints in parallel..." + ) # Helper function for parallel execution def load_single_timepoint(args): time_id, dicom_dir = args - print(f" Loading timepoint {time_id} from {Path(dicom_dir).name}...") + logger.debug( + f" Loading timepoint {time_id} from {Path(dicom_dir).name}..." + ) return time_id, load_from_dicom_dir( dir=dicom_dir, modality=modality, @@ -230,11 +237,13 @@ def load_single_timepoint(args): time_id, (image, meta) = future.result() images[time_id] = image metadata[time_id] = meta - print(f" ✓ Timepoint {time_id} loaded") + logger.debug(f" ✓ Timepoint {time_id} loaded") else: # Sequential loading for time_id, dicom_dir in enumerate(dicom_dirs): - print(f"Loading timepoint {time_id} from {Path(dicom_dir).name}...") + logger.info( + f"Loading timepoint {time_id} from {Path(dicom_dir).name}..." + ) image, meta = load_from_dicom_dir( dir=dicom_dir, modality=modality, @@ -691,8 +700,8 @@ def add_masks_to_time_point( ) if mask_target in self.masks[time_id]: - print( - f"Warning: {mask_target} found at Time = {time_id}. It will be over-written!" + logger.warning( + f"{mask_target} found at Time = {time_id}. It will be over-written!" ) # Masks are in the right orientation and spacing, however there could be discrepancies @@ -756,8 +765,8 @@ def add_raw_masks_to_time_point( SimpleITK.GetArrayFromImage(mask_itk), axes=(1, 2, 0) ) if mask_name in self.masks[time_id]: - print( - f"Warning: {mask_name} found at Time = {time_id}. It will be over-written!" + logger.warning( + f"{mask_name} found at Time = {time_id}. It will be over-written!" ) self.masks[time_id][mask_name] = mask_array.astype(numpy.bool_) @@ -865,7 +874,7 @@ def add_bone_marrow_mask_from_phantom( phantom_skeleton_path (Path): Path to phantom Skeleton .nii file. phantom_bone_marrow_path (Path): Path to phantom Bone Marrow .nii file. """ - print( + logger.info( "Running Personalized Bone Marrow generation from XCAT Phantom. This feature is unstable. Please review the generated BoneMarrow masks." ) @@ -882,11 +891,11 @@ def add_bone_marrow_mask_from_phantom( best_index = {time_id: 0 for time_id in self.images.keys()} for i in range(num_iterations): - print(f"Registration :: Iteration {i+1}") + logger.info(f"Registration :: Iteration {i+1}") # Loop through each time point: for time_id, ct in self.images.items(): # Register Skeleton - print( + logger.debug( f" >> Registering Phantom Skeleton to CT at time point {time_id} ..." ) RegManager = PhantomToCTBoneReg( @@ -921,14 +930,14 @@ def add_bone_marrow_mask_from_phantom( best_index[time_id] = jaccard # Calculate Index - print( + logger.debug( f" >>> Jaccard Index between Skeleton and Segmented Bone Marrow: {jaccard: 1.2f}" ) # Final Results: - print(" >>> Final Jaccard Indices:") + logger.info(" >>> Final Jaccard Indices:") for time_id in self.masks.keys(): - print(f" >>> Time point {time_id}: {best_index[time_id]}") + logger.info(f" >>> Time point {time_id}: {best_index[time_id]}") return None @@ -960,7 +969,7 @@ def save_image_to_nii_at( time_id (int): The time ID representing the time point to be saved. out_path (Path): The path to the folder where images will be written. """ - print(f"Writing Image ({name}) into nifty file.") + logger.info(f"Writing Image ({name}) into nifty file.") SimpleITK.WriteImage( image=SimpleITK.Cast(self.images[time_id], SimpleITK.sitkInt32), fileName=out_path / f"Image_{time_id}{name}.nii.gz", @@ -977,7 +986,7 @@ def save_image_to_mhd_at( time_id (int): The time ID representing the time point to be saved. out_path (Path): The path to the folder where images will be written. """ - print(f"Writing Image ({name}) into mhd file.") + logger.info(f"Writing Image ({name}) into mhd file.") SimpleITK.WriteImage( image=SimpleITK.Cast(self.images[time_id], SimpleITK.sitkInt32), fileName=os.path.join(out_path, f"{name}.mhd"), @@ -1015,7 +1024,7 @@ def save_masks_to_nii_at( ref_image=self.images[time_id], ) - print(f"Writing Masks ({mask_names}) into nifty file.") + logger.info(f"Writing Masks ({mask_names}) into nifty file.") SimpleITK.WriteImage( image=mask_image, fileName=out_path / f"Masks_{time_id}.nii.gz" diff --git a/pytheranostics/imaging_ds/mapping_summary.py b/pytheranostics/imaging_ds/mapping_summary.py index c990026..edc0980 100644 --- a/pytheranostics/imaging_ds/mapping_summary.py +++ b/pytheranostics/imaging_ds/mapping_summary.py @@ -7,9 +7,12 @@ from __future__ import annotations import json +import logging from pathlib import Path from typing import Dict, Iterable, List, Tuple +logger = logging.getLogger(__name__) + def _split_modalities( mapping: Dict[str, str], @@ -76,14 +79,14 @@ def summarize_used_mappings( unmapped_ct_n = len(parts.get("unmapped_ct", [])) unmapped_sp_n = len(parts.get("unmapped_spect", [])) msg += f" | Unmapped: CT {unmapped_ct_n}, SPECT {unmapped_sp_n}" - print(msg) + logger.info(msg) if verbose: def _print_pairs(label: str, pairs: Iterable[Tuple[str, str]]) -> None: shown = 0 for k, v in pairs: - print(f" {label}: {k} -> {v}") + logger.info(f" {label}: {k} -> {v}") shown += 1 if shown >= sample_limit: break @@ -95,10 +98,10 @@ def _print_pairs(label: str, pairs: Iterable[Tuple[str, str]]) -> None: if include_unmapped: if parts.get("unmapped_ct"): sample_ct = parts["unmapped_ct"][:sample_limit] - print(f" Unmapped CT (identity): {sample_ct}") + logger.info(f" Unmapped CT (identity): {sample_ct}") if parts.get("unmapped_spect"): sample_sp = parts["unmapped_spect"][:sample_limit] - print(f" Unmapped SPECT (identity): {sample_sp}") + logger.info(f" Unmapped SPECT (identity): {sample_sp}") if save_json_path is not None: out = { @@ -119,4 +122,4 @@ def _print_pairs(label: str, pairs: Iterable[Tuple[str, str]]) -> None: save_path = Path(save_json_path) with save_path.open("w") as f: json.dump(out, f, indent=2) - print(f"Saved detailed mapping summary to {save_path}") + logger.info(f"Saved detailed mapping summary to {save_path}") diff --git a/pytheranostics/imaging_tools/tools.py b/pytheranostics/imaging_tools/tools.py index 0cb3538..5f43e14 100644 --- a/pytheranostics/imaging_tools/tools.py +++ b/pytheranostics/imaging_tools/tools.py @@ -3,6 +3,7 @@ from __future__ import annotations import glob +import logging from pathlib import Path from typing import TYPE_CHECKING, Dict, List, Optional, Tuple @@ -23,6 +24,8 @@ transform_ct_mask_to_spect, ) +logger = logging.getLogger(__name__) + # TODO: Move under dicomtools, and have two sets: one generic (the current dicomtools.py) and on specific for pyTheranostic functions (containing # the code below) @@ -91,22 +94,22 @@ def load_metadata(dir: str, modality: str) -> ImagingMetadata: injected_activity > 20000 ): # Activity likely in Bq instead of MBq injected_activity /= 1e6 - print( + logger.info( f"Injected activity found in DICOM Header: {injected_activity:2.1f} MBq. Please verify." ) except AttributeError: # Sequence exists but RadionuclideTotalDose attribute is missing - print( + logger.warning( "RadiopharmaceuticalInformationSequence found but RadionuclideTotalDose is missing." ) else: # Sequence exists but is empty - this may indicate a data quality issue - print( - "Warning: RadiopharmaceuticalInformationSequence is empty. This may indicate a data quality issue." + logger.warning( + "RadiopharmaceuticalInformationSequence is empty. This may indicate a data quality issue." ) if injected_activity is None: - print("Using default injected activity: 7400 MBq") + logger.info("Using default injected activity: 7400 MBq") injected_activity = 7400.0 # Global attributes. Should be the same in all slices! @@ -394,7 +397,9 @@ def load_from_dicom_dir( ) except AttributeError: - print("No calibration factor provided, Data might not be in BQ/ML ...") + logger.warning( + "No calibration factor provided, Data might not be in BQ/ML ..." + ) # Load Meta Data using pydicom. meta = load_metadata(dir=dir, modality=modality) @@ -403,7 +408,7 @@ def load_from_dicom_dir( image = force_orthogonality(image=image) # Display Origin and Orientation. - print( + logger.debug( f"Modality: {modality} -> Origin: {image.GetOrigin()}; Direction: {image.GetDirection()}" ) @@ -452,10 +457,12 @@ def force_orthogonality(image: SimpleITK.Image) -> SimpleITK.Image: Image with orthogonal orientation vectors. """ if not are_vectors_orthogonal(image.GetDirection()): - print("Patient Orientation Vectors are NOT orthogonal. Forcing...") + logger.warning("Patient Orientation Vectors are NOT orthogonal. Forcing...") prev_origin = image.GetDirection() new_origin = [round(vec_element) for vec_element in prev_origin] - print(f">> Original Orientation: {prev_origin}, New Orientation: {new_origin} ") + logger.debug( + f">> Original Orientation: {prev_origin}, New Orientation: {new_origin} " + ) image.SetDirection(new_origin) else: prev_origin = image.GetDirection() @@ -586,7 +593,7 @@ def load_and_resample_RT_to_target( resampled_masks: Dict[str, SimpleITK.Image] = {} for mask_name, mask_image in ref_masks.items(): - print(f"Resampling Masks: {mask_name} ...") + logger.debug(f"Resampling Masks: {mask_name} ...") resampled_masks[mask_name] = resample_mask_to_target( mask_img=mask_image, target_img=target_img ) @@ -627,7 +634,7 @@ def load_and_register_RT_to_target( resampled_masks: Dict[str, SimpleITK.Image] = {} for mask_name, mask_image in ref_masks.items(): - print(f"Registering Masks: {mask_name} ...") + logger.debug(f"Registering Masks: {mask_name} ...") resampled_masks[mask_name] = transform_ct_mask_to_spect( mask=mask_image, spect=target_img, transform=transform ) From e1cf0122217cc0cb84e1581c6618340ecc317356 Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sun, 25 Jan 2026 09:05:35 -0800 Subject: [PATCH 11/12] Fix test thaat was not updated after refactor --- tests/test_longitudinal_study.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_longitudinal_study.py b/tests/test_longitudinal_study.py index fba8834..4080571 100644 --- a/tests/test_longitudinal_study.py +++ b/tests/test_longitudinal_study.py @@ -103,8 +103,10 @@ def test_init_success_minimal(self): assert len(study.images) == 2 assert len(study.meta) == 2 assert len(study.masks) == 0 - assert isinstance(study._VALID_ORGAN_NAMES, list) - assert "Liver" in study._VALID_ORGAN_NAMES + # Test that _get_valid_organ_names() returns a list and contains expected organs + valid_organs = LongitudinalStudy._get_valid_organ_names() + assert isinstance(valid_organs, list) + assert "Liver" in valid_organs assert LongitudinalStudy._is_valid_mask_name("Lesion_1") def test_init_mismatched_keys_raises_error(self): From 40fcf024d5863fd0aa93116d34871ea628b6ebe4 Mon Sep 17 00:00:00 2001 From: Carlos Uribe Date: Sun, 25 Jan 2026 10:03:33 -0800 Subject: [PATCH 12/12] create configuration path for the fits of different organs --- docs/source/index.rst | 1 + docs/source/intro/dosimetry_config.rst | 120 ++++++++++ .../dosimetry_fit_defaults.json | 33 +++ pytheranostics/dosimetry/__init__.py | 4 + pytheranostics/dosimetry/config.py | 217 ++++++++++++++++++ pytheranostics/project.py | 9 + tests/test_dosimetry_config.py | 61 +++++ 7 files changed, 445 insertions(+) create mode 100644 docs/source/intro/dosimetry_config.rst create mode 100644 pytheranostics/data/configuration_templates/dosimetry_fit_defaults.json create mode 100644 pytheranostics/dosimetry/config.py create mode 100644 tests/test_dosimetry_config.py diff --git a/docs/source/index.rst b/docs/source/index.rst index b0199a5..5a04489 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,6 +15,7 @@ PyTheranostics is a comprehensive Python library for nuclear medicine image proc intro/overview intro/installation intro/logging + intro/dosimetry_config tutorials/getting_started/basic_usage .. toctree:: diff --git a/docs/source/intro/dosimetry_config.rst b/docs/source/intro/dosimetry_config.rst new file mode 100644 index 0000000..d1d0e55 --- /dev/null +++ b/docs/source/intro/dosimetry_config.rst @@ -0,0 +1,120 @@ +Dosimetry Fit Defaults +====================== + +PyTheranostics supports a config-driven workflow for ROI dosimetry fits. +This page describes the configuration format, auto-discovery logic, +and how to use the `build_roi_fit_config` API in notebooks and scripts. + +Overview +-------- + +- Central config file controls default fit parameters for organs and lesions. +- Project-specific overrides are supported; otherwise a packaged template is used. +- Lesion ROIs are auto-discovered from `longSPECT.masks` (when enabled). + +Project Initialization +---------------------- + +When creating a new PyTheranostics project, the dosimetry config is automatically +generated alongside other project templates: + +.. code-block:: python + + from pytheranostics.project import init_project + + # Creates project directory with all config templates + init_project("./my_dosimetry_project") + +This copies `dosimetry_fit_defaults.json` to your project root so you can +**customize it without touching code**. Edit the file directly to adjust organ +defaults, BoneMarrow kinetics, lesion bounds, etc. + +Config Discovery +---------------- + +The loader searches for `dosimetry_fit_defaults.json` in this order: + +1. Current working directory +2. Parent directory of the current working directory +3. Packaged template: `pytheranostics.data/configuration_templates/dosimetry_fit_defaults.json` + +If a project file is found, it overrides the packaged template. + +Config Schema +------------- + +Example JSON (packaged template): + +.. code-block:: json + + { + "organ_defaults": { + "fit_order": 1, + "with_uptake": false, + "param_init": {"A1": 100, "A2": 0.01}, + "fixed_parameters": null, + "bounds": null, + "washout_ratio": null + }, + "organs": { + "BoneMarrow": { + "fit_order": 2, + "param_init": {"A1": 50}, + "fixed_parameters": {"A2": 0.045788, "B1": 561.376560, "B2": 0.213215}, + "with_uptake": null, + "bounds": null, + "washout_ratio": 4.656569406831483 + } + }, + "lesion_defaults": { + "fit_order": 1, + "param_init": {"A1": 700, "A2": 0.1}, + "fixed_parameters": null, + "bounds": {"A1": [0, "inf"], "A2": ["log2_over_(6.647*24)_per_hour", "inf"]}, + "with_uptake": false, + "washout_ratio": null + }, + "lesions": { + "auto_discover": true, + "pattern": "^Lesion_(\\d+)$" + } + } + +Notes: +- Bounds support special values: "inf" and "log2_over_(6.647*24)_per_hour". +- Organ overrides only replace specified fields; unspecified fields inherit defaults. + +API Usage +--------- + +.. code-block:: python + + import logging + from pytheranostics.dosimetry import build_roi_fit_config + + logging.basicConfig(level=logging.INFO, format='%(message)s') + + roi_config = build_roi_fit_config(longSPECT) + # roi_config["Liver"] -> {fixed_parameters, fit_order, param_init, ...} + # roi_config["Lesion_3"] -> lesion defaults applied + +Project Overrides +----------------- + +After running `initialize_project_dosimetry_config()`, edit the generated +`dosimetry_fit_defaults.json` in your project root to customize: + +- `organ_defaults`: Apply to all organs unless overridden +- `organs.`: Override specific organ parameters (e.g., BoneMarrow kinetics) +- `lesion_defaults`: Apply to all auto-discovered lesions +- `lesions.pattern`: Regex pattern for lesion ROI names +- `lesions.auto_discover`: Enable/disable automatic lesion discovery + +The loader will pick up your project file automatically. + +Validation & Best Practices +--------------------------- + +- Keep institution-specific values in project overrides, not code. +- Review bounds and fixed parameters for each organ. +- Version your overrides and document assumptions. diff --git a/pytheranostics/data/configuration_templates/dosimetry_fit_defaults.json b/pytheranostics/data/configuration_templates/dosimetry_fit_defaults.json new file mode 100644 index 0000000..ee3a5fe --- /dev/null +++ b/pytheranostics/data/configuration_templates/dosimetry_fit_defaults.json @@ -0,0 +1,33 @@ +{ + "_description": "Default dosimetry fit parameters for organs and lesions. Project-specific overrides can be provided via a dosimetry_fit_defaults.json placed in the project root.", + "organ_defaults": { + "fit_order": 1, + "with_uptake": false, + "param_init": {"A1": 100, "A2": 0.01}, + "fixed_parameters": null, + "bounds": null, + "washout_ratio": null + }, + "organs": { + "BoneMarrow": { + "fit_order": 2, + "param_init": {"A1": 50}, + "fixed_parameters": {"A2": 0.045788, "B1": 561.376560, "B2": 0.213215}, + "with_uptake": null, + "bounds": null, + "washout_ratio": 4.656569406831483 + } + }, + "lesion_defaults": { + "fit_order": 1, + "param_init": {"A1": 700, "A2": 0.1}, + "fixed_parameters": null, + "bounds": {"A1": [0, "inf"], "A2": ["log2_over_(6.647*24)_per_hour", "inf"]}, + "with_uptake": false, + "washout_ratio": null + }, + "lesions": { + "auto_discover": true, + "pattern": "^Lesion_(\\d+)$" + } +} diff --git a/pytheranostics/dosimetry/__init__.py b/pytheranostics/dosimetry/__init__.py index 9dfc220..714acee 100644 --- a/pytheranostics/dosimetry/__init__.py +++ b/pytheranostics/dosimetry/__init__.py @@ -8,4 +8,8 @@ "organ_s_dosimetry", "voxel_s_dosimetry", "bone_marrow", + "config", ] + +# Convenience re-exports for users +from .config import build_roi_fit_config # noqa: F401 diff --git a/pytheranostics/dosimetry/config.py b/pytheranostics/dosimetry/config.py new file mode 100644 index 0000000..8902658 --- /dev/null +++ b/pytheranostics/dosimetry/config.py @@ -0,0 +1,217 @@ +"""Dosimetry fit configuration loader. + +Provides a single API to build ROI fit parameter configuration from a project or +package template, merging organ defaults, organ overrides, and auto-discovered +lesions from a `LongitudinalStudy`. + +The config file is auto-discovered in the following order: +1. Current working directory: dosimetry_fit_defaults.json +2. Parent directory of CWD: dosimetry_fit_defaults.json +3. Package template: pytheranostics.data/configuration_templates/dosimetry_fit_defaults.json + +Notes +----- +- Bounds may include string values like "inf" and special expressions + (e.g., "log2_over_(6.647*24)_per_hour") which are parsed to numeric values. +- Lesion discovery is controlled via the config's `lesions` section. + +""" + +from __future__ import annotations + +import json +import logging +import re +from copy import deepcopy +from pathlib import Path +from typing import Dict, Optional + +logger = logging.getLogger(__name__) + + +def _parse_special_value(val): + """Parse special string values to numeric where applicable. + + Parameters + ---------- + val : any + Value from config that may be a special string. + + Returns + ------- + any + Parsed value. + """ + if isinstance(val, str): + if val == "inf": + from math import inf + + return inf + if val == "log2_over_(6.647*24)_per_hour": + import numpy as np + + return float(np.log(2) / (6.647 * 24)) + return val + + +def _parse_bounds(bounds): + """Parse bounds mapping converting special strings to numeric values. + + Parameters + ---------- + bounds : dict | None + Bounds mapping from the config. + + Returns + ------- + dict | None + Parsed bounds or None. + """ + if bounds is None: + return None + parsed = {} + for k, pair in bounds.items(): + if isinstance(pair, (list, tuple)) and len(pair) == 2: + parsed[k] = (_parse_special_value(pair[0]), _parse_special_value(pair[1])) + else: + parsed[k] = pair + return parsed + + +def _load_config(config_path: Optional[Path | str] = None) -> Dict: + """Load dosimetry fit defaults JSON from project or package template. + + Parameters + ---------- + config_path : Path | str | None + Optional explicit path to a JSON config. + + Returns + ------- + dict + The loaded configuration mapping. + """ + # Explicit path + if config_path is not None: + p = Path(config_path) + if not p.exists(): + raise FileNotFoundError(f"Dosimetry config not found: {p}") + with p.open("r") as f: + return json.load(f) + + # Project search + search_paths = [ + Path.cwd() / "dosimetry_fit_defaults.json", + Path.cwd().parent / "dosimetry_fit_defaults.json", + ] + for p in search_paths: + if p.exists(): + try: + with p.open("r") as f: + return json.load(f) + except Exception: + continue + + # Package template fallback + try: + import importlib.resources as pkg_resources + + template = pkg_resources.files("pytheranostics.data").joinpath( + "configuration_templates/dosimetry_fit_defaults.json" + ) + with open(template, "r") as f: + return json.load(f) + except Exception as e: + logger.error("Failed to load package dosimetry template: %s", e) + raise + + +def build_roi_fit_config( + longSPECT, config_path: Optional[Path | str] = None +) -> Dict[str, Dict]: + """Build ROI fit configuration for organs and lesions. + + Parameters + ---------- + longSPECT : LongitudinalStudy + The SPECT longitudinal study; used to auto-discover lesion names from masks. + config_path : Path | str | None + Optional explicit path to a JSON config. If None, auto-discovery is used. + + Returns + ------- + dict + Mapping of ROI name to fit parameter dict. + """ + cfg = _load_config(config_path) + + organ_defaults = deepcopy(cfg.get("organ_defaults", {})) + lesion_defaults = deepcopy(cfg.get("lesion_defaults", {})) + + # Parse bounds for both defaults + if "bounds" in organ_defaults: + organ_defaults["bounds"] = _parse_bounds(organ_defaults.get("bounds")) + if "bounds" in lesion_defaults: + lesion_defaults["bounds"] = _parse_bounds(lesion_defaults.get("bounds")) + + # Get all masks actually available in the study + all_masks = set() + for tp_masks in getattr(longSPECT, "masks", {}).values(): + all_masks.update(tp_masks.keys()) + + # Get valid organ names from config + try: + from pytheranostics.imaging_ds.longitudinal_study import LongitudinalStudy + + valid_names = LongitudinalStudy._get_valid_organ_names() + except Exception: + valid_names = [] + + lesion_pattern_str = cfg.get("lesions", {}).get("pattern", r"^Lesion_(\\d+)$") + lesion_pattern = re.compile(lesion_pattern_str) + + # Use intersection: organs that are both valid AND actually present in masks + organ_names = [ + name + for name in valid_names + if name in all_masks and not lesion_pattern.match(name) + ] + + roi_config: Dict[str, Dict] = {} + + # Initialize all organs with defaults + for name in organ_names: + roi_config[name] = deepcopy(organ_defaults) + + # Apply explicit organ overrides + for name, override in cfg.get("organs", {}).items(): + base = roi_config.get(name, deepcopy(organ_defaults)) + merged = deepcopy(base) + for k, v in override.items(): + if k == "bounds": + merged[k] = _parse_bounds(v) + else: + merged[k] = v + roi_config[name] = merged + + # Auto-discover lesions from masks + lesions_cfg = cfg.get("lesions", {}) + if lesions_cfg.get("auto_discover", True): + discovered: set[str] = set() + for tp_masks in getattr(longSPECT, "masks", {}).values(): + for mask_name in tp_masks.keys(): + if lesion_pattern.match(mask_name): + discovered.add(mask_name) + if discovered: + logger.debug("Discovered lesions: %s", sorted(discovered)) + for lesion in sorted(discovered): + lesion_entry = deepcopy(lesion_defaults) + roi_config[lesion] = lesion_entry + + logger.info( + "Configured %d organs + %d lesions", + len([n for n in roi_config.keys() if not lesion_pattern.match(n)]), + len([n for n in roi_config.keys() if lesion_pattern.match(n)]), + ) + + return roi_config diff --git a/pytheranostics/project.py b/pytheranostics/project.py index 0d26102..f07d5c4 100644 --- a/pytheranostics/project.py +++ b/pytheranostics/project.py @@ -90,6 +90,7 @@ def init_project( available_templates = { "total_seg_config.json": "TotalSegmentator ROI filtering/renaming/combining", "voi_mappings_config.json": "VOI name mappings for CT/SPECT analysis", + "dosimetry_fit_defaults.json": "Dosimetry fit parameters for organs and lesions", } if templates is None: @@ -176,6 +177,13 @@ def init_project( - `ct_mappings`: Morphology-based names (e.g., "Kidney_L_m") - `spect_mappings`: Activity-based names (e.g., "Kidney_L_a") +### dosimetry_fit_defaults.json +Configure default fit parameters for dosimetry calculations: +- `organ_defaults`: Parameters applied to all organs +- `organs`: Override specific organ kinetics (e.g., BoneMarrow) +- `lesion_defaults`: Parameters for auto-discovered lesions +- `lesions.pattern`: Regex pattern for lesion ROI names + ## Getting Started ```python @@ -256,6 +264,7 @@ def list_templates() -> dict: return { "total_seg_config.json": "TotalSegmentator ROI filtering/renaming/combining", "voi_mappings_config.json": "VOI name mappings for CT/SPECT analysis", + "dosimetry_fit_defaults.json": "Dosimetry fit parameters for organs and lesions", } diff --git a/tests/test_dosimetry_config.py b/tests/test_dosimetry_config.py new file mode 100644 index 0000000..799a129 --- /dev/null +++ b/tests/test_dosimetry_config.py @@ -0,0 +1,61 @@ +from typing import Dict + +from pytheranostics.dosimetry.config import build_roi_fit_config + + +class DummyLongitudinalStudy: + def __init__(self, masks: Dict[int, Dict[str, object]]): + self.masks = masks + + +def test_build_roi_fit_config_merges_organs_and_lesions(): + # Minimal masks with two timepoints, includes lesions + masks = { + 0: {"Liver": object(), "Lesion_1": object()}, + 1: {"Kidney_Left": object(), "Lesion_2": object()}, + } + longSPECT = DummyLongitudinalStudy(masks=masks) + + roi_cfg = build_roi_fit_config(longSPECT) + + # Organs: expect defaults applied + assert "Liver" in roi_cfg + assert "Kidney_Left" in roi_cfg + assert "fit_order" in roi_cfg["Liver"] + assert "param_init" in roi_cfg["Liver"] + + # Lesions: auto-discovered and have lesion defaults applied + for lesion in ("Lesion_1", "Lesion_2"): + assert lesion in roi_cfg + assert "param_init" in roi_cfg[lesion] + assert "fit_order" in roi_cfg[lesion] + + # No unexpected keys + assert all(isinstance(v, dict) for v in roi_cfg.values()) + + +def test_build_roi_fit_config_respects_pattern_toggle(): + # Custom masks with non-standard lesion naming + masks = { + 0: {"Liver": object(), "Tumor_01": object()}, + } + longSPECT = DummyLongitudinalStudy(masks=masks) + + # Use explicit config with custom pattern and disable auto-discovery + import json + from pathlib import Path + from tempfile import TemporaryDirectory + + with TemporaryDirectory() as tmpdir: + cfg_path = Path(tmpdir) / "dosimetry_fit_defaults.json" + cfg = { + "organ_defaults": {"fit_order": 1, "param_init": {"A1": 10, "A2": 0.01}}, + "lesion_defaults": {"fit_order": 1, "param_init": {"A1": 100, "A2": 0.1}}, + "lesions": {"auto_discover": False, "pattern": "^Tumor_(\\d+)$"}, + } + cfg_path.write_text(json.dumps(cfg)) + + roi_cfg = build_roi_fit_config(longSPECT, config_path=cfg_path) + assert "Liver" in roi_cfg + # auto_discover disabled; Tumor_01 should not be present + assert "Tumor_01" not in roi_cfg