diff --git a/README.md b/README.md
index cc24511..63cf263 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-# MicroImpute
+# Microimpute
 
-MicroImpute enables variable imputation through different statistical methods. It facilitates comparison and benchmarking across methods through quantile loss calculations.
+Microimpute enables variable imputation through different statistical methods. It facilitates comparison and benchmarking across methods through quantile loss calculations.
 
 To install, run pip install microimpute.
 
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29..104b054 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,7 @@
+- bump: patch
+  changes:
+    changed:
+    - Refactor the Imputer base class.
+    - Refactor cross-validation, comparison, visualization, and autoimputation functions.
+    added:
+    - Improved test coverage.
diff --git a/microimpute/__init__.py b/microimpute/__init__.py
index 238a90e..fba3084 100644
--- a/microimpute/__init__.py
+++ b/microimpute/__init__.py
@@ -1,11 +1,28 @@
-"""MicroImpute Package
+"""Microimpute: advanced statistical imputation for microdata
 
-A package for benchmarking different imputation methods using microdata.
+Microimpute is a comprehensive Python package for benchmarking and applying
+various statistical imputation methods to microdata. It provides tools for
+automated model selection, cross-validation, and visualization of imputation results.
+
+Key features:
+    - Multiple imputation models (OLS, QRF, QuantReg, Matching)
+    - Automated model selection with cross-validation
+    - Quantile-based imputation and evaluation
+    - Comprehensive visualization tools
+    - Support for weighted imputation
+    - Hyperparameter tuning capabilities
+
+Main components:
+    - autoimpute: automated imputation with model selection
+    - Models: OLS, QRF, QuantReg, Matching (optional)
+    - Evaluation: cross-validation and quantile loss metrics
+    - Visualization: performance and comparison plots
 """
 
 __version__ = "1.1.2"
 
-from microimpute.comparisons.autoimpute import autoimpute
+# Import automated imputation
+from microimpute.comparisons.autoimpute import AutoImputeResult, autoimpute
 from microimpute.comparisons.imputations import get_imputations
 
 # Import comparison utilities
@@ -15,6 +32,14 @@
     quantile_loss,
 )
 
+# Import validation utilities
+from microimpute.comparisons.validation import (
+    validate_columns_exist,
+    validate_dataframe_compatibility,
+    validate_imputation_inputs,
+    validate_quantiles,
+)
+
 # Main configuration
 from microimpute.config import (
     PLOT_CONFIG,
@@ -30,7 +55,7 @@
 from microimpute.models import OLS, QRF, Imputer, ImputerResults, QuantReg
 
 # Import data handling functions
-from microimpute.utils.data import preprocess_data
+from microimpute.utils.data import preprocess_data, unnormalize_predictions
 
 try:
     from microimpute.models.matching import Matching
@@ -38,7 +63,9 @@
     pass
 
 # Import visualization modules
-from microimpute.visualizations.plotting import (
+from microimpute.visualizations import (
+    MethodComparisonResults,
+    PerformanceResults,
     method_comparison_results,
     model_performance_results,
 )
diff --git a/microimpute/comparisons/__init__.py b/microimpute/comparisons/__init__.py
index 958c806..e60342a 100644
--- a/microimpute/comparisons/__init__.py
+++ b/microimpute/comparisons/__init__.py
@@ -1,17 +1,34 @@
-"""Data Comparison Utilities
+"""Imputation method comparison and evaluation utilities
 
-This module contains utilities for comparing different imputation methods.
+This module provides comprehensive tools for comparing and evaluating different
+imputation methods. It includes automated model selection, quantile loss metrics,
+and validation utilities for ensuring data integrity.
+
+Key components:
+    - autoimpute: automated imputation method selection and application
+    - get_imputations: generate imputations using multiple model classes
+    - quantile_loss: calculate quantile-based loss metrics
+    - compare_quantile_loss: compare performance across imputation methods
+    - Validation utilities for data and parameter validation
 """
 
 # Import automated imputation utilities
-from .autoimpute import autoimpute
+from microimpute.comparisons.autoimpute import AutoImputeResult, autoimpute
 
 # Import imputation utilities
-from .imputations import get_imputations
+from microimpute.comparisons.imputations import get_imputations
 
 # Import loss functions
-from .quantile_loss import (
+from microimpute.comparisons.quantile_loss import (
     compare_quantile_loss,
     compute_quantile_loss,
     quantile_loss,
 )
+
+# Import validation utilities
+from microimpute.comparisons.validation import (
+    validate_columns_exist,
+    validate_dataframe_compatibility,
+    validate_imputation_inputs,
+    validate_quantiles,
+)
diff --git a/microimpute/comparisons/autoimpute.py b/microimpute/comparisons/autoimpute.py
index 867d629..70868a6 100644
--- a/microimpute/comparisons/autoimpute.py
+++ b/microimpute/comparisons/autoimpute.py
@@ -5,8 +5,7 @@
 
 import logging
 import warnings
-from functools import partial
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
 
 import joblib
 import pandas as pd
@@ -14,14 +13,21 @@
 from tqdm.auto import tqdm
 
 from microimpute.comparisons import *
+from microimpute.comparisons.autoimpute_helpers import (
+    evaluate_model,
+    fit_and_predict_model,
+    prepare_data_for_imputation,
+    select_best_model,
+    validate_autoimpute_inputs,
+)
 from microimpute.config import (
     QUANTILES,
     RANDOM_STATE,
     TRAIN_SIZE,
     VALIDATE_CONFIG,
 )
-from microimpute.evaluations import cross_validate_model
 from microimpute.models import OLS, QRF, Imputer, ImputerResults, QuantReg
+from microimpute.utils.data import unnormalize_predictions
 
 try:
     from microimpute.models import Matching
@@ -29,7 +35,6 @@
     HAS_MATCHING = True
 except ImportError:
     HAS_MATCHING = False
-from microimpute.utils.data import preprocess_data
 
 log = logging.getLogger(__name__)
 
@@ -50,7 +55,7 @@ class AutoImputeResult(BaseModel):
         Mapping model name → fitted Imputer instance.
     cv_results : pd.DataFrame
         Cross-validation loss table (models as index, quantiles as columns)
-        with an extra “mean_loss” column.
+        with an extra "mean_loss" column.
     """
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -63,6 +68,173 @@ class AutoImputeResult(BaseModel):
     cv_results: pd.DataFrame = Field(...)
 
 
+def _setup_logging(log_level: str) -> int:
+    """Configure logging level.
+
+    Args:
+        log_level: String representation of log level.
+
+    Returns:
+        Numeric log level.
+    """
+    level_map = {
+        "DEBUG": logging.DEBUG,
+        "INFO": logging.INFO,
+        "WARNING": logging.WARNING,
+        "ERROR": logging.ERROR,
+        "CRITICAL": logging.CRITICAL,
+    }
+    numeric_level = level_map[log_level]
+    log.setLevel(numeric_level)
+    warnings.filterwarnings("ignore")
+    return numeric_level
+
+
+def _evaluate_models_parallel(
+    model_classes: List[Type[Imputer]],
+    training_data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    quantiles: List[float],
+    k_folds: int,
+    random_state: int,
+    tune_hyperparameters: bool,
+    hyperparameters: Optional[Dict[str, Dict[str, Any]]],
+    n_jobs: int = -1,
+) -> Tuple[pd.DataFrame, Optional[Dict[str, Any]]]:
+    """Evaluate multiple models in parallel using cross-validation.
+
+    Returns:
+        Tuple of (results_dataframe, best_hyperparameters_dict or None)
+    """
+    # Check if Matching model is present (requires sequential processing)
+    has_matching = any(model.__name__ == "Matching" for model in model_classes)
+    if has_matching and n_jobs != 1:
+        log.info(
+            "Using sequential processing (n_jobs=1) because Matching model is present"
+        )
+        n_jobs = 1
+
+    # Prepare tasks for parallel execution
+    parallel_tasks = []
+    for model in model_classes:
+        model_hyperparams = None
+        if hyperparameters and model.__name__ in hyperparameters:
+            model_hyperparams = hyperparameters[model.__name__]
+
+        parallel_tasks.append(
+            (
+                model,
+                training_data,
+                predictors,
+                imputed_variables,
+                weight_col,
+                quantiles,
+                k_folds,
+                random_state,
+                tune_hyperparameters,
+                model_hyperparams,
+            )
+        )
+
+    # Execute in parallel
+    results = joblib.Parallel(n_jobs=n_jobs)(
+        joblib.delayed(lambda args: evaluate_model(*args))(task)
+        for task in tqdm(parallel_tasks, desc="Evaluating models")
+    )
+
+    # Process results
+    method_test_losses = {}
+    best_hyperparams = {}
+
+    if tune_hyperparameters:
+        for result in results:
+            if len(result) == 3:
+                model_name, cv_result, best_params = result
+                method_test_losses[model_name] = cv_result.loc["test"]
+                if model_name in ["QRF", "Matching"]:
+                    best_hyperparams[model_name] = best_params
+            else:
+                model_name, cv_result = result
+                method_test_losses[model_name] = cv_result.loc["test"]
+    else:
+        for model_name, cv_result in results:
+            method_test_losses[model_name] = cv_result.loc["test"]
+
+    method_results_df = pd.DataFrame.from_dict(
+        method_test_losses, orient="index"
+    )
+
+    return method_results_df, (
+        best_hyperparams if tune_hyperparameters else None
+    )
+
+
+def _generate_imputations_for_all_models(
+    model_classes: List[Type[Imputer]],
+    best_method: str,
+    training_data: pd.DataFrame,
+    imputing_data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    imputation_q: float,
+    normalize_data: bool,
+    normalizing_params: Optional[dict],
+    tune_hyperparameters: bool,
+    hyperparams: Optional[Dict[str, Any]],
+    log_level: str,
+) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]:
+    """Generate imputations for all models when impute_all=True.
+
+    Returns:
+        Tuple of (imputations_dict, fitted_models_dict)
+    """
+    final_imputations_dict = {}
+    fitted_models_dict = {}
+
+    log.info("Generating imputations for all models using the full dataset.")
+
+    for model_class in model_classes:
+        model_name = model_class.__name__
+        if model_name == best_method:
+            continue  # Skip the best method as it's already done
+
+        log.info(f"Generating imputations with {model_name}.")
+
+        # Get model-specific hyperparameters if available
+        model_hyperparams = None
+        if tune_hyperparameters and hyperparams and model_name in hyperparams:
+            model_hyperparams = hyperparams[model_name]
+
+        # Fit and predict
+        fitted_model, imputations = fit_and_predict_model(
+            model_class,
+            training_data,
+            imputing_data,
+            predictors,
+            imputed_variables,
+            weight_col,
+            imputation_q,
+            model_hyperparams,
+            log_level,
+        )
+
+        # Unnormalize if needed
+        if normalize_data and normalizing_params:
+            final_imputations = unnormalize_predictions(
+                imputations, normalizing_params
+            )
+        else:
+            final_imputations = imputations
+
+        final_imputations_dict[model_name] = final_imputations[imputation_q]
+        fitted_models_dict[model_name] = fitted_model
+
+    return final_imputations_dict, fitted_models_dict
+
+
 @validate_call(config=VALIDATE_CONFIG)
 def autoimpute(
     donor_data: pd.DataFrame,
@@ -124,497 +296,210 @@ def autoimpute(
         ValueError: If inputs are invalid (e.g., invalid quantiles, missing columns)
         RuntimeError: For unexpected errors during imputation
     """
-    # Set up logging level
-    if log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
-        error_msg = f"Invalid log_level: {log_level}. Must be one of DEBUG, INFO, WARNING, ERROR, CRITICAL."
-        log.error(error_msg)
-        raise ValueError(error_msg)
-    if log_level == "DEBUG":
-        log_level = logging.DEBUG
-    elif log_level == "INFO":
-        log_level = logging.INFO
-    elif log_level == "WARNING":
-        log_level = logging.WARNING
-    elif log_level == "ERROR":
-        log_level = logging.ERROR
-    elif log_level == "CRITICAL":
-        log_level = logging.CRITICAL
-    log.setLevel(log_level)
-    warnings.filterwarnings("ignore")
-
-    # Set up parallel processing
-    n_jobs: Optional[int] = -1
-
-    # Create a progress tracking system
-    if log_level == "INFO" or log_level == "DEBUG":
-        main_progress = tqdm(total=4, desc="AutoImputation progress")
-        main_progress.set_description("Input validation")
-
-    if imputation_quantiles is None:
-        quantiles = QUANTILES  # Use default quantiles if not provided
-    else:
-        quantiles = imputation_quantiles
-
-    # Step 0: Input validation
     try:
-        # Validate quantiles if provided
-        if quantiles:
-            invalid_quantiles = [q for q in quantiles if not 0 <= q <= 1]
-            if invalid_quantiles:
-                error_msg = f"Invalid quantiles (must be between 0 and 1): {invalid_quantiles}"
-                log.error(error_msg)
-                raise ValueError(error_msg)
-
-        # Validate that predictor and imputed variable columns exist in donor data
-        missing_predictors_donor = [
-            col for col in predictors if col not in donor_data.columns
-        ]
-        if missing_predictors_donor:
-            error_msg = f"Missing predictor columns in donor data: {missing_predictors_donor}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        missing_predictors_receiver = [
-            col for col in predictors if col not in receiver_data.columns
-        ]
-        if missing_predictors_receiver:
-            error_msg = f"Missing predictor columns in reciver data: {missing_predictors_receiver}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        missing_imputed_donor = [
-            col for col in imputed_variables if col not in donor_data.columns
-        ]
-        if missing_imputed_donor:
-            error_msg = f"Missing imputed variable columns in donor data: {missing_imputed_donor}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        # Validate that predictor columns exist in receiver data (imputed variables may not be present in receiver data)
-        missing_predictors_receiver = [
-            col for col in predictors if col not in receiver_data.columns
-        ]
-        if missing_predictors_receiver:
-            error_msg = f"Missing predictor columns in test data: {missing_predictors_receiver}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
+        # Step 0: Setup and validation
+        numeric_log_level = _setup_logging(log_level)
+
+        # Create progress tracker if needed
+        if numeric_log_level <= logging.INFO:
+            main_progress = tqdm(total=5, desc="AutoImputation progress")
+            main_progress.set_description("Input validation")
+
+        # Use provided quantiles or defaults
+        quantiles = imputation_quantiles if imputation_quantiles else QUANTILES
+
+        # Validate all inputs
+        validate_autoimpute_inputs(
+            donor_data,
+            receiver_data,
+            predictors,
+            imputed_variables,
+            weight_col,
+            quantiles,
+            hyperparameters,
+            tune_hyperparameters,
+            log_level,
+        )
 
         log.info(
-            f"Generating imputations to impute from {len(donor_data)} donor data to {len(receiver_data)} receiver data for variables {imputed_variables} with predictors {predictors}. "
+            f"Generating imputations to impute from {len(donor_data)} donor data "
+            f"to {len(receiver_data)} receiver data for variables {imputed_variables} "
+            f"with predictors {predictors}."
         )
 
-        if (hyperparameters is not None) and (tune_hyperparameters == True):
-            error_msg = "Cannot specify both model_hyperparams and request to automatically tune hyperparameters, please select one or the other."
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
         # Step 1: Data preparation
-        if log_level == "INFO" or log_level == "DEBUG":
+        if numeric_log_level <= logging.INFO:
             log.info("Preprocessing data...")
             main_progress.update(1)
             main_progress.set_description("Data preparation")
 
-        # If imputed variables are in receiver data, remove them
-        receiver_data = receiver_data.drop(
-            columns=imputed_variables, errors="ignore"
-        )
-
-        training_data = donor_data.copy()
-        imputing_data = receiver_data.copy()
-
-        # Keep track of original imputed variable names for final assignment
+        # Keep track of original imputed variable names
         original_imputed_variables = imputed_variables.copy()
 
-        if normalize_data:
-            training_data_predictors, _ = preprocess_data(
-                training_data[predictors],
-                full_data=True,
-                train_size=train_size,
-                test_size=(1 - train_size),
-                normalize=normalize_data,
-            )
-            (
-                training_data_imputed_variables,
-                normalizing_params,
-            ) = preprocess_data(
-                training_data[imputed_variables],
-                full_data=True,
-                train_size=train_size,
-                test_size=(1 - train_size),
-                normalize=normalize_data,
-            )
-            imputing_data, _ = preprocess_data(
-                imputing_data[predictors],
-                full_data=True,
-                train_size=train_size,
-                test_size=(1 - train_size),
-                normalize=normalize_data,
-            )
-
-            training_data = training_data_predictors.join(
-                training_data_imputed_variables
-            )
-            if weight_col:
-                training_data[weight_col] = donor_data[weight_col]
-        else:
-            training_data_predictors = preprocess_data(
-                training_data[predictors],
-                full_data=True,
-                train_size=train_size,
-                test_size=(1 - train_size),
-                normalize=normalize_data,
-            )
-            training_data_imputed_variables = preprocess_data(
-                training_data[imputed_variables],
-                full_data=True,
-                train_size=train_size,
-                test_size=(1 - train_size),
-                normalize=normalize_data,
-            )
-            imputing_data = preprocess_data(  # _
-                imputing_data[predictors],
-                full_data=True,
-                train_size=train_size,
-                test_size=(1 - train_size),
-                normalize=normalize_data,
-            )
-            training_data = training_data_predictors.join(
-                training_data_imputed_variables
+        training_data, imputing_data, normalizing_params = (
+            prepare_data_for_imputation(
+                donor_data,
+                receiver_data,
+                predictors,
+                imputed_variables,
+                weight_col,
+                normalize_data,
+                train_size,
+                1 - train_size,
             )
-            if weight_col:
-                training_data[weight_col] = donor_data[weight_col]
+        )
 
-        # Step 2: Imputation with each method
-        if log_level == "INFO" or log_level == "DEBUG":
+        # Step 2: Model evaluation
+        if numeric_log_level <= logging.INFO:
             main_progress.update(1)
             main_progress.set_description("Model evaluation")
 
+        # Get model classes
         if not models:
-            # If no models are provided, use default models
             model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg]
             if HAS_MATCHING:
                 model_classes.append(Matching)
         else:
             model_classes = models
 
+        # Log hyperparameter usage
         if hyperparameters:
             model_names = [
                 model_class.__name__ for model_class in model_classes
             ]
             for model_name, model_params in hyperparameters.items():
                 if model_name in model_names:
-                    # Update the model class with the provided hyperparameters
-                    if model_name == "QRF":
-                        log.info(
-                            f"Using hyperparameters for QRF: {model_params}"
-                        )
-                    elif model_name == "Matching":
-                        log.info(
-                            f"Using hyperparameters for Matching: {model_params}"
-                        )
+                    log.info(
+                        f"Using hyperparameters for {model_name}: {model_params}"
+                    )
                 else:
                     log.info(
-                        f"None of the hyperparameters provided are relevant for the supported models: {model_names}. Using default hyperparameters."
+                        f"Hyperparameters provided for {model_name} but model not in list: {model_names}"
                     )
 
-        method_test_losses = {}
         log.info(
-            "Hyperparameter tuning and cross-validation for model comparisson in progress... "
-        )
-
-        def evaluate_model(
-            model: Type[Imputer],
-            data: pd.DataFrame,
-            predictors: List[str],
-            imputed_variables: List[str],
-            weight_col: Optional[str],
-            quantiles: List[float],
-            k_folds: Optional[int] = 5,
-            random_state: Optional[bool] = RANDOM_STATE,
-            tune_hyperparams: Optional[bool] = True,
-            hyperparameters: Optional[Dict[str, Any]] = None,
-        ) -> tuple[str, pd.DataFrame]:
-            """Evaluate a single imputation model with cross-validation.
-
-            Args:
-                model: The imputation model class to evaluate
-                data: The dataset to use for evaluation
-                predictors: List of predictor column names
-                imputed_variables: List of columns to impute
-                quantiles: List of quantiles to evaluate
-                k_folds: Number of cross-validation folds
-                random_state: Random seed for reproducibility
-                tune_hyperparams: Whether to tune hyperparameters
-                hyperparameters: Optional model-specific hyperparameters
-
-            Returns:
-                Tuple containing model name and cross-validation results DataFrame
-            """
-            model_name = model.__name__
-            log.info(f"Evaluating {model_name}...")
-
-            cv_result = cross_validate_model(
-                model_class=model,
-                data=data,
-                predictors=predictors,
-                imputed_variables=imputed_variables,
-                weight_col=weight_col,
-                quantiles=quantiles,
-                n_splits=k_folds,
-                random_state=random_state,
-                tune_hyperparameters=tune_hyperparams,
-                model_hyperparams=hyperparameters,
-            )
-
-            if (
-                tune_hyperparams
-                and isinstance(cv_result, tuple)
-                and len(cv_result) == 2
-            ):
-                final_results, best_params = cv_result
-                return model_name, final_results, best_params
-            else:
-                return model_name, cv_result
-
-        # Special handling for models that use rpy2
-        # Use sequential processing for Matching model to avoid thread context issues
-        has_matching = any(
-            model.__name__ == "Matching" for model in model_classes
-        )
-        if has_matching and n_jobs != 1:
-            log.info(
-                "Using sequential processing (n_jobs=1) because Matching model is present"
-            )
-            n_jobs = 1
-
-        parallel_tasks = []
-        for model in model_classes:
-            parallel_tasks.append(
-                (
-                    model,
-                    training_data,
-                    predictors,
-                    imputed_variables,
-                    weight_col,
-                    quantiles,
-                    k_folds,
-                    RANDOM_STATE,
-                    tune_hyperparameters,
-                    hyperparameters,
-                )
-            )
-
-        # Execute in parallel
-        results = joblib.Parallel(n_jobs=n_jobs)(
-            joblib.delayed(lambda args: evaluate_model(*args))(task)
-            for task in tqdm(parallel_tasks, desc="Evaluating models")
+            "Hyperparameter tuning and cross-validation for model comparison in progress..."
         )
 
-        # Process results
-        if tune_hyperparameters == True:
-            hyperparams = {}
-            for model_name, cv_result, best_tuned_hyperparams in results:
-                method_test_losses[model_name] = cv_result.loc["test"]
-                if model_name == "QRF" or model_name == "Matching":
-                    hyperparams[model_name] = best_tuned_hyperparams
-        else:
-            for model_name, cv_result in results:
-                method_test_losses[model_name] = cv_result.loc["test"]
-
-        method_results_df = pd.DataFrame.from_dict(
-            method_test_losses, orient="index"
+        # Evaluate models in parallel
+        method_results_df, best_hyperparams = _evaluate_models_parallel(
+            model_classes,
+            training_data,
+            predictors,
+            imputed_variables,
+            weight_col,
+            quantiles,
+            k_folds,
+            random_state,
+            tune_hyperparameters,
+            hyperparameters,
         )
 
-        # Step 3: Compare imputation methods
-        log.info(f"Comparing across {model_classes} methods. ")
-
-        if log_level == "INFO" or log_level == "DEBUG":
+        # Step 3: Model selection
+        if numeric_log_level <= logging.INFO:
             main_progress.update(1)
             main_progress.set_description("Model selection")
 
-        # add a column called "mean_loss" with the average loss across quantiles
-        method_results_df["mean_loss"] = method_results_df.mean(axis=1)
+        log.info(f"Comparing across {model_classes} methods.")
+        best_method, best_row = select_best_model(method_results_df)
 
-        # Step 4: Select best method
-        best_method = method_results_df["mean_loss"].idxmin()
-        best_row = method_results_df.loc[best_method]
-
-        log.info(
-            f"The method with the lowest average loss is {best_method}, with an average loss across variables and quantiles of {best_row['mean_loss']}. "
-        )
+        # Step 4: Generate imputations with best method
+        if numeric_log_level <= logging.INFO:
+            main_progress.update(1)
+            main_progress.set_description("Imputation")
 
-        # Step 5: Generate imputations with the best method on the receiver data
         log.info(
-            f"Generating imputations using the best method: {best_method} on the receiver data. "
+            f"Generating imputations using the best method: {best_method} on the receiver data."
         )
 
-        if log_level == "INFO" or log_level == "DEBUG":
-            main_progress.update(1)
-            main_progress.set_description("Imputation")
-
+        # Get the best model class
         models_dict = {model.__name__: model for model in model_classes}
         chosen_model = models_dict[best_method]
 
-        # Initialize the model
-        model = chosen_model(log_level=log_level)
-        imputation_q = 0.5  # this can be an input parameter, or if unspecified will default to a random quantile
-        # Fit the model
-        if best_method == "QuantReg":
-            # For QuantReg, we need to explicitly fit the quantile
-            best_fitted_model = model.fit(
-                training_data,
-                predictors,
-                imputed_variables,
-                weight_col=weight_col,
-                quantiles=[imputation_q],
-            )
-        else:
-            if (
-                chosen_model.__name__ == "Matching"
-                or chosen_model.__name__ == "QRF"
-            ) and tune_hyperparameters == True:
-                # For Matching and QRF, we need to pass the best hyperparameters
-                best_fitted_model = model.fit(
-                    training_data,
-                    predictors,
-                    imputed_variables,
-                    weight_col=weight_col,
-                    **hyperparams[chosen_model.__name__],
-                )
-            else:
-                best_fitted_model = model.fit(
-                    training_data,
-                    predictors,
-                    imputed_variables,
-                    weight_col=weight_col,
-                )
-
-        # Predict with explicit quantiles
-        imputations = best_fitted_model.predict(
-            imputing_data, quantiles=[imputation_q]
+        # Default to median quantile for final imputation
+        imputation_q = 0.5
+
+        # Get hyperparameters for best model if tuned
+        model_hyperparams = None
+        if (
+            tune_hyperparameters
+            and best_hyperparams
+            and best_method in best_hyperparams
+        ):
+            model_hyperparams = best_hyperparams[best_method]
+
+        # Fit and predict with best model
+        best_fitted_model, imputations = fit_and_predict_model(
+            chosen_model,
+            training_data,
+            imputing_data,
+            predictors,
+            imputed_variables,
+            weight_col,
+            imputation_q,
+            model_hyperparams,
+            log_level,
         )
 
-        # Handle case where predict returns a DataFrame directly (single quantile)
-        if isinstance(imputations, pd.DataFrame):
-            imputations = {imputation_q: imputations}
-
-        if normalize_data:
-            # Unnormalize the imputations
-            mean = pd.Series(
-                {col: p["mean"] for col, p in normalizing_params.items()}
-            )
-            std = pd.Series(
-                {col: p["std"] for col, p in normalizing_params.items()}
+        # Unnormalize if needed
+        if normalize_data and normalizing_params:
+            final_imputations = unnormalize_predictions(
+                imputations, normalizing_params
             )
-            unnormalized_imputations = {}
-            for q, df in imputations.items():
-                cols = df.columns  # the imputed variables
-                df_unnorm = df.mul(std[cols], axis=1)  # × std
-                df_unnorm = df_unnorm.add(mean[cols], axis=1)  # + mean
-                unnormalized_imputations[q] = df_unnorm
-            final_imputations = unnormalized_imputations
         else:
             final_imputations = imputations
 
         log.info(
-            f"Imputation generation completed for {len(receiver_data)} samples using the best method: {best_method} and the median quantile. "
+            f"Imputation generation completed for {len(receiver_data)} samples "
+            f"using the best method: {best_method} and the median quantile."
         )
 
-        if log_level == "INFO" or log_level == "DEBUG":
-            main_progress.set_description("Complete")
-            main_progress.close()
-
+        # Add imputed values to receiver data
         median_imputations = final_imputations[imputation_q]
-        # Add the imputed variables to the receiver data
-        try:
-            missing_imputed_vars = []
-            for var in original_imputed_variables:
-                if var in median_imputations.columns:
-                    receiver_data[var] = median_imputations[var]
-                else:
-                    missing_imputed_vars.append(var)
-                    log.warning(
-                        f"Imputed variable {var} not found in the imputations. "
-                    )
-        except KeyError as e:
-            error_msg = f"Missing imputed variable in the imputations: {e}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        final_imputations_dict = {}
-        fitted_models_dict = {}
-        final_imputations_dict["best_method"] = (
-            final_imputations[0.5]
-            if imputation_quantiles is None
-            else final_imputations
-        )
-        fitted_models_dict["best_method"] = best_fitted_model
+        for var in original_imputed_variables:
+            if var in median_imputations.columns:
+                receiver_data[var] = median_imputations[var]
+            else:
+                log.warning(
+                    f"Imputed variable {var} not found in the imputations."
+                )
 
-        # Step 6: If impute_all is True, impute using the full dataset with all the remaining models
-        if impute_all:
-            log.info(
-                "Generating imputations for all the remaining models using the full dataset. "
+        # Initialize results
+        final_imputations_dict = {
+            "best_method": (
+                final_imputations[0.5]
+                if imputation_quantiles is None
+                else final_imputations
             )
-            for model_class in model_classes:
-                model_name = model_class.__name__
-                if model_name != best_method:
-                    log.info(f"Generating imputations with {model_name}. ")
-                    model = model_class(log_level=log_level)
-                    if model_name == "QuantReg":
-                        # For QuantReg, we need to explicitly fit the quantile
-                        fitted_model = model.fit(
-                            training_data,
-                            predictors,
-                            imputed_variables,
-                            weight_col=weight_col,
-                            quantiles=[imputation_q],
-                        )
-                    else:
-                        if (
-                            model_name == "Matching" or model_name == "QRF"
-                        ) and tune_hyperparameters == True:
-                            # For Matching and QRF, we need to pass the best hyperparameters
-                            fitted_model = model.fit(
-                                training_data,
-                                predictors,
-                                imputed_variables,
-                                weight_col=weight_col,
-                                **hyperparams[model_name],
-                            )
-                        else:
-                            fitted_model = model.fit(
-                                training_data,
-                                predictors,
-                                imputed_variables,
-                                weight_col=weight_col,
-                            )
-
-                    # Predict with explicit quantiles
-                    imputations = fitted_model.predict(
-                        imputing_data, quantiles=[imputation_q]
-                    )
-
-                    if normalize_data:
-                        unnormalized_imputations = {}
-                        for q, df in imputations.items():
-                            cols = df.columns  # the imputed variables
-                            df_unnorm = df.mul(std[cols], axis=1)  # × std
-                            df_unnorm = df_unnorm.add(
-                                mean[cols], axis=1
-                            )  # + mean
-                            unnormalized_imputations[q] = df_unnorm
-                        final_imputations = unnormalized_imputations
-                    else:
-                        final_imputations = imputations
-                    final_imputations_dict[model_name] = (
-                        final_imputations[0.5]
-                        if imputation_quantiles is None
-                        else final_imputations
-                    )
+        }
+        fitted_models_dict = {"best_method": best_fitted_model}
 
-                    log.warning(final_imputations)
+        # Step 5: Generate imputations for all models if requested
+        if impute_all:
+            other_imputations, other_models = (
+                _generate_imputations_for_all_models(
+                    model_classes,
+                    best_method,
+                    training_data,
+                    imputing_data,
+                    predictors,
+                    imputed_variables,
+                    weight_col,
+                    imputation_q,
+                    normalize_data,
+                    normalizing_params,
+                    tune_hyperparameters,
+                    best_hyperparams,
+                    log_level,
+                )
+            )
+            final_imputations_dict.update(other_imputations)
+            fitted_models_dict.update(other_models)
 
-                    fitted_models_dict[model_name] = fitted_model
+        # Complete progress bar if used
+        if numeric_log_level <= logging.INFO:
+            main_progress.set_description("Complete")
+            main_progress.close()
 
         return AutoImputeResult(
             imputations=final_imputations_dict,
@@ -626,6 +511,6 @@ def evaluate_model(
     except ValueError as e:
         # Re-raise validation errors directly
         raise e
-    except Exception as e:
+    except (KeyError, TypeError, AttributeError) as e:
         log.error(f"Unexpected error during autoimputation: {str(e)}")
         raise RuntimeError(f"Failed to generate imputations: {str(e)}") from e
diff --git a/microimpute/comparisons/autoimpute_helpers.py b/microimpute/comparisons/autoimpute_helpers.py
new file mode 100644
index 0000000..96d83ac
--- /dev/null
+++ b/microimpute/comparisons/autoimpute_helpers.py
@@ -0,0 +1,320 @@
+"""Helper functions for automated imputation
+
+This module provides utility functions that support the autoimpute workflow,
+including input validation, data preparation, model evaluation, and result selection.
+These functions are extracted from the main autoimpute module to improve code
+organization and maintainability.
+
+Key functions:
+    - validate_autoimpute_inputs: comprehensive input validation
+    - prepare_data_for_imputation: data preprocessing and normalization
+    - evaluate_model: cross-validation evaluation for a single model
+    - fit_and_predict_model: model fitting and prediction generation
+    - select_best_model: selection of best performing model
+"""
+
+import logging
+from typing import Any, Dict, List, Optional, Tuple, Type
+
+import pandas as pd
+
+from microimpute.comparisons.validation import (
+    validate_imputation_inputs,
+    validate_quantiles,
+)
+from microimpute.evaluations import cross_validate_model
+from microimpute.models import Imputer
+from microimpute.models.quantreg import QuantReg
+from microimpute.utils.data import preprocess_data, unnormalize_predictions
+
+log = logging.getLogger(__name__)
+
+
+def validate_autoimpute_inputs(
+    donor_data: pd.DataFrame,
+    receiver_data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    quantiles: Optional[List[float]],
+    hyperparameters: Optional[Dict[str, Dict[str, Any]]],
+    tune_hyperparameters: bool,
+    log_level: str,
+) -> None:
+    """Validate all inputs for the autoimpute function.
+
+    Args:
+        donor_data: Training data.
+        receiver_data: Data to impute.
+        predictors: Predictor column names.
+        imputed_variables: Variables to impute.
+        weight_col: Optional weight column.
+        quantiles: Optional quantiles list.
+        hyperparameters: Optional model hyperparameters.
+        tune_hyperparameters: Whether to tune hyperparameters.
+        log_level: Logging level string.
+
+    Raises:
+        ValueError: If validation fails.
+    """
+    # Validate log level
+    if log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
+        error_msg = f"Invalid log_level: {log_level}. Must be one of DEBUG, INFO, WARNING, ERROR, CRITICAL."
+        log.error(error_msg)
+        raise ValueError(error_msg)
+
+    # Validate quantiles if provided
+    if quantiles:
+        validate_quantiles(quantiles)
+
+    # Validate data and columns
+    validate_imputation_inputs(
+        donor_data, receiver_data, predictors, imputed_variables, weight_col
+    )
+
+    # Validate hyperparameter settings
+    if hyperparameters is not None and tune_hyperparameters:
+        error_msg = "Cannot specify both model_hyperparams and request to automatically tune hyperparameters, please select one or the other."
+        log.error(error_msg)
+        raise ValueError(error_msg)
+
+
+def prepare_data_for_imputation(
+    donor_data: pd.DataFrame,
+    receiver_data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    normalize_data: bool,
+    train_size: float,
+    test_size: float,
+) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[dict]]:
+    """Prepare training and imputing data, optionally with normalization.
+
+    Args:
+        donor_data: Original donor data.
+        receiver_data: Original receiver data.
+        predictors: Predictor columns.
+        imputed_variables: Variables to impute.
+        weight_col: Optional weight column.
+        normalize_data: Whether to normalize.
+        train_size: Training data proportion.
+        test_size: Test data proportion.
+
+    Returns:
+        Tuple of (training_data, imputing_data, normalization_params or None)
+    """
+    # Remove imputed variables from receiver if present
+    receiver_data = receiver_data.drop(
+        columns=imputed_variables, errors="ignore"
+    )
+
+    training_data = donor_data.copy()
+    imputing_data = receiver_data.copy()
+
+    if normalize_data:
+        # Normalize predictors and imputed variables together for consistency
+        all_training_cols = predictors + imputed_variables
+        normalized_training, norm_params = preprocess_data(
+            training_data[all_training_cols],
+            full_data=True,
+            train_size=train_size,
+            test_size=test_size,
+            normalize=True,
+        )
+
+        # Normalize imputing data predictors using same parameters
+        imputing_predictors, _ = preprocess_data(
+            imputing_data[predictors],
+            full_data=True,
+            train_size=train_size,
+            test_size=test_size,
+            normalize=True,
+        )
+
+        # Reconstruct training data with normalized values
+        training_data = normalized_training
+        if weight_col:
+            training_data[weight_col] = donor_data[weight_col]
+
+        imputing_data = imputing_predictors
+
+        # Extract normalization params only for imputed variables
+        imputed_norm_params = {
+            col: norm_params[col]
+            for col in imputed_variables
+            if col in norm_params
+        }
+
+        return training_data, imputing_data, imputed_norm_params
+    else:
+        # No normalization needed
+        training_data = preprocess_data(
+            training_data[predictors + imputed_variables],
+            full_data=True,
+            train_size=train_size,
+            test_size=test_size,
+            normalize=False,
+        )
+
+        imputing_data = preprocess_data(
+            imputing_data[predictors],
+            full_data=True,
+            train_size=train_size,
+            test_size=test_size,
+            normalize=False,
+        )
+
+        if weight_col:
+            training_data[weight_col] = donor_data[weight_col]
+
+        return training_data, imputing_data, None
+
+
+def evaluate_model(
+    model: Type[Imputer],
+    data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    quantiles: List[float],
+    k_folds: int,
+    random_state: int,
+    tune_hyperparams: bool,
+    hyperparameters: Optional[Dict[str, Any]],
+) -> tuple:
+    """Evaluate a single imputation model with cross-validation.
+
+    Args:
+        model: The imputation model class to evaluate.
+        data: The dataset to use for evaluation.
+        predictors: List of predictor column names.
+        imputed_variables: List of columns to impute.
+        weight_col: Optional weight column.
+        quantiles: List of quantiles to evaluate.
+        k_folds: Number of cross-validation folds.
+        random_state: Random seed for reproducibility.
+        tune_hyperparams: Whether to tune hyperparameters.
+        hyperparameters: Optional model-specific hyperparameters.
+
+    Returns:
+        Tuple containing model name and cross-validation results.
+    """
+    model_name = model.__name__
+    log.info(f"Evaluating {model_name}...")
+
+    cv_result = cross_validate_model(
+        model_class=model,
+        data=data,
+        predictors=predictors,
+        imputed_variables=imputed_variables,
+        weight_col=weight_col,
+        quantiles=quantiles,
+        n_splits=k_folds,
+        random_state=random_state,
+        tune_hyperparameters=tune_hyperparams,
+        model_hyperparams=hyperparameters,
+    )
+
+    if (
+        tune_hyperparams
+        and isinstance(cv_result, tuple)
+        and len(cv_result) == 2
+    ):
+        final_results, best_params = cv_result
+        return model_name, final_results, best_params
+    else:
+        return model_name, cv_result
+
+
+def fit_and_predict_model(
+    model_class: Type[Imputer],
+    training_data: pd.DataFrame,
+    imputing_data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    quantile: float,
+    hyperparams: Optional[Dict[str, Any]] = None,
+    log_level: str = "WARNING",
+) -> Tuple[Any, Dict[float, pd.DataFrame]]:
+    """Fit a model and generate predictions.
+
+    Args:
+        model_class: The model class to use.
+        training_data: Training data.
+        imputing_data: Data to make predictions on.
+        predictors: Predictor columns.
+        imputed_variables: Variables to impute.
+        weight_col: Optional weight column.
+        quantile: Quantile to predict.
+        hyperparams: Optional model hyperparameters.
+        log_level: Logging level.
+
+    Returns:
+        Tuple of (fitted_model, predictions_dict)
+    """
+    model_name = model_class.__name__
+    model = model_class(log_level=log_level)
+
+    # Fit the model
+    if model_name == "QuantReg":
+        # QuantReg needs explicit quantiles during fitting
+        fitted_model = model.fit(
+            training_data,
+            predictors,
+            imputed_variables,
+            weight_col=weight_col,
+            quantiles=[quantile],
+        )
+    elif hyperparams and model_name in ["Matching", "QRF"]:
+        # Apply hyperparameters for specific models
+        fitted_model = model.fit(
+            training_data,
+            predictors,
+            imputed_variables,
+            weight_col=weight_col,
+            **hyperparams,
+        )
+    else:
+        fitted_model = model.fit(
+            training_data,
+            predictors,
+            imputed_variables,
+            weight_col=weight_col,
+        )
+
+    # Generate predictions
+    imputations = fitted_model.predict(imputing_data, quantiles=[quantile])
+
+    # Handle case where predict returns a DataFrame directly
+    if isinstance(imputations, pd.DataFrame):
+        imputations = {quantile: imputations}
+
+    return fitted_model, imputations
+
+
+def select_best_model(
+    method_results_df: pd.DataFrame,
+) -> Tuple[str, pd.Series]:
+    """Select the best model based on cross-validation results.
+
+    Args:
+        method_results_df: DataFrame with model performance metrics.
+
+    Returns:
+        Tuple of (best_method_name, best_method_row)
+    """
+    # Add mean_loss column if not present
+    if "mean_loss" not in method_results_df.columns:
+        method_results_df["mean_loss"] = method_results_df.mean(axis=1)
+
+    best_method = method_results_df["mean_loss"].idxmin()
+    best_row = method_results_df.loc[best_method]
+
+    log.info(
+        f"The method with the lowest average loss is {best_method}, "
+        f"with an average loss across variables and quantiles of {best_row['mean_loss']}"
+    )
+
+    return best_method, best_row
diff --git a/microimpute/comparisons/imputations.py b/microimpute/comparisons/imputations.py
index ce3f982..0316880 100644
--- a/microimpute/comparisons/imputations.py
+++ b/microimpute/comparisons/imputations.py
@@ -8,10 +8,13 @@
 import logging
 from typing import Any, Dict, List, Optional, Type
 
-import numpy as np
 import pandas as pd
 from pydantic import validate_call
 
+from microimpute.comparisons.validation import (
+    validate_columns_exist,
+    validate_quantiles,
+)
 from microimpute.config import QUANTILES, VALIDATE_CONFIG
 from microimpute.models.quantreg import QuantReg
 
@@ -51,39 +54,14 @@ def get_imputations(
             log.error(error_msg)
             raise ValueError(error_msg)
 
-        # Validate that predictor and imputed variable columns exist in training data
-        missing_predictors_train = [
-            col for col in predictors if col not in X_train.columns
-        ]
-        if missing_predictors_train:
-            error_msg = f"Missing predictor columns in training data: {missing_predictors_train}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        missing_imputed_train = [
-            col for col in imputed_variables if col not in X_train.columns
-        ]
-        if missing_imputed_train:
-            error_msg = f"Missing imputed variable columns in training data: {missing_imputed_train}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        # Validate that predictor columns exist in test data (imputed variables may not be present in test)
-        missing_predictors_test = [
-            col for col in predictors if col not in X_test.columns
-        ]
-        if missing_predictors_test:
-            error_msg = f"Missing predictor columns in test data: {missing_predictors_test}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
+        # Validate columns exist
+        validate_columns_exist(X_train, predictors, "training data")
+        validate_columns_exist(X_train, imputed_variables, "training data")
+        validate_columns_exist(X_test, predictors, "test data")
 
         # Validate quantiles if provided
         if quantiles:
-            invalid_quantiles = [q for q in quantiles if not 0 <= q <= 1]
-            if invalid_quantiles:
-                error_msg = f"Invalid quantiles (must be between 0 and 1): {invalid_quantiles}"
-                log.error(error_msg)
-                raise ValueError(error_msg)
+            validate_quantiles(quantiles)
 
         log.info(
             f"Generating imputations for {len(model_classes)} model classes"
@@ -100,11 +78,9 @@ def get_imputations(
 
         method_imputations: Dict[str, Dict[float, Any]] = {}
 
-        fitted_models: Dict[str, Any] = {}
         for model_class in model_classes:
             model_name = model_class.__name__
             log.info(f"Processing model: {model_name}")
-            method_imputations[model_name] = {}
 
             try:
                 # Instantiate the model
@@ -125,30 +101,12 @@ def get_imputations(
                         X_train, predictors, imputed_variables
                     )
 
-                fitted_models[model_name] = fitted_model
-
                 # Get predictions
                 log.info(f"Generating predictions with {model_name}")
                 imputations = fitted_model.predict(X_test, quantiles)
                 method_imputations[model_name] = imputations
 
-                # Log a summary of predictions
-                num_quantiles = len(imputations)
-                first_quantile = next(iter(imputations.keys()))
-                first_pred = imputations[first_quantile]
-
-                if isinstance(first_pred, np.ndarray):
-                    pred_shape = first_pred.shape
-                elif isinstance(first_pred, pd.DataFrame):
-                    pred_shape = first_pred.shape
-                else:
-                    pred_shape = "unknown"
-
-                log.info(
-                    f"Model {model_name} generated predictions for {num_quantiles} quantiles with shape {pred_shape}"
-                )
-
-            except Exception as model_error:
+            except (TypeError, AttributeError, ValueError) as model_error:
                 log.error(
                     f"Error processing model {model_name}: {str(model_error)}"
                 )
@@ -164,6 +122,6 @@ def get_imputations(
     except ValueError as e:
         # Re-raise validation errors directly
         raise e
-    except Exception as e:
+    except (KeyError, TypeError, AttributeError) as e:
         log.error(f"Unexpected error during imputation generation: {str(e)}")
         raise RuntimeError(f"Failed to generate imputations: {str(e)}") from e
diff --git a/microimpute/comparisons/quantile_loss.py b/microimpute/comparisons/quantile_loss.py
index 988ac64..b3c0adc 100644
--- a/microimpute/comparisons/quantile_loss.py
+++ b/microimpute/comparisons/quantile_loss.py
@@ -12,6 +12,10 @@
 import pandas as pd
 from pydantic import validate_call
 
+from microimpute.comparisons.validation import (
+    validate_columns_exist,
+    validate_quantiles,
+)
 from microimpute.config import QUANTILES, VALIDATE_CONFIG
 
 log = logging.getLogger(__name__)
@@ -53,10 +57,7 @@ def compute_quantile_loss(
     """
     try:
         # Validate quantile value
-        if not 0 <= q <= 1:
-            error_msg = f"Quantile must be between 0 and 1, got {q}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
+        validate_quantiles([q])
 
         # Validate input dimensions
         if len(test_y) != len(imputations):
@@ -76,13 +77,107 @@ def compute_quantile_loss(
 
         return losses
 
-    except Exception as e:
-        if isinstance(e, ValueError):
-            raise e
+    except (TypeError, AttributeError) as e:
         log.error(f"Error computing quantile loss: {str(e)}")
         raise RuntimeError(f"Failed to compute quantile loss: {str(e)}") from e
 
 
+def _compute_method_losses(
+    method: str,
+    imputation: Dict[float, pd.DataFrame],
+    test_y: pd.DataFrame,
+    imputed_variables: List[str],
+    quantiles: List[float],
+) -> List[Dict]:
+    """Compute losses for a single method across all quantiles and variables.
+
+    Args:
+        method: Name of the imputation method.
+        imputation: Dictionary mapping quantiles to imputation DataFrames.
+        test_y: DataFrame containing true values.
+        imputed_variables: List of variables to evaluate.
+        quantiles: List of quantiles to evaluate.
+
+    Returns:
+        List of dictionaries containing loss results.
+
+    Raises:
+        ValueError: If required quantiles or variables are missing.
+    """
+    results = []
+
+    for quantile in quantiles:
+        log.debug(f"Computing loss for {method} at quantile {quantile}")
+
+        # Validate that the quantile exists in the imputation results
+        if quantile not in imputation:
+            error_msg = f"Quantile {quantile} not found in imputations for method {method}"
+            log.error(error_msg)
+            raise ValueError(error_msg)
+
+        variable_losses = []
+
+        for variable in imputed_variables:
+            # Validate variable exists
+            if variable not in imputation[quantile].columns:
+                error_msg = f"Variable {variable} not found in imputation results for method {method}"
+                log.error(error_msg)
+                raise ValueError(error_msg)
+
+            # Get values
+            test_values = test_y[variable].values
+            pred_values = imputation[quantile][variable].values
+
+            # Compute loss
+            q_loss = compute_quantile_loss(test_values, pred_values, quantile)
+            variable_losses.append(q_loss.mean())
+
+            # Add variable-specific result
+            results.append(
+                {
+                    "Method": method,
+                    "Imputed Variable": variable,
+                    "Percentile": quantile,
+                    "Loss": q_loss.mean(),
+                }
+            )
+
+            log.debug(
+                f"Loss for {method}/{variable} at q={quantile}: {q_loss.mean():.6f}"
+            )
+
+        # Add average across variables for this quantile
+        avg_var_loss = np.mean(variable_losses)
+        results.append(
+            {
+                "Method": method,
+                "Imputed Variable": "mean_loss",
+                "Percentile": quantile,
+                "Loss": avg_var_loss,
+            }
+        )
+
+    # Add overall average across all quantiles
+    all_quantile_losses = [
+        r["Loss"]
+        for r in results
+        if r["Imputed Variable"] == "mean_loss"
+        and r["Percentile"] != "mean_loss"
+    ]
+    if all_quantile_losses:
+        avg_quant_loss = np.mean(all_quantile_losses)
+        results.append(
+            {
+                "Method": method,
+                "Imputed Variable": "mean_loss",
+                "Percentile": "mean_loss",
+                "Loss": avg_quant_loss,
+            }
+        )
+
+    return results
+
+
 @validate_call(config=VALIDATE_CONFIG)
 def compare_quantile_loss(
     test_y: pd.DataFrame,
@@ -95,6 +190,7 @@ def compare_quantile_loss(
         test_y: DataFrame containing true values.
         method_imputations: Nested dictionary mapping method names
             to dictionaries mapping quantiles to imputation values.
+        imputed_variables: List of variables to evaluate.
 
     Returns:
         pd.DataFrame: Results dataframe with columns 'Method',
@@ -112,100 +208,29 @@ def compare_quantile_loss(
         log.info(f"Using {len(QUANTILES)} quantiles: {QUANTILES}")
         log.info(f"True values shape: {test_y.shape}")
 
-        # Initialize empty dataframe with method names, quantile, and loss columns
-        results_df: pd.DataFrame = pd.DataFrame(
-            columns=["Method", "Imputed Variable", "Percentile", "Loss"]
-        )
-
-        # Process each method and quantile
-        for method, imputation in method_imputations.items():
-            quantile_losses = []
-            for quantile in QUANTILES:
-                log.debug(
-                    f"Computing loss for {method} at quantile {quantile}"
-                )
-
-                # Validate that the quantile exists in the imputation results
-                if quantile not in imputation:
-                    error_msg = f"Quantile {quantile} not found in imputations for method {method}"
-                    log.error(error_msg)
-                    raise ValueError(error_msg)
-
-                variable_losses = []
-                for variable in imputed_variables:
-                    if variable not in imputation[quantile].columns:
-                        error_msg = f"Variable {variable} not found in imputation results for method {method}"
-                        log.error(error_msg)
-                        raise ValueError(error_msg)
-
-                    # Flatten arrays for computation
-                    test_values = test_y[variable].values
-
-                    pred_values = imputation[quantile][variable].values
-
-                    # Compute loss
-                    q_loss = compute_quantile_loss(
-                        test_values,
-                        pred_values,
-                        quantile,
-                    )
-
-                    variable_losses.append(q_loss)
-
-                    # Create new row and add to results
-                    new_row = {
-                        "Method": method,
-                        "Imputed Variable": variable,
-                        "Percentile": quantile,
-                        "Loss": q_loss.mean(),
-                    }
-
-                    log.debug(
-                        f"Mean loss for {method} at q={quantile}: {q_loss.mean():.6f}"
-                    )
-
-                    results_df = pd.concat(
-                        [results_df, pd.DataFrame([new_row])],
-                        ignore_index=True,
-                    )
-
-                # Compute the average loss across all variables
-                avg_var_loss = np.mean(variable_losses)
-
-                # Create a new row for "mean_loss"
-                new_row = {
-                    "Method": method,
-                    "Imputed Variable": "mean_loss",
-                    "Percentile": quantile,
-                    "Loss": avg_var_loss,
-                }
-
-                results_df = pd.concat(
-                    [results_df, pd.DataFrame([new_row])], ignore_index=True
-                )
+        # Validate inputs
+        validate_columns_exist(test_y, imputed_variables, "test_y")
 
-            quantile_losses.append(avg_var_loss)
+        # Collect all results in a list first (more efficient than repeated DataFrame concatenation)
+        all_results = []
 
-            # Compute the average loss across all quantiles
-            avg_quant_loss = np.mean(quantile_losses)
+        # Process each method
+        for method, imputation in method_imputations.items():
+            method_results = _compute_method_losses(
+                method, imputation, test_y, imputed_variables, QUANTILES
+            )
+            all_results.extend(method_results)
 
-            # Create a new row for "mean_loss"
-            new_row = {
-                "Method": method,
-                "Imputed Variable": "mean_loss",
-                "Percentile": "mean_loss",
-                "Loss": avg_quant_loss,
-            }
+        # Create DataFrame from all results at once
+        results_df = pd.DataFrame(all_results)
 
-            results_df = pd.concat(
-                [results_df, pd.DataFrame([new_row])], ignore_index=True
-            )
+        log.info(f"Comparison complete. Results shape: {results_df.shape}")
 
         return results_df
 
     except ValueError as e:
         # Re-raise validation errors
         raise e
-    except Exception as e:
+    except (KeyError, TypeError, AttributeError) as e:
         log.error(f"Error in quantile loss comparison: {str(e)}")
         raise RuntimeError(f"Failed to compare quantile loss: {str(e)}") from e
diff --git a/microimpute/comparisons/validation.py b/microimpute/comparisons/validation.py
new file mode 100644
index 0000000..47d487d
--- /dev/null
+++ b/microimpute/comparisons/validation.py
@@ -0,0 +1,115 @@
+"""Data validation utilities for imputation operations
+
+This module provides centralized validation functions used throughout the microimpute
+package to ensure data integrity, validate parameters, and maintain consistency
+across different imputation methods.
+
+The validation functions help prevent common errors and provide clear error messages
+when data or parameters don't meet requirements.
+"""
+
+import logging
+from typing import List, Optional
+
+import pandas as pd
+
+log = logging.getLogger(__name__)
+
+
+def validate_quantiles(quantiles: List[float]) -> None:
+    """Validate that quantiles are within [0, 1] range.
+
+    Args:
+        quantiles: List of quantile values to validate.
+
+    Raises:
+        ValueError: If any quantile is outside [0, 1] range.
+    """
+    invalid_quantiles = [q for q in quantiles if not 0 <= q <= 1]
+    if invalid_quantiles:
+        error_msg = (
+            f"Invalid quantiles (must be between 0 and 1): {invalid_quantiles}"
+        )
+        log.error(error_msg)
+        raise ValueError(error_msg)
+
+
+def validate_columns_exist(
+    data: pd.DataFrame, columns: List[str], data_name: str = "data"
+) -> None:
+    """Validate that specified columns exist in the DataFrame.
+
+    Args:
+        data: DataFrame to check.
+        columns: List of column names that should exist.
+        data_name: Name of the dataset for error messages.
+
+    Raises:
+        ValueError: If any columns are missing from the DataFrame.
+    """
+    missing_columns = [col for col in columns if col not in data.columns]
+    if missing_columns:
+        error_msg = f"Missing columns in {data_name}: {missing_columns}"
+        log.error(error_msg)
+        raise ValueError(error_msg)
+
+
+def validate_dataframe_compatibility(
+    df1: pd.DataFrame,
+    df2: pd.DataFrame,
+    check_columns: Optional[List[str]] = None,
+    require_same_length: bool = True,
+) -> None:
+    """Validate that two DataFrames are compatible for operations.
+
+    Args:
+        df1: First DataFrame.
+        df2: Second DataFrame.
+        check_columns: If provided, verify these columns exist in both DataFrames.
+        require_same_length: If True, verify DataFrames have the same number of rows.
+
+    Raises:
+        ValueError: If DataFrames are incompatible.
+    """
+    if require_same_length and len(df1) != len(df2):
+        error_msg = (
+            f"Length mismatch: first DataFrame has {len(df1)} rows, "
+            f"second DataFrame has {len(df2)} rows"
+        )
+        log.error(error_msg)
+        raise ValueError(error_msg)
+
+    if check_columns:
+        validate_columns_exist(df1, check_columns, "first DataFrame")
+        validate_columns_exist(df2, check_columns, "second DataFrame")
+
+
+def validate_imputation_inputs(
+    donor_data: pd.DataFrame,
+    receiver_data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str] = None,
+) -> None:
+    """Validate inputs for imputation operations.
+
+    Args:
+        donor_data: Training data containing predictors and target variables.
+        receiver_data: Data where imputations will be applied.
+        predictors: List of predictor column names.
+        imputed_variables: List of variables to impute.
+        weight_col: Optional weight column name.
+
+    Raises:
+        ValueError: If validation fails.
+    """
+    # Validate donor data has all required columns
+    validate_columns_exist(donor_data, predictors, "donor data")
+    validate_columns_exist(donor_data, imputed_variables, "donor data")
+
+    # Validate receiver data has predictor columns
+    validate_columns_exist(receiver_data, predictors, "receiver data")
+
+    # Validate weight column if provided
+    if weight_col:
+        validate_columns_exist(donor_data, [weight_col], "donor data")
diff --git a/microimpute/evaluations/__init__.py b/microimpute/evaluations/__init__.py
index 3341836..2dffacb 100644
--- a/microimpute/evaluations/__init__.py
+++ b/microimpute/evaluations/__init__.py
@@ -1,6 +1,11 @@
-"""Model Evaluation Utilities
+"""Model evaluation and validation utilities
 
-This module contains functions for evaluating imputation model performance.
+This module provides comprehensive tools for evaluating imputation model performance
+using cross-validation techniques. It calculates train and test quantile loss metrics
+across multiple folds to provide robust performance estimates.
+
+Key components:
+    - cross_validate_model: perform k-fold cross-validation for imputation models with optional hyperparameter tuning
 """
 
-from .cross_validation import cross_validate_model
+from microimpute.evaluations.cross_validation import cross_validate_model
diff --git a/microimpute/evaluations/cross_validation.py b/microimpute/evaluations/cross_validation.py
index 4ff122f..d3f764f 100644
--- a/microimpute/evaluations/cross_validation.py
+++ b/microimpute/evaluations/cross_validation.py
@@ -6,7 +6,7 @@
 """
 
 import logging
-from typing import Dict, List, Optional, Tuple, Type
+from typing import Any, Dict, List, Optional, Tuple, Type
 
 import joblib
 import numpy as np
@@ -15,6 +15,10 @@
 from sklearn.model_selection import KFold
 
 from microimpute.comparisons.quantile_loss import quantile_loss
+from microimpute.comparisons.validation import (
+    validate_columns_exist,
+    validate_quantiles,
+)
 from microimpute.config import QUANTILES, RANDOM_STATE, VALIDATE_CONFIG
 
 try:
@@ -27,6 +31,269 @@
 log = logging.getLogger(__name__)
 
 
+def _process_single_fold(
+    fold_idx_pair: Tuple[int, Tuple[np.ndarray, np.ndarray]],
+    data: pd.DataFrame,
+    model_class: Type,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    quantiles: List[float],
+    model_hyperparams: Optional[dict],
+    tune_hyperparameters: bool,
+) -> Tuple[int, Dict, Dict, np.ndarray, np.ndarray, Optional[dict]]:
+    """Process a single CV fold and return results.
+
+    Args:
+        fold_idx_pair: Tuple of (fold_index, (train_indices, test_indices))
+        data: Full dataset
+        model_class: Model class to evaluate
+        predictors: Predictor column names
+        imputed_variables: Variables to impute
+        weight_col: Optional weight column
+        quantiles: List of quantiles to evaluate
+        model_hyperparams: Optional model hyperparameters
+        tune_hyperparameters: Whether to tune hyperparameters
+
+    Returns:
+        Tuple containing fold results
+    """
+    fold_idx, (train_idx, test_idx) = fold_idx_pair
+    log.info(f"Processing fold {fold_idx+1}")
+
+    # Split data for this fold
+    train_data = data.iloc[train_idx]
+    test_data = data.iloc[test_idx]
+
+    # Store actual values for this fold
+    train_y = train_data[imputed_variables].values
+    test_y = test_data[imputed_variables].values
+
+    # Instantiate and fit the model
+    model = model_class()
+    fold_tuned_params = None
+
+    # Fit model with appropriate parameters
+    fitted_model, fold_tuned_params = _fit_model_for_fold(
+        model,
+        model_class,
+        train_data,
+        predictors,
+        imputed_variables,
+        weight_col,
+        quantiles,
+        model_hyperparams,
+        tune_hyperparameters,
+    )
+
+    # Get predictions for this fold
+    log.info(f"Generating predictions for train and test data")
+    fold_test_imputations = fitted_model.predict(test_data, quantiles)
+    fold_train_imputations = fitted_model.predict(train_data, quantiles)
+
+    return (
+        fold_idx,
+        fold_test_imputations,
+        fold_train_imputations,
+        test_y,
+        train_y,
+        fold_tuned_params,
+    )
+
+
+def _fit_model_for_fold(
+    model: Any,
+    model_class: Type,
+    train_data: pd.DataFrame,
+    predictors: List[str],
+    imputed_variables: List[str],
+    weight_col: Optional[str],
+    quantiles: List[float],
+    model_hyperparams: Optional[dict],
+    tune_hyperparameters: bool,
+) -> Tuple[Any, Optional[dict]]:
+    """Fit a model for a single fold with appropriate parameters.
+
+    Returns:
+        Tuple of (fitted_model, tuned_hyperparameters or None)
+    """
+    model_name = model_class.__name__
+    fold_tuned_params = None
+
+    # Handle model-specific hyperparameters
+    if model_hyperparams and model_name in model_hyperparams:
+        try:
+            log.info(
+                f"Fitting {model_name} with hyperparameters: {model_hyperparams[model_name]}"
+            )
+            fitted_model = model.fit(
+                X_train=train_data,
+                predictors=predictors,
+                imputed_variables=imputed_variables,
+                weight_col=weight_col,
+                **model_hyperparams[model_name],
+            )
+        except TypeError as e:
+            log.warning(
+                f"Invalid hyperparameters for {model_name}, using defaults: {str(e)}"
+            )
+            fitted_model = model.fit(
+                X_train=train_data,
+                predictors=predictors,
+                imputed_variables=imputed_variables,
+                weight_col=weight_col,
+            )
+            raise ValueError(
+                f"Invalid hyperparameters for {model_name}"
+            ) from e
+
+    # Handle QuantReg which needs explicit quantiles
+    elif model_class == QuantReg:
+        log.info(f"Fitting QuantReg model with explicit quantiles")
+        fitted_model = model.fit(
+            train_data,
+            predictors,
+            imputed_variables,
+            weight_col=weight_col,
+            quantiles=quantiles,
+        )
+
+    # Handle hyperparameter tuning for QRF and Matching
+    elif tune_hyperparameters and model_name in ["QRF", "Matching"]:
+        log.info(f"Tuning {model_name} hyperparameters during fitting")
+        fitted_model, fold_tuned_params = model.fit(
+            train_data,
+            predictors,
+            imputed_variables,
+            weight_col=weight_col,
+            tune_hyperparameters=True,
+        )
+
+    # Default fitting
+    else:
+        log.info(f"Fitting {model_name} model with default parameters")
+        fitted_model = model.fit(
+            train_data, predictors, imputed_variables, weight_col=weight_col
+        )
+
+    return fitted_model, fold_tuned_params
+
+
+def _compute_fold_loss(
+    fold_idx: int,
+    quantile: float,
+    test_y_values: List[np.ndarray],
+    train_y_values: List[np.ndarray],
+    test_results: Dict[float, List],
+    train_results: Dict[float, List],
+) -> Dict[str, Any]:
+    """Compute loss for a specific fold and quantile.
+
+    Returns:
+        Dictionary with fold, quantile, and loss metrics
+    """
+    # Flatten arrays for calculation
+    test_y_flat = test_y_values[fold_idx].flatten()
+    train_y_flat = train_y_values[fold_idx].flatten()
+    test_pred_flat = test_results[quantile][fold_idx].values.flatten()
+    train_pred_flat = train_results[quantile][fold_idx].values.flatten()
+
+    # Calculate loss
+    test_loss = quantile_loss(quantile, test_y_flat, test_pred_flat)
+    train_loss = quantile_loss(quantile, train_y_flat, train_pred_flat)
+
+    return {
+        "fold": fold_idx,
+        "quantile": quantile,
+        "test_loss": test_loss.mean(),
+        "train_loss": train_loss.mean(),
+    }
+
+
+def _compute_losses_parallel(
+    test_y_values: List[np.ndarray],
+    train_y_values: List[np.ndarray],
+    test_results: Dict[float, List],
+    train_results: Dict[float, List],
+    quantiles: List[float],
+    n_jobs: int,
+) -> Tuple[Dict[float, List[float]], Dict[float, List[float]]]:
+    """Compute losses in parallel for all folds and quantiles.
+
+    Returns:
+        Tuple of (test_losses_by_quantile, train_losses_by_quantile)
+    """
+    loss_tasks = [(k, q) for k in range(len(test_y_values)) for q in quantiles]
+
+    # Only parallelize if worthwhile
+    if len(loss_tasks) > 10 and n_jobs != 1:
+        with joblib.Parallel(n_jobs=n_jobs) as parallel:
+            loss_results = parallel(
+                joblib.delayed(_compute_fold_loss)(
+                    fold_idx,
+                    q,
+                    test_y_values,
+                    train_y_values,
+                    test_results,
+                    train_results,
+                )
+                for fold_idx, q in loss_tasks
+            )
+    else:
+        # Sequential computation for smaller tasks
+        loss_results = [
+            _compute_fold_loss(
+                fold_idx,
+                q,
+                test_y_values,
+                train_y_values,
+                test_results,
+                train_results,
+            )
+            for fold_idx, q in loss_tasks
+        ]
+
+    # Organize results
+    avg_test_losses = {q: [] for q in quantiles}
+    avg_train_losses = {q: [] for q in quantiles}
+
+    for result in loss_results:
+        q = result["quantile"]
+        fold_idx = result["fold"]
+        avg_test_losses[q].append(result["test_loss"])
+        avg_train_losses[q].append(result["train_loss"])
+
+        log.debug(
+            f"Fold {fold_idx+1}, q={q}: Train loss = {result['train_loss']:.6f}, "
+            f"Test loss = {result['test_loss']:.6f}"
+        )
+
+    return avg_test_losses, avg_train_losses
+
+
+def _select_best_hyperparameters(
+    loss_results: List[Dict], tuned_hyperparameters: Dict[int, Any]
+) -> Any:
+    """Select best hyperparameters based on median quantile test loss.
+
+    Args:
+        loss_results: List of loss result dictionaries
+        tuned_hyperparameters: Dictionary mapping fold index to tuned parameters
+
+    Returns:
+        Best hyperparameters
+    """
+    best_fold = 0
+    best_loss = float("inf")
+
+    for result in loss_results:
+        if result["quantile"] == 0.5 and result["test_loss"] < best_loss:
+            best_loss = result["test_loss"]
+            best_fold = result["fold"]
+
+    return tuned_hyperparameters.get(best_fold)
+
+
 @validate_call(config=VALIDATE_CONFIG)
 def cross_validate_model(
     model_class: Type,
@@ -43,69 +310,37 @@ def cross_validate_model(
     """Perform cross-validation for an imputation model.
 
     Args:
-        model_class: Model class to evaluate (e.g., QRF, OLS, QuantReg,
-                   Matching).
+        model_class: Model class to evaluate (e.g., QRF, OLS, QuantReg, Matching).
         data: Full dataset to split into training and testing folds.
         predictors: Names of columns to use as predictors.
         imputed_variables: Names of columns to impute.
         weight_col: Optional column name for sample weights.
-        quantiles: List of quantiles to evaluate. Defaults to standard
-            set if None.
+        quantiles: List of quantiles to evaluate. Defaults to standard set if None.
         n_splits: Number of cross-validation folds.
         random_state: Random seed for reproducibility.
         model_hyperparams: Hyperparameters for the model class.
-            Defaults to None and uses default model hyperparameters then.
-        tune_hyperparameters: Whether to tune hyperparameters for QRF
-            model. Defaults to False.
+        tune_hyperparameters: Whether to tune hyperparameters for QRF/Matching models.
 
     Returns:
-        DataFrame with train and test rows, quantiles as columns, and average
-        loss values
+        DataFrame with train and test rows, quantiles as columns, and average loss values.
+        If tune_hyperparameters is True, returns tuple of (DataFrame, best_hyperparameters).
 
     Raises:
         ValueError: If input data is invalid or missing required columns.
         RuntimeError: If cross-validation fails.
     """
+    # Use shared validation utilities
+    validate_columns_exist(data, predictors, "data")
+    validate_columns_exist(data, imputed_variables, "data")
+    if weight_col:
+        validate_columns_exist(data, [weight_col], "data")
+    if quantiles:
+        validate_quantiles(quantiles)
+
     # Set up parallel processing
-    # Disable parallel processing for Matching (R/rpy2 doesn't work well with multiprocessing)
-    if Matching is not None and model_class == Matching:
-        n_jobs: Optional[int] = 1  # Sequential processing for R-based models
-    else:
-        n_jobs: Optional[int] = (
-            -1
-        )  # Parallel processing for Python-only models
+    n_jobs = 1 if (Matching is not None and model_class == Matching) else -1
 
     try:
-        # Validate predictor and imputed variable columns exist
-        missing_predictors = [
-            col for col in predictors if col not in data.columns
-        ]
-        if missing_predictors:
-            error_msg = f"Missing predictor columns: {missing_predictors}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        missing_imputed = [
-            col for col in imputed_variables if col not in data.columns
-        ]
-        if missing_imputed:
-            error_msg = f"Missing imputed variable columns: {missing_imputed}"
-            log.error(error_msg)
-            raise ValueError(error_msg)
-
-        if quantiles:
-            invalid_quantiles = [q for q in quantiles if not 0 <= q <= 1]
-            if invalid_quantiles:
-                error_msg = f"Invalid quantiles (must be between 0 and 1): {invalid_quantiles}"
-                log.error(error_msg)
-                raise ValueError(error_msg)
-
-        # Set up results containers
-        test_results = {q: [] for q in quantiles}
-        train_results = {q: [] for q in quantiles}
-        train_y_values = []
-        test_y_values = []
-
         log.info(
             f"Starting {n_splits}-fold cross-validation for {model_class.__name__}"
         )
@@ -113,307 +348,65 @@ def cross_validate_model(
 
         # Set up k-fold cross-validation
         kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
-
-        # Create parallel-ready fold indices
         fold_indices = list(kf.split(data))
 
-        # Initialize tuned_hyperparameters
-        tuned_hyperparameters = {} if tune_hyperparameters else None
-        best_tuned_hyperparams = None
-
-        # Define the function to process a single fold
-        def process_single_fold(
-            fold_idx_pair,
-        ) -> Tuple[Dict, Dict, np.ndarray, np.ndarray]:
-            """Process a single CV fold and return results.
-
-            Args:
-                fold_idx_pair: Tuple of (train_indices, test_indices)
-
-            Returns:
-                Tuple containing:
-                - Dictionary of test predictions for each quantile
-                - Dictionary of train predictions for each quantile
-                - Array of actual train values
-                - Array of actual test values
-            """
-            fold_idx, (train_idx, test_idx) = fold_idx_pair
-            log.info(f"Processing fold {fold_idx+1}/{n_splits}")
-
-            # Split data for this fold
-            train_data = data.iloc[train_idx]
-            test_data = data.iloc[test_idx]
-
-            # Store actual values for this fold
-            train_y = train_data[imputed_variables].values
-            test_y = test_data[imputed_variables].values
-
-            # Instantiate the model
-            log.info(f"Initializing {model_class.__name__} model")
-            model = model_class()
-
-            fold_tuned_params = None
-
-            # Handle different model fitting requirements
-            if (
-                model_hyperparams
-                and model_class.__name__ == "QRF"
-                and ("QRF" in model_hyperparams)
-            ):
-                try:
-                    log.info(
-                        f"Fitting {model_class.__name__} model with hyperparameters: {model_hyperparams}"
-                    )
-                    fitted_model = model.fit(
-                        X_train=train_data,
-                        predictors=predictors,
-                        imputed_variables=imputed_variables,
-                        weight_col=weight_col,
-                        **model_hyperparams["QRF"],
-                    )
-                except TypeError as e:
-                    log.warning(
-                        f"Invalid hyperparameters, using defaults: {str(e)}"
-                    )
-                    fitted_model = model.fit(
-                        X_train=train_data,
-                        predictors=predictors,
-                        imputed_variables=imputed_variables,
-                        weight_col=weight_col,
-                    )
-                    raise ValueError(
-                        f"Invalid hyperparameters for model initialization. Current model hyperparameters: {fitted_model.models[imputed_variables[0]].qrf.get_params()}"
-                    ) from e
-            elif (
-                model_hyperparams
-                and Matching is not None
-                and model_class.__name__ == "Matching"
-                and ("Matching" in model_hyperparams)
-            ):
-                try:
-                    log.info(
-                        f"Fitting {model_class.__name__} model with hyperparameters: {model_hyperparams}"
-                    )
-                    fitted_model = model.fit(
-                        X_train=train_data,
-                        predictors=predictors,
-                        imputed_variables=imputed_variables,
-                        weight_col=weight_col,
-                        **model_hyperparams["Matching"],
-                    )
-                except TypeError as e:
-                    log.warning(
-                        f"Invalid hyperparameters, using defaults: {str(e)}"
-                    )
-                    fitted_model = model.fit(
-                        X_train=train_data,
-                        predictors=predictors,
-                        imputed_variables=imputed_variables,
-                        weight_col=weight_col,
-                    )
-                    raise ValueError(
-                        f"Invalid hyperparameters for model initialization. Current model hyperparameters: dist_fun=Manhattan, constrained=False"
-                    ) from e
-            else:
-                if model_class == QuantReg:
-                    log.info(f"Fitting QuantReg model with explicit quantiles")
-                    fitted_model = model.fit(
-                        train_data,
-                        predictors,
-                        imputed_variables,
-                        weight_col=weight_col,
-                        quantiles=quantiles,
-                    )
-                elif (
-                    model_class.__name__ == "QRF"
-                    or (
-                        Matching is not None
-                        and model_class.__name__ == "Matching"
-                    )
-                ) and tune_hyperparameters == True:
-                    log.info(
-                        f"Tuning {model_class.__name__} hyperparameters when fitting"
-                    )
-                    fitted_model, best_tuned_params = model.fit(
-                        train_data,
-                        predictors,
-                        imputed_variables,
-                        weight_col=weight_col,
-                        tune_hyperparameters=True,
-                    )
-                    fold_tuned_params = best_tuned_params
-
-                else:
-                    log.info(f"Fitting {model_class.__name__} model")
-                    fitted_model = model.fit(
-                        train_data,
-                        predictors,
-                        imputed_variables,
-                        weight_col=weight_col,
-                    )
-
-            # Get predictions for this fold
-            log.info(f"Generating predictions for train and test data")
-            fold_test_imputations = fitted_model.predict(test_data, quantiles)
-            fold_train_imputations = fitted_model.predict(
-                train_data, quantiles
-            )
-
-            # Return results for this fold
-            return (
-                fold_idx,
-                fold_test_imputations,
-                fold_train_imputations,
-                test_y,
-                train_y,
-                fold_tuned_params,
-            )
-
         # Execute folds in parallel
-        fold_results = []
         with joblib.Parallel(n_jobs=n_jobs, verbose=10) as parallel:
             fold_results = parallel(
-                joblib.delayed(process_single_fold)((i, fold_pair))
+                joblib.delayed(_process_single_fold)(
+                    (i, fold_pair),
+                    data,
+                    model_class,
+                    predictors,
+                    imputed_variables,
+                    weight_col,
+                    quantiles,
+                    model_hyperparams,
+                    tune_hyperparameters,
+                )
                 for i, fold_pair in enumerate(fold_indices)
             )
 
-        # Organize results
+        # Sort results by fold index
+        fold_results.sort(key=lambda x: x[0])
+
+        # Extract and organize results
         test_results = {q: [] for q in quantiles}
         train_results = {q: [] for q in quantiles}
         test_y_values = []
         train_y_values = []
+        tuned_hyperparameters = {}
 
-        # Sort results by fold index to maintain order
-        fold_results.sort(key=lambda x: x[0])
-
-        # Extract results
         for (
             fold_idx,
-            fold_test_imputations,
-            fold_train_imputations,
+            fold_test_imp,
+            fold_train_imp,
             test_y,
             train_y,
             fold_tuned_params,
         ) in fold_results:
             test_y_values.append(test_y)
             train_y_values.append(train_y)
-            if tune_hyperparameters:
+
+            if tune_hyperparameters and fold_tuned_params:
                 tuned_hyperparameters[fold_idx] = fold_tuned_params
 
-            # Store results for each quantile
             for q in quantiles:
-                test_results[q].append(fold_test_imputations[q])
-                train_results[q].append(fold_train_imputations[q])
+                test_results[q].append(fold_test_imp[q])
+                train_results[q].append(fold_train_imp[q])
 
-        # Calculate loss metrics (this can also be parallelized for large datasets)
+        # Compute losses
         log.info("Computing loss metrics across all folds")
+        avg_test_losses, avg_train_losses = _compute_losses_parallel(
+            test_y_values,
+            train_y_values,
+            test_results,
+            train_results,
+            quantiles,
+            n_jobs,
+        )
 
-        # Define a function to compute loss for a specific fold and quantile
-        def compute_fold_loss(fold_idx, quantile):
-            # Flatten arrays for easier calculation
-            test_y_flat = test_y_values[fold_idx].flatten()
-            train_y_flat = train_y_values[fold_idx].flatten()
-            test_pred_flat = test_results[quantile][fold_idx].values.flatten()
-            train_pred_flat = train_results[quantile][
-                fold_idx
-            ].values.flatten()
-
-            # Calculate the loss for this fold and quantile
-            test_loss = quantile_loss(quantile, test_y_flat, test_pred_flat)
-            train_loss = quantile_loss(quantile, train_y_flat, train_pred_flat)
-
-            return {
-                "fold": fold_idx,
-                "quantile": quantile,
-                "test_loss": test_loss.mean(),
-                "train_loss": train_loss.mean(),
-            }
-
-        # Create tasks for parallel loss computation
-        loss_tasks = [
-            (k, q) for k in range(len(test_y_values)) for q in quantiles
-        ]
-
-        # Compute losses in parallel if there are many folds/quantiles
-        if (
-            len(loss_tasks) > 10 and n_jobs != 1
-        ):  # Only parallelize if it's worth it
-            with joblib.Parallel(n_jobs=n_jobs) as parallel:
-                loss_results = parallel(
-                    joblib.delayed(compute_fold_loss)(fold_idx, q)
-                    for fold_idx, q in loss_tasks
-                )
-
-            # Organize loss results
-            avg_test_losses = {q: [] for q in quantiles}
-            avg_train_losses = {q: [] for q in quantiles}
-
-            if (
-                model_class == QRF
-                or (Matching is not None and model_class == Matching)
-            ) and tune_hyperparameters == True:
-                best_fold = fold_indices[0][0]
-                best_loss = float("inf")
-                for result in loss_results:
-                    q = result["quantile"]
-                    fold_idx = result["fold"]
-                    avg_test_losses[q].append(result["test_loss"])
-                    avg_train_losses[q].append(result["train_loss"])
-
-                    log.debug(
-                        f"Fold {fold_idx+1}, q={q}: Train loss = {result['train_loss']:.6f}, Test loss = {result['test_loss']:.6f}"
-                    )
-                    if q == 0.5:
-                        if result["test_loss"] < best_loss:
-                            best_loss = result["test_loss"]
-                            best_fold = fold_idx
-                best_tuned_hyperparams = tuned_hyperparameters[best_fold]
-            else:
-                for result in loss_results:
-                    q = result["quantile"]
-                    fold_idx = result["fold"]
-                    avg_test_losses[q].append(result["test_loss"])
-                    avg_train_losses[q].append(result["train_loss"])
-
-                    log.debug(
-                        f"Fold {fold_idx+1}, q={q}: Train loss = {result['train_loss']:.6f}, Test loss = {result['test_loss']:.6f}"
-                    )
-        else:
-            # Calculate losses sequentially for simpler cases
-            avg_test_losses = {q: [] for q in quantiles}
-            avg_train_losses = {q: [] for q in quantiles}
-
-            if (
-                model_class == QRF
-                or (Matching is not None and model_class == Matching)
-            ) and tune_hyperparameters == True:
-                best_fold = fold_indices[0][0]
-                best_loss = float("inf")
-                for k in range(len(test_y_values)):
-                    for q in quantiles:
-                        result = compute_fold_loss(k, q)
-                        avg_test_losses[q].append(result["test_loss"])
-                        avg_train_losses[q].append(result["train_loss"])
-
-                        log.debug(
-                            f"Fold {k+1}, q={q}: Train loss = {result['train_loss']:.6f}, Test loss = {result['test_loss']:.6f}"
-                        )
-                    if result["test_loss"] < best_loss:
-                        best_fold = k
-                        best_loss = result["test_loss"]
-                best_tuned_hyperparams = tuned_hyperparameters[best_fold]
-            else:
-                for k in range(len(test_y_values)):
-                    for q in quantiles:
-                        result = compute_fold_loss(k, q)
-                        avg_test_losses[q].append(result["test_loss"])
-                        avg_train_losses[q].append(result["train_loss"])
-
-                        log.debug(
-                            f"Fold {k+1}, q={q}: Train loss = {result['train_loss']:.6f}, Test loss = {result['test_loss']:.6f}"
-                        )
-
-        # Calculate the average loss across all folds for each quantile
+        # Calculate final average metrics
         log.info("Calculating final average metrics")
         final_test_losses = {
             q: np.mean(losses) for q, losses in avg_test_losses.items()
@@ -422,29 +415,41 @@ def compute_fold_loss(fold_idx, quantile):
             q: np.mean(losses) for q, losses in avg_train_losses.items()
         }
 
-        # Create DataFrame with quantiles as columns
+        # Create results DataFrame
         final_results = pd.DataFrame(
             [final_train_losses, final_test_losses], index=["train", "test"]
         )
 
-        # Generate summary statistics
+        # Log summary statistics
         train_mean = final_results.loc["train"].mean()
         test_mean = final_results.loc["test"].mean()
-        train_test_ratio = train_mean / test_mean
-
         log.info(f"Cross-validation completed for {model_class.__name__}")
         log.info(f"Average Train Loss: {train_mean:.6f}")
         log.info(f"Average Test Loss: {test_mean:.6f}")
-        log.info(f"Train/Test Ratio: {train_test_ratio:.6f}")
-
-        if tune_hyperparameters:
-            return final_results, best_tuned_hyperparams
+        log.info(f"Train/Test Ratio: {train_mean / test_mean:.6f}")
+
+        # Return results with optional hyperparameters
+        if tune_hyperparameters and tuned_hyperparameters:
+            # Create simplified loss results for hyperparameter selection
+            loss_results = []
+            for fold_idx in range(len(test_y_values)):
+                for q in quantiles:
+                    loss_results.append(
+                        {
+                            "fold": fold_idx,
+                            "quantile": q,
+                            "test_loss": avg_test_losses[q][fold_idx],
+                        }
+                    )
+            best_hyperparams = _select_best_hyperparameters(
+                loss_results, tuned_hyperparameters
+            )
+            return final_results, best_hyperparams
         else:
             return final_results
 
     except ValueError as e:
-        # Re-raise validation errors directly
         raise e
-    except Exception as e:
+    except (KeyError, TypeError, AttributeError, ImportError) as e:
         log.error(f"Error during cross-validation: {str(e)}")
         raise RuntimeError(f"Cross-validation failed: {str(e)}") from e
diff --git a/microimpute/main.py b/microimpute/main.py
deleted file mode 100644
index f678fac..0000000
--- a/microimpute/main.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-MicroImpute Package
-
-A package for benchmarking different imputation methods.
-"""
-
-__version__ = "0.1.0"
-
-from microimpute.utils.logging_utils import configure_logging
-
-
-def main():
-    # Logging configuration
-    configure_logging()
-
-    # Not sure what the main functionality should be
-
-
-if __name__ == "__main__":
-    main()
diff --git a/microimpute/models/__init__.py b/microimpute/models/__init__.py
index efd3525..eb0d105 100644
--- a/microimpute/models/__init__.py
+++ b/microimpute/models/__init__.py
@@ -1,17 +1,29 @@
-"""Imputation Models
+"""Statistical imputation models
 
-This module contains different imputation models for benchmarking.
+This module provides a collection of statistical models for data imputation,
+including both parametric and non-parametric approaches. Each model extends
+the base Imputer class and provides quantile-based predictions.
+
+Available models:
+    - OLS: ordinary least squares regression with bootstrapped quantiles
+    - QRF: quantile random forest for non-parametric quantile regression
+    - QuantReg: linear quantile regression model
+    - Matching: statistical matching/hot-deck imputation (optional, requires rpy2)
+
+Base classes:
+    - Imputer: abstract base class for all imputation models
+    - ImputerResults: container for fitted model and prediction methods
 """
 
 # Import base classes
-from .imputer import Imputer, ImputerResults
+from microimpute.models.imputer import Imputer, ImputerResults
 
 try:
-    from .matching import Matching
+    from microimpute.models.matching import Matching
 except ImportError:
     pass
 
 # Import specific model implementations
-from .ols import OLS
-from .qrf import QRF
-from .quantreg import QuantReg
+from microimpute.models.ols import OLS
+from microimpute.models.qrf import QRF
+from microimpute.models.quantreg import QuantReg
diff --git a/microimpute/models/imputer.py b/microimpute/models/imputer.py
index 30f48b5..2a12bc3 100644
--- a/microimpute/models/imputer.py
+++ b/microimpute/models/imputer.py
@@ -10,7 +10,7 @@
 
 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -19,30 +19,490 @@
 from microimpute.config import RANDOM_STATE, VALIDATE_CONFIG
 
 
-def _has_equal_spacing(values: np.ndarray, variable: str) -> bool:
-    """Check if numeric values have equal spacing between consecutive values.
+class VariableTypeDetector:
+    """Utility class for detecting and categorizing variable types."""
+
+    @staticmethod
+    def is_boolean_variable(series: pd.Series) -> bool:
+        """Check if a series represents boolean data."""
+        if pd.api.types.is_bool_dtype(series):
+            return True
+
+        unique_vals = set(series.dropna().unique())
+        if pd.api.types.is_integer_dtype(series) and unique_vals <= {0, 1}:
+            return True
+        if pd.api.types.is_float_dtype(series) and unique_vals <= {0.0, 1.0}:
+            return True
+
+        return False
+
+    @staticmethod
+    def is_categorical_variable(series: pd.Series) -> bool:
+        """Check if a series represents categorical string/object data."""
+        return pd.api.types.is_string_dtype(
+            series
+        ) or pd.api.types.is_object_dtype(series)
+
+    @staticmethod
+    def is_numeric_categorical_variable(
+        series: pd.Series, max_unique: int = 10
+    ) -> bool:
+        """Check if a numeric series should be treated as categorical."""
+        if not pd.api.types.is_numeric_dtype(series):
+            return False
+
+        if series.nunique() >= max_unique:
+            return False
+
+        # Check for equal spacing between values
+        unique_values = np.sort(series.dropna().unique())
+        if len(unique_values) < 2:
+            return True
+
+        differences = np.diff(unique_values)
+        return np.allclose(differences, differences[0], rtol=1e-9)
+
+    @staticmethod
+    def categorize_variable(
+        series: pd.Series, col_name: str, logger: logging.Logger
+    ) -> Tuple[str, Optional[List]]:
+        """
+        Categorize a variable and return its type and categories if applicable.
 
-    Args:
-        values: Array of numeric values to check
+        Returns:
+            Tuple of (variable_type, categories)
+            variable_type: 'bool', 'categorical', 'numeric_categorical', or 'numeric'
+            categories: List of unique values for categorical types, None for numeric
+        """
+        if VariableTypeDetector.is_boolean_variable(series):
+            return "bool", None
 
-    Returns:
-        bool: True if values have equal spacing, False otherwise
-    """
-    if len(values) < 2:
-        return True
+        if VariableTypeDetector.is_categorical_variable(series):
+            return "categorical", series.unique().tolist()
+
+        if VariableTypeDetector.is_numeric_categorical_variable(series):
+            categories = [float(i) for i in series.unique().tolist()]
+            logger.info(
+                f"Treating numeric variable '{col_name}' as categorical due to low unique count and equal spacing"
+            )
+            return "numeric_categorical", categories
+
+        return "numeric", None
+
+
+class DummyVariableProcessor:
+    """Handles conversion between original variables and dummy variables."""
+
+    def __init__(self, logger: logging.Logger):
+        self.logger = logger
+        self.dummy_info = {
+            "original_dtypes": {},
+            "column_mapping": {},
+            "original_categories": {},
+        }
+
+    def preprocess_variables(
+        self,
+        data: pd.DataFrame,
+        predictors: List[str],
+        imputed_variables: List[str],
+    ) -> Tuple[pd.DataFrame, List[str], List[str], Dict]:
+        """
+        Process all variables, converting categoricals to dummies as needed.
+
+        Returns:
+            Tuple of (processed_data, updated_predictors, updated_imputed_variables, imputed_vars_dummy_info)
+        """
+        data = data[predictors + imputed_variables].copy()
+        detector = VariableTypeDetector()
+
+        # Categorize all columns
+        column_categories = {}
+        for col in data.columns:
+            var_type, categories = detector.categorize_variable(
+                data[col], col, self.logger
+            )
+            column_categories[col] = (var_type, categories, data[col].dtype)
+
+        # Process variables according to their types
+        bool_columns = [
+            col
+            for col, (vtype, _, _) in column_categories.items()
+            if vtype == "bool"
+        ]
+        if bool_columns:
+            self._process_boolean_columns(
+                data, bool_columns, column_categories
+            )
+
+        categorical_columns = [
+            col
+            for col, (vtype, _, _) in column_categories.items()
+            if vtype in ["categorical", "numeric_categorical"]
+        ]
+
+        if categorical_columns:
+            data, predictors, imputed_variables = (
+                self._process_categorical_columns(
+                    data,
+                    categorical_columns,
+                    column_categories,
+                    predictors,
+                    imputed_variables,
+                )
+            )
+
+        imputed_vars_dummy_info = self._filter_imputed_vars_info(
+            imputed_variables
+        )
+
+        return data, predictors, imputed_variables, imputed_vars_dummy_info
+
+    def _process_boolean_columns(
+        self,
+        data: pd.DataFrame,
+        bool_columns: List[str],
+        column_categories: Dict,
+    ) -> None:
+        """Process boolean columns by converting to float."""
+        self.logger.info(
+            f"Converting {len(bool_columns)} boolean columns: {bool_columns}"
+        )
+
+        for col in bool_columns:
+            _, _, original_dtype = column_categories[col]
+            self.dummy_info["original_dtypes"][col] = ("bool", original_dtype)
+            self.dummy_info["column_mapping"][col] = [col]
+            data[col] = data[col].astype("float64")
+
+    def _process_categorical_columns(
+        self,
+        data: pd.DataFrame,
+        categorical_columns: List[str],
+        column_categories: Dict,
+        predictors: List[str],
+        imputed_variables: List[str],
+    ) -> Tuple[pd.DataFrame, List[str], List[str]]:
+        """Process categorical columns by creating dummy variables."""
+        for col in categorical_columns:
+            var_type, categories, original_dtype = column_categories[col]
+            self.dummy_info["original_dtypes"][col] = (
+                (
+                    "numeric categorical"
+                    if var_type == "numeric_categorical"
+                    else "categorical"
+                ),
+                original_dtype,
+            )
+            if categories:
+                self.dummy_info["original_categories"][col] = categories
+
+            if var_type == "numeric_categorical":
+                data[col] = data[col].astype("float64").astype("category")
+
+        # Create dummy variables
+        dummy_data = pd.get_dummies(
+            data[categorical_columns],
+            columns=categorical_columns,
+            dtype="float64",
+            drop_first=True,
+        )
+
+        self.logger.debug(
+            f"Created {dummy_data.shape[1]} dummy variables from {len(categorical_columns)} categorical columns"
+        )
+
+        # Create column mappings
+        for orig_col in categorical_columns:
+            related_dummies = [
+                col
+                for col in dummy_data.columns
+                if col.startswith(f"{orig_col}_")
+            ]
+
+            if not related_dummies:
+                self._handle_single_category_variable(
+                    data, orig_col, column_categories[orig_col]
+                )
+                self.dummy_info["column_mapping"][orig_col] = [orig_col]
+            else:
+                self.dummy_info["column_mapping"][orig_col] = related_dummies
+
+        # Combine data
+        numeric_data = data.drop(
+            columns=[col for col in categorical_columns if col in data.columns]
+        )
+        data = pd.concat([numeric_data, dummy_data], axis=1)
+
+        # Update predictor and imputed_variables lists with dummy columns' names
+        predictors, imputed_variables = self._update_variable_lists(
+            predictors, imputed_variables, data.columns
+        )
+
+        for col in data.columns:
+            data[col] = data[col].astype("float64")
+
+        return data, predictors, imputed_variables
+
+    def _handle_single_category_variable(
+        self, data: pd.DataFrame, col: str, col_info: Tuple[str, List, Any]
+    ) -> None:
+        """Handle variables with only a single category."""
+        var_type, categories, _ = col_info
+
+        if var_type == "numeric_categorical":
+            self.logger.info(
+                f"Keeping numeric categorical '{col}' as numeric column"
+            )
+            if categories:
+                data[col] = categories[0]
+        else:
+            self.logger.info(
+                f"Converting single-value categorical '{col}' to numeric encoding (1.0)"
+            )
+            data[col] = 1.0
+
+    def _update_variable_lists(
+        self,
+        predictors: List[str],
+        imputed_variables: List[str],
+        data_columns: pd.Index,
+    ) -> Tuple[List[str], List[str]]:
+        """Update predictor and imputed variable lists with dummy columns."""
+        new_predictors = predictors.copy()
+        new_imputed_variables = imputed_variables.copy()
+
+        for col, dummy_cols in self.dummy_info["column_mapping"].items():
+            if len(dummy_cols) > 0 and all(
+                dc in data_columns for dc in dummy_cols
+            ):
+                if col in new_predictors:
+                    new_predictors.remove(col)
+                    new_predictors.extend(dummy_cols)
+                elif col in new_imputed_variables:
+                    new_imputed_variables.remove(col)
+                    new_imputed_variables.extend(dummy_cols)
+
+        return new_predictors, new_imputed_variables
+
+    def _filter_imputed_vars_info(self, imputed_variables: List[str]) -> Dict:
+        """Create dummy info specific to imputed variables."""
+        imputed_vars_dummy_info = {
+            "original_dtypes": {},
+            "column_mapping": {},
+            "original_categories": {},
+        }
+
+        for col in self.dummy_info["column_mapping"]:
+            dummy_cols = self.dummy_info["column_mapping"][col]
+            if any(dc in imputed_variables for dc in dummy_cols):
+                imputed_vars_dummy_info["column_mapping"][col] = dummy_cols
+                imputed_vars_dummy_info["original_dtypes"][col] = (
+                    self.dummy_info["original_dtypes"][col]
+                )
+                if col in self.dummy_info["original_categories"]:
+                    imputed_vars_dummy_info["original_categories"][col] = (
+                        self.dummy_info["original_categories"][col]
+                    )
+
+        return imputed_vars_dummy_info
+
+    def reverse_dummy_encoding(
+        self,
+        imputations: Union[Dict[float, pd.DataFrame], pd.DataFrame],
+        dummy_info: Dict[str, Any],
+    ) -> Union[Dict[float, pd.DataFrame], pd.DataFrame]:
+        """Convert dummy variables back to original categorical format."""
+        if isinstance(imputations, dict):
+            processed_imputations = {}
+            for quantile, df in imputations.items():
+                processed_imputations[quantile] = (
+                    self._process_single_dataframe(df.copy(), dummy_info)
+                )
+        else:
+            processed_imputations = self._process_single_dataframe(
+                imputations.copy(), dummy_info
+            )
+
+        return processed_imputations
+
+    def _process_single_dataframe(
+        self, df: pd.DataFrame, dummy_info: Dict[str, Any]
+    ) -> pd.DataFrame:
+        """Process a single quantile DataFrame."""
+        for orig_col, dummy_cols in dummy_info.get(
+            "column_mapping", {}
+        ).items():
+            if orig_col not in dummy_info.get("original_dtypes", {}):
+                continue
+
+            dtype_info = dummy_info["original_dtypes"][orig_col]
+            if not isinstance(dtype_info, tuple) or len(dtype_info) != 2:
+                self.logger.warning(
+                    f"Unexpected dtype format for {orig_col}: {dtype_info}"
+                )
+                continue
+
+            dtype_category, original_pandas_dtype = dtype_info
+
+            if dtype_category == "bool" and orig_col in df.columns:
+                df[orig_col] = self._reverse_boolean(
+                    df[orig_col], original_pandas_dtype
+                )
+            elif dtype_category in ["categorical", "numeric_categorical"]:
+                df = self._reverse_categorical(
+                    df,
+                    orig_col,
+                    dummy_cols,
+                    dummy_info,
+                    dtype_category,
+                    original_pandas_dtype,
+                )
+
+        return df
+
+    def _reverse_boolean(
+        self, series: pd.Series, original_dtype: Any
+    ) -> pd.Series:
+        """Convert float back to boolean."""
+        threshold = 0.5
+        bool_series = series > threshold
+        return bool_series.astype(original_dtype)
+
+    def _reverse_categorical(
+        self,
+        df: pd.DataFrame,
+        orig_col: str,
+        dummy_cols: List[str],
+        dummy_info: Dict,
+        dtype_category: str,
+        original_dtype: Any,
+    ) -> pd.DataFrame:
+        """Convert dummy variables back to categorical."""
+        available_dummies = [col for col in dummy_cols if col in df.columns]
+
+        if not available_dummies:
+            return self._handle_single_category_reverse(
+                df, orig_col, dummy_cols, dummy_info, original_dtype
+            )
+
+        categories = dummy_info["original_categories"][orig_col]
+        reference_category = self._find_reference_category(
+            orig_col, available_dummies, categories
+        )
+
+        # Convert dummies back to categorical
+        df[orig_col] = self._dummies_to_categorical(
+            df[available_dummies], orig_col, categories, reference_category
+        )
+
+        # Convert to original dtype if needed
+        if original_dtype != "object":
+            try:
+                df[orig_col] = df[orig_col].astype(original_dtype)
+            except (ValueError, TypeError) as e:
+                self.logger.warning(
+                    f"Could not convert {orig_col} to {original_dtype}: {e}"
+                )
+
+        # Drop dummy columns
+        df = df.drop(columns=available_dummies)
+
+        return df
+
+    def _handle_single_category_reverse(
+        self,
+        df: pd.DataFrame,
+        orig_col: str,
+        dummy_cols: List[str],
+        dummy_info: Dict,
+        original_dtype: Any,
+    ) -> pd.DataFrame:
+        """Handle reversal for single-category variables."""
+        if (
+            orig_col in df.columns
+            and len(dummy_cols) == 1
+            and dummy_cols[0] == orig_col
+        ):
+            categories = dummy_info["original_categories"][orig_col]
+            df[orig_col] = categories[0]
+
+            if original_dtype != "object":
+                try:
+                    df[orig_col] = df[orig_col].astype(original_dtype)
+                except (ValueError, TypeError) as e:
+                    self.logger.warning(
+                        f"Could not convert {orig_col} to original dtype: {e}"
+                    )
+
+        return df
+
+    def _find_reference_category(
+        self,
+        orig_col: str,
+        available_dummies: List[str],
+        original_categories: List,
+    ) -> Any:
+        """Find the reference category that was dropped during dummy encoding."""
+        dummy_categories = []
+        for dummy_col in available_dummies:
+            category_part = dummy_col.replace(f"{orig_col}_", "", 1)
+            try:
+                if category_part.replace(".", "").replace("-", "").isdigit():
+                    dummy_categories.append(float(category_part))
+                else:
+                    dummy_categories.append(category_part)
+            except:
+                dummy_categories.append(category_part)
+
+        for cat in original_categories:
+            if cat not in dummy_categories:
+                return cat
+
+        return original_categories[0] if original_categories else None
+
+    def _dummies_to_categorical(
+        self,
+        dummy_df: pd.DataFrame,
+        orig_col: str,
+        categories: List,
+        reference_category: Any,
+    ) -> pd.Series:
+        """Convert dummy columns to categorical values."""
+        category_mapping = {
+            f"{orig_col}_{cat}": cat
+            for cat in categories
+            if f"{orig_col}_{cat}" in dummy_df.columns
+        }
 
-    unique_values = np.sort(np.unique(values[~np.isnan(values)]))
-    if len(unique_values) < 2:
-        return True
+        # Find max dummy value per row
+        max_idx = dummy_df.idxmax(axis=1)
+        max_values = dummy_df.max(axis=1)
 
-    differences = np.diff(unique_values)
+        # Initialize with reference category
+        result = pd.Series(reference_category, index=dummy_df.index)
 
-    same_difference = np.allclose(differences, differences[0], rtol=1e-9)
-    if not same_difference:
-        logging.warning(
-            f"Values do not have equal spacing, will not convert {variable} to categorical"
+        # Assign to dummy categories where confidence > threshold
+        threshold = 0.5
+        high_confidence_mask = max_values >= threshold
+        if high_confidence_mask.any():
+            result.loc[high_confidence_mask] = max_idx[
+                high_confidence_mask
+            ].map(category_mapping)
+
+        nan_mask = result.isna()
+        if nan_mask.any():
+            result.loc[nan_mask] = reference_category
+            self.logger.warning(
+                f"Some values could not be mapped for {orig_col}, using reference category"
+            )
+
+        self.logger.info(
+            f"Assigned {high_confidence_mask.sum()} observations to dummy categories, "
+            f"{(~high_confidence_mask).sum()} to reference category '{reference_category}'"
         )
-    return same_difference
+
+        return result
 
 
 class Imputer(ABC):
@@ -65,17 +525,15 @@ def __init__(
         self.original_predictors: Optional[List[str]] = None
         self.seed = seed
         self.logger = logging.getLogger(__name__)
-        if log_level == "DEBUG":
-            log_level = logging.DEBUG
-        elif log_level == "INFO":
-            log_level = logging.INFO
-        elif log_level == "WARNING":
-            log_level = logging.WARNING
-        elif log_level == "ERROR":
-            log_level = logging.ERROR
-        elif log_level == "CRITICAL":
-            log_level = logging.CRITICAL
-        self.logger.setLevel(log_level)
+
+        log_level_map = {
+            "DEBUG": logging.DEBUG,
+            "INFO": logging.INFO,
+            "WARNING": logging.WARNING,
+            "ERROR": logging.ERROR,
+            "CRITICAL": logging.CRITICAL,
+        }
+        self.logger.setLevel(log_level_map.get(log_level, logging.WARNING))
 
     @validate_call(config=VALIDATE_CONFIG)
     def _validate_data(self, data: pd.DataFrame, columns: List[str]) -> None:
@@ -104,13 +562,13 @@ def _validate_data(self, data: pd.DataFrame, columns: List[str]) -> None:
             )
 
     @validate_call(config=VALIDATE_CONFIG)
-    def _preprocess_data_types(
+    def preprocess_data_types(
         self,
         data: pd.DataFrame,
         predictors: List[str],
         imputed_variables: List[str],
-    ) -> pd.DataFrame:
-        """Ensure all predictor columns are numeric. Transform booleand and categorical variables if necessary.
+    ) -> Tuple[pd.DataFrame, List[str], List[str], Dict[str, Any]]:
+        """Ensure all predictor columns are numeric. Transform boolean and categorical variables if necessary.
 
         Args:
             data: DataFrame containing the data.
@@ -118,243 +576,22 @@ def _preprocess_data_types(
             imputed_variables: List of column names to ensure are numeric.
 
         Returns:
-            data: DataFrame with specified variables converted to numeric types.
-            predictors: List of predictor column names after conversion.
-            imputed_variables: List of imputed variable column names after conversion.
-            dummy_info: Dictionary containing information about dummy variables created for post-processing of imputed variables.
+            Tuple of (data, predictors, imputed_variables, dummy_info)
 
         Raises:
             ValueError: If any column cannot be converted to numeric.
         """
-        # Initialize dummy information dictionary
-        dummy_info = {
-            "original_dtypes": {},
-            "column_mapping": {},
-            "original_categories": {},
-        }
-
-        data = data[predictors + imputed_variables].copy()
-
         try:
-            self.logger.debug(
-                "Converting boolean and categorical columns to numerical format"
+            processor = DummyVariableProcessor(self.logger)
+            return processor.preprocess_variables(
+                data, predictors, imputed_variables
             )
-            # Identify boolean columns and convert them to strings
-            bool_columns = [
-                col
-                for col in data.columns
-                if (
-                    pd.api.types.is_bool_dtype(data[col])
-                    or (
-                        pd.api.types.is_integer_dtype(data[col])
-                        and set(data[col].unique()) == {0, 1}
-                    )
-                    or (
-                        pd.api.types.is_float_dtype(data[col])
-                        and set(data[col].unique()) == {0.0, 1.0}
-                    )
-                )
-            ]
-
-            if bool_columns:
-                self.logger.info(
-                    f"Found {len(bool_columns)} boolean columns to convert: {bool_columns}"
-                )
-                for col in bool_columns:
-                    dummy_info["original_dtypes"][col] = (
-                        "bool",
-                        data[col].dtype,
-                    )
-                    # For boolean columns, map the column to itself since we don't create dummies
-                    dummy_info["column_mapping"][col] = [col]
-                    data[col] = data[col].astype("float64")
-
-            # Identify string and object columns (excluding already processed booleans)
-            string_columns = [
-                col
-                for col in data.columns
-                if (
-                    pd.api.types.is_string_dtype(data[col])
-                    or pd.api.types.is_object_dtype(data[col])
-                )
-                and col not in bool_columns
-            ]
-
-            # Identify numeric columns that represent categorical data
-            numeric_categorical_columns = [
-                col
-                for col in data.columns
-                if pd.api.types.is_numeric_dtype(data[col])
-                and data[col].nunique()
-                < 10  # Parse as category if unique count < 10
-                and _has_equal_spacing(
-                    data[col].values, col
-                )  # Only convert if values have equal spacing
-                and col
-                not in bool_columns  # Exclude already processed boolean columns
-            ]
-
-            if numeric_categorical_columns:
-                self.logger.info(
-                    f"Found {len(numeric_categorical_columns)} numeric columns with unique values < 10, treating as categorical: {numeric_categorical_columns}. Converting to dummy variables."
-                )
-                for col in numeric_categorical_columns:
-                    dummy_info["original_categories"][col] = [
-                        float(i) for i in data[col].unique().tolist()
-                    ]
-                    dummy_info["original_dtypes"][col] = (
-                        "numeric categorical",
-                        data[col].dtype,
-                    )
-                    data[col] = data[col].astype("float64")
-                    data[col] = data[col].astype("category")
-
-            if string_columns:
-                self.logger.info(
-                    f"Found {len(string_columns)} categorical columns to convert: {string_columns}"
-                )
-
-                # Store original categories and dtypes for categorical columns
-                for col in string_columns:
-                    dummy_info["original_dtypes"][col] = (
-                        "categorical",
-                        data[col].dtype,
-                    )
-                    dummy_info["original_categories"][col] = (
-                        data[col].unique().tolist()
-                    )
-
-            if string_columns or numeric_categorical_columns:
-                # Use pandas get_dummies to create one-hot encoded features
-                categorical_columns = (
-                    string_columns + numeric_categorical_columns
-                )
-                dummy_data = pd.get_dummies(
-                    data[categorical_columns],
-                    columns=categorical_columns,
-                    dtype="float64",
-                    drop_first=True,
-                )
-                for col in dummy_data.columns:
-                    dummy_data[col] = dummy_data[col].astype("float64")
-                self.logger.debug(
-                    f"Created {dummy_data.shape[1]} dummy variables from {len(categorical_columns)} categorical columns"
-                )
-
-                # Create mapping from original columns to their resulting dummy columns
-                for orig_col in categorical_columns:
-                    # Find all dummy columns that came from this original column
-                    related_dummies = [
-                        col
-                        for col in dummy_data.columns
-                        if col.startswith(f"{orig_col}_")
-                    ]
-                    dummy_info["column_mapping"][orig_col] = (
-                        related_dummies
-                        if len(related_dummies) > 0
-                        else [orig_col]
-                    )
-
-                # Drop original string and numeric categorical columns and join the dummy variables
-                numeric_data = data.drop(columns=categorical_columns)
-                self.logger.debug(
-                    f"Removed original string and numeric categorical columns, data shape: {numeric_data.shape}"
-                )
-
-                # Combine numeric columns with dummy variables
-                data = pd.concat([numeric_data, dummy_data], axis=1)
-                for col in data.columns:
-                    data[col] = data[col].astype("float64")
-                self.logger.info(
-                    f"Data shape after dummy variable conversion: {data.shape}"
-                )
-
-            imputed_vars_dummy_info = {
-                "original_dtypes": {},
-                "column_mapping": {},
-                "original_categories": {},
-            }
-            for col, dummy_cols in dummy_info["column_mapping"].items():
-                # Only update variable lists if dummy columns were actually created and exist in data
-                if len(dummy_cols) > 0 and all(
-                    dc in data.columns for dc in dummy_cols
-                ):
-                    if col in predictors:
-                        predictors.remove(col)
-                        predictors.extend(dummy_cols)
-                    elif col in imputed_variables:
-                        imputed_variables.remove(col)
-                        imputed_variables.extend(dummy_cols)
-                        imputed_vars_dummy_info["column_mapping"][
-                            col
-                        ] = dummy_cols
-                        imputed_vars_dummy_info["original_dtypes"][col] = (
-                            dummy_info["original_dtypes"][col][0],
-                            dummy_info["original_dtypes"][col][1],
-                        )
-                        if col in dummy_info["original_categories"]:
-                            imputed_vars_dummy_info["original_categories"][
-                                col
-                            ] = dummy_info["original_categories"][col]
-                else:
-                    # If no dummy columns were created, handle based on original data type
-                    self.logger.warning(
-                        f"Variable '{col}' was processed as categorical but no dummy variables "
-                        f"were created (likely due to having only one unique value)."
-                    )
-
-                    # Check if the original column was numeric
-                    dtype_info = dummy_info["original_dtypes"].get(col)
-                    is_numeric_categorical = (
-                        dtype_info and dtype_info[0] == "numeric categorical"
-                    )
-
-                    if is_numeric_categorical:
-                        # For numeric categorical, restore the original column since it can still be processed
-                        self.logger.info(
-                            f"Restoring numeric categorical variable '{col}' as numeric column."
-                        )
-                        # Get the single unique value and create a column with that value
-                        original_categories = dummy_info[
-                            "original_categories"
-                        ][col]
-                        single_value = original_categories[
-                            0
-                        ]  # There should be only one
-                        data[col] = single_value
-                        # Keep it in the variable lists as a regular numeric column
-                    else:
-                        # For non-numeric categorical (strings), encode as 1.0 and store for post-processing
-                        self.logger.info(
-                            f"Converting single-value categorical variable '{col}' to numeric encoding (1.0)."
-                        )
-                        # Create a column with value 1.0 for the single category
-                        data[col] = 1.0
-
-                        # Store info for post-processing to convert back
-                        if col in imputed_variables:
-                            imputed_vars_dummy_info["column_mapping"][col] = [
-                                col
-                            ]
-                            imputed_vars_dummy_info["original_dtypes"][col] = (
-                                dummy_info["original_dtypes"][col][0],
-                                dummy_info["original_dtypes"][col][1],
-                            )
-                            if col in dummy_info["original_categories"]:
-                                imputed_vars_dummy_info["original_categories"][
-                                    col
-                                ] = dummy_info["original_categories"][col]
-                        # Keep it in the variable lists
-
-            return data, predictors, imputed_variables, imputed_vars_dummy_info
 
         except Exception as e:
             self.logger.error(
-                f"Error during string column conversion: {str(e)}"
+                f"Error during donor data preprocessing: {str(e)}"
             )
-            raise RuntimeError(
-                "Failed to convert string columns to dummy variables"
-            ) from e
+            raise RuntimeError("Failed to preprocess data types") from e
 
     @validate_call(config=VALIDATE_CONFIG)
     def fit(
@@ -421,7 +658,7 @@ def fit(
             raise ValueError("Weights must be positive")
 
         X_train, predictors, imputed_variables, imputed_vars_dummy_info = (
-            self._preprocess_data_types(X_train, predictors, imputed_variables)
+            self.preprocess_data_types(X_train, predictors, imputed_variables)
         )
 
         if weights is not None:
@@ -493,7 +730,6 @@ def _handle_missing_variables(
             v for v in imputed_variables if v not in X_train.columns
         ]
 
-        # Handle missing variables
         if missing_vars:
             self.logger.warning(
                 f"Variables not found in X_train: {missing_vars}. "
@@ -533,17 +769,15 @@ def __init__(
         self.original_predictors = original_predictors
         self.seed = seed
         self.logger = logging.getLogger(__name__)
-        if log_level == "DEBUG":
-            log_level = logging.DEBUG
-        elif log_level == "INFO":
-            log_level = logging.INFO
-        elif log_level == "WARNING":
-            log_level = logging.WARNING
-        elif log_level == "ERROR":
-            log_level = logging.ERROR
-        elif log_level == "CRITICAL":
-            log_level = logging.CRITICAL
-        self.logger.setLevel(log_level)
+
+        log_level_map = {
+            "DEBUG": logging.DEBUG,
+            "INFO": logging.INFO,
+            "WARNING": logging.WARNING,
+            "ERROR": logging.ERROR,
+            "CRITICAL": logging.CRITICAL,
+        }
+        self.logger.setLevel(log_level_map.get(log_level, logging.WARNING))
 
     @validate_call(config=VALIDATE_CONFIG)
     def _validate_quantiles(
@@ -577,7 +811,7 @@ def _validate_quantiles(
                 )
 
     @validate_call(config=VALIDATE_CONFIG)
-    def _preprocess_data_types(
+    def preprocess_data_types(
         self,
         data: pd.DataFrame,
         predictors: List[str],
@@ -594,161 +828,20 @@ def _preprocess_data_types(
         Raises:
             ValueError: If any column cannot be converted to numeric.
         """
-        # Initialize dummy information dictionary
-        dummy_info = {
-            "original_dtypes": {},
-            "column_mapping": {},
-            "original_categories": {},
-        }
-
-        data = data[predictors].copy()
-
         try:
-            self.logger.debug(
-                "Converting boolean and categorical columns to numerical format"
+            processor = DummyVariableProcessor(self.logger)
+            processed_data, _, _, _ = processor.preprocess_variables(
+                data, predictors, []
             )
-            # Identify boolean columns and convert them to strings
-            bool_columns = [
-                col
-                for col in data.columns
-                if (
-                    pd.api.types.is_bool_dtype(data[col])
-                    or (
-                        pd.api.types.is_integer_dtype(data[col])
-                        and set(data[col].unique()) == {0, 1}
-                    )
-                    or (
-                        pd.api.types.is_float_dtype(data[col])
-                        and set(data[col].unique()) == {0.0, 1.0}
-                    )
-                )
-            ]
-
-            if bool_columns:
-                self.logger.info(
-                    f"Found {len(bool_columns)} boolean columns to convert: {bool_columns}"
-                )
-                for col in bool_columns:
-                    dummy_info["original_dtypes"][col] = (
-                        "bool",
-                        data[col].dtype,
-                    )
-                    # For boolean columns, map the column to itself since we don't create dummies
-                    dummy_info["column_mapping"][col] = [col]
-                    data[col] = data[col].astype("float64")
-
-            # Identify string and object columns (excluding already processed booleans)
-            string_columns = [
-                col
-                for col in data.columns
-                if (
-                    pd.api.types.is_string_dtype(data[col])
-                    or pd.api.types.is_object_dtype(data[col])
-                )
-                and col not in bool_columns
-            ]
-
-            # Identify numeric columns that represent categorical data
-            numeric_categorical_columns = [
-                col
-                for col in data.columns
-                if pd.api.types.is_numeric_dtype(data[col])
-                and data[col].nunique()
-                < 10  # Parse as category if unique count < 10
-                and _has_equal_spacing(
-                    data[col].values, col
-                )  # Only convert if values have equal spacing
-                and col
-                not in bool_columns  # Exclude already processed boolean columns
-            ]
-
-            if numeric_categorical_columns:
-                self.logger.info(
-                    f"Found {len(numeric_categorical_columns)} numeric columns with unique values < 10, treating as categorical: {numeric_categorical_columns}. Converting to dummy variables."
-                )
-                for col in numeric_categorical_columns:
-                    dummy_info["original_categories"][col] = [
-                        float(i) for i in data[col].unique().tolist()
-                    ]
-                    dummy_info["original_dtypes"][col] = (
-                        "numeric categorical",
-                        data[col].dtype,
-                    )
-                    data[col] = data[col].astype("float64")
-                    data[col] = data[col].astype("category")
-
-            if string_columns:
-                self.logger.info(
-                    f"Found {len(string_columns)} categorical columns to convert: {string_columns}"
-                )
-
-                # Store original categories and dtypes for categorical columns
-                for col in string_columns:
-                    dummy_info["original_dtypes"][col] = (
-                        "categorical",
-                        data[col].dtype,
-                    )
-                    dummy_info["original_categories"][col] = (
-                        data[col].unique().tolist()
-                    )
-
-            if string_columns or numeric_categorical_columns:
-                # Use pandas get_dummies to create one-hot encoded features
-                categorical_columns = (
-                    string_columns + numeric_categorical_columns
-                )
-                dummy_data = pd.get_dummies(
-                    data[categorical_columns],
-                    columns=categorical_columns,
-                    dtype="float64",
-                    drop_first=True,
-                )
-                for col in dummy_data.columns:
-                    dummy_data[col] = dummy_data[col].astype("float64")
-                self.logger.debug(
-                    f"Created {dummy_data.shape[1]} dummy variables from {len(categorical_columns)} categorical columns"
-                )
-
-                # Create mapping from original columns to their resulting dummy columns
-                for orig_col in categorical_columns:
-                    # Find all dummy columns that came from this original column
-                    related_dummies = [
-                        col
-                        for col in dummy_data.columns
-                        if col.startswith(f"{orig_col}_")
-                    ]
-                    dummy_info["column_mapping"][orig_col] = (
-                        related_dummies
-                        if len(related_dummies) > 0
-                        else [orig_col]
-                    )
-
-                # Drop original string and numeric categorical columns and join the dummy variables
-                numeric_data = data.drop(columns=categorical_columns)
-                self.logger.debug(
-                    f"Removed original string and numeric categorical columns, data shape: {numeric_data.shape}"
-                )
-
-                # Combine numeric columns with dummy variables
-                data = pd.concat([numeric_data, dummy_data], axis=1)
-                for col in data.columns:
-                    data[col] = data[col].astype("float64")
-                self.logger.info(
-                    f"Data shape after dummy variable conversion: {data.shape}"
-                )
-
-            return data
-
+            return processed_data
         except Exception as e:
             self.logger.error(
-                f"Error during string column conversion: {str(e)}"
+                f"Error during receiver data preprocessing: {str(e)}"
             )
-            raise RuntimeError(
-                "Failed to convert string columns to dummy variables"
-            ) from e
+            raise RuntimeError("Failed to preprocess data types") from e
 
     @validate_call(config=VALIDATE_CONFIG)
-    def _postprocess_imputations(
+    def postprocess_imputations(
         self,
         imputations: Union[Dict[float, pd.DataFrame], pd.DataFrame],
         dummy_info: Dict[str, Any],
@@ -765,281 +858,18 @@ def _postprocess_imputations(
                 and original data types
 
         Returns:
-            Dictionary mapping quantiles to DataFrames with original data types restored
+            Dictionary mapping quantiles to DataFrames with original data types restored or a single DataFrame if only one quantile is provided.
 
         Raises:
-            ValueError: If dummy_info is missing required information
             RuntimeError: If conversion back to original types fails
         """
-
-        def _get_reference_category(
-            orig_col: str,
-            available_dummies: List[str],
-            original_categories: List,
-        ) -> Any:
-            """Identify the reference category that was dropped during dummy encoding."""
-            dummy_categories = []
-            for dummy_col in available_dummies:
-                # Remove the original column name and underscore prefix
-                category_part = dummy_col.replace(f"{orig_col}_", "", 1)
-                try:
-                    # Try to convert back to original type if it was numeric
-                    if (
-                        category_part.replace(".", "")
-                        .replace("-", "")
-                        .isdigit()
-                    ):
-                        dummy_categories.append(float(category_part))
-                    else:
-                        dummy_categories.append(category_part)
-                except:
-                    dummy_categories.append(category_part)
-
-            # Find which original category is missing (the reference category)
-            reference_category = None
-            for cat in original_categories:
-                if cat not in dummy_categories:
-                    reference_category = cat
-                    break
-
-            return (
-                reference_category
-                if reference_category is not None
-                else original_categories[0]
-            )
-
-        self.logger.debug(
-            f"Post-processing {len(imputations)} quantile imputations with dummy_info keys: {dummy_info.keys()}"
-        )
-
         try:
-            processed_imputations = {}
-
-            def process_single_quantile(
-                df: pd.DataFrame, dummy_info: Dict[str, Any]
-            ) -> pd.DataFrame:
-
-                df_processed = df.copy()
-
-                for orig_col, dummy_cols in dummy_info.get(
-                    "column_mapping", {}
-                ).items():
-                    if orig_col in dummy_info.get("original_dtypes", {}):
-                        orig_dtype_info = dummy_info["original_dtypes"][
-                            orig_col
-                        ]
-
-                        # Extract dtype category and original pandas dtype
-                        if (
-                            isinstance(orig_dtype_info, tuple)
-                            and len(orig_dtype_info) == 2
-                        ):
-                            dtype_category, original_pandas_dtype = (
-                                orig_dtype_info
-                            )
-                        else:
-                            # Fallback for old format
-                            self.logger.warning(
-                                f"Unexpected dtype format for {orig_col}: {orig_dtype_info}"
-                            )
-                            continue
-
-                        # Check if this variable was imputed based on its type
-                        is_imputed = False
-                        if dtype_category == "bool":
-                            # For bool, check if original column is present
-                            is_imputed = orig_col in df_processed.columns
-                        elif dtype_category in [
-                            "categorical",
-                            "numeric categorical",
-                        ]:
-                            # For regular and numeric categorical, check if dummy columns are present
-                            available_dummies = [
-                                col
-                                for col in dummy_cols
-                                if col in df_processed.columns
-                            ]
-                            is_imputed = len(available_dummies) > 0
-
-                        if not is_imputed:
-                            self.logger.debug(
-                                f"Skipping {orig_col} - not in imputed variables"
-                            )
-                            continue
-
-                        self.logger.debug(
-                            f"Converting {orig_col} back to {dtype_category} with original dtype {original_pandas_dtype}"
-                        )
-
-                        if dtype_category == "bool":
-                            # Convert back to boolean from float (>0.5 threshold for discretization)
-                            df_processed[orig_col] = (
-                                df_processed[orig_col] > 0.5
-                            )
-                            # Convert to original boolean dtype
-                            df_processed[orig_col] = df_processed[
-                                orig_col
-                            ].astype(original_pandas_dtype)
-                            self.logger.debug(
-                                f"Converted {orig_col} back to boolean type {original_pandas_dtype}"
-                            )
-
-                        elif dtype_category in [
-                            "categorical",
-                            "numeric categorical",
-                        ]:
-                            # Find available dummy columns
-                            available_dummies = [
-                                col
-                                for col in dummy_cols
-                                if col in df_processed.columns
-                            ]
-
-                            if len(available_dummies) > 0:
-                                self.logger.debug(
-                                    f"Converting dummy columns back to categorical {orig_col}"
-                                )
-
-                                categories = dummy_info["original_categories"][
-                                    orig_col
-                                ]
-                                dummy_subset = df_processed[available_dummies]
-
-                                # Identify the reference category (the one that was dropped)
-                                reference_category = _get_reference_category(
-                                    orig_col, available_dummies, categories
-                                )
-
-                                # Create mapping from dummy columns to their categories
-                                category_mapping = {}
-                                for cat in categories:
-                                    dummy_name = f"{orig_col}_{cat}"
-                                    if dummy_name in available_dummies:
-                                        category_mapping[dummy_name] = cat
-
-                                # Find the dummy column with highest value for each row
-                                max_idx = dummy_subset.idxmax(axis=1)
-                                max_values = dummy_subset.max(axis=1)
-
-                                # If max dummy value is < 0.5, assign to reference category
-                                threshold = 0.5
-
-                                # Initialize with reference category
-                                df_processed[orig_col] = reference_category
-
-                                # Only assign to dummy categories where max value exceeds threshold
-                                high_confidence_mask = max_values >= threshold
-                                if high_confidence_mask.any():
-                                    df_processed.loc[
-                                        high_confidence_mask, orig_col
-                                    ] = max_idx[high_confidence_mask].map(
-                                        category_mapping
-                                    )
-
-                                # Handle any NaN values that might occur from mapping
-                                nan_mask = df_processed[orig_col].isna()
-                                if nan_mask.any():
-                                    df_processed.loc[nan_mask, orig_col] = (
-                                        reference_category
-                                    )
-                                    self.logger.warning(
-                                        f"Some values could not be mapped for {orig_col}, using reference category: {reference_category}"
-                                    )
-
-                                self.logger.info(
-                                    f"Assigned {high_confidence_mask.sum()} observations to dummy categories, "
-                                    f"{(~high_confidence_mask).sum()} to reference category '{reference_category}'"
-                                )
-
-                                # Convert to original categorical type if needed
-                                try:
-                                    if original_pandas_dtype != "object":
-                                        df_processed[orig_col] = df_processed[
-                                            orig_col
-                                        ].astype(original_pandas_dtype)
-                                        self.logger.debug(
-                                            f"Converted {orig_col} back to categorical type: {original_pandas_dtype}"
-                                        )
-                                except (ValueError, TypeError) as e:
-                                    self.logger.warning(
-                                        f"Could not convert {orig_col} to {original_pandas_dtype}: {e}"
-                                    )
-
-                                # Drop the dummy columns
-                                df_processed = df_processed.drop(
-                                    columns=available_dummies
-                                )
-                                self.logger.debug(
-                                    f"Converted dummy columns back to categorical {orig_col}"
-                                )
-                            else:
-                                # Check if this is a single-value categorical variable (encoded as original column)
-                                if (
-                                    orig_col in df_processed.columns
-                                    and len(dummy_cols) == 1
-                                    and dummy_cols[0] == orig_col
-                                ):
-                                    self.logger.debug(
-                                        f"Converting single-value categorical variable {orig_col} back to original category"
-                                    )
-                                    # Get the original single category value
-                                    categories = dummy_info[
-                                        "original_categories"
-                                    ][orig_col]
-                                    single_category = categories[
-                                        0
-                                    ]  # Should be only one category
-
-                                    # Convert back to the original categorical value
-                                    df_processed[orig_col] = single_category
-
-                                    # Convert to original dtype if needed
-                                    try:
-                                        if dtype_category == "categorical":
-                                            original_pandas_dtype = dummy_info[
-                                                "original_dtypes"
-                                            ][orig_col][1]
-                                            if (
-                                                original_pandas_dtype
-                                                != "object"
-                                            ):
-                                                df_processed[orig_col] = (
-                                                    df_processed[
-                                                        orig_col
-                                                    ].astype(
-                                                        original_pandas_dtype
-                                                    )
-                                                )
-                                        self.logger.debug(
-                                            f"Converted single-value categorical {orig_col} back to original dtype"
-                                        )
-                                    except (ValueError, TypeError) as e:
-                                        self.logger.warning(
-                                            f"Could not convert {orig_col} to original dtype: {e}"
-                                        )
-                                else:
-                                    self.logger.warning(
-                                        f"No dummy columns found for categorical variable {orig_col}"
-                                    )
-                return df_processed
-
-            if isinstance(imputations, pd.DataFrame):
-                processed_df = process_single_quantile(imputations, dummy_info)
-                return processed_df
-            else:
-                for quantile, df in imputations.items():
-                    self.logger.debug(
-                        f"Processing quantile {quantile} with shape {df.shape}"
-                    )
-                    processed_df = process_single_quantile(df, dummy_info)
-                    processed_imputations[quantile] = processed_df
-                    self.logger.debug(
-                        f"Processed quantile {quantile}, final shape: {processed_df.shape}"
-                    )
-                return processed_imputations
-
+            processor = DummyVariableProcessor(self.logger)
+            return processor.reverse_dummy_encoding(imputations, dummy_info)
         except Exception as e:
-            self.logger.error(f"Error in postprocess_imputations: {str(e)}")
+            self.logger.error(
+                f"Error when postprocessing imputations: {str(e)}"
+            )
             raise RuntimeError(
                 f"Failed to post-process imputations: {str(e)}"
             ) from e
@@ -1068,14 +898,13 @@ def predict(
             RuntimeError: If imputation fails.
         """
         try:
-            # Validate quantiles
             self._validate_quantiles(quantiles)
         except Exception as quantile_error:
             raise ValueError(
                 f"Invalid quantiles: {str(quantile_error)}"
             ) from quantile_error
 
-        X_test = self._preprocess_data_types(X_test, self.original_predictors)
+        X_test = self.preprocess_data_types(X_test, self.original_predictors)
 
         for col in self.predictors:
             if col not in X_test.columns:
@@ -1088,7 +917,7 @@ def predict(
         # Defer actual imputations to subclass with all parameters
         imputations = self._predict(X_test, quantiles, **kwargs)
         if self.imputed_vars_dummy_info is not None:
-            imputations = self._postprocess_imputations(
+            imputations = self.postprocess_imputations(
                 imputations, self.imputed_vars_dummy_info
             )
         return imputations
diff --git a/microimpute/models/quantreg.py b/microimpute/models/quantreg.py
index b98755b..c85c4a4 100644
--- a/microimpute/models/quantreg.py
+++ b/microimpute/models/quantreg.py
@@ -29,8 +29,9 @@ def __init__(
         imputed_vars_dummy_info: Optional[Dict[str, str]] = None,
         original_predictors: Optional[List[str]] = None,
         log_level: Optional[str] = "WARNING",
+        quantiles_specified: bool = False,
     ) -> None:
-        """Initialize the QRF results.
+        """Initialize the QuantReg results.
 
         Args:
             models: Dict of quantiles and fitted QuantReg models.
@@ -41,6 +42,7 @@ def __init__(
                 about dummy variables for imputed variables.
             original_predictors: Optional list of original predictor variable
                 names before dummy encoding.
+            quantiles_specified: Whether quantiles were explicitly specified during fit.
         """
         super().__init__(
             predictors,
@@ -51,6 +53,7 @@ def __init__(
             log_level,
         )
         self.models = models
+        self.quantiles_specified = quantiles_specified
 
     @validate_call(config=VALIDATE_CONFIG)
     def _predict(
@@ -78,6 +81,9 @@ def _predict(
             # Create output dictionary with results
             imputations: Dict[float, pd.DataFrame] = {}
 
+            # Store original quantiles parameter to determine return type
+            quantiles_param = quantiles
+
             X_test_with_const = sm.add_constant(X_test[self.predictors])
             self.logger.info(f"Prepared test data with {len(X_test)} samples")
 
@@ -159,14 +165,18 @@ def _predict(
                         imputations[q] = imputed_df
 
             self.logger.info(
-                f"Completed predictions for {len(quantiles)} quantiles"
+                f"Completed predictions for {len(imputations)} quantiles"
             )
 
-            quantiles = imputations.keys()
-            if len(quantiles) < 2:
-                q = list(quantiles)[0]
-
-            return imputations if len(imputations) > 1 else imputations[q]
+            # Return behavior based on how the model was fitted:
+            # - If quantiles were explicitly specified during fit OR predict, return dict
+            # - Otherwise, return DataFrame directly for single quantile
+            if quantiles_param is not None or self.quantiles_specified:
+                return imputations
+            else:
+                # Default behavior: return DataFrame directly
+                q = list(imputations.keys())[0]
+                return imputations[q]
 
         except ValueError as e:
             # Re-raise value errors directly
@@ -269,6 +279,7 @@ def _fit(
                 original_predictors=self.original_predictors,
                 seed=self.seed,
                 log_level=self.log_level,
+                quantiles_specified=(quantiles is not None),
             )
         except Exception as e:
             self.logger.error(f"Error fitting QuantReg model: {str(e)}")
diff --git a/microimpute/utils/__init__.py b/microimpute/utils/__init__.py
index e7e41ab..0def13f 100644
--- a/microimpute/utils/__init__.py
+++ b/microimpute/utils/__init__.py
@@ -1,13 +1,20 @@
-"""
-This module contains utilities that support microimpute processes.
+"""Utility functions for microimpute operations
+
+This module provides utility functions that support various microimpute processes,
+including data preprocessing, normalization/unnormalization, and optional R-based
+statistical matching functionality.
+
+Key components:
+    - preprocess_data: prepare and normalize data for imputation
+    - unnormalize_predictions: convert normalized predictions back to original scale
+    - nnd_hotdeck_using_rpy2: R-based nearest neighbor hot deck imputation (optional)
 """
 
-from .data import preprocess_data
-from .logging_utils import configure_logging
+from microimpute.utils.data import preprocess_data, unnormalize_predictions
 
 # Optional import for R-based functions
 try:
-    from .statmatch_hotdeck import nnd_hotdeck_using_rpy2
+    from microimpute.utils.statmatch_hotdeck import nnd_hotdeck_using_rpy2
 except ImportError:
     # rpy2 is not available, matching functionality will be limited
     nnd_hotdeck_using_rpy2 = None
diff --git a/microimpute/utils/data.py b/microimpute/utils/data.py
index 08694e0..0fb154b 100644
--- a/microimpute/utils/data.py
+++ b/microimpute/utils/data.py
@@ -1,8 +1,13 @@
-"""Data preparation utilities for imputation benchmarking.
+"""Data preparation and transformation utilities
 
-This module provides functions for acquiring, preprocessing, and splitting data for imputation benchmarking.
-It includes utilities for downloading Survey of Consumer Finances
-(SCF) data, normalizing features, and creating train-test splits with consistent parameters.
+This module provides comprehensive data preparation functions for imputation workflows,
+including data splitting, normalization, unnormalization, and categorical variable handling.
+These utilities ensure consistent data preprocessing across different imputation methods.
+
+Key functions:
+    - preprocess_data: split and optionally normalize data for training/testing
+    - unnormalize_predictions: convert normalized predictions back to original scale
+    - Handle categorical variables through one-hot encoding
 """
 
 import logging
@@ -95,7 +100,7 @@ def preprocess_data(
 
             logger.debug(f"Normalization parameters: {normalization_params}")
 
-        except Exception as e:
+        except (TypeError, AttributeError) as e:
             logger.error(f"Error during data normalization: {str(e)}")
             raise RuntimeError("Failed to normalize data") from e
 
@@ -134,6 +139,50 @@ def preprocess_data(
                     X_test,
                 )
 
-        except Exception as e:
+        except (ValueError, TypeError) as e:
             logger.error(f"Error in processing data: {str(e)}")
             raise
+
+
+@validate_call(config=VALIDATE_CONFIG)
+def unnormalize_predictions(
+    imputations: dict, normalization_params: dict
+) -> dict:
+    """Unnormalize predictions using stored normalization parameters.
+
+    Args:
+        imputations: Dictionary mapping quantiles to DataFrames of predictions.
+        normalization_params: Dictionary with mean and std for each column.
+
+    Returns:
+        Dictionary with same structure as imputations but with unnormalized values.
+
+    Raises:
+        ValueError: If columns in imputations don't match normalization parameters.
+    """
+    logger.debug(f"Unnormalizing predictions for {len(imputations)} quantiles")
+
+    # Extract mean and std from normalization parameters
+    mean = pd.Series(
+        {col: p["mean"] for col, p in normalization_params.items()}
+    )
+    std = pd.Series({col: p["std"] for col, p in normalization_params.items()})
+
+    unnormalized = {}
+    for q, df in imputations.items():
+        cols = df.columns
+
+        # Check that all columns have normalization parameters
+        missing_params = [col for col in cols if col not in mean.index]
+        if missing_params:
+            error_msg = f"Missing normalization parameters for columns: {missing_params}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+        # Unnormalize: x_original = x_normalized * std + mean
+        df_unnorm = df.mul(std[cols], axis=1).add(mean[cols], axis=1)
+        unnormalized[q] = df_unnorm
+
+        logger.debug(f"Unnormalized quantile {q} with shape {df_unnorm.shape}")
+
+    return unnormalized
diff --git a/microimpute/utils/logging_utils.py b/microimpute/utils/logging_utils.py
deleted file mode 100644
index 114aec5..0000000
--- a/microimpute/utils/logging_utils.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-Logging utilities for the MicroImpute package.
-
-This module provides centralized logging configuration and utilities
-for consistent error handling across the codebase.
-"""
-
-import logging
-import sys
-from typing import Optional
-
-DEFAULT_LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-DEFAULT_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
-
-# Define logging levels for different environments
-DEBUG_LEVEL = logging.DEBUG
-INFO_LEVEL = logging.INFO
-WARN_LEVEL = logging.WARNING
-ERROR_LEVEL = logging.ERROR
-
-
-def configure_logging(
-    level: int = INFO_LEVEL,
-    log_format: str = DEFAULT_LOG_FORMAT,
-    date_format: str = DEFAULT_DATE_FORMAT,
-    log_file: Optional[str] = None,
-) -> None:
-    """Configure global logging settings.
-
-    Args:
-        level: Logging level (DEBUG, INFO, WARNING, ERROR)
-        log_format: Format string for log messages
-        date_format: Format string for timestamps
-        log_file: Optional file path to write logs
-    """
-    root_logger = logging.getLogger()
-    root_logger.setLevel(level)
-
-    # Remove existing handlers to avoid duplicates
-    for handler in root_logger.handlers[:]:
-        root_logger.removeHandler(handler)
-
-    # Console handler
-    console_handler = logging.StreamHandler(stream=sys.stdout)
-    console_handler.setLevel(level)
-    formatter = logging.Formatter(log_format, date_format)
-    console_handler.setFormatter(formatter)
-    root_logger.addHandler(console_handler)
-
-    # File handler (if specified)
-    if log_file:
-        file_handler = logging.FileHandler(log_file)
-        file_handler.setLevel(level)
-        file_handler.setFormatter(formatter)
-        root_logger.addHandler(file_handler)
diff --git a/microimpute/visualizations/__init__.py b/microimpute/visualizations/__init__.py
index e4d87b8..2d02d1e 100644
--- a/microimpute/visualizations/__init__.py
+++ b/microimpute/visualizations/__init__.py
@@ -1,6 +1,24 @@
-"""Results Visualization Utilities
+"""Visualization utilities for imputation results
 
-This module contains functions for visualizing imputation model performance and cross-model comparison.
+This module provides comprehensive visualization tools for analyzing and comparing
+imputation model performance. It includes utilities for visualizing individual model
+performance metrics and comparing results across multiple imputation methods.
+
+Key components:
+    - PerformanceResults: data class for storing model performance visualization results
+    - model_performance_results: function to create performance visualizations for a single model
+    - MethodComparisonResults: data class for storing method comparison visualization results
+    - method_comparison_results: function to create comparison visualizations across methods
 """
 
-from .plotting import method_comparison_results, model_performance_results
+# Import from comparison_plots module
+from microimpute.visualizations.comparison_plots import (
+    MethodComparisonResults,
+    method_comparison_results,
+)
+
+# Import from performance_plots module
+from microimpute.visualizations.performance_plots import (
+    PerformanceResults,
+    model_performance_results,
+)
diff --git a/microimpute/visualizations/comparison_plots.py b/microimpute/visualizations/comparison_plots.py
new file mode 100644
index 0000000..7f5b4b6
--- /dev/null
+++ b/microimpute/visualizations/comparison_plots.py
@@ -0,0 +1,403 @@
+"""Multi-method comparison visualization
+
+This module provides comprehensive visualization tools for comparing the performance
+of multiple imputation methods. It creates interactive plots and heatmaps that help
+identify the best performing method for different variables and quantiles.
+
+Key components:
+    - MethodComparisonResults: container class for comparison data with plotting methods
+    - method_comparison_results: factory function to create comparison visualizations
+    - Support for variable-specific and aggregate performance comparisons
+    - Interactive Plotly-based visualizations with customizable layouts
+"""
+
+import logging
+from typing import List, Optional, Tuple
+
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+
+from microimpute.config import PLOT_CONFIG
+from microimpute.visualizations.performance_plots import _save_figure
+
+logger = logging.getLogger(__name__)
+
+
+class MethodComparisonResults:
+    """Class to store and visualize comparison results across methods."""
+
+    def __init__(
+        self,
+        comparison_data: pd.DataFrame,
+        metric_name: str = "Quantile Loss",
+        imputed_variables: Optional[List[str]] = None,
+        data_format: str = "wide",
+    ):
+        """Initialize MethodComparisonResults with comparison data.
+
+        Args:
+            comparison_data: DataFrame with comparison data in one of two formats:
+                - "wide": DataFrame with methods as index and quantiles as columns
+                - "long": DataFrame with columns 'Method', 'Imputed Variable', 'Percentile', 'Loss'
+            metric_name: Name of the metric being compared (e.g., "Quantile Loss", "MAE", "RMSE")
+            imputed_variables: List of variable names that were imputed
+            data_format: Input data format - 'wide' or 'long'
+        """
+        self.metric_name = metric_name
+        self.imputed_variables = imputed_variables or []
+        self.data_format = data_format
+
+        # Process data based on input format
+        if data_format == "wide":
+            # Convert wide format to long format for internal use
+            self._process_wide_input(comparison_data)
+        else:
+            # Data is already in long format
+            self.comparison_data = comparison_data.copy()
+
+            # Validate required columns for long format
+            required_cols = [
+                "Method",
+                "Imputed Variable",
+                "Percentile",
+                "Loss",
+            ]
+            missing_cols = [
+                col
+                for col in required_cols
+                if col not in self.comparison_data.columns
+            ]
+            if missing_cols:
+                error_msg = f"Missing required columns: {missing_cols}"
+                logger.error(error_msg)
+                raise ValueError(error_msg)
+
+        # Get unique methods and variables
+        self.methods = self.comparison_data["Method"].unique().tolist()
+        self.variables = (
+            self.comparison_data["Imputed Variable"].unique().tolist()
+        )
+
+        logger.debug(
+            f"Initialized MethodComparisonResults with {len(self.methods)} methods "
+            f"and {len(self.variables)} variables"
+        )
+
+    def _process_wide_input(self, wide_data: pd.DataFrame):
+        """Convert wide format data to long format for internal use.
+
+        Args:
+            wide_data: DataFrame with methods as index and quantiles as columns
+        """
+        logger.debug("Converting wide format input to long format")
+
+        # Reset index to get methods as a column
+        data = wide_data.reset_index()
+        if "index" in data.columns:
+            data = data.rename(columns={"index": "Method"})
+
+        # Convert to long format
+        long_format_data = []
+
+        for _, row in data.iterrows():
+            method = row["Method"]
+
+            for col in wide_data.columns:
+                if col == "mean_loss":
+                    # Add mean_loss as special case
+                    long_format_data.append(
+                        {
+                            "Method": method,
+                            "Imputed Variable": "mean_loss",
+                            "Percentile": "mean_loss",
+                            "Loss": row[col],
+                        }
+                    )
+                else:
+                    # Regular quantile columns
+                    # Use first imputed variable if specified, otherwise "y"
+                    var_name = (
+                        self.imputed_variables[0]
+                        if self.imputed_variables
+                        else "y"
+                    )
+                    long_format_data.append(
+                        {
+                            "Method": method,
+                            "Imputed Variable": var_name,
+                            "Percentile": col,
+                            "Loss": row[col],
+                        }
+                    )
+
+        self.comparison_data = pd.DataFrame(long_format_data)
+
+    def plot(
+        self,
+        title: Optional[str] = None,
+        save_path: Optional[str] = None,
+        show_mean: bool = True,
+        figsize: Tuple[int, int] = (
+            PLOT_CONFIG["width"],
+            PLOT_CONFIG["height"],
+        ),
+    ) -> go.Figure:
+        """Plot a bar chart comparing performance across different imputation methods.
+
+        Args:
+            title: Custom title for the plot. If None, a default title is used.
+            save_path: Path to save the plot. If None, the plot is displayed.
+            show_mean: Whether to show horizontal lines for mean loss values.
+            figsize: Figure size as (width, height) in pixels.
+
+        Returns:
+            Plotly figure object
+
+        Raises:
+            ValueError: If data_subset is invalid or not available
+            RuntimeError: If plot creation or saving fails
+        """
+        logger.debug(
+            f"Creating method comparison plot with {len(self.methods)} methods"
+        )
+
+        try:
+            # Prepare data for plotting - we need it in a specific format
+            # regardless of how it was input
+            if hasattr(self, "method_results_df"):
+                # Data came in wide format, convert to long for plotting
+                plot_df = self.method_results_df.reset_index().rename(
+                    columns={"index": "Method"}
+                )
+
+                id_vars = ["Method"]
+                value_vars = [
+                    col
+                    for col in plot_df.columns
+                    if col not in id_vars and col != "mean_loss"
+                ]
+
+                melted_df = pd.melt(
+                    plot_df,
+                    id_vars=id_vars,
+                    value_vars=value_vars,
+                    var_name="Percentile",
+                    value_name=self.metric_name,
+                )
+
+                melted_df["Percentile"] = melted_df["Percentile"].astype(str)
+
+            else:
+                # Data is already in long format (comparison_data)
+                # Filter out mean_loss entries for the bar chart
+                melted_df = self.comparison_data[
+                    (self.comparison_data["Percentile"] != "mean_loss")
+                    & (self.comparison_data["Imputed Variable"] != "mean_loss")
+                ].copy()
+                melted_df = melted_df.rename(
+                    columns={"Loss": self.metric_name}
+                )
+                melted_df["Percentile"] = melted_df["Percentile"].astype(str)
+
+            if title is None:
+                title = f"Test {self.metric_name} Across Quantiles for Different Imputation Methods"
+
+            # Create the bar chart
+            logger.debug("Creating bar chart with plotly express")
+            fig = px.bar(
+                melted_df,
+                x="Percentile",
+                y=self.metric_name,
+                color="Method",
+                color_discrete_sequence=px.colors.qualitative.Plotly,
+                barmode="group",
+                title=title,
+                labels={
+                    "Percentile": "Quantiles",
+                    self.metric_name: f"Test {self.metric_name}",
+                },
+            )
+
+            # Add horizontal lines for mean loss if present and requested
+            if show_mean:
+                logger.debug("Adding mean loss markers to plot")
+
+                if (
+                    hasattr(self, "method_results_df")
+                    and "mean_loss" in self.method_results_df.columns
+                ):
+                    # Wide format data has mean_loss column
+                    for i, method in enumerate(self.method_results_df.index):
+                        mean_loss = self.method_results_df.loc[
+                            method, "mean_loss"
+                        ]
+                        fig.add_shape(
+                            type="line",
+                            x0=-0.5,
+                            y0=mean_loss,
+                            x1=len(value_vars) - 0.5,
+                            y1=mean_loss,
+                            line=dict(
+                                color=px.colors.qualitative.Plotly[
+                                    i % len(px.colors.qualitative.Plotly)
+                                ],
+                                width=2,
+                                dash="dot",
+                            ),
+                            name=f"{method} Mean",
+                        )
+                else:
+                    # Calculate means from the data
+                    for i, method in enumerate(self.methods):
+                        method_data = melted_df[melted_df["Method"] == method]
+                        if not method_data.empty:
+                            mean_loss = method_data[self.metric_name].mean()
+                            # Get number of unique percentiles for x1 position
+                            n_percentiles = melted_df["Percentile"].nunique()
+                            fig.add_shape(
+                                type="line",
+                                x0=-0.5,
+                                y0=mean_loss,
+                                x1=n_percentiles - 0.5,
+                                y1=mean_loss,
+                                line=dict(
+                                    color=px.colors.qualitative.Plotly[
+                                        i % len(px.colors.qualitative.Plotly)
+                                    ],
+                                    width=2,
+                                    dash="dot",
+                                ),
+                                name=f"{method} Mean",
+                            )
+
+            fig.update_layout(
+                title_font_size=14,
+                xaxis_title_font_size=12,
+                yaxis_title_font_size=12,
+                paper_bgcolor="#F0F0F0",
+                plot_bgcolor="#F0F0F0",
+                legend_title="Method",
+                height=figsize[1],
+                width=figsize[0],
+            )
+
+            fig.update_xaxes(showgrid=False, zeroline=False)
+            fig.update_yaxes(showgrid=False, zeroline=False)
+
+            # Save or show the plot
+            if save_path:
+                _save_figure(fig, save_path)
+
+            logger.debug("Plot creation completed successfully")
+            return fig
+
+        except Exception as e:
+            logger.error(f"Error creating method comparison plot: {str(e)}")
+            raise RuntimeError(
+                f"Failed to create method comparison plot: {str(e)}"
+            ) from e
+
+    def summary(self, format: str = "wide") -> pd.DataFrame:
+        """Generate a summary table of the comparison results.
+
+        Args:
+            format: 'wide' for methods as columns, 'long' for stacked format
+
+        Returns:
+            Summary DataFrame
+        """
+        logger.debug(f"Generating {format} format summary")
+
+        if format == "wide":
+            # Pivot table with methods as columns
+            summary = self.comparison_data.pivot_table(
+                index=["Imputed Variable", "Percentile"],
+                columns="Method",
+                values="Loss",
+                aggfunc="mean",
+            )
+            # Add a row for average across all quantiles
+            overall_mean = summary.mean()
+            overall_mean.name = ("Overall", "Mean")
+            summary = pd.concat([summary, overall_mean.to_frame().T])
+
+        else:  # long format
+            # Group by method and calculate statistics
+            summary = (
+                self.comparison_data.groupby("Method")["Loss"]
+                .agg(["mean", "std", "min", "max"])
+                .round(6)
+            )
+
+        logger.debug(f"Summary generated with shape {summary.shape}")
+        return summary
+
+    def get_best_method(self, criterion: str = "mean") -> str:
+        """Identify the best performing method.
+
+        Args:
+            criterion: 'mean' for average loss, 'median' for median loss
+
+        Returns:
+            Name of the best performing method
+        """
+        logger.debug(f"Finding best method using {criterion} criterion")
+
+        if criterion == "mean":
+            method_scores = self.comparison_data.groupby("Method")[
+                "Loss"
+            ].mean()
+        elif criterion == "median":
+            method_scores = self.comparison_data.groupby("Method")[
+                "Loss"
+            ].median()
+        else:
+            raise ValueError(f"Unknown criterion: {criterion}")
+
+        best_method = method_scores.idxmin()
+        logger.info(
+            f"Best method: {best_method} with {criterion} loss = {method_scores[best_method]:.6f}"
+        )
+        return best_method
+
+    def __repr__(self) -> str:
+        """String representation of the MethodComparisonResults object."""
+        return (
+            f"MethodComparisonResults(methods={self.methods}, "
+            f"variables={len(self.variables)}, "
+            f"shape={self.comparison_data.shape})"
+        )
+
+
+def method_comparison_results(
+    data: pd.DataFrame,
+    metric_name: str = "Quantile Loss",
+    quantiles: List[float] = None,
+    data_format: str = "wide",
+) -> MethodComparisonResults:
+    """Create a MethodComparisonResults object from comparison data.
+
+    This unified factory function supports multiple input formats:
+    - "wide": DataFrame with methods as index and quantiles as columns (and
+             optional 'mean_loss' column)
+    - "long": DataFrame with columns ["Method", "Imputed Variable", "Percentile", "Loss"]
+
+    Args:
+        data: DataFrame containing performance data in one of the supported formats.
+        metric_name: Name of the metric being compared (default: "Quantile Loss").
+        quantiles: List of quantile values (e.g., [0.05, 0.1, ...]).
+        data_format: Format of the input data ("wide" or "long").
+
+    Returns:
+        MethodComparisonResults object for visualization
+    """
+    # Note: quantiles parameter is kept for backward compatibility but not used
+    # The quantiles are inferred from the data itself
+
+    return MethodComparisonResults(
+        comparison_data=data,
+        metric_name=metric_name,
+        imputed_variables=None,  # Will be inferred from data
+        data_format=data_format,
+    )
diff --git a/microimpute/visualizations/performance_plots.py b/microimpute/visualizations/performance_plots.py
new file mode 100644
index 0000000..6645357
--- /dev/null
+++ b/microimpute/visualizations/performance_plots.py
@@ -0,0 +1,299 @@
+"""Individual model performance visualization
+
+This module provides comprehensive visualization tools for analyzing the performance
+of individual imputation models. It creates interactive plots showing train/test
+performance across different quantiles, helping identify overfitting and understand
+model behavior at different points of the distribution.
+
+Key components:
+    - PerformanceResults: container class for model performance data with plotting methods
+    - model_performance_results: factory function to create performance visualizations
+    - Interactive Plotly-based visualizations with customizable styling
+"""
+
+import logging
+import os
+from typing import Optional, Tuple
+
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+
+from microimpute.config import PLOT_CONFIG
+
+logger = logging.getLogger(__name__)
+
+
+class PerformanceResults:
+    """Class to store and visualize model performance results.
+
+    This class provides an interface for storing and visualizing
+    performance metrics, with methods like plot() and summary().
+    """
+
+    def __init__(
+        self,
+        results: pd.DataFrame,
+        model_name: Optional[str] = None,
+        method_name: Optional[str] = None,
+    ):
+        """Initialize PerformanceResults with train/test performance data.
+
+        Args:
+            results: DataFrame with train and test rows, quantiles
+                as columns, and loss values.
+            model_name: Name of the model used for imputation.
+            method_name: Name of the imputation method.
+        """
+        self.results = results.copy()
+        self.model_name = model_name or "Unknown Model"
+        self.method_name = method_name or "Unknown Method"
+
+        # Validate inputs
+        required_indices = ["train", "test"]
+        available_indices = self.results.index.tolist()
+        missing_indices = [
+            idx for idx in required_indices if idx not in available_indices
+        ]
+
+        if missing_indices:
+            logger.warning(
+                f"Missing indices in results DataFrame: {missing_indices}"
+            )
+            logger.info(f"Available indices: {available_indices}")
+
+        # Convert column names to strings if they are not already
+        self.results.columns = [str(col) for col in self.results.columns]
+
+    def plot(
+        self,
+        title: Optional[str] = None,
+        save_path: Optional[str] = None,
+        figsize: Tuple[int, int] = (
+            PLOT_CONFIG["width"],
+            PLOT_CONFIG["height"],
+        ),
+    ) -> go.Figure:
+        """Plot the performance comparison between training and testing
+        sets across quantiles.
+
+        Args:
+            title: Custom title for the plot. If None, a default title is used.
+            save_path: Path to save the plot. If None, the plot is displayed.
+            figsize: Figure size as (width, height) in pixels.
+
+        Returns:
+            Plotly figure object
+
+        Raises:
+            RuntimeError: If plot creation or saving fails
+        """
+        logger.debug(
+            f"Creating train-test performance plot from results shape {self.results.shape}"
+        )
+        palette = px.colors.qualitative.Plotly
+        train_color = palette[2]
+        test_color = palette[3]
+
+        try:
+            logger.debug("Creating Plotly figure")
+            fig = go.Figure()
+
+            # Add bars for training data if present
+            if "train" in self.results.index:
+                logger.debug("Adding training data bars")
+                fig.add_trace(
+                    go.Bar(
+                        x=self.results.columns,
+                        y=self.results.loc["train"],
+                        name="Train",
+                        marker_color=train_color,
+                    )
+                )
+
+            # Add bars for test data if present
+            if "test" in self.results.index:
+                logger.debug("Adding test data bars")
+                fig.add_trace(
+                    go.Bar(
+                        x=self.results.columns,
+                        y=self.results.loc["test"],
+                        name="Test",
+                        marker_color=test_color,
+                    )
+                )
+
+            logger.debug("Updating plot layout")
+            fig.update_layout(
+                title=title,
+                xaxis_title="Quantile",
+                yaxis_title="Average Quantile Loss",
+                barmode="group",
+                width=figsize[0],
+                height=figsize[1],
+                paper_bgcolor="#F0F0F0",
+                plot_bgcolor="#F0F0F0",
+                legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99),
+                margin=dict(l=50, r=50, t=80, b=50),
+            )
+
+            fig.update_xaxes(showgrid=False, zeroline=False)
+            fig.update_yaxes(showgrid=False, zeroline=False)
+
+            if save_path:
+                _save_figure(fig, save_path)
+
+            logger.debug("Plot creation successful")
+            return fig
+
+        except KeyError as e:
+            error_msg = f"Missing required data in results: {str(e)}"
+            logger.error(error_msg)
+            raise RuntimeError(error_msg) from e
+        except ValueError as e:
+            error_msg = f"Invalid data format for plotting: {str(e)}"
+            logger.error(error_msg)
+            raise RuntimeError(error_msg) from e
+
+    def summary(self) -> pd.DataFrame:
+        """Generate a summary of the performance metrics.
+
+        Returns:
+            Summary DataFrame with metrics
+        """
+        logger.debug("Generating performance summary")
+
+        # Calculate summary statistics
+        train_mean = (
+            self.results.loc["train"].mean()
+            if "train" in self.results.index
+            else np.nan
+        )
+        test_mean = (
+            self.results.loc["test"].mean()
+            if "test" in self.results.index
+            else np.nan
+        )
+
+        train_std = (
+            self.results.loc["train"].std()
+            if "train" in self.results.index
+            else np.nan
+        )
+        test_std = (
+            self.results.loc["test"].std()
+            if "test" in self.results.index
+            else np.nan
+        )
+
+        train_min = (
+            self.results.loc["train"].min()
+            if "train" in self.results.index
+            else np.nan
+        )
+        test_min = (
+            self.results.loc["test"].min()
+            if "test" in self.results.index
+            else np.nan
+        )
+
+        train_max = (
+            self.results.loc["train"].max()
+            if "train" in self.results.index
+            else np.nan
+        )
+        test_max = (
+            self.results.loc["test"].max()
+            if "test" in self.results.index
+            else np.nan
+        )
+
+        # Create summary DataFrame
+        summary_data = {
+            "Model": [self.model_name],
+            "Method": [self.method_name],
+            "Train Mean": [train_mean],
+            "Test Mean": [test_mean],
+            "Train Std": [train_std],
+            "Test Std": [test_std],
+            "Train Min": [train_min],
+            "Test Min": [test_min],
+            "Train Max": [train_max],
+            "Test Max": [test_max],
+            "Train/Test Ratio": [
+                train_mean / test_mean if test_mean != 0 else np.nan
+            ],
+        }
+
+        summary_df = pd.DataFrame(summary_data)
+        logger.debug(f"Summary generated with shape {summary_df.shape}")
+        return summary_df
+
+    def __repr__(self) -> str:
+        """String representation of the PerformanceResults object."""
+        return (
+            f"PerformanceResults(model='{self.model_name}', "
+            f"method='{self.method_name}', "
+            f"shape={self.results.shape})"
+        )
+
+
+def _save_figure(fig: go.Figure, save_path: str) -> None:
+    """Save a plotly figure to file.
+
+    Args:
+        fig: Plotly figure to save
+        save_path: Path where to save the figure
+
+    Raises:
+        RuntimeError: If saving fails
+    """
+    try:
+        logger.info(f"Saving plot to {save_path}")
+
+        # Ensure directory exists
+        save_dir = os.path.dirname(save_path)
+        if save_dir and not os.path.exists(save_dir):
+            logger.debug(f"Creating directory: {save_dir}")
+            os.makedirs(save_dir, exist_ok=True)
+
+        # Try to save as image if kaleido is available
+        try:
+            fig.write_image(save_path)
+            logger.info(f"Plot saved as image to {save_path}")
+        except ImportError:
+            # Fall back to HTML if kaleido is not available
+            html_path = save_path.rsplit(".", 1)[0] + ".html"
+            fig.write_html(html_path)
+            logger.warning(
+                f"kaleido not available for image export. "
+                f"Saved as HTML to {html_path}"
+            )
+    except OSError as e:
+        error_msg = f"Failed to save plot to {save_path}: {str(e)}"
+        logger.error(error_msg)
+        raise RuntimeError(error_msg) from e
+
+
+def model_performance_results(
+    results: pd.DataFrame,
+    model_name: Optional[str] = None,
+    method_name: Optional[str] = None,
+) -> PerformanceResults:
+    """Create a PerformanceResults object from train/test results.
+
+    Args:
+        results: DataFrame with train and test rows, quantiles
+            as columns, and loss values.
+        model_name: Name of the model used for imputation.
+        method_name: Name of the imputation method.
+
+    Returns:
+        PerformanceResults object for visualization
+    """
+    return PerformanceResults(
+        results=results,
+        model_name=model_name,
+        method_name=method_name,
+    )
diff --git a/microimpute/visualizations/plotting.py b/microimpute/visualizations/plotting.py
deleted file mode 100644
index 46b3864..0000000
--- a/microimpute/visualizations/plotting.py
+++ /dev/null
@@ -1,649 +0,0 @@
-"""Visualization interfaces for imputation model performance.
-
-This module provides classes and functions for visualizing the performance
-of imputation models, following a statsmodels-like interface, built on plotly.
-"""
-
-import logging
-import os
-from typing import Dict, List, Optional, Tuple, Union
-
-import numpy as np
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from pydantic import validate_call
-
-from microimpute.config import PLOT_CONFIG, QUANTILES, VALIDATE_CONFIG
-
-logger = logging.getLogger(__name__)
-
-
-class PerformanceResults:
-    """Class to store and visualize model performance results.
-
-    This class provides an interface for storing and visualizing
-    performance metrics, with methods like plot() and summary().
-    """
-
-    def __init__(
-        self,
-        results: pd.DataFrame,
-        model_name: Optional[str] = None,
-        method_name: Optional[str] = None,
-    ):
-        """Initialize PerformanceResults with train/test performance data.
-
-        Args:
-            results: DataFrame with train and test rows, quantiles
-                as columns, and loss values.
-            model_name: Name of the model used for imputation.
-            method_name: Name of the imputation method.
-        """
-        self.results = results.copy()
-        self.model_name = model_name or "Unknown Model"
-        self.method_name = method_name or "Unknown Method"
-
-        # Validate inputs
-        required_indices = ["train", "test"]
-        available_indices = self.results.index.tolist()
-        missing_indices = [
-            idx for idx in required_indices if idx not in available_indices
-        ]
-
-        if missing_indices:
-            logger.warning(
-                f"Missing indices in results DataFrame: {missing_indices}"
-            )
-            logger.info(f"Available indices: {available_indices}")
-
-        # Convert column names to strings if they are not already
-        self.results.columns = [str(col) for col in self.results.columns]
-
-    def plot(
-        self,
-        title: Optional[str] = None,
-        save_path: Optional[str] = None,
-        figsize: Tuple[int, int] = (
-            PLOT_CONFIG["width"],
-            PLOT_CONFIG["height"],
-        ),
-    ) -> go.Figure:
-        """Plot the performance comparison between training and testing
-        sets across quantiles.
-
-        Args:
-            title: Custom title for the plot. If None, a default title is used.
-            save_path: Path to save the plot. If None, the plot is displayed.
-            figsize: Figure size as (width, height) in pixels.
-
-        Returns:
-            Plotly figure object
-
-        Raises:
-            RuntimeError: If plot creation or saving fails
-        """
-        logger.debug(
-            f"Creating train-test performance plot from results shape {self.results.shape}"
-        )
-        palette = px.colors.qualitative.Plotly
-        train_color = palette[2]
-        test_color = palette[3]
-
-        try:
-            logger.debug("Creating Plotly figure")
-            fig = go.Figure()
-
-            # Add bars for training data if present
-            if "train" in self.results.index:
-                logger.debug("Adding training data bars")
-                fig.add_trace(
-                    go.Bar(
-                        x=self.results.columns,
-                        y=self.results.loc["train"],
-                        name="Train",
-                        marker_color=train_color,
-                    )
-                )
-
-            # Add bars for test data if present
-            if "test" in self.results.index:
-                logger.debug("Adding test data bars")
-                fig.add_trace(
-                    go.Bar(
-                        x=self.results.columns,
-                        y=self.results.loc["test"],
-                        name="Test",
-                        marker_color=test_color,
-                    )
-                )
-
-            logger.debug("Updating plot layout")
-            fig.update_layout(
-                title=title,
-                xaxis_title="Quantile",
-                yaxis_title="Average Quantile Loss",
-                barmode="group",
-                width=figsize[0],
-                height=figsize[1],
-                paper_bgcolor="#F0F0F0",
-                plot_bgcolor="#F0F0F0",
-                legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99),
-                margin=dict(l=50, r=50, t=80, b=50),
-            )
-
-            fig.update_xaxes(showgrid=False, zeroline=False)
-            fig.update_yaxes(showgrid=False, zeroline=False)
-
-            if save_path:
-                try:
-                    logger.info(f"Saving plot to {save_path}")
-
-                    # Ensure directory exists
-                    save_dir = os.path.dirname(save_path)
-                    if save_dir and not os.path.exists(save_dir):
-                        logger.debug(f"Creating directory: {save_dir}")
-                        os.makedirs(save_dir, exist_ok=True)
-
-                    # Try to save as image if kaleido is available
-                    try:
-                        fig.write_image(save_path)
-                        logger.info(f"Plot saved as image to {save_path}")
-                    except Exception as img_error:
-                        logger.warning(
-                            f"Could not save image to {save_path}: {str(img_error)}. "
-                            "Install kaleido to enable image export."
-                        )
-
-                    # Always save HTML version for interactive viewing
-                    html_path = save_path.replace(".jpg", ".html")
-                    fig.write_html(html_path)
-
-                    logger.info(f"Plot saved as HTML to {html_path}")
-                except Exception as e:
-                    logger.error(f"Error saving train-test plot: {str(e)}")
-                    raise RuntimeError(
-                        f"Failed to save plot to {save_path}"
-                    ) from e
-
-            logger.debug("Train-test performance plot created successfully")
-            return fig
-
-        except Exception as e:
-            logger.error(
-                f"Error creating train-test performance plot: {str(e)}"
-            )
-            raise RuntimeError(
-                f"Failed to create train-test performance plot: {str(e)}"
-            ) from e
-
-    def summary(self) -> pd.DataFrame:
-        """Provide a summary of model performance statistics.
-
-        Returns:
-            DataFrame with performance statistics including:
-                - Mean loss per dataset (train/test)
-                - Loss by quantile
-                - Difference between train and test
-                - Overfitting ratio (test/train)
-        """
-        logger.debug("Generating performance summary statistics")
-
-        try:
-            summary_data = {}
-
-            # Overall mean loss
-            mean_loss = self.results.mean(axis=1)
-            summary_data["mean_loss"] = mean_loss.to_dict()
-
-            # Calculate train/test difference if both are available
-            if all(idx in self.results.index for idx in ["train", "test"]):
-                diff = self.results.loc["test"] - self.results.loc["train"]
-                summary_data["difference"] = {
-                    "mean": diff.mean(),
-                    "max": diff.max(),
-                    "min": diff.min(),
-                }
-
-                # Calculate overfitting ratio (test/train)
-                ratio = self.results.loc["test"] / self.results.loc["train"]
-                summary_data["ratio"] = {
-                    "mean": ratio.mean(),
-                    "max": ratio.max(),
-                    "min": ratio.min(),
-                }
-
-            summary_df = pd.DataFrame(
-                {
-                    "Model": self.model_name,
-                    "Method": self.method_name,
-                    "Mean Train Loss": summary_data.get("mean_loss", {}).get(
-                        "train", np.nan
-                    ),
-                    "Mean Test Loss": summary_data.get("mean_loss", {}).get(
-                        "test", np.nan
-                    ),
-                    "Mean Difference": summary_data.get("difference", {}).get(
-                        "mean", np.nan
-                    ),
-                    "Overfitting Ratio": summary_data.get("ratio", {}).get(
-                        "mean", np.nan
-                    ),
-                },
-                index=[0],
-            )
-
-            return summary_df
-
-        except Exception as e:
-            logger.error(f"Error generating performance summary: {str(e)}")
-            raise RuntimeError(
-                f"Failed to generate performance summary: {str(e)}"
-            ) from e
-
-
-class MethodComparisonResults:
-    """Class to store and visualize performance comparison across different methods.
-
-    This unified comparison class provides an interface for comparing and visualizing performance metrics across different imputation methods, with support for diverse dataset shapes, different quantiles, and various metrics.
-    """
-
-    def __init__(
-        self,
-        data: pd.DataFrame,
-        metric_name: str = "Quantile Loss",
-        quantiles: List[float] = QUANTILES,
-        data_format: str = "wide",
-    ):
-        """Initialize MethodComparison with performance data.
-
-        This class supports multiple input formats through the data_format parameter:
-        - "wide": DataFrame with methods as index and quantiles as columns (and
-                optional 'mean_loss' column)
-        - "long": DataFrame with columns ["Method", "Imputed Variable", "Percentile", "Loss"]
-
-        Args:
-            data: DataFrame containing performance data in one of the supported formats.
-            metric_name: Name of the metric being compared (default: "Quantile Loss").
-            quantiles: List of quantile values (e.g., [0.05, 0.1, ...]).
-            data_format: Format of the input data ("wide" or "long").
-
-        Raises:
-            ValueError: If input DataFrame is invalid or in unsupported format
-        """
-        self.quantiles = quantiles or QUANTILES
-        self.metric_name = metric_name
-        self.data_format = data_format
-
-        # Process data based on format
-        if data_format == "wide":
-            self._process_wide_format(data)
-        elif data_format == "long":
-            self._process_long_format(data)
-        else:
-            raise ValueError(
-                f"Unsupported data_format: {data_format}. "
-                "Must be 'wide' or 'long'."
-            )
-
-    def _process_wide_format(self, data: pd.DataFrame) -> None:
-        """Process data in wide format (methods as index, quantiles as columns)."""
-        # Validate inputs
-        if data.empty:
-            logger.error("Empty DataFrame provided for plotting")
-            raise ValueError("DataFrame cannot be empty")
-
-        self.method_results_df = data.copy()
-
-        expected_columns = [str(q) for q in self.quantiles]
-        if not all(
-            str(q) in self.method_results_df.columns
-            or q in self.method_results_df.columns
-            for q in self.quantiles
-        ):
-            logger.warning(
-                f"Some quantiles not found in DataFrame columns. "
-                f"Expected: {expected_columns}, Found: {list(self.method_results_df.columns)}"
-            )
-
-        self.methods = self.method_results_df.index.tolist()
-        self.data_subset = "test"  # Default to test data for wide format
-
-        # Compute mean loss if not already present
-        if "mean_loss" not in self.method_results_df.columns:
-            quantile_cols = [
-                col
-                for col in self.method_results_df.columns
-                if col != "mean_loss"
-                and (
-                    col in map(str, self.quantiles)
-                    or col in map(float, self.quantiles)
-                )
-            ]
-            if quantile_cols:
-                self.method_results_df["mean_loss"] = self.method_results_df[
-                    quantile_cols
-                ].mean(axis=1)
-
-    def _process_long_format(self, data: pd.DataFrame) -> None:
-        """Process data in long format (Method, Imputed Variable, Percentile, Loss)."""
-        # Validate inputs
-        required_columns = ["Method", "Imputed Variable", "Percentile", "Loss"]
-        missing_columns = [
-            col for col in required_columns if col not in data.columns
-        ]
-        if missing_columns:
-            logger.error(
-                f"Missing required columns for long format: {missing_columns}"
-            )
-            raise ValueError(
-                f"Long format DataFrame must contain columns: {required_columns}"
-            )
-
-        self.loss_comparison_df = data.copy()
-
-        # Convert to wide format for internal storage
-        df_avg = self.loss_comparison_df[
-            self.loss_comparison_df["Imputed Variable"] == "mean_loss"
-        ]
-        df_regular = df_avg[df_avg["Percentile"] != "mean_loss"]
-
-        wide_df = df_regular.pivot(
-            index="Method", columns="Percentile", values="Loss"
-        )
-
-        # Add mean loss column
-        wide_df["mean_loss"] = wide_df.mean(axis=1)
-
-        self.method_results_df = wide_df
-        self.methods = wide_df.index.tolist()
-        self.data_subset = "test"  # Default to test data for long format
-
-    def plot(
-        self,
-        title: Optional[str] = None,
-        save_path: Optional[str] = None,
-        figsize: Tuple[int, int] = (
-            PLOT_CONFIG["width"],
-            PLOT_CONFIG["height"],
-        ),
-        show_mean: bool = True,
-    ) -> go.Figure:
-        """Plot a bar chart comparing performance across different imputation methods.
-
-        Args:
-            title: Custom title for the plot. If None, a default title is used.
-            save_path: Path to save the plot. If None, the plot is displayed.
-            figsize: Figure size as (width, height) in pixels.
-            show_mean: Whether to show horizontal lines for mean loss values.
-
-        Returns:
-            Plotly figure object
-
-        Raises:
-            ValueError: If data_subset is invalid or not available
-            RuntimeError: If plot creation or saving fails
-        """
-        logger.debug(
-            f"Creating method comparison plot with DataFrame of shape {self.method_results_df.shape}"
-        )
-
-        try:
-            # Convert DataFrame to long format for plotting
-            plot_df = self.method_results_df.reset_index().rename(
-                columns={"index": "Method"}
-            )
-
-            id_vars = ["Method"]
-            value_vars = [
-                col
-                for col in plot_df.columns
-                if col not in id_vars and col != "mean_loss"
-            ]
-
-            melted_df = pd.melt(
-                plot_df,
-                id_vars=id_vars,
-                value_vars=value_vars,
-                var_name="Percentile",
-                value_name=self.metric_name,
-            )
-
-            melted_df["Percentile"] = melted_df["Percentile"].astype(str)
-
-            if title is None:
-                title = f"Test {self.metric_name} Across Quantiles for Different Imputation Methods"
-
-            # Create the bar chart
-            logger.debug("Creating bar chart with plotly express")
-            fig = px.bar(
-                melted_df,
-                x="Percentile",
-                y=self.metric_name,
-                color="Method",
-                color_discrete_sequence=px.colors.qualitative.Plotly,
-                barmode="group",
-                title=title,
-                labels={
-                    "Percentile": "Quantiles",
-                    self.metric_name: f"Test {self.metric_name}",
-                },
-            )
-
-            # Add a horizontal line for the mean loss if present and requested
-            if show_mean and "mean_loss" in self.method_results_df.columns:
-                logger.debug("Adding mean loss markers to plot")
-                for i, method in enumerate(self.method_results_df.index):
-                    mean_loss = self.method_results_df.loc[method, "mean_loss"]
-                    fig.add_shape(
-                        type="line",
-                        x0=-0.5,
-                        y0=mean_loss,
-                        x1=len(value_vars) - 0.5,
-                        y1=mean_loss,
-                        line=dict(
-                            color=px.colors.qualitative.Plotly[
-                                i % len(px.colors.qualitative.Plotly)
-                            ],
-                            width=2,
-                            dash="dot",
-                        ),
-                        name=f"{method} Mean",
-                    )
-
-            fig.update_layout(
-                title_font_size=14,
-                xaxis_title_font_size=12,
-                yaxis_title_font_size=12,
-                paper_bgcolor="#F0F0F0",
-                plot_bgcolor="#F0F0F0",
-                legend_title="Method",
-                height=figsize[1],
-                width=figsize[0],
-            )
-
-            fig.update_xaxes(showgrid=False, zeroline=False)
-            fig.update_yaxes(showgrid=False, zeroline=False)
-
-            # Save or show the plot
-            if save_path:
-                try:
-                    logger.info(f"Saving plot to {save_path}")
-
-                    # Ensure directory exists
-                    save_dir = os.path.dirname(save_path)
-                    if save_dir and not os.path.exists(save_dir):
-                        logger.debug(f"Creating directory: {save_dir}")
-                        os.makedirs(save_dir, exist_ok=True)
-
-                    # Try to save as image if kaleido is available
-                    try:
-                        fig.write_image(save_path)
-                        logger.info(f"Plot saved as image to {save_path}")
-                    except Exception as img_error:
-                        logger.warning(
-                            f"Could not save image to {save_path}: {str(img_error)}. "
-                            "Install kaleido to enable image export."
-                        )
-
-                    # Always save as HTML for interactive viewing
-                    html_path = save_path.replace(".jpg", ".html").replace(
-                        ".png", ".html"
-                    )
-                    if html_path == save_path:
-                        html_path = save_path + ".html"
-                    fig.write_html(html_path)
-
-                    logger.info(f"Plot saved as HTML to {html_path}")
-                except Exception as e:
-                    logger.error(f"Error saving plot: {str(e)}")
-                    raise RuntimeError(
-                        f"Failed to save plot to {save_path}"
-                    ) from e
-
-            logger.debug("Plot creation completed successfully")
-            return fig
-
-        except Exception as e:
-            logger.error(f"Error creating method comparison plot: {str(e)}")
-            raise RuntimeError(
-                f"Failed to create method comparison plot: {str(e)}"
-            ) from e
-
-    def summary(
-        self,
-        data_subset: Optional[str] = None,
-    ) -> pd.DataFrame:
-        """Provide a summary of method comparison statistics.
-
-        Args:
-            data_subset: Which data subset to summarize ("train" or "test").
-                Only applicable for train_test format data.
-
-        Returns:
-            DataFrame with summary statistics by method including:
-                - Mean loss across quantiles
-                - Best/worst quantiles and their corresponding losses
-
-        Raises:
-            ValueError: If data_subset is invalid or not available
-        """
-        logger.debug("Generating method comparison summary statistics")
-
-        try:
-            methods = self.method_results_df.index.tolist()
-            summary_data = []
-
-            for method in methods:
-                method_data = self.method_results_df.loc[method]
-
-                quantile_cols = [
-                    col for col in method_data.index if col != "mean_loss"
-                ]
-
-                if "mean_loss" in method_data.index:
-                    mean_loss = method_data["mean_loss"]
-                else:
-                    mean_loss = (
-                        method_data[quantile_cols].mean()
-                        if quantile_cols
-                        else np.nan
-                    )
-
-                if quantile_cols:
-                    best_quantile = method_data[quantile_cols].idxmin()
-                    best_loss = method_data[quantile_cols].min()
-                    worst_quantile = method_data[quantile_cols].idxmax()
-                    worst_loss = method_data[quantile_cols].max()
-                else:
-                    best_quantile = worst_quantile = "N/A"
-                    best_loss = worst_loss = np.nan
-
-                summary_data.append(
-                    {
-                        "Method": method,
-                        f"Mean {self.metric_name}": mean_loss,
-                        "Best Quantile": best_quantile,
-                        f"Best {self.metric_name}": best_loss,
-                        "Worst Quantile": worst_quantile,
-                        f"Worst {self.metric_name}": worst_loss,
-                    }
-                )
-
-            # Create summary DataFrame
-            summary_df = pd.DataFrame(summary_data)
-
-            # Sort by mean loss
-            if (
-                not summary_df.empty
-                and f"Mean {self.metric_name}" in summary_df.columns
-            ):
-                summary_df = summary_df.sort_values(
-                    f"Mean {self.metric_name}", ascending=True
-                )
-
-            return summary_df
-
-        except Exception as e:
-            logger.error(
-                f"Error generating method comparison summary: {str(e)}"
-            )
-            raise RuntimeError(
-                f"Failed to generate method comparison summary: {str(e)}"
-            ) from e
-
-
-# Functions to create visualization objects
-@validate_call(config=VALIDATE_CONFIG)
-def model_performance_results(
-    results: pd.DataFrame,
-    model_name: Optional[str] = None,
-    method_name: Optional[str] = None,
-) -> PerformanceResults:
-    """Create a PerformanceResults object from train/test results.
-
-    Args:
-        results: DataFrame with train and test rows, quantiles
-            as columns, and loss values.
-        model_name: Name of the model used for imputation.
-        method_name: Name of the imputation method.
-
-    Returns:
-        PerformanceResults object for visualization
-    """
-    return PerformanceResults(
-        results=results,
-        model_name=model_name,
-        method_name=method_name,
-    )
-
-
-@validate_call(config=VALIDATE_CONFIG)
-def method_comparison_results(
-    data: pd.DataFrame,
-    metric_name: str = "Quantile Loss",
-    quantiles: List[float] = QUANTILES,
-    data_format: str = "wide",
-) -> MethodComparisonResults:
-    """Create a MethodComparison object for visualizing performance comparisons.
-
-    This unified factory function supports multiple input formats:
-    - "wide": DataFrame with methods as index and quantiles as columns (and
-             optional 'mean_loss' column)
-    - "long": DataFrame with columns ["Method", "Imputed Variable", "Percentile", "Loss"]
-
-    Args:
-        data: DataFrame containing performance data in one of the supported formats.
-        metric_name: Name of the metric being compared (default: "Quantile Loss").
-        quantiles: List of quantile values (e.g., [0.05, 0.1, ...]).
-        data_format: Format of the input data ("wide" or "long").
-
-    Returns:
-        MethodComparison object for visualization
-    """
-    return MethodComparisonResults(
-        data=data,
-        metric_name=metric_name,
-        quantiles=quantiles,
-        data_format=data_format,
-    )
diff --git a/tests/README.md b/tests/README.md
index dba1a81..4082e29 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,8 +1,8 @@
-# MicroImpute Tests
+# Microimpute tests
 
-This directory contains tests for the MicroImpute package.
+This directory contains tests for the Microimpute package.
 
-## Test Structure
+## Test structure
 
 The test suite is organized as follows:
 
@@ -17,24 +17,16 @@ The test suite is organized as follows:
   - Tests for the common Imputer interface
   - Examples of model usage
 
-## Running Tests
+## Running tests
 
 To run the tests, use the following command from the project root:
 
 ```bash
-python -m pytest us_imputation_benchmarking/tests/
+python -m pytest tests/
 ```
 
 For more verbose output:
 
 ```bash
-python -m pytest us_imputation_benchmarking/tests/ -v
+python -m pytest tests/ -v
 ```
-
-## Model-Specific Tests
-
-For detailed information about the model-specific tests, refer to the [test_models/README.md](test_models/README.md) file, which contains:
-
-- Details about the Imputer abstract class and its implementations
-- Usage examples for each model type
-- Test descriptions for each model implementation
diff --git a/tests/__init__.py b/tests/__init__.py
index a437fcd..7594ed5 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1 +1 @@
-"""Tests for the MicroImpute package."""
+"""Tests for the Microimpute package."""
diff --git a/tests/test_autoimpute.py b/tests/test_autoimpute.py
index 9fe3301..59e01f9 100644
--- a/tests/test_autoimpute.py
+++ b/tests/test_autoimpute.py
@@ -1,13 +1,14 @@
-"""
-Test the autoimpute function.
-"""
+"""Comprehensive tests for the autoimpute functionality."""
 
+import numpy as np
 import pandas as pd
+import pytest
 from sklearn.datasets import load_diabetes
 
-from microimpute.comparisons.autoimpute import autoimpute
-from microimpute.visualizations.plotting import *
+from microimpute.comparisons.autoimpute import autoimpute, AutoImputeResult
+from microimpute.visualizations import *
 
+# Check if Matching is available
 try:
     from microimpute.models import Matching
 
@@ -16,20 +17,58 @@
     HAS_MATCHING = False
 
 
-def test_autoimpute_basic() -> None:
-    """Test that autoimpute returns expected data structures."""
+# === Fixtures ===
+
+
+@pytest.fixture
+def diabetes_donor() -> pd.DataFrame:
+    """Create donor dataset from diabetes data."""
     diabetes = load_diabetes()
-    diabetes_donor = pd.DataFrame(
-        diabetes.data, columns=diabetes.feature_names
-    )
-    # Add random boolean variable
-    diabetes_donor["bool"] = np.random.choice(
-        [True, False], size=len(diabetes_donor)
+    df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
+    # Add boolean variable for testing
+    np.random.seed(42)
+    df["bool"] = np.random.choice([True, False], size=len(df))
+    # Add categorical variable
+    df["category"] = np.random.choice(["A", "B", "C"], size=len(df))
+    return df
+
+
+@pytest.fixture
+def diabetes_receiver() -> pd.DataFrame:
+    """Create receiver dataset from diabetes data."""
+    diabetes = load_diabetes()
+    return pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
+
+
+@pytest.fixture
+def simple_data() -> tuple:
+    """Create simple donor and receiver datasets."""
+    np.random.seed(42)
+    n_samples = 100
+
+    donor = pd.DataFrame(
+        {
+            "x1": np.random.randn(n_samples),
+            "x2": np.random.randn(n_samples),
+            "y1": np.random.randn(n_samples),
+            "y2": np.random.randn(n_samples),
+        }
     )
-    diabetes_receiver = pd.DataFrame(
-        diabetes.data, columns=diabetes.feature_names
+
+    receiver = pd.DataFrame(
+        {"x1": np.random.randn(50), "x2": np.random.randn(50)}
     )
 
+    return donor, receiver
+
+
+# === Basic Functionality Tests ===
+
+
+def test_autoimpute_basic_structure(
+    diabetes_donor: pd.DataFrame, diabetes_receiver: pd.DataFrame
+) -> None:
+    """Test that autoimpute returns expected data structures."""
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1", "bool"]
 
@@ -42,42 +81,342 @@ def test_autoimpute_basic() -> None:
         receiver_data=diabetes_receiver,
         predictors=predictors,
         imputed_variables=imputed_variables,
-        hyperparameters=hyperparams,
-        log_level="INFO",
+        hyperparameters={
+            "QRF": {"n_estimators": 50},
+            "Matching": {"constrained": True},
+        },
+        log_level="WARNING",
     )
 
-    model_imputations = results.imputations
-    imputed_data = results.receiver_data
-    method_results_df = results.cv_results
+    # Check return type
+    assert isinstance(results, AutoImputeResult)
 
-    # Check that the imputations is a dictionary of dataframes
-    assert isinstance(model_imputations, dict)
-    for model, imputations in model_imputations.items():
+    # Check imputations structure
+    assert isinstance(results.imputations, dict)
+    assert "best_method" in results.imputations
+    for model_name, imputations in results.imputations.items():
         assert isinstance(imputations, pd.DataFrame)
+        if model_name != "best_method":
+            assert all(var in imputations.columns for var in imputed_variables)
+
+    # Check receiver_data structure
+    assert isinstance(results.receiver_data, pd.DataFrame)
+    assert len(results.receiver_data) == len(diabetes_receiver)
+    assert all(
+        var in results.receiver_data.columns for var in imputed_variables
+    )
+
+    # Check cv_results structure
+    assert isinstance(results.cv_results, pd.DataFrame)
+    assert "mean_loss" in results.cv_results.columns
+    assert 0.05 in results.cv_results.columns  # First quantile
+    assert 0.95 in results.cv_results.columns  # Last quantile
 
-    # Check that the method_results_df has the expected structure
-    assert isinstance(method_results_df, pd.DataFrame)
-    # method_results_df will have quantiles as columns and model names as indices
-    assert "mean_loss" in method_results_df.columns
-    assert 0.05 in method_results_df.columns  # First quantile
-    assert 0.95 in method_results_df.columns  # Last quantile
 
-    quantiles = [q for q in method_results_df.columns if isinstance(q, float)]
+def test_autoimpute_all_models(
+    diabetes_donor: pd.DataFrame, diabetes_receiver: pd.DataFrame
+) -> None:
+    """Test autoimpute with all available models."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1"]
 
-    model_imputations["best_method"].to_csv(
-        "autoimpute_bestmodel_median_imputations.csv"
+    results = autoimpute(
+        donor_data=diabetes_donor,
+        receiver_data=diabetes_receiver,
+        predictors=predictors,
+        imputed_variables=imputed_variables,
+        models=None,  # Use all available models
+        impute_all=True,  # Return results for all models
+        log_level="WARNING",
     )
-    imputed_data.to_csv("autoimpute_bestmodel_imputed_dataset.csv")
 
-    method_results_df.to_csv("autoimpute_model_comparison_results.csv")
+    # Should have results for multiple models
+    assert len(results.imputations) > 2  # At least 2 models + best_method
+
+    # Check that different models might produce different results
+    model_names = [
+        name for name in results.imputations.keys() if name != "best_method"
+    ]
+    if len(model_names) >= 2:
+        model1_imputations = results.imputations[model_names[0]]
+        model2_imputations = results.imputations[model_names[1]]
+        # Different models should generally produce different imputations
+        assert not model1_imputations.equals(model2_imputations)
+
+
+def test_autoimpute_specific_models(
+    diabetes_donor: pd.DataFrame, diabetes_receiver: pd.DataFrame
+) -> None:
+    """Test autoimpute with specific models only."""
+    from microimpute.models import OLS, QRF
+
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1"]
+
+    results = autoimpute(
+        donor_data=diabetes_donor,
+        receiver_data=diabetes_receiver,
+        predictors=predictors,
+        imputed_variables=imputed_variables,
+        models=[OLS, QRF],
+        impute_all=True,  # Return results for all models
+        log_level="WARNING",
+    )
+
+    # Should have best_method and at least one of the specified models
+    assert "best_method" in results.imputations
+    # At least one of the specified models should be present
+    model_names = [
+        name for name in results.imputations.keys() if name != "best_method"
+    ]
+    assert len(model_names) >= 1
+
+    # CV results should have both models
+    assert "OLS" in results.cv_results.index
+    assert "QRF" in results.cv_results.index
+
+
+# === Hyperparameter Handling ===
+
+
+def test_autoimpute_with_hyperparameters(simple_data: tuple) -> None:
+    """Test autoimpute with custom hyperparameters."""
+    donor, receiver = simple_data
+
+    hyperparameters = {
+        "QRF": {"n_estimators": 20, "min_samples_leaf": 10},
+        "OLS": {},  # Empty dict for models without hyperparameters
+        "Matching": {"k": 3, "dist_fun": "Manhattan"},
+    }
+
+    results = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1"],
+        hyperparameters=hyperparameters,
+        log_level="WARNING",
+    )
+
+    # Should run without errors
+    assert results is not None
+    assert "best_method" in results.imputations
+
+
+# === Edge Cases ===
+
+
+def test_autoimpute_multiple_imputed_variables(simple_data: tuple) -> None:
+    """Test autoimpute with multiple variables to impute."""
+    donor, receiver = simple_data
+
+    results = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1", "y2"],  # Multiple variables
+        log_level="WARNING",
+    )
+
+    assert results is not None
+    assert all(var in results.receiver_data.columns for var in ["y1", "y2"])
+    assert not results.receiver_data[["y1", "y2"]].isna().any().any()
+
+
+def test_autoimpute_large_receiver() -> None:
+    """Test autoimpute with receiver larger than donor."""
+    np.random.seed(42)
+
+    donor = pd.DataFrame({"x": np.random.randn(50), "y": np.random.randn(50)})
 
+    receiver = pd.DataFrame({"x": np.random.randn(100)})  # Larger than donor
+
+    results = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x"],
+        imputed_variables=["y"],
+        log_level="WARNING",
+    )
+
+    assert results is not None
+    assert len(results.receiver_data) == 100
+    assert not results.receiver_data["y"].isna().any()
+
+
+# === Best Method Selection ===
+
+
+def test_autoimpute_best_method_selection(simple_data: tuple) -> None:
+    """Test that best method is selected based on CV results."""
+    donor, receiver = simple_data
+
+    results = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1"],
+        log_level="WARNING",
+    )
+
+    # Best method should have lowest mean loss
+    best_method_name = results.cv_results["mean_loss"].idxmin()
+
+    # Best method imputations should match the best performing model
+    if best_method_name in results.imputations:
+        best_method_imputations = results.imputations["best_method"]
+        specific_model_imputations = results.imputations[best_method_name]
+
+        # They should be the same
+        pd.testing.assert_frame_equal(
+            best_method_imputations, specific_model_imputations
+        )
+
+
+def test_autoimpute_cv_results_structure(simple_data: tuple) -> None:
+    """Test the structure of cross-validation results."""
+    donor, receiver = simple_data
+
+    results = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1"],
+        log_level="WARNING",
+    )
+
+    cv_results = results.cv_results
+
+    # Check structure
+    assert isinstance(cv_results, pd.DataFrame)
+    assert "mean_loss" in cv_results.columns
+
+    # Check quantile columns
+    quantile_cols = [
+        col for col in cv_results.columns if isinstance(col, float)
+    ]
+    assert len(quantile_cols) > 0
+    assert min(quantile_cols) >= 0.0
+    assert max(quantile_cols) <= 1.0
+
+    # Check that all models have results
+    assert len(cv_results) > 0
+    assert not cv_results["mean_loss"].isna().any()
+
+
+# === Visualization Compatibility ===
+
+
+def test_autoimpute_visualization_compatibility(simple_data: tuple) -> None:
+    """Test that autoimpute results work with visualization functions."""
+    donor, receiver = simple_data
+
+    results = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1"],
+        log_level="WARNING",
+    )
+
+    # Test that visualization can be created
     comparison_viz = method_comparison_results(
-        data=method_results_df,
+        data=results.cv_results,
         metric_name="Test Quantile Loss",
-        data_format="wide",  # Explicitly using wide format
+        data_format="wide",
     )
+
+    assert comparison_viz is not None
+
+    # Test that plot can be generated (without saving)
     fig = comparison_viz.plot(
-        title="Autoimpute Method Comparison",
+        title="Test Autoimpute Comparison",
         show_mean=True,
-        save_path="autoimpute_model_comparison.jpg",
+        save_path=None,  # Don't save
+    )
+
+    assert fig is not None
+
+
+# === Error Handling ===
+
+
+def test_autoimpute_missing_predictors() -> None:
+    """Test autoimpute with missing predictors in receiver."""
+    np.random.seed(42)
+
+    donor = pd.DataFrame(
+        {
+            "x1": np.random.randn(50),
+            "x2": np.random.randn(50),
+            "y": np.random.randn(50),
+        }
+    )
+
+    receiver = pd.DataFrame(
+        {
+            "x1": np.random.randn(10)
+            # x2 is missing
+        }
+    )
+
+    with pytest.raises(Exception):
+        autoimpute(
+            donor_data=donor,
+            receiver_data=receiver,
+            predictors=["x1", "x2"],  # x2 not in receiver
+            imputed_variables=["y"],
+            log_level="WARNING",
+        )
+
+
+def test_autoimpute_invalid_model_specification() -> None:
+    """Test autoimpute with invalid model specification."""
+    np.random.seed(42)
+
+    donor = pd.DataFrame({"x": np.random.randn(50), "y": np.random.randn(50)})
+
+    receiver = pd.DataFrame({"x": np.random.randn(10)})
+
+    # Invalid model type
+    with pytest.raises(Exception):
+        autoimpute(
+            donor_data=donor,
+            receiver_data=receiver,
+            predictors=["x"],
+            imputed_variables=["y"],
+            models=["InvalidModel"],  # String instead of class
+            log_level="WARNING",
+        )
+
+
+# === Performance Tests ===
+
+
+def test_autoimpute_consistency(simple_data: tuple) -> None:
+    """Test that autoimpute produces consistent results."""
+    donor, receiver = simple_data
+
+    # Run autoimpute twice with same data
+    results1 = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1"],
+        log_level="WARNING",
+    )
+
+    results2 = autoimpute(
+        donor_data=donor,
+        receiver_data=receiver,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1"],
+        log_level="WARNING",
+    )
+
+    # CV results should be very similar (allowing for small numerical differences)
+    np.testing.assert_allclose(
+        results1.cv_results["mean_loss"].values,
+        results2.cv_results["mean_loss"].values,
+        rtol=0.01,
     )
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 06b40e7..d4c55a6 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -4,5 +4,3 @@
 def test_import() -> None:
     """Test that the package can be imported."""
     import microimpute
-
-    assert microimpute is not None
diff --git a/tests/test_models/README.md b/tests/test_models/README.md
index 75c7887..f63c7f8 100644
--- a/tests/test_models/README.md
+++ b/tests/test_models/README.md
@@ -1,4 +1,4 @@
-# Imputer Model Tests
+# Imputer model tests
 
 This directory contains tests for the `Imputer` abstract base class and its implementations.
 
@@ -12,7 +12,7 @@ The tests in this directory verify that all imputation models in this package:
 4. Can be evaluated using common testing approaches like cross-validation
 5. Provide consistent outputs in expected formats
 
-## Test Files
+## Test files
 
 - **test_imputers.py**: Verifies the common interface across all models:
   - Tests model initialization with no required arguments
@@ -40,87 +40,3 @@ The tests in this directory verify that all imputation models in this package:
   - Cross-validation evaluation on the Iris dataset
   - Verifies that the model stores donor data correctly
   - Tests that predictions maintain the expected structure
-
-## Using the Imputer Interface
-
-### Base Interface
-
-All imputation models inherit from `Imputer` and implement:
-
-```python
-def fit(self, X_train, predictors, imputed_variables, **kwargs) -> "Imputer":
-    """Fit the model to training data."""
-    pass
-
-def predict(self, test_X, quantiles=None) -> Dict[float, Union[np.ndarray, pd.DataFrame]]:
-    """Predict imputed values at specified quantiles."""
-    pass
-```
-
-### Example: Using Models Interchangeably
-
-```python
-# Function that works with any Imputer model
-def impute_values(imputer: Imputer, train_data, test_data, predictors, target):
-    # Fit the model
-    imputer.fit(train_data, predictors, [target])
-    
-    # Make predictions at median
-    predictions = imputer.predict(test_data, [0.5])
-    
-    return predictions[0.5]
-
-# Use with different model types
-ols_preds = impute_values(OLS(), train_data, test_data, predictors, target)
-qrf_preds = impute_values(QRF(), train_data, test_data, predictors, target)
-```
-
-## Available Model Implementations
-
-### OLS (Ordinary Least Squares)
-
-- Simple linear regression model
-- Assumes normally distributed residuals
-- Predicts quantiles by adding scaled normal quantiles to the mean prediction
-
-```python
-model = OLS()
-model.fit(train_data, predictors, target_vars)
-predictions = model.predict(test_data, [0.25, 0.5, 0.75])
-```
-
-### QuantReg (Quantile Regression)
-
-- Directly models conditional quantiles
-- Can capture asymmetric distributions
-- Fits separate models for each quantile
-
-```python
-model = QuantReg()
-model.fit(train_data, predictors, target_vars, quantiles=[0.25, 0.5, 0.75])
-predictions = model.predict(test_data)  # Uses pre-fitted quantiles
-```
-
-### QRF (Quantile Random Forest)
-
-- Uses random forests to model quantiles
-- Can capture complex nonlinear relationships
-- Supports RF hyperparameters through kwargs
-
-```python
-model = QRF()
-model.fit(train_data, predictors, target_vars, n_estimators=100)
-predictions = model.predict(test_data, [0.25, 0.5, 0.75])
-```
-
-### Matching (Statistical Matching)
-
-- Uses distance hot deck matching to find donors
-- Non-parametric approach based on R's StatMatch package
-- Returns matched donor values for all quantiles
-
-```python
-model = Matching()
-model.fit(train_data, predictors, target_vars)
-predictions = model.predict(test_data, [0.5])
-```
diff --git a/tests/test_models/test_imputers.py b/tests/test_models/test_imputers.py
index 6ff7d19..43440c1 100644
--- a/tests/test_models/test_imputers.py
+++ b/tests/test_models/test_imputers.py
@@ -1,8 +1,9 @@
 """
-Test module for the Imputer abstract class and its model implementations.
+Comprehensive test module for the Imputer abstract class and its implementations.
 
-This module demonstrates the compatibility and interchangeability of different
-imputer models thanks to the common Imputer interface.
+This module tests the compatibility and interchangeability of different
+imputer models through the common Imputer interface, including edge cases
+and error handling.
 """
 
 from typing import Type
@@ -12,30 +13,57 @@
 import pytest
 from sklearn.datasets import load_diabetes
 
-from microimpute.utils.data import preprocess_data
 from microimpute.config import QUANTILES
 from microimpute.models import *
+from microimpute.utils.data import preprocess_data
+
+
+# === Fixtures ===
 
 
 @pytest.fixture
 def diabetes_data() -> pd.DataFrame:
-    """Create a dataset from the Diabetes dataset for testing.
-
-    Returns:
-        A DataFrame with the Diabetes dataset.
-    """
-    # Load the Diabetes dataset
+    """Create a dataset from the Diabetes dataset for testing."""
     diabetes = load_diabetes()
-
-    # Create DataFrame with feature names
     data = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
 
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1"]
 
-    data = data[predictors + imputed_variables]
+    return data[predictors + imputed_variables]
+
 
-    return data
+@pytest.fixture
+def simple_data() -> pd.DataFrame:
+    """Create simple synthetic data for edge case testing."""
+    np.random.seed(42)
+    return pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.random.randn(100),
+        }
+    )
+
+
+@pytest.fixture
+def data_with_edge_cases() -> pd.DataFrame:
+    """Create data with various edge cases."""
+    np.random.seed(42)
+    n_samples = 100
+
+    return pd.DataFrame(
+        {
+            "numeric": np.random.randn(n_samples),
+            "constant": np.ones(n_samples),  # Constant predictor
+            "binary": np.random.choice([0, 1], n_samples),
+            "categorical": np.random.choice(["A", "B", "C"], n_samples),
+            "high_correlation": np.random.randn(
+                n_samples
+            ),  # Will be made correlated
+            "target": np.random.randn(n_samples),
+        }
+    )
 
 
 # Define all imputer model classes to test
@@ -44,24 +72,19 @@ def diabetes_data() -> pd.DataFrame:
 try:
     from microimpute.models.matching import Matching
 
-    # Add Matching model if available
     ALL_IMPUTER_MODELS.append(Matching)
 except ImportError:
-    # If Matching model is not available, skip the test
     pass
 
 
-# Parametrize tests to run for each model
+# === Basic Interface Tests ===
+
+
 @pytest.mark.parametrize(
     "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
 )
 def test_init_signatures(model_class: Type[Imputer]) -> None:
-    """Test that all models can be initialized without required arguments.
-
-    Args:
-        model_class: The model class to test
-    """
-    # Check that we can initialize the model without errors
+    """Test that all models can be initialized without required arguments."""
     model = model_class()
     assert (
         model.predictors is None
@@ -77,13 +100,7 @@ def test_init_signatures(model_class: Type[Imputer]) -> None:
 def test_fit_predict_interface(
     model_class: Type[Imputer], diabetes_data: pd.DataFrame
 ) -> None:
-    """Test the fit and predict methods for each model.
-    Demonstrating models can be interchanged through the Imputer interface.
-
-    Args:
-        model_class: The model class to test
-        diabetes_data: DataFrame with sample data
-    """
+    """Test the fit and predict methods for each model."""
     quantiles = QUANTILES
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1"]
@@ -95,7 +112,6 @@ def test_fit_predict_interface(
 
     # Fit the model
     if model_class.__name__ == "QuantReg":
-        # For QuantReg, we need to explicitly fit the quantiles
         fitted_model = model.fit(
             X_train, predictors, imputed_variables, quantiles=quantiles
         )
@@ -109,57 +125,96 @@ def test_fit_predict_interface(
     assert isinstance(
         predictions, dict
     ), f"{model_class.__name__} predict should return a dictionary"
-    assert set(predictions.keys()).issubset(set(quantiles)), (
-        f"{model_class.__name__} predict should return keys in the "
-        f"specified quantiles"
-    )
+    assert set(predictions.keys()).issubset(set(quantiles))
 
     # Check prediction shape
     for q, pred in predictions.items():
         assert pred.shape[0] == len(X_test)
+        assert not pred.isna().any().any()
 
-    # Test with default quantiles (None)
 
-    # Initialize the model
-    model_default_q = model_class()
+# === Data Type Handling Tests ===
 
-    # Fit the model
-    fitted_default_model = model_default_q.fit(
-        X_train, predictors, imputed_variables
-    )
 
-    default_predictions = fitted_default_model.predict(X_test)
-    assert isinstance(default_predictions, pd.DataFrame), (
-        f"{model_class.__name__} predict should return a DataFrame with "
-        f"default quantiles"
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_categorical_variables(model_class: Type[Imputer]) -> None:
+    """Test that models handle categorical variables correctly."""
+    np.random.seed(42)
+    data = pd.DataFrame(
+        {
+            "numeric": np.random.randn(100),
+            "category": np.random.choice(["A", "B", "C"], 100),
+            "target": np.random.randn(100),
+        }
     )
 
+    X_train, X_test = preprocess_data(data)
 
-def test_string_column_validation() -> None:
-    """Test that the _validate_data method raises an error for string columns."""
-    # Create a simple dataframe with a string column
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(
+            X_train, ["numeric", "category"], ["target"], quantiles=[0.5]
+        )
+    else:
+        fitted = model.fit(X_train, ["numeric", "category"], ["target"])
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert len(predictions[0.5]) == len(X_test)
+    assert not predictions[0.5]["target"].isna().any()
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_boolean_variables(model_class: Type[Imputer]) -> None:
+    """Test that models handle boolean variables correctly."""
+    np.random.seed(42)
     data = pd.DataFrame(
-        {"numeric_col": [1, 2, 3], "string_col": ["a", "b", "c"]}
+        {
+            "numeric": np.random.randn(100),
+            "bool_var": np.random.choice([True, False], 100),
+            "target": np.random.randn(100),
+        }
     )
-    columns = ["numeric_col", "string_col"]
 
-    # Create a model to test
-    model = OLS()
+    X_train, X_test = preprocess_data(data)
 
-    data = preprocess_data(data, full_data=True)
+    model = model_class()
 
-    # Test that it raises a ValueError with the expected message
-    model._validate_data(data, columns)
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(
+            X_train, ["numeric", "bool_var"], ["target"], quantiles=[0.5]
+        )
+    else:
+        fitted = model.fit(X_train, ["numeric", "bool_var"], ["target"])
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5]["target"].isna().any()
 
 
-def test_imputation_categorical_bool_vars() -> None:
-    """Test that the imputer can handle categorical and boolean variables."""
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_imputation_categorical_bool_targets(
+    model_class: Type[Imputer],
+) -> None:
+    """Test imputing categorical and boolean target variables."""
     diabetes = load_diabetes()
     df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
 
-    # Add random boolean variable
+    # Add random boolean and categorical targets
     df["bool"] = np.random.choice([True, False], size=len(df))
-    # Add random categorical variable with three categories
     df["categorical"] = np.random.choice(["one", "two", "three"], size=len(df))
 
     predictors = ["age", "sex", "bmi", "bp"]
@@ -167,12 +222,143 @@ def test_imputation_categorical_bool_vars() -> None:
 
     X_train, X_test = preprocess_data(df)
 
-    ols = OLS()
-    fitted_ols = ols.fit(X_train, predictors, imputed_variables)
-    ols_predictions = fitted_ols.predict(X_test)
+    model = model_class()
+    fitted_model = model.fit(X_train, predictors, imputed_variables)
+    predictions = fitted_model.predict(X_test)
+
+    # Default behavior returns DataFrame directly
+    assert isinstance(predictions, pd.DataFrame)
+    assert predictions["categorical"].dtype == "object"
+    assert predictions["bool"].dtype == "bool"
+
+
+# === Edge Cases and Error Handling ===
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_single_predictor(
+    model_class: Type[Imputer], simple_data: pd.DataFrame
+) -> None:
+    """Test models with only one predictor."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(X_train, ["x1"], ["y"], quantiles=[0.5])
+    else:
+        fitted = model.fit(X_train, ["x1"], ["y"])
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_multiple_targets(
+    model_class: Type[Imputer], diabetes_data: pd.DataFrame
+) -> None:
+    """Test models with multiple target variables."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s2", "s3"]
+
+    # Get more columns from diabetes data
+    diabetes = load_diabetes()
+    full_data = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
+    data = full_data[predictors + imputed_variables]
+
+    X_train, X_test = preprocess_data(data)
+
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(
+            X_train, predictors, imputed_variables, quantiles=[0.5]
+        )
+    else:
+        fitted = model.fit(X_train, predictors, imputed_variables)
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert predictions[0.5].shape[1] == len(imputed_variables)
+    assert not predictions[0.5].isna().any().any()
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_constant_predictor(model_class: Type[Imputer]) -> None:
+    """Test models with a constant predictor (no variance)."""
+    np.random.seed(42)
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "constant": np.ones(100),  # Constant predictor
+            "y": np.random.randn(100),
+        }
+    )
+
+    X_train, X_test = preprocess_data(data)
+
+    model = model_class()
+
+    # Models should handle constant predictors gracefully
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(X_train, ["x1", "constant"], ["y"], quantiles=[0.5])
+    else:
+        fitted = model.fit(X_train, ["x1", "constant"], ["y"])
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
 
-    assert ols_predictions["categorical"].dtype == "object"
-    assert ols_predictions["bool"].dtype == "bool"
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_highly_correlated_predictors(model_class: Type[Imputer]) -> None:
+    """Test models with highly correlated predictors."""
+    np.random.seed(42)
+    n_samples = 100
+
+    x1 = np.random.randn(n_samples)
+    x2 = x1 + np.random.randn(n_samples) * 0.01  # Almost perfectly correlated
+
+    data = pd.DataFrame(
+        {"x1": x1, "x2": x2, "y": x1 + np.random.randn(n_samples) * 0.5}
+    )
+
+    X_train, X_test = preprocess_data(data)
+
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(X_train, ["x1", "x2"], ["y"], quantiles=[0.5])
+    else:
+        fitted = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+
+# === Weighted Training Tests ===
 
 
 @pytest.mark.parametrize(
@@ -182,9 +368,9 @@ def test_weighted_training(
     model_class: Type[Imputer], diabetes_data: pd.DataFrame
 ) -> None:
     """Ensure models can be trained using sampling weights."""
-
     X_train, _ = preprocess_data(diabetes_data)
-    # Create a simple positive weight column after preprocessing
+
+    # Create a simple positive weight column
     X_train["wgt"] = range(1, len(X_train) + 1)
 
     predictors = ["age", "sex", "bmi", "bp"]
@@ -197,7 +383,7 @@ def test_weighted_training(
             X_train,
             predictors,
             imputed_variables,
-            weight_col=X_train["wgt"],
+            weight_col="wgt",
             quantiles=QUANTILES,
         )
     else:
@@ -206,3 +392,182 @@ def test_weighted_training(
         )
 
     assert fitted is not None
+
+    # Test prediction
+    X_test = X_train.drop(columns=["wgt"]).head(10)
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5].isna().any().any()
+
+
+# === Quantile-Specific Tests ===
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_extreme_quantiles(
+    model_class: Type[Imputer], simple_data: pd.DataFrame
+) -> None:
+    """Test models with extreme quantile values."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    extreme_quantiles = [0.01, 0.99]
+
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(
+            X_train, ["x1", "x2"], ["y"], quantiles=extreme_quantiles
+        )
+    else:
+        fitted = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted.predict(X_test, quantiles=extreme_quantiles)
+
+    for q in extreme_quantiles:
+        assert q in predictions
+        assert not predictions[q]["y"].isna().any()
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_single_quantile(
+    model_class: Type[Imputer], simple_data: pd.DataFrame
+) -> None:
+    """Test models with a single quantile."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(X_train, ["x1", "x2"], ["y"], quantiles=[0.5])
+    else:
+        fitted = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+
+# === Data Validation Tests ===
+
+
+def test_string_column_validation() -> None:
+    """Test that the _validate_data method handles string columns appropriately."""
+    data = pd.DataFrame(
+        {"numeric_col": [1, 2, 3], "string_col": ["a", "b", "c"]}
+    )
+    columns = ["numeric_col", "string_col"]
+
+    model = OLS()
+
+    # Preprocess will handle encoding
+    data = preprocess_data(data, full_data=True)
+
+    # Should not raise an error after preprocessing
+    model._validate_data(data, columns)
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_missing_predictors_in_test(model_class: Type[Imputer]) -> None:
+    """Test behavior when test data is missing predictor columns."""
+    np.random.seed(42)
+    train_data = pd.DataFrame(
+        {
+            "x1": np.random.randn(50),
+            "x2": np.random.randn(50),
+            "y": np.random.randn(50),
+        }
+    )
+
+    # Test data missing x2
+    test_data = pd.DataFrame({"x1": np.random.randn(10)})
+
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(train_data, ["x1", "x2"], ["y"], quantiles=[0.5])
+    else:
+        fitted = model.fit(train_data, ["x1", "x2"], ["y"])
+
+    # Should raise an error when predictor is missing
+    with pytest.raises(Exception):
+        predictions = fitted.predict(test_data, quantiles=[0.5])
+
+
+# === Reproducibility Tests ===
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_reproducibility(
+    model_class: Type[Imputer], simple_data: pd.DataFrame
+) -> None:
+    """Test that models produce reproducible results."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    # First run
+    model1 = model_class()
+    fitted1 = model1.fit(X_train, ["x1", "x2"], ["y"])
+    pred1 = fitted1.predict(X_test, quantiles=[0.5])
+
+    # Second run with same data
+    model2 = model_class()
+    fitted2 = model2.fit(X_train, ["x1", "x2"], ["y"])
+    pred2 = fitted2.predict(X_test, quantiles=[0.5])
+
+    # Results should be very similar (allowing for minor numerical differences)
+    # When quantiles specified, returns dict
+    np.testing.assert_allclose(
+        pred1[0.5]["y"].values, pred2[0.5]["y"].values, rtol=1e-5
+    )
+
+
+# === Performance and Memory Tests ===
+
+
+@pytest.mark.parametrize(
+    "model_class", ALL_IMPUTER_MODELS, ids=lambda cls: cls.__name__
+)
+def test_large_number_of_predictors(model_class: Type[Imputer]) -> None:
+    """Test models with many predictors."""
+    np.random.seed(42)
+    n_samples = 50
+    n_predictors = 20
+
+    # Create data with many predictors
+    data_dict = {
+        f"x{i}": np.random.randn(n_samples) for i in range(n_predictors)
+    }
+    data_dict["y"] = np.random.randn(n_samples)
+    data = pd.DataFrame(data_dict)
+
+    X_train = data.iloc[:40].reset_index(drop=True)
+    X_test = data.iloc[40:].reset_index(drop=True)
+
+    predictors = [f"x{i}" for i in range(n_predictors)]
+
+    model = model_class()
+
+    if model_class.__name__ == "QuantReg":
+        fitted = model.fit(X_train, predictors, ["y"], quantiles=[0.5])
+    else:
+        fitted = model.fit(X_train, predictors, ["y"])
+
+    predictions = fitted.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
diff --git a/tests/test_models/test_matching.py b/tests/test_models/test_matching.py
index 9b4424a..e89dfb2 100644
--- a/tests/test_models/test_matching.py
+++ b/tests/test_models/test_matching.py
@@ -1,195 +1,360 @@
-"""Tests for the Statistical Matching imputation model."""
+"""Comprehensive tests for the Statistical Matching imputation model."""
 
 from typing import Dict, List
 
 import numpy as np
 import pandas as pd
+import pytest
 from sklearn.datasets import load_diabetes
 from sklearn.metrics import mean_squared_error
 
-from microimpute.utils.data import preprocess_data
 from microimpute.config import QUANTILES
 from microimpute.evaluations import *
+from microimpute.utils.data import preprocess_data
+from microimpute.visualizations import *
 
 try:
     from microimpute.models.matching import Matching
+
+    MATCHING_AVAILABLE = True
 except ImportError:
-    pass
-from microimpute.visualizations.plotting import *
-
-# Test Method on diabetes dataset
-diabetes_data = load_diabetes()
-diabetes_df = pd.DataFrame(
-    diabetes_data.data, columns=diabetes_data.feature_names
-)
-
-
-def test_matching_cross_validation(
-    data: pd.DataFrame = diabetes_df,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Test the Matching model on a specific dataset.
-
-    Args:
-            data: DataFrame with the dataset of interest.
-            predictors: List of predictor variables.
-            imputed_variables: List of variables to impute.
-            quantiles: List of quantiles to predict.
-    """
+    MATCHING_AVAILABLE = False
+    pytest.skip("Matching model not available", allow_module_level=True)
+
+
+# === Fixtures ===
+
+
+@pytest.fixture
+def diabetes_data() -> pd.DataFrame:
+    """Load and prepare diabetes dataset for testing."""
+    diabetes = load_diabetes()
+    df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1", "s4"]
-    data = data[predictors + imputed_variables]
+    return df[predictors + imputed_variables]
 
-    data = preprocess_data(
-        data,
-        full_data=True,
-        normalize=False,
-    )
 
-    matching_results = cross_validate_model(
-        Matching, data, predictors, imputed_variables
+@pytest.fixture
+def simple_data() -> pd.DataFrame:
+    """Create simple synthetic data for testing."""
+    np.random.seed(42)
+    return pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.random.randn(100),
+        }
     )
 
-    matching_results.to_csv("matching_cv_results.csv")
-
-    assert not matching_results.isna().any().any()
 
-    perf_results_viz = model_performance_results(
-        results=matching_results,
-        model_name="QRF",
-        method_name="Cross-Validation Quantile Loss Average",
-    )
-    fig = perf_results_viz.plot(
-        title="Matching Cross-Validation Performance",
-        save_path="matching_cv_performance.jpg",
+@pytest.fixture
+def categorical_data() -> pd.DataFrame:
+    """Create data with categorical variables."""
+    np.random.seed(42)
+    n_samples = 100
+    return pd.DataFrame(
+        {
+            "numeric": np.random.randn(n_samples),
+            "category": np.random.choice(["A", "B", "C"], n_samples),
+            "target": np.random.randn(n_samples),
+        }
     )
 
 
-def test_matching_example_use(
-    data: pd.DataFrame = diabetes_df,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Example of how to use the Statistical Matching imputer model.
-
-    This example demonstrates:
-    - Initializing a Matching model
-    - Fitting the model to donor data
-    - Predicting values for recipient data
-    - How matching uses nearest neighbors for imputation
-
-    Args:
-        data: DataFrame with the dataset of interest.
-        predictors: List of predictor variables.
-        imputed_variables: List of variables to impute.
-        quantiles: List of quantiles to predict.
-    """
+# === Basic Functionality Tests ===
+
+
+def test_matching_basic_fit_predict(diabetes_data: pd.DataFrame) -> None:
+    """Test basic Matching model fitting and prediction."""
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1", "s4"]
-    data = data[predictors + imputed_variables]
+
+    X_train, X_test = preprocess_data(diabetes_data)
+
+    # Initialize and fit model
+    model = Matching()
+    fitted_model = model.fit(X_train, predictors, imputed_variables)
+
+    # Predict (matching uses same value for all quantiles)
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    # Validate predictions
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert isinstance(predictions[0.5], pd.DataFrame)
+    assert predictions[0.5].shape == (len(X_test), len(imputed_variables))
+    assert not predictions[0.5].isna().any().any()
+
+
+def test_matching_quantile_invariance(simple_data: pd.DataFrame) -> None:
+    """Test that Matching returns same values for different quantiles."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    model = Matching()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    # Get predictions at different quantiles
+    predictions = fitted_model.predict(X_test, quantiles=[0.1, 0.5, 0.9])
+
+    # Matching should return same values for all quantiles
+    # (it doesn't model uncertainty)
+    for i in range(len(X_test)):
+        val_01 = predictions[0.1]["y"].iloc[i]
+        val_05 = predictions[0.5]["y"].iloc[i]
+        val_09 = predictions[0.9]["y"].iloc[i]
+        assert (
+            val_01 == val_05 == val_09
+        ), "Matching should return same value for all quantiles"
+
+
+def test_matching_donor_preservation(simple_data: pd.DataFrame) -> None:
+    """Test that Matching preserves actual donor values."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    model = Matching()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted_model.predict(X_test[:1], quantiles=[0.5])
+
+    # The predicted value should be from the training set
+    predicted_value = predictions[0.5]["y"].iloc[0]
+    assert (
+        predicted_value in X_train["y"].values
+    ), "Matched value should be from donor pool"
+
+
+# === Distance Functions Tests ===
+
+
+def test_matching_different_distance_functions() -> None:
+    """Test Matching with different distance functions."""
+    np.random.seed(42)
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.random.randn(100),
+        }
+    )
 
     X_train, X_test = preprocess_data(data)
 
-    # Initialize Matching model
+    distance_functions = ["Manhattan", "Euclidean"]
+
+    for dist_fun in distance_functions:
+        model = Matching()
+        fitted_model = model.fit(
+            X_train, ["x1", "x2"], ["y"], dist_fun=dist_fun
+        )
+
+        predictions = fitted_model.predict(X_test[:5], quantiles=[0.5])
+
+        assert 0.5 in predictions
+        assert not predictions[0.5]["y"].isna().any()
+
+
+def test_matching_k_neighbors() -> None:
+    """Test Matching with different k values."""
+    np.random.seed(42)
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.random.randn(100),
+        }
+    )
+
+    X_train, X_test = preprocess_data(data)
+
+    # Test different k values
+    for k in [1, 3, 5]:
+        model = Matching()
+        fitted_model = model.fit(X_train, ["x1", "x2"], ["y"], k=k)
+
+        predictions = fitted_model.predict(X_test[:5], quantiles=[0.5])
+
+        assert 0.5 in predictions
+        assert not predictions[0.5]["y"].isna().any()
+
+
+# === Categorical Variables ===
+
+
+def test_matching_mixed_types() -> None:
+    """Test Matching with mixed data types."""
+    np.random.seed(42)
+    n_samples = 100
+
+    data = pd.DataFrame(
+        {
+            "numeric": np.random.randn(n_samples),
+            "category": np.random.choice(["A", "B", "C"], n_samples),
+            "binary": np.random.choice([0, 1], n_samples),
+            "target_numeric": np.random.randn(n_samples),
+            "target_category": np.random.choice(["X", "Y"], n_samples),
+        }
+    )
+
+    X_train, X_test = preprocess_data(data, normalize=False)
+
     model = Matching()
+    fitted_model = model.fit(
+        X_train,
+        ["numeric", "category", "binary"],
+        ["target_numeric", "target_category"],
+    )
 
-    # Fit the model (stores donor data)
-    fitted_model = model.fit(X_train, predictors, imputed_variables)
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    assert predictions[0.5]["target_numeric"].dtype == np.float64
+    assert predictions[0.5]["target_category"].dtype == object
 
-    # Predict for the test data
-    # For matching, quantiles don't have the same meaning as in regression
-    # The same matched value is used for all quantiles
-    test_quantiles: List[float] = [0.5]  # Just one quantile for simplicity
-    predictions: Dict[float, pd.DataFrame] = fitted_model.predict(
-        X_test, test_quantiles
+
+# === Edge Cases ===
+
+
+def test_matching_single_donor(simple_data: pd.DataFrame) -> None:
+    """Test Matching with very small donor pool."""
+    # Use only 5 donors
+    X_train = simple_data[:5]
+    X_test = simple_data[90:]
+
+    model = Matching()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+    # All predictions should be from the small donor pool
+    for val in predictions[0.5]["y"]:
+        assert val in X_train["y"].values
+
+
+def test_matching_exact_match() -> None:
+    """Test Matching when exact matches exist."""
+    np.random.seed(42)
+
+    data = pd.DataFrame(
+        {
+            "x1": [1.0, 2.0, 3.0, 4.0, 5.0],
+            "x2": [1.0, 2.0, 3.0, 4.0, 5.0],
+            "y": [10, 20, 30, 40, 50],
+        }
     )
 
-    # Check structure of predictions
-    assert isinstance(predictions[0.5], pd.DataFrame)
+    X_train = data
+    # Test with exact match
+    X_test = pd.DataFrame({"x1": [3.0], "x2": [3.0]})
+
+    model = Matching()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    # Check that predictions exist
+    assert 0.5 in predictions
+    assert not predictions[0.5].empty
+
+
+# === Constrained Matching ===
+
+
+def test_matching_constrained_mode() -> None:
+    """Test Matching with constrained mode."""
+    np.random.seed(42)
+
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.random.randn(100),
+        }
+    )
 
-    transformed_df = pd.DataFrame()
-    for quantile, pred_df in predictions.items():
-        # For each quantile and its predictions DataFrame
-        for variable in imputed_variables:
-            # Calculate the mean of predictions for this variable at this quantile
-            mean_value = pred_df[variable].mean()
-            # Create or update the value in our transformed DataFrame
-            if variable not in transformed_df.columns:
-                transformed_df[variable] = pd.Series(dtype="float64")
-            transformed_df.loc[quantile, variable] = mean_value
-
-    # Save to CSV for further analysis
-    transformed_df.to_csv("matching_predictions_by_quantile.csv")
-
-
-def test_matching_hyperparameter_tuning(
-    data: pd.DataFrame = diabetes_df,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Test the hyperparameter tuning functionality of the Matching model.
-
-    This test verifies that:
-    1. The hyperparameter tuning process runs without errors
-    2. The tuned model performs at least as well as a default model
-    3. The tuned hyperparameters are within expected ranges
-
-    Args:
-        data: DataFrame with the dataset to use
-        predictors: List of predictor column names
-        imputed_variables: List of target column names
-        quantiles: List of quantiles to predict
-    """
+    X_train, X_test = preprocess_data(data)
+
+    model = Matching()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"], constrained=True)
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+
+# === Cross-Validation ===
+
+
+def test_matching_cross_validation(diabetes_data: pd.DataFrame) -> None:
+    """Test Matching model with cross-validation."""
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1", "s4"]
-    data = data[predictors + imputed_variables]
 
-    # Split data for training and validation
-    np.random.seed(42)  # For reproducible testing
-    train_idx = np.random.choice(
-        len(data), int(0.7 * len(data)), replace=False
+    # Preprocess without normalization for matching
+    data = preprocess_data(diabetes_data, full_data=True, normalize=False)
+
+    matching_results = cross_validate_model(
+        Matching, data, predictors, imputed_variables
     )
-    valid_idx = np.array([i for i in range(len(data)) if i not in train_idx])
 
-    train_data = data.iloc[train_idx].reset_index(drop=True)
-    valid_data = data.iloc[valid_idx].reset_index(drop=True)
+    # Validate cross-validation results
+    assert not matching_results.isna().any().any()
+    assert len(matching_results) > 0
 
-    # Preprocess training and validation data
-    X_train = preprocess_data(
-        train_data,
-        full_data=True,
+    # Test visualization capability
+    perf_results_viz = model_performance_results(
+        results=matching_results,
+        model_name="Matching",
+        method_name="Cross-Validation Quantile Loss Average",
     )
-    X_valid = preprocess_data(
-        valid_data,
-        full_data=True,
+
+    assert perf_results_viz is not None
+
+
+# === Hyperparameter Tuning ===
+
+
+def test_matching_hyperparameter_tuning(diabetes_data: pd.DataFrame) -> None:
+    """Test hyperparameter tuning for Matching model."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+
+    # Split data
+    np.random.seed(42)
+    train_idx = np.random.choice(
+        len(diabetes_data), int(0.7 * len(diabetes_data)), replace=False
+    )
+    valid_idx = np.array(
+        [i for i in range(len(diabetes_data)) if i not in train_idx]
     )
 
-    # Initialize Matching models - one with default parameters, one with tuning
-    default_model = Matching()
-    tuned_model = Matching()
+    train_data = diabetes_data.iloc[train_idx].reset_index(drop=True)
+    valid_data = diabetes_data.iloc[valid_idx].reset_index(drop=True)
+
+    X_train = preprocess_data(train_data, full_data=True)
+    X_valid = preprocess_data(valid_data, full_data=True)
 
-    # Fit models
+    # Fit models with and without tuning
+    default_model = Matching()
     default_fitted = default_model.fit(X_train, predictors, imputed_variables)
 
-    # Fit with hyperparameter tuning
+    tuned_model = Matching()
     tuned_fitted, best_params = tuned_model.fit(
-        X_train,
-        predictors,
-        imputed_variables,
-        tune_hyperparameters=True,  # Enable hyperparameter tuning
+        X_train, predictors, imputed_variables, tune_hyperparameters=True
     )
 
-    # Make predictions with both models
+    # Make predictions
     default_preds = default_fitted.predict(X_valid, quantiles=[0.5])
     tuned_preds = tuned_fitted.predict(X_valid, quantiles=[0.5])
 
-    # Evaluate performance on validation set
+    # Calculate MSE
     default_mse = {}
     tuned_mse = {}
 
     for var in imputed_variables:
-        # Calculate MSE for each variable
         default_mse[var] = mean_squared_error(
             X_valid[var], default_preds[0.5][var]
         )
@@ -197,30 +362,17 @@ def test_matching_hyperparameter_tuning(
             X_valid[var], tuned_preds[0.5][var]
         )
 
-    # Calculate average MSE across all variables
-    avg_default_mse = np.mean(list(default_mse.values()))
-    avg_tuned_mse = np.mean(list(tuned_mse.values()))
-
-    # Output results for inspection
-    print(f"Default model average MSE: {avg_default_mse:.4f}")
-    print(f"Tuned model average MSE: {avg_tuned_mse:.4f}")
-    print(
-        f"MSE improvement: {(avg_default_mse - avg_tuned_mse) / avg_default_mse:.2%}"
-    )
+    # Both should produce valid results
+    assert all(mse < np.inf for mse in default_mse.values())
+    assert all(mse < np.inf for mse in tuned_mse.values())
 
-    # Extract the tuned hyperparameters if available
+    # Check hyperparameters if available
     if (
         hasattr(tuned_fitted, "hyperparameters")
         and tuned_fitted.hyperparameters
     ):
-        print("Tuned hyperparameters:")
-        for param, value in tuned_fitted.hyperparameters.items():
-            print(f"  {param}: {value}")
-
-        # Verify that dist_fun is in expected set
         if "dist_fun" in tuned_fitted.hyperparameters:
-            dist_fun = tuned_fitted.hyperparameters["dist_fun"]
-            expected_dist_funs = [
+            assert tuned_fitted.hyperparameters["dist_fun"] in [
                 "Manhattan",
                 "Euclidean",
                 "Mahalanobis",
@@ -228,34 +380,67 @@ def test_matching_hyperparameter_tuning(
                 "Gower",
                 "minimax",
             ]
-            assert (
-                dist_fun in expected_dist_funs
-            ), f"dist_fun outside expected values: {dist_fun}"
-
-        # Verify that k is in reasonable range
         if "k" in tuned_fitted.hyperparameters:
-            k_value = tuned_fitted.hyperparameters["k"]
-            assert 1 <= k_value <= 10, f"k outside expected range: {k_value}"
+            assert 1 <= tuned_fitted.hyperparameters["k"] <= 10
+
+
+# === Performance Tests ===
+
 
-    # Verify that the file is saved
-    combined_results = pd.DataFrame(
+def test_matching_multiple_targets(diabetes_data: pd.DataFrame) -> None:
+    """Test Matching with multiple target variables."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s2", "s3", "s4"]
+
+    diabetes = load_diabetes()
+    full_data = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
+    data = full_data[predictors + imputed_variables]
+
+    X_train, X_test = preprocess_data(data)
+
+    model = Matching()
+    fitted_model = model.fit(X_train, predictors, imputed_variables)
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    assert predictions[0.5].shape[1] == len(imputed_variables)
+    for var in imputed_variables:
+        assert var in predictions[0.5].columns
+        assert not predictions[0.5][var].isna().any()
+
+
+def test_matching_preserves_relationships() -> None:
+    """Test that Matching preserves relationships between variables."""
+    np.random.seed(42)
+    n_samples = 100
+
+    # Create data with strong relationship between targets
+    x = np.random.randn(n_samples)
+    data = pd.DataFrame(
         {
-            "Variable": imputed_variables * 2,
-            "Model": ["Default"] * len(imputed_variables)
-            + ["Tuned"] * len(imputed_variables),
-            "MSE": list(default_mse.values()) + list(tuned_mse.values()),
+            "x": x,
+            "y1": 2 * x + np.random.randn(n_samples) * 0.1,
+            "y2": 3 * x + np.random.randn(n_samples) * 0.1,
         }
     )
 
-    combined_results.to_csv(
-        "matching_hyperparameter_tuning_comparison.csv", index=False
-    )
+    X_train = data[:80]
+    X_test = data[80:][["x"]]  # Only predictors for test
 
-    # Assert that the tuned model performs at least 90% as well as the default model
-    # This is a loose check because sometimes the default model might perform better by chance,
-    # especially with limited tuning trials
-    assert_performance_comparison = False
-    if assert_performance_comparison:
-        assert (
-            avg_tuned_mse <= avg_default_mse * 1.1
-        ), "Tuned model performance significantly worse than default"
+    model = Matching()
+    fitted_model = model.fit(X_train, ["x"], ["y1", "y2"])
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    # Check that the relationship between y1 and y2 is preserved
+    # Since we're matching entire rows, y1 and y2 should maintain their relationship
+    pred_y1 = predictions[0.5]["y1"].values
+    pred_y2 = predictions[0.5]["y2"].values
+
+    # Each prediction should come from the same donor row
+    for i in range(len(pred_y1)):
+        # Find which donor row was matched
+        donor_mask = (X_train["y1"] == pred_y1[i]) & (
+            X_train["y2"] == pred_y2[i]
+        )
+        assert donor_mask.any(), "Predictions should come from same donor row"
diff --git a/tests/test_models/test_ols.py b/tests/test_models/test_ols.py
index e787c64..eaa7793 100644
--- a/tests/test_models/test_ols.py
+++ b/tests/test_models/test_ols.py
@@ -1,131 +1,292 @@
-"""Tests for the OLS (Ordinary Least Squares) imputation model."""
+"""Comprehensive tests for the OLS (Ordinary Least Squares) imputation model."""
 
 from typing import Dict, List
 
 import numpy as np
 import pandas as pd
+import pytest
 from sklearn.datasets import load_diabetes
 
-from microimpute.utils.data import preprocess_data
 from microimpute.config import QUANTILES
 from microimpute.evaluations import *
 from microimpute.models.ols import OLS
-from microimpute.visualizations.plotting import *
-
-# Test Method on diabetes dataset
-diabetes_data = load_diabetes()
-diabetes_df = pd.DataFrame(
-    diabetes_data.data, columns=diabetes_data.feature_names
-)
-
-predictors = ["age", "sex", "bmi", "bp"]
-imputed_variables = ["s1", "s4"]
-
-diabetes_df = diabetes_df[predictors + imputed_variables]
-
-
-def test_ols_cross_validation(
-    data: pd.DataFrame = diabetes_df,
-    predictors: List[str] = predictors,
-    imputed_variables: List[str] = imputed_variables,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Test the OLS model on a specific dataset.
-
-    Args:
-            data: DataFrame with the dataset of interest.
-            predictors: List of predictor variables.
-            imputed_variables: List of variables to impute.
-            quantiles: List of quantiles to predict.
-    """
-    ols_results = cross_validate_model(
-        OLS, data, predictors, imputed_variables
+from microimpute.utils.data import preprocess_data
+from microimpute.visualizations import *
+
+
+# === Fixtures ===
+
+
+@pytest.fixture
+def diabetes_data() -> pd.DataFrame:
+    """Load and prepare diabetes dataset for testing."""
+    diabetes = load_diabetes()
+    df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+    return df[predictors + imputed_variables]
+
+
+@pytest.fixture
+def simple_data() -> pd.DataFrame:
+    """Create simple synthetic data for testing."""
+    np.random.seed(42)
+    return pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.random.randn(100),
+        }
+    )
+
+
+# === Basic Functionality Tests ===
+
+
+def test_ols_basic_fit_predict(diabetes_data: pd.DataFrame) -> None:
+    """Test basic OLS model fitting and prediction."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+
+    X_train, X_test = preprocess_data(diabetes_data)
+
+    # Initialize and fit model
+    model = OLS()
+    fitted_model = model.fit(X_train, predictors, imputed_variables)
+
+    # Predict at multiple quantiles
+    predictions = fitted_model.predict(
+        X_test, QUANTILES, random_quantile_sample=False
+    )
+
+    # Validate predictions
+    assert isinstance(predictions, dict)
+    assert set(predictions.keys()) == set(QUANTILES)
+
+    for q, pred_df in predictions.items():
+        assert pred_df.shape == (len(X_test), len(imputed_variables))
+        assert not pred_df.isna().any().any()
+
+
+def test_ols_symmetric_quantiles(simple_data: pd.DataFrame) -> None:
+    """Test that OLS produces symmetric quantile predictions due to normal distribution assumption."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    model = OLS()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    # Predict at symmetric quantiles
+    predictions = fitted_model.predict(
+        X_test, quantiles=[0.1, 0.5, 0.9], random_quantile_sample=False
+    )
+
+    # Check symmetry around median
+    median = predictions[0.5]["y"].values
+    lower = predictions[0.1]["y"].values
+    upper = predictions[0.9]["y"].values
+
+    lower_diff = median - lower
+    upper_diff = upper - median
+
+    # OLS assumes normal distribution, so quantiles should be symmetric
+    np.testing.assert_allclose(
+        lower_diff.mean(),
+        upper_diff.mean(),
+        rtol=0.1,
+        err_msg="OLS should have symmetric quantile predictions around the median",
+    )
+
+
+def test_ols_random_quantile_sampling(simple_data: pd.DataFrame) -> None:
+    """Test OLS with random quantile sampling."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    model = OLS()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    # Predict with random quantile sampling
+    predictions = fitted_model.predict(
+        X_test, quantiles=[0.5], random_quantile_sample=True
+    )
+
+    assert 0.5 in predictions
+    assert len(predictions[0.5]) == len(X_test)
+    assert not predictions[0.5]["y"].isna().any()
+
+
+# === Edge Cases ===
+
+
+def test_ols_perfect_collinearity() -> None:
+    """Test OLS behavior with perfectly collinear predictors."""
+    np.random.seed(42)
+    n_samples = 100
+
+    x1 = np.random.randn(n_samples)
+
+    data = pd.DataFrame(
+        {
+            "x1": x1,
+            "x2": x1 * 2,  # Perfect collinearity
+            "x3": x1 * 3,  # Perfect collinearity
+            "y": x1 + np.random.randn(n_samples) * 0.1,
+        }
+    )
+
+    X_train, X_test = preprocess_data(data)
+
+    model = OLS()
+    # Should handle collinearity (might drop columns or use regularization)
+    fitted_model = model.fit(X_train, ["x1", "x2", "x3"], ["y"])
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+
+def test_ols_constant_target() -> None:
+    """Test OLS with a constant target variable."""
+    np.random.seed(42)
+
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.ones(100),  # Constant target
+        }
     )
 
-    ols_results.to_csv("ols_cv_results.csv")
+    X_train, X_test = preprocess_data(data)
+
+    model = OLS()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.1, 0.5, 0.9])
+
+    # All predictions should be close to 1 (the constant value)
+    for q in [0.1, 0.5, 0.9]:
+        assert np.allclose(predictions[q]["y"].values, 1.0, rtol=0.1)
+
+
+# === Cross-Validation Test ===
 
+
+def test_ols_cross_validation(diabetes_data: pd.DataFrame) -> None:
+    """Test OLS model with cross-validation."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+
+    ols_results = cross_validate_model(
+        OLS, diabetes_data, predictors, imputed_variables
+    )
+
+    # Validate cross-validation results
     assert not ols_results.isna().any().any()
+    assert len(ols_results) > 0
 
+    # Test visualization capability
     perf_results_viz = model_performance_results(
         results=ols_results,
         model_name="OLS",
         method_name="Cross-Validation Quantile Loss Average",
     )
-    fig = perf_results_viz.plot(
-        title="OLS Cross-Validation Performance",
-        save_path="ols_cv_performance.jpg",
+
+    assert perf_results_viz is not None
+
+
+# === Extreme Values ===
+
+
+def test_ols_extreme_values() -> None:
+    """Test OLS with extreme values in data."""
+    np.random.seed(42)
+
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(100) * 1000,  # Large scale
+            "x2": np.random.randn(100) * 0.001,  # Small scale
+            "y": np.random.randn(100),
+        }
     )
 
+    X_train, X_test = preprocess_data(data)
+
+    model = OLS()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"])
+
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+    assert np.all(np.isfinite(predictions[0.5]["y"].values))
+
+
+def test_ols_outliers() -> None:
+    """Test OLS robustness to outliers."""
+    np.random.seed(42)
+
+    data = pd.DataFrame({"x": np.random.randn(100), "y": np.random.randn(100)})
+
+    # Add outliers
+    data.loc[0, "y"] = 100  # Large outlier
+    data.loc[1, "y"] = -100  # Large outlier
 
-def test_ols_example(
-    data: pd.DataFrame = diabetes_df,
-    predictors: List[str] = predictors,
-    imputed_variables: List[str] = imputed_variables,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Example of how to use the OLS imputer model.
-
-    This example demonstrates:
-    - Initializing an OLS model
-    - Fitting the model to training data
-    - Predicting quantiles on test data
-    - How OLS models assume normally distributed residuals
-
-    Args:
-        data: DataFrame with the dataset to use.
-        predictors: List of predictor column names.
-        imputed_variables: List of target column names.
-        quantiles: List of quantiles to predict.
-    """
     X_train, X_test = preprocess_data(data)
 
-    # Initialize OLS model
     model = OLS()
+    fitted_model = model.fit(X_train, ["x"], ["y"])
 
-    # Fit the model
-    fitted_model = model.fit(X_train, predictors, imputed_variables)
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
 
-    # Predict at multiple quantiles
-    predictions: Dict[float, pd.DataFrame] = fitted_model.predict(
-        X_test,
-        quantiles,
-        random_quantile_sample=False,
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+
+# === Performance Tests ===
+
+
+def test_ols_prediction_quality(diabetes_data: pd.DataFrame) -> None:
+    """Test OLS prediction quality on real data."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1"]
+
+    data = diabetes_data[predictors + imputed_variables]
+
+    # Split data
+    np.random.seed(42)
+    train_idx = np.random.choice(
+        len(data), int(0.8 * len(data)), replace=False
     )
+    test_idx = np.array([i for i in range(len(data)) if i not in train_idx])
 
-    # Check structure of predictions
-    assert isinstance(predictions, dict)
-    assert set(predictions.keys()) == set(quantiles)
+    train_data = data.iloc[train_idx].reset_index(drop=True)
+    test_data = data.iloc[test_idx].reset_index(drop=True)
 
-    # Demonstrate how OLS uses normal distribution assumption
-    median_pred = predictions[0.5]
-    q10_pred = predictions[0.1]
-    q90_pred = predictions[0.9]
+    X_train = preprocess_data(
+        train_data, full_data=True, train_size=1.0, test_size=0.0
+    )
+    X_test = preprocess_data(
+        test_data, full_data=True, train_size=1.0, test_size=0.0
+    )
 
-    # The difference between q90 and median should approximately equal
-    # the difference between median and q10 for OLS (symmetric distribution)
-    upper_diff = q90_pred - median_pred
-    lower_diff = median_pred - q10_pred
+    # Fit model
+    model = OLS()
+    fitted_model = model.fit(X_train, predictors, imputed_variables)
 
-    # Allow some numerical error
-    np.testing.assert_allclose(
-        upper_diff.mean(),
-        lower_diff.mean(),
-        rtol=0.1,
-        err_msg="OLS should have symmetric quantile predictions around the median",
+    # Get predictions
+    predictions = fitted_model.predict(
+        X_test, quantiles=[0.5], random_quantile_sample=False
     )
 
-    transformed_df = pd.DataFrame()
-    for quantile, pred_df in predictions.items():
-        # For each quantile and its predictions DataFrame
-        for variable in imputed_variables:
-            # Calculate the mean of predictions for this variable at this quantile
-            mean_value = pred_df[variable].mean()
-            # Create or update the value in our transformed DataFrame
-            if variable not in transformed_df.columns:
-                transformed_df[variable] = pd.Series(dtype="float64")
-            transformed_df.loc[quantile, variable] = mean_value
-
-    # Save to CSV for further analysis
-    transformed_df.to_csv("ols_predictions_by_quantile.csv")
+    # Calculate correlation with true values
+    true_values = X_test["s1"].values
+    pred_values = predictions[0.5]["s1"].values
+
+    correlation = np.corrcoef(true_values, pred_values)[0, 1]
+
+    # OLS should achieve reasonable correlation
+    assert correlation > 0.2, f"OLS correlation too low: {correlation}"
+
+    # Check prediction variance is reasonable
+    assert np.var(pred_values) > 0, "OLS predictions have no variance"
diff --git a/tests/test_models/test_qrf.py b/tests/test_models/test_qrf.py
index bf275f4..a067a38 100644
--- a/tests/test_models/test_qrf.py
+++ b/tests/test_models/test_qrf.py
@@ -1,146 +1,238 @@
-"""Tests for the Quantile Regression Forest imputation model."""
+"""Comprehensive tests for the Quantile Regression Forest imputation model.
 
+This file combines and consolidates tests from test_qrf.py and test_qrf_extended.py,
+removing duplicates and ensuring comprehensive coverage.
+"""
+
+import io
+import logging
+import re
 from typing import Dict, List
 
 import numpy as np
 import pandas as pd
+import pytest
 from sklearn.datasets import load_diabetes
 from sklearn.metrics import mean_squared_error
 
-from microimpute.utils.data import preprocess_data
 from microimpute.config import QUANTILES
 from microimpute.evaluations import *
-from microimpute.models.qrf import QRF
-from microimpute.visualizations.plotting import *
+from microimpute.models.qrf import QRF, _QRFModel
+from microimpute.utils.data import preprocess_data
+from microimpute.visualizations import *
 
-# Test Method on diabetes dataset
-diabetes_data = load_diabetes()
-diabetes_df = pd.DataFrame(
-    diabetes_data.data, columns=diabetes_data.feature_names
-)
 
+# === Fixtures and Test Data ===
 
-def test_qrf_cross_validation(
-    data: pd.DataFrame = diabetes_df,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Test the QRF model on a specific dataset.
-
-    Args:
-            data: DataFrame with the dataset of interest.
-            predictors: List of predictor variables.
-            imputed_variables: List of variables to impute.
-            quantiles: List of quantiles to predict.
-    """
-    predictors = ["age", "sex", "bmi", "bp"]
-    imputed_variables = ["s1", "s4"]
-    data = data[predictors + imputed_variables]
-
-    qrf_results = cross_validate_model(
-        QRF, data, predictors, imputed_variables
-    )
 
-    qrf_results.to_csv("qrf_cv_results.csv")
+@pytest.fixture
+def diabetes_data() -> pd.DataFrame:
+    """Load and prepare diabetes dataset for testing."""
+    diabetes = load_diabetes()
+    return pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
 
-    assert not qrf_results.isna().any().any()
 
-    perf_results_viz = model_performance_results(
-        results=qrf_results,
-        model_name="QRF",
-        method_name="Cross-Validation Quantile Loss Average",
+@pytest.fixture
+def simple_data() -> pd.DataFrame:
+    """Create simple synthetic data for testing basic functionality."""
+    np.random.seed(42)
+    n_samples = 100
+    return pd.DataFrame(
+        {
+            "x1": np.random.randn(n_samples),
+            "x2": np.random.randn(n_samples),
+            "y": np.random.randn(n_samples),
+        }
     )
-    fig = perf_results_viz.plot(
-        title="QRF Cross-Validation Performance",
-        save_path="qrf_cv_performance.jpg",
+
+
+@pytest.fixture
+def categorical_data() -> pd.DataFrame:
+    """Create data with categorical variables for testing encoding."""
+    np.random.seed(42)
+    n_samples = 100
+    return pd.DataFrame(
+        {
+            "numeric1": np.random.randn(n_samples),
+            "numeric2": np.random.randn(n_samples),
+            "category": np.random.choice(["A", "B", "C"], n_samples),
+            "target": np.random.randn(n_samples),
+        }
     )
 
 
-def test_qrf_example(
-    data: pd.DataFrame = diabetes_df,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Example of how to use the Quantile Random Forest imputer model.
-
-    This example demonstrates:
-    - Initializing a QRF model
-    - Fitting the model with optional hyperparameters
-    - Predicting quantiles on test data
-    - How QRF can capture complex nonlinear relationships
-
-    Args:
-        data: DataFrame with the dataset to use.
-        predictors: List of predictor column names.
-        imputed_variables: List of target column names.
-        quantiles: List of quantiles to predict.
-    """
+# === Basic Functionality Tests ===
+
+
+def test_qrf_basic_fit_predict(diabetes_data: pd.DataFrame) -> None:
+    """Test basic QRF model fitting and prediction."""
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1", "s4"]
-    data = data[predictors + imputed_variables]
+    data = diabetes_data[predictors + imputed_variables]
 
     X_train, X_test = preprocess_data(data)
 
-    # Initialize QRF model
+    # Initialize and fit model
     model = QRF()
-
-    # Fit the model with RF hyperparameters
     fitted_model = model.fit(
         X_train,
         predictors,
         imputed_variables,
-        n_estimators=100,  # Number of trees
-        min_samples_leaf=5,  # Min samples in leaf nodes
+        n_estimators=50,
+        min_samples_leaf=5,
     )
 
     # Predict at multiple quantiles
-    predictions: Dict[float, pd.DataFrame] = fitted_model.predict(
-        X_test, quantiles
-    )
+    predictions = fitted_model.predict(X_test, quantiles=QUANTILES)
 
-    # Check structure of predictions
+    # Validate predictions
     assert isinstance(predictions, dict)
-    assert set(predictions.keys()) == set(quantiles)
-
-    transformed_df = pd.DataFrame()
-    for quantile, pred_df in predictions.items():
-        # For each quantile and its predictions DataFrame
-        for variable in imputed_variables:
-            # Calculate the mean of predictions for this variable at this quantile
-            mean_value = pred_df[variable].mean()
-            # Create or update the value in our transformed DataFrame
-            if variable not in transformed_df.columns:
-                transformed_df[variable] = pd.Series(dtype="float64")
-            transformed_df.loc[quantile, variable] = mean_value
-
-    # Save to CSV for further analysis
-    transformed_df.to_csv("qrf_predictions_by_quantile.csv")
-
-
-def test_qrf_hyperparameter_tuning(
-    data: pd.DataFrame = diabetes_df,
-    quantiles: List[float] = QUANTILES,
+    assert set(predictions.keys()) == set(QUANTILES)
+
+    for q, pred_df in predictions.items():
+        assert pred_df.shape == (len(X_test), len(imputed_variables))
+        assert not pred_df.isna().any().any()
+
+    # Test default quantiles - returns DataFrame directly for single quantile
+    default_predictions = fitted_model.predict(X_test)
+    assert isinstance(default_predictions, pd.DataFrame)
+
+
+def test_qrf_sequential_imputation(diabetes_data: pd.DataFrame) -> None:
+    """Test sequential imputation where later variables use earlier ones as predictors."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s2", "s3"]
+    data = diabetes_data[predictors + imputed_variables]
+
+    X_train, X_test = preprocess_data(data)
+
+    # Fit model with sequential imputation
+    model = QRF()
+    fitted_model = model.fit(
+        X_train, predictors, imputed_variables, n_estimators=30
+    )
+
+    # Get predictions
+    small_test = X_test.head(5).copy()
+    sequential_preds = fitted_model.predict(small_test, quantiles=[0.5])[0.5]
+
+    # Compare with parallel imputation (each variable independently)
+    parallel_predictions = {}
+    for var in imputed_variables:
+        single_model = QRF()
+        single_fitted = single_model.fit(
+            X_train, predictors, [var], n_estimators=30
+        )
+        single_pred = single_fitted.predict(small_test, quantiles=[0.5])
+        parallel_predictions[var] = single_pred[0.5][var]
+
+    # Sequential should differ from parallel for later variables
+    differences_found = False
+    for var in imputed_variables[1:]:
+        seq_values = sequential_preds[var].values
+        par_values = parallel_predictions[var].values
+        if not np.allclose(seq_values, par_values, rtol=1e-5):
+            differences_found = True
+            break
+
+    assert (
+        differences_found
+    ), "Sequential imputation should differ from parallel"
+
+    # Test that order matters
+    reversed_model = QRF()
+    reversed_fitted = reversed_model.fit(
+        X_train, predictors, imputed_variables[::-1], n_estimators=30
+    )
+    reversed_preds = reversed_fitted.predict(small_test, quantiles=[0.5])[0.5]
+
+    # Middle variable should differ when imputed in different orders
+    assert not np.allclose(
+        sequential_preds["s2"].values, reversed_preds["s2"].values, rtol=1e-5
+    ), "Imputation order should affect results"
+
+
+def test_qrf_beta_distribution_sampling():
+    """Test different mean_quantile values for beta distribution sampling."""
+    np.random.seed(42)
+
+    # Create simple dataset
+    data = pd.DataFrame(
+        {
+            "x": np.random.randn(200),
+            "y": np.random.randn(200),
+        }
+    )
+
+    train_data = data[:150]
+    test_data = data[150:]
+
+    model = QRF()
+    fitted_model = model.fit(
+        train_data,
+        predictors=["x"],
+        imputed_variables=["y"],
+        n_estimators=50,
+    )
+
+    # Test extreme quantiles
+    extreme_low = fitted_model.predict(test_data[["x"]], quantiles=[0.01])
+    extreme_high = fitted_model.predict(test_data[["x"]], quantiles=[0.99])
+    median = fitted_model.predict(test_data[["x"]], quantiles=[0.5])
+
+    # Verify ordering
+    for i in range(len(test_data)):
+        assert extreme_low[0.01]["y"].iloc[i] <= median[0.5]["y"].iloc[i]
+        assert median[0.5]["y"].iloc[i] <= extreme_high[0.99]["y"].iloc[i]
+
+
+# === Categorical Variable Tests ===
+
+
+def test_qrf_missing_categorical_levels_in_test(
+    categorical_data: pd.DataFrame,
 ) -> None:
-    """
-    Test the hyperparameter tuning functionality of the QRF model.
-
-    This test verifies that:
-    1. The hyperparameter tuning process runs without errors
-    2. The tuned model performs at least as well as a default model
-    3. The tuned hyperparameters are within expected ranges
-
-    Args:
-        data: DataFrame with the dataset to use
-        predictors: List of predictor column names
-        imputed_variables: List of target column names
-        quantiles: List of quantiles to predict
-    """
+    """Test handling of missing categorical levels in test data."""
+    # Training has A, B, C
+    train_data = categorical_data.copy()
+
+    # Test only has A, B (missing C)
+    test_data = pd.DataFrame(
+        {
+            "numeric1": np.random.randn(20),
+            "numeric2": np.random.randn(20),
+            "category": np.random.choice(["A", "B"], 20),
+            "target": np.nan,
+        }
+    )
+
+    model = QRF()
+    fitted_model = model.fit(
+        train_data,
+        predictors=["numeric1", "numeric2", "category"],
+        imputed_variables=["target"],
+        n_estimators=20,
+    )
+
+    # Should handle missing category gracefully
+    predictions = fitted_model.predict(
+        test_data[["numeric1", "numeric2", "category"]]
+    )
+    # Default returns DataFrame directly
+    assert not predictions["target"].isna().any()
+
+
+# === Hyperparameter Tuning Tests ===
+
+
+def test_qrf_hyperparameter_tuning(diabetes_data: pd.DataFrame) -> None:
+    """Test hyperparameter tuning functionality."""
     predictors = ["age", "sex", "bmi", "bp"]
     imputed_variables = ["s1", "s4"]
-    data = data[predictors + imputed_variables]
+    data = diabetes_data[predictors + imputed_variables]
 
-    # Split data for training and validation
-    np.random.seed(42)  # For reproducible testing
+    # Split data
+    np.random.seed(42)
     train_idx = np.random.choice(
         len(data), int(0.7 * len(data)), replace=False
     )
@@ -149,7 +241,6 @@ def test_qrf_hyperparameter_tuning(
     train_data = data.iloc[train_idx].reset_index(drop=True)
     valid_data = data.iloc[valid_idx].reset_index(drop=True)
 
-    # Preprocess training and validation data
     X_train = preprocess_data(
         train_data, full_data=True, train_size=1.0, test_size=0.0
     )
@@ -157,31 +248,24 @@ def test_qrf_hyperparameter_tuning(
         valid_data, full_data=True, train_size=1.0, test_size=0.0
     )
 
-    # Initialize QRF models - one with default parameters, one with tuning
+    # Fit models with and without tuning
     default_model = QRF()
-    tuned_model = QRF()
-
-    # Fit models
     default_fitted = default_model.fit(X_train, predictors, imputed_variables)
 
-    # Fit with hyperparameter tuning
+    tuned_model = QRF()
     tuned_fitted, best_params = tuned_model.fit(
-        X_train,
-        predictors,
-        imputed_variables,
-        tune_hyperparameters=True,  # Enable hyperparameter tuning
+        X_train, predictors, imputed_variables, tune_hyperparameters=True
     )
 
-    # Make predictions with both models
+    # Compare predictions
     default_preds = default_fitted.predict(X_valid, quantiles=[0.5])
     tuned_preds = tuned_fitted.predict(X_valid, quantiles=[0.5])
 
-    # Evaluate performance on validation set
+    # Calculate MSE
     default_mse = {}
     tuned_mse = {}
 
     for var in imputed_variables:
-        # Calculate MSE for each variable
         default_mse[var] = mean_squared_error(
             X_valid[var], default_preds[0.5][var]
         )
@@ -189,226 +273,521 @@ def test_qrf_hyperparameter_tuning(
             X_valid[var], tuned_preds[0.5][var]
         )
 
-    # Calculate average MSE across all variables
-    avg_default_mse = np.mean(list(default_mse.values()))
-    avg_tuned_mse = np.mean(list(tuned_mse.values()))
-
-    # Output results for inspection
-    print(f"Default model average MSE: {avg_default_mse:.4f}")
-    print(f"Tuned model average MSE: {avg_tuned_mse:.4f}")
-    print(
-        f"MSE improvement: {(avg_default_mse - avg_tuned_mse) / avg_default_mse:.2%}"
-    )
-
-    # Extract the tuned hyperparameters
-    tuned_params = {}
+    # Verify hyperparameters are reasonable
     for var in imputed_variables:
         model = tuned_fitted.models[var]
-        # Extract hyperparameters from the underlying model
         if hasattr(model, "rf"):
-            for param_name in [
-                "n_estimators",
-                "min_samples_split",
-                "min_samples_leaf",
-                "max_features",
-            ]:
-                if hasattr(model.rf, param_name):
-                    param_value = getattr(model.rf, param_name)
-                    if param_name not in tuned_params:
-                        tuned_params[param_name] = []
-                    tuned_params[param_name].append(param_value)
-
-    # Output tuned hyperparameters
-    print("Tuned hyperparameters:")
-    for param, values in tuned_params.items():
-        print(f"  {param}: {values}")
-
-    # Verify that n_estimators is in reasonable range
-    if "n_estimators" in tuned_params:
-        for n_est in tuned_params["n_estimators"]:
-            assert (
-                50 <= n_est <= 300
-            ), f"n_estimators outside expected range: {n_est}"
-
-    # Verify that min_samples_leaf is in reasonable range
-    if "min_samples_leaf" in tuned_params:
-        for min_leaf in tuned_params["min_samples_leaf"]:
-            assert (
-                1 <= min_leaf <= 10
-            ), f"min_samples_leaf outside expected range: {min_leaf}"
-
-    # Verify that the file is saved
-    combined_results = pd.DataFrame(
+            if hasattr(model.rf, "n_estimators"):
+                assert 50 <= model.rf.n_estimators <= 300
+            if hasattr(model.rf, "min_samples_leaf"):
+                assert 1 <= model.rf.min_samples_leaf <= 10
+
+
+# === Memory Management and Performance Tests ===
+
+
+def test_qrf_memory_efficient_mode() -> None:
+    """Test memory-efficient mode with cleanup intervals."""
+    np.random.seed(42)
+    n_samples = 30
+
+    # Create data with many variables
+    data_dict = {
+        "predictor1": np.random.randn(n_samples),
+        "predictor2": np.random.randn(n_samples),
+    }
+
+    # Add multiple target variables
+    for i in range(15):
+        data_dict[f"target{i}"] = np.random.randn(n_samples)
+
+    data = pd.DataFrame(data_dict)
+
+    # Set up logging
+    log_stream = io.StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.INFO)
+
+    # Initialize with memory-efficient mode
+    model = QRF(log_level="INFO", memory_efficient=True, cleanup_interval=3)
+    model.logger.addHandler(handler)
+
+    # Fit model
+    fitted_model = model.fit(
+        data,
+        predictors=["predictor1", "predictor2"],
+        imputed_variables=[f"target{i}" for i in range(15)],
+        n_estimators=5,
+    )
+
+    log_output = log_stream.getvalue()
+
+    # Verify memory management features
+    assert model.cleanup_interval == 3
+    assert model.memory_efficient == True
+    assert "Final memory usage:" in log_output
+
+    model.logger.removeHandler(handler)
+
+
+def test_qrf_batch_processing() -> None:
+    """Test batch processing functionality."""
+    np.random.seed(42)
+    n_samples = 30
+
+    data_dict = {
+        "predictor1": np.random.randn(n_samples),
+        "predictor2": np.random.randn(n_samples),
+    }
+
+    for i in range(10):
+        data_dict[f"target{i}"] = np.random.randn(n_samples)
+
+    data = pd.DataFrame(data_dict)
+
+    # Set up logging
+    log_stream = io.StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.INFO)
+
+    # Initialize with batch processing
+    model = QRF(
+        log_level="INFO",
+        memory_efficient=True,
+        batch_size=3,
+        cleanup_interval=2,
+    )
+    model.logger.addHandler(handler)
+
+    # Fit model
+    fitted_model = model.fit(
+        data,
+        predictors=["predictor1", "predictor2"],
+        imputed_variables=[f"target{i}" for i in range(10)],
+        n_estimators=5,
+    )
+
+    log_output = log_stream.getvalue()
+
+    # Verify batch processing
+    assert model.batch_size == 3
+    assert "Processing 10 variables in batches of 3" in log_output
+    assert "Processing batch" in log_output
+
+    # Test predictions
+    test_data = data[["predictor1", "predictor2"]].head(5)
+    predictions = fitted_model.predict(test_data)
+
+    # Default returns DataFrame directly
+    assert not predictions.isna().any().any()
+
+    model.logger.removeHandler(handler)
+
+
+# === Logging and Progress Tracking Tests ===
+
+
+def test_qrf_detailed_logging() -> None:
+    """Test detailed progress logging functionality."""
+    np.random.seed(42)
+    n_samples = 50
+
+    data = pd.DataFrame(
+        {
+            "predictor1": np.random.randn(n_samples),
+            "predictor2": np.random.randn(n_samples),
+            "target1": np.random.randn(n_samples),
+            "target2": np.random.randn(n_samples),
+            "target3": np.random.randn(n_samples),
+        }
+    )
+
+    # Set up logging
+    log_stream = io.StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.INFO)
+
+    model = QRF(log_level="INFO")
+    model.logger.addHandler(handler)
+
+    # Fit model
+    fitted_model = model.fit(
+        data,
+        predictors=["predictor1", "predictor2"],
+        imputed_variables=["target1", "target2", "target3"],
+        n_estimators=10,
+    )
+
+    log_output = log_stream.getvalue()
+
+    # Verify logging messages
+    assert "Training data shape:" in log_output
+    assert "Memory usage:" in log_output
+    assert "Starting imputation for 'target1'" in log_output
+    assert "Starting imputation for 'target2'" in log_output
+    assert "Starting imputation for 'target3'" in log_output
+    assert "Features:" in log_output
+    assert "Success:" in log_output
+    assert "fitted in" in log_output
+    assert "Model complexity:" in log_output
+    assert "QRF model fitting completed" in log_output
+
+    # Test prediction logging
+    log_stream.truncate(0)
+    log_stream.seek(0)
+
+    test_data = data[["predictor1", "predictor2"]].head(10)
+    predictions = fitted_model.predict(test_data)
+
+    prediction_logs = log_stream.getvalue()
+    assert "Predicting for 'target1'" in prediction_logs
+    assert "predicted in" in prediction_logs
+
+    model.logger.removeHandler(handler)
+
+
+def test_qrf_sequential_imputation_logging() -> None:
+    """Test sequential imputation logging shows correct progression."""
+    np.random.seed(42)
+    n_samples = 40
+
+    data = pd.DataFrame(
         {
-            "Variable": imputed_variables * 2,
-            "Model": ["Default"] * len(imputed_variables)
-            + ["Tuned"] * len(imputed_variables),
-            "MSE": list(default_mse.values()) + list(tuned_mse.values()),
+            "x1": np.random.randn(n_samples),
+            "x2": np.random.randn(n_samples),
+            "y1": np.random.randn(n_samples),
+            "y2": np.random.randn(n_samples),
+            "y3": np.random.randn(n_samples),
         }
     )
 
-    combined_results.to_csv(
-        "qrf_hyperparameter_tuning_comparison.csv", index=False
+    log_stream = io.StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.INFO)
+
+    model = QRF(log_level="INFO")
+    model.logger.addHandler(handler)
+
+    fitted_model = model.fit(
+        data,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1", "y2", "y3"],
+        n_estimators=8,
     )
 
-    # Assert that the tuned model performs at least 90% as well as the default model
-    # This is a loose check because sometimes the default model might perform better by chance,
-    # especially with limited tuning trials
-    assert_performance_comparison = False
-    if assert_performance_comparison:
-        assert (
-            avg_tuned_mse <= avg_default_mse * 1.1
-        ), "Tuned model performance significantly worse than default"
+    log_output = log_stream.getvalue()
 
+    # Verify sequential progression
+    assert "[1/3] Starting imputation for 'y1'" in log_output
+    assert "[2/3] Starting imputation for 'y2'" in log_output
+    assert "[3/3] Starting imputation for 'y3'" in log_output
 
-def test_qrf_imputes_multiple_variables(
-    data: pd.DataFrame = diabetes_df,
-) -> None:
-    """
-    Test that QRF can impute multiple variables.
+    # Verify feature counts increase
+    lines = log_output.split("\n")
+    feature_lines = [
+        line for line in lines if "Features:" in line and "predictors" in line
+    ]
+    assert len(feature_lines) == 3
 
-    This test verifies that:
-    1. The model can handle multiple imputed variables
-    2. The predictions are structured correctly
+    # Extract and verify feature counts
+    feature_counts = []
+    for line in feature_lines:
+        match = re.search(r"Features: (\d+) predictors", line)
+        if match:
+            feature_counts.append(int(match.group(1)))
 
-    Args:
-        None
-    """
-    predictors = ["age", "sex", "bmi", "bp"]
-    imputed_variables = ["s1", "s4"]
-    data = data[predictors + imputed_variables]
+    assert feature_counts == [2, 3, 4]  # Sequential addition of predictors
 
-    X_train, X_test = preprocess_data(data)
+    model.logger.removeHandler(handler)
 
-    # Initialize QRF model
-    model = QRF()
 
-    # Fit the model with RF hyperparameters
+# === Error Handling and Validation Tests ===
+
+
+def test_qrf_missing_variables_handling() -> None:
+    """Test handling of missing variables in imputation."""
+    np.random.seed(42)
+    n_samples = 50
+
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(n_samples),
+            "x2": np.random.randn(n_samples),
+            "existing_var": np.random.randn(n_samples),
+        }
+    )
+
+    # Test with skip_missing=False (should raise error)
+    model_strict = QRF(log_level="WARNING")
+
+    with pytest.raises(ValueError) as excinfo:
+        model_strict.fit(
+            data,
+            predictors=["x1", "x2"],
+            imputed_variables=["existing_var", "missing_var1", "missing_var2"],
+            skip_missing=False,
+            n_estimators=5,
+        )
+
+    error_str = str(excinfo.value)
+    assert "missing_var1" in error_str
+    assert "missing_var2" in error_str
+
+    # Test with skip_missing=True (should work)
+    log_stream = io.StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.WARNING)
+
+    model_lenient = QRF(log_level="WARNING")
+    model_lenient.logger.addHandler(handler)
+
+    fitted_model = model_lenient.fit(
+        data,
+        predictors=["x1", "x2"],
+        imputed_variables=["existing_var", "missing_var1", "missing_var2"],
+        skip_missing=True,
+        n_estimators=5,
+    )
+
+    log_output = log_stream.getvalue()
+    assert "Variables not found in X_train" in log_output
+
+    # Check only existing variable was included
+    assert len(fitted_model.imputed_variables) == 1
+    assert "existing_var" in fitted_model.imputed_variables
+
+    model_lenient.logger.removeHandler(handler)
+
+
+def test_qrf_all_variables_missing() -> None:
+    """Test behavior when all imputed variables are missing."""
+    np.random.seed(42)
+    n_samples = 30
+
+    data = pd.DataFrame(
+        {"x1": np.random.randn(n_samples), "x2": np.random.randn(n_samples)}
+    )
+
+    log_stream = io.StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.WARNING)
+
+    model = QRF(log_level="WARNING")
+    model.logger.addHandler(handler)
+
     fitted_model = model.fit(
-        X_train,
-        predictors,
-        imputed_variables,
-        n_estimators=100,  # Number of trees
-        min_samples_leaf=5,  # Min samples in leaf nodes
+        data,
+        predictors=["x1", "x2"],
+        imputed_variables=["missing_var1", "missing_var2"],
+        skip_missing=True,
+        n_estimators=5,
     )
 
-    # Predict at multiple quantiles
-    predictions: Dict[float, pd.DataFrame] = fitted_model.predict(X_test)
+    log_output = log_stream.getvalue()
+    assert "Variables not found in X_train" in log_output
+    assert "Skipping missing variables" in log_output
+
+    # Check that no variables were included
+    assert len(fitted_model.imputed_variables) == 0
+    assert fitted_model.models == {}
 
-    # Check structure of predictions
+    # Test prediction with empty model
+    test_data = data[["x1", "x2"]].head(5)
+    predictions = fitted_model.predict(test_data)
+
+    # Default returns DataFrame directly
     assert isinstance(predictions, pd.DataFrame)
-    assert predictions.shape[1] == len(imputed_variables)
+    assert len(predictions.columns) == 0
 
+    model.logger.removeHandler(handler)
 
-def test_qrf_sequential_imputation(
-    data: pd.DataFrame = diabetes_df,
-) -> None:
-    """
-    Test that QRF performs sequential imputation correctly.
-
-    This test verifies that:
-    1. Each imputed variable uses previously imputed variables as predictors
-    2. The predictor sets are correctly tracked and used
-    3. Sequential imputation produces different results than parallel imputation
-    4. The order of imputation matters
-
-    Args:
-        data: DataFrame with the dataset to use
-    """
-    predictors = ["age", "sex", "bmi", "bp"]
-    imputed_variables = [
-        "s1",
-        "s2",
-        "s3",
-    ]  # Using 3 variables to test sequential behavior
-    data = data[predictors + imputed_variables]
 
-    X_train, X_test = preprocess_data(data)
+def test_qrf_error_handling() -> None:
+    """Test error handling in QRF model."""
+    # Test with empty data
+    with pytest.raises(Exception):
+        model = QRF()
+        model.fit(pd.DataFrame(), predictors=[], imputed_variables=[])
+
+    # Test with mismatched predictors
+    data = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
 
-    # Fit the model for sequential imputation testing
     model = QRF()
-    fitted_model = model.fit(
-        X_train,
-        predictors,
-        imputed_variables,
-        n_estimators=50,  # Smaller for faster testing
+    fitted_model = model.fit(data, predictors=["x"], imputed_variables=["y"])
+
+    # Try to predict with missing predictor
+    test_data = pd.DataFrame({"z": [7, 8, 9]})
+
+    try:
+        predictions = fitted_model.predict(test_data)
+    except Exception as e:
+        assert "preprocess data" in str(e).lower()
+
+
+# === Internal Model Tests ===
+
+
+def test_qrf_internal_model_class() -> None:
+    """Test the internal _QRFModel class directly."""
+    np.random.seed(42)
+    n_samples = 100
+
+    X = pd.DataFrame(
+        {
+            "feature1": np.random.randn(n_samples),
+            "feature2": np.random.randn(n_samples),
+            "cat_feature": np.random.choice(["X", "Y", "Z"], n_samples),
+        }
     )
+    y = pd.Series(np.random.randn(n_samples), name="target")
 
-    # Verify that sequential imputation produces different results than parallel imputation
+    logger = logging.getLogger(__name__)
 
-    # Create a small test set where we can track the behavior
-    small_test = X_test.head(5).copy()
+    # Initialize internal model
+    internal_model = _QRFModel(seed=42, logger=logger)
 
-    # Get predictions
-    predictions = fitted_model.predict(small_test, quantiles=[0.5])
-    sequential_preds = predictions[0.5]
+    # Preprocess categorical features
+    X_encoded = pd.get_dummies(X, columns=["cat_feature"], drop_first=True)
 
-    parallel_predictions = {}
+    # Fit the model
+    internal_model.fit(X_encoded, y, n_estimators=30, min_samples_leaf=5)
 
-    # Fit and predict each variable independently
-    for var in imputed_variables:
-        single_model = QRF()
-        single_fitted = single_model.fit(
-            X_train,
-            predictors,
-            [var],  # Only impute this one variable
-            n_estimators=50,
+    # Verify model attributes
+    assert internal_model.qrf is not None
+    assert internal_model.output_column == "target"
+
+    # Test predictions at different quantiles
+    for quantile in [0.1, 0.25, 0.5, 0.75, 0.9]:
+        predictions = internal_model.predict(
+            X_encoded, mean_quantile=quantile, count_samples=20
         )
-        single_pred = single_fitted.predict(small_test, quantiles=[0.5])
-        parallel_predictions[var] = single_pred[0.5][var]
+        assert isinstance(predictions, pd.Series)
+        assert len(predictions) == n_samples
+        assert predictions.name == "target"
 
-    # The sequential predictions should be different from parallel predictions
-    # (at least for variables after the first one)
-    differences_found = False
-    for var in imputed_variables[
-        1:
-    ]:  # Skip first variable as it should be the same
-        seq_values = sequential_preds[var].values
-        par_values = parallel_predictions[var].values
-        if not np.allclose(seq_values, par_values, rtol=1e-5):
-            differences_found = True
 
-    assert (
-        differences_found
-    ), "Sequential and parallel predictions are identical - sequential imputation may not be working"
+# === Cross-Validation Test ===
 
-    # Verify that the order of imputation matters
 
-    # Reverse the order of variables
-    reversed_imputed_variables = imputed_variables[::-1]  # ["s3", "s2", "s1"]
+def test_qrf_cross_validation(diabetes_data: pd.DataFrame) -> None:
+    """Test QRF model with cross-validation."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+    data = diabetes_data[predictors + imputed_variables]
 
-    reversed_model = QRF()
-    reversed_fitted = reversed_model.fit(
-        X_train,
-        predictors,
-        reversed_imputed_variables,
-        n_estimators=50,
+    qrf_results = cross_validate_model(
+        QRF, data, predictors, imputed_variables
+    )
+
+    # Validate cross-validation results
+    assert not qrf_results.isna().any().any()
+    assert len(qrf_results) > 0
+
+    # Test visualization capability
+    perf_results_viz = model_performance_results(
+        results=qrf_results,
+        model_name="QRF",
+        method_name="Cross-Validation Quantile Loss Average",
     )
 
-    reversed_predictions = reversed_fitted.predict(small_test, quantiles=[0.5])
-    reversed_preds = reversed_predictions[0.5]
+    assert perf_results_viz is not None
 
-    # Compare predictions for the middle variable (s2)
-    # It should be different when imputed in different orders
-    original_s2 = sequential_preds["s2"].values
-    reversed_s2 = reversed_preds["s2"].values
 
-    assert not np.allclose(original_s2, reversed_s2, rtol=1e-5), (
-        "Predictions for s2 are the same regardless of imputation order - "
-        "sequential imputation may not be working correctly"
+# === Integration Tests ===
+
+
+def test_qrf_memory_usage_tracking() -> None:
+    """Test memory usage tracking functionality."""
+    np.random.seed(42)
+    n_samples = 50
+
+    data = pd.DataFrame(
+        {
+            "x1": np.random.randn(n_samples),
+            "x2": np.random.randn(n_samples),
+            "y1": np.random.randn(n_samples),
+            "y2": np.random.randn(n_samples),
+        }
     )
 
-    # Edge case - single variable imputation should work normally
+    model = QRF()
+    memory_info = model._get_memory_usage_info()
 
-    single_var_model = QRF()
-    single_var_fitted = single_var_model.fit(
-        X_train,
-        predictors,
-        ["s1"],  # Only one variable
-        n_estimators=50,
+    # Should return string with memory info or "N/A"
+    assert isinstance(memory_info, str)
+    assert ("MB" in memory_info) or (memory_info == "N/A")
+
+    # Test with actual fitting
+    log_stream = io.StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.INFO)
+
+    model_with_logging = QRF(log_level="INFO", memory_efficient=True)
+    model_with_logging.logger.addHandler(handler)
+
+    fitted_model = model_with_logging.fit(
+        data,
+        predictors=["x1", "x2"],
+        imputed_variables=["y1", "y2"],
+        n_estimators=10,
+    )
+
+    log_output = log_stream.getvalue()
+    assert "Memory usage:" in log_output
+
+    model_with_logging.logger.removeHandler(handler)
+
+
+# === Performance Test ===
+
+
+def test_qrf_performance_characteristics(diabetes_data: pd.DataFrame) -> None:
+    """Test QRF performance characteristics and validate reasonable accuracy."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1"]
+    data = diabetes_data[predictors + imputed_variables]
+
+    # Create train/test split
+    np.random.seed(42)
+    train_idx = np.random.choice(
+        len(data), int(0.8 * len(data)), replace=False
     )
+    test_idx = np.array([i for i in range(len(data)) if i not in train_idx])
+
+    train_data = data.iloc[train_idx].reset_index(drop=True)
+    test_data = data.iloc[test_idx].reset_index(drop=True)
+
+    X_train = preprocess_data(
+        train_data, full_data=True, train_size=1.0, test_size=0.0
+    )
+    X_test = preprocess_data(
+        test_data, full_data=True, train_size=1.0, test_size=0.0
+    )
+
+    # Fit model
+    model = QRF()
+    fitted_model = model.fit(
+        X_train, predictors, imputed_variables, n_estimators=50
+    )
+
+    # Get predictions
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    # Calculate correlation with true values
+    # When quantiles specified, returns dictionary
+    true_values = X_test["s1"].values
+    pred_values = predictions[0.5]["s1"].values
+
+    correlation = np.corrcoef(true_values, pred_values)[0, 1]
+
+    # QRF should achieve positive correlation
+    assert (
+        correlation > 0.0
+    ), f"QRF correlation should be positive: {correlation}"
 
-    single_var_preds = single_var_fitted.predict(small_test, quantiles=[0.5])
+    # Calculate MSE
+    mse = mean_squared_error(true_values, pred_values)
 
-    assert single_var_preds[0.5].shape == (len(small_test), 1)
-    assert not single_var_preds[0.5]["s1"].isna().any()
+    # MSE should be reasonable (not infinite or NaN)
+    assert np.isfinite(mse)
+    assert mse < 1e6  # Reasonable upper bound
diff --git a/tests/test_models/test_qrf_extended.py b/tests/test_models/test_qrf_extended.py
deleted file mode 100644
index 458ea01..0000000
--- a/tests/test_models/test_qrf_extended.py
+++ /dev/null
@@ -1,819 +0,0 @@
-"""Extended tests for QRF model to improve coverage."""
-
-import logging
-import io
-import numpy as np
-import pandas as pd
-import pytest
-from sklearn.datasets import load_diabetes
-
-from microimpute.models.qrf import QRF, _QRFModel
-
-
-def test_qrf_model_with_categorical_variables():
-    """Test QRF with categorical variables to ensure proper encoding."""
-    # Create test data with categorical variable
-    np.random.seed(42)
-    n_samples = 100
-
-    data = pd.DataFrame(
-        {
-            "numeric1": np.random.randn(n_samples),
-            "numeric2": np.random.randn(n_samples),
-            "category": np.random.choice(["A", "B", "C"], n_samples),
-            "target": np.random.randn(n_samples),
-        }
-    )
-
-    # Split data
-    train_idx = np.random.choice(
-        n_samples, int(0.8 * n_samples), replace=False
-    )
-    test_idx = np.array([i for i in range(n_samples) if i not in train_idx])
-
-    X_train = data.iloc[train_idx].reset_index(drop=True)
-    X_test = data.iloc[test_idx].reset_index(drop=True)
-
-    # Initialize and fit model
-    model = QRF()
-    fitted_model = model.fit(
-        X_train,
-        predictors=["numeric1", "numeric2", "category"],
-        imputed_variables=["target"],
-        n_estimators=50,
-    )
-
-    # Predict
-    predictions = fitted_model.predict(X_test, quantiles=[0.25, 0.5, 0.75])
-
-    # Verify predictions exist for all quantiles
-    assert len(predictions) == 3
-    assert all(q in predictions for q in [0.25, 0.5, 0.75])
-    assert all(len(predictions[q]) == len(X_test) for q in predictions)
-
-
-def test_qrf_model_internal_class():
-    """Test the internal _QRFModel class directly."""
-    # Create test data
-    np.random.seed(42)
-    n_samples = 100
-
-    X = pd.DataFrame(
-        {
-            "feature1": np.random.randn(n_samples),
-            "feature2": np.random.randn(n_samples),
-            "cat_feature": np.random.choice(["X", "Y", "Z"], n_samples),
-        }
-    )
-    y = pd.Series(np.random.randn(n_samples), name="target")
-
-    # Create logger mock
-    import logging
-
-    logger = logging.getLogger(__name__)
-
-    # Initialize internal model
-    internal_model = _QRFModel(seed=42, logger=logger)
-
-    # Preprocess categorical features manually for this test
-    X_encoded = pd.get_dummies(X, columns=["cat_feature"], drop_first=True)
-
-    # Fit the model
-    internal_model.fit(X_encoded, y, n_estimators=50, min_samples_leaf=5)
-
-    # Verify model attributes
-    assert internal_model.qrf is not None
-    assert internal_model.output_column == "target"
-
-    # Test prediction with different quantiles
-    predictions = internal_model.predict(
-        X_encoded, mean_quantile=0.25, count_samples=20
-    )
-    assert isinstance(predictions, pd.Series)
-    assert len(predictions) == n_samples
-    assert predictions.name == "target"
-
-    # Test with high quantile
-    high_quantile_preds = internal_model.predict(
-        X_encoded, mean_quantile=0.9, count_samples=15
-    )
-    assert len(high_quantile_preds) == n_samples
-
-    # Test with low quantile
-    low_quantile_preds = internal_model.predict(
-        X_encoded, mean_quantile=0.1, count_samples=10
-    )
-    assert len(low_quantile_preds) == n_samples
-
-
-def test_qrf_with_missing_categorical_columns_in_test():
-    """Test QRF behavior when test data has missing categorical levels."""
-    np.random.seed(42)
-
-    # Create training data with categories A, B, C
-    train_data = pd.DataFrame(
-        {
-            "numeric": np.random.randn(100),
-            "category": np.random.choice(["A", "B", "C"], 100),
-            "target": np.random.randn(100),
-        }
-    )
-
-    # Create test data with only categories A and B (missing C)
-    test_data = pd.DataFrame(
-        {
-            "numeric": np.random.randn(20),
-            "category": np.random.choice(["A", "B"], 20),
-            "target": np.nan,  # Will be imputed
-        }
-    )
-
-    # Fit model
-    model = QRF()
-    fitted_model = model.fit(
-        train_data,
-        predictors=["numeric", "category"],
-        imputed_variables=["target"],
-        n_estimators=30,
-    )
-
-    # Predict - should handle missing category gracefully
-    predictions = fitted_model.predict(test_data[["numeric", "category"]])
-
-    assert len(predictions) == len(test_data)
-    assert not predictions["target"].isna().any()
-
-
-def test_qrf_with_single_predictor():
-    """Test QRF with only one predictor variable."""
-    np.random.seed(42)
-
-    data = pd.DataFrame(
-        {
-            "x": np.linspace(0, 10, 100),
-            "y": np.sin(np.linspace(0, 10, 100)) + np.random.randn(100) * 0.1,
-        }
-    )
-
-    train_data = data[:80]
-    test_data = data[80:]
-
-    model = QRF()
-    fitted_model = model.fit(
-        train_data,
-        predictors=["x"],
-        imputed_variables=["y"],
-        n_estimators=50,
-    )
-
-    predictions = fitted_model.predict(
-        test_data[["x"]], quantiles=[0.1, 0.5, 0.9]
-    )
-
-    # Verify prediction intervals make sense
-    for i in range(len(test_data)):
-        assert predictions[0.1]["y"].iloc[i] <= predictions[0.5]["y"].iloc[i]
-        assert predictions[0.5]["y"].iloc[i] <= predictions[0.9]["y"].iloc[i]
-
-
-def test_qrf_beta_distribution_sampling():
-    """Test different mean_quantile values for beta distribution sampling."""
-    np.random.seed(42)
-
-    # Create simple dataset
-    data = pd.DataFrame(
-        {
-            "x": np.random.randn(200),
-            "y": np.random.randn(200),
-        }
-    )
-
-    train_data = data[:150]
-    test_data = data[150:]
-
-    model = QRF()
-    fitted_model = model.fit(
-        train_data,
-        predictors=["x"],
-        imputed_variables=["y"],
-        n_estimators=50,
-    )
-
-    # Test extreme quantiles
-    extreme_low = fitted_model.predict(test_data[["x"]], quantiles=[0.01])
-    extreme_high = fitted_model.predict(test_data[["x"]], quantiles=[0.99])
-    median = fitted_model.predict(test_data[["x"]], quantiles=[0.5])
-
-    # Verify ordering
-    for i in range(len(test_data)):
-        assert extreme_low[0.01]["y"].iloc[i] <= median[0.5]["y"].iloc[i]
-        assert median[0.5]["y"].iloc[i] <= extreme_high[0.99]["y"].iloc[i]
-
-
-def test_qrf_reproducibility():
-    """Test that QRF produces reproducible results with same seed."""
-    np.random.seed(42)
-
-    data = pd.DataFrame(
-        {
-            "x1": np.random.randn(100),
-            "x2": np.random.randn(100),
-            "y": np.random.randn(100),
-        }
-    )
-
-    train_data = data[:80]
-    test_data = data[80:]
-
-    # First run
-    model1 = QRF()
-    fitted1 = model1.fit(
-        train_data,
-        predictors=["x1", "x2"],
-        imputed_variables=["y"],
-        n_estimators=30,
-    )
-    predictions1 = fitted1.predict(test_data[["x1", "x2"]])
-
-    # Second run with same seed
-    model2 = QRF()
-    fitted2 = model2.fit(
-        train_data,
-        predictors=["x1", "x2"],
-        imputed_variables=["y"],
-        n_estimators=30,
-    )
-    predictions2 = fitted2.predict(test_data[["x1", "x2"]])
-
-    # Results should be identical
-    np.testing.assert_array_almost_equal(
-        predictions1["y"].values,
-        predictions2["y"].values,
-    )
-
-
-def test_qrf_with_highly_correlated_predictors():
-    """Test QRF with highly correlated predictors."""
-    np.random.seed(42)
-    n_samples = 200
-
-    # Create correlated predictors
-    x1 = np.random.randn(n_samples)
-    x2 = x1 + np.random.randn(n_samples) * 0.1  # Highly correlated with x1
-    x3 = np.random.randn(n_samples)  # Independent
-
-    data = pd.DataFrame(
-        {
-            "x1": x1,
-            "x2": x2,
-            "x3": x3,
-            "y": 2 * x1 + x3 + np.random.randn(n_samples) * 0.5,
-        }
-    )
-
-    train_data = data[:150]
-    test_data = data[150:]
-
-    model = QRF()
-    fitted_model = model.fit(
-        train_data,
-        predictors=["x1", "x2", "x3"],
-        imputed_variables=["y"],
-        n_estimators=50,
-    )
-
-    predictions = fitted_model.predict(test_data[["x1", "x2", "x3"]])
-
-    # Model should still produce reasonable predictions despite correlation
-    assert len(predictions) == len(test_data)
-    assert not predictions["y"].isna().any()
-
-    # Check that predictions are somewhat correlated with true values
-    true_y = test_data["y"].values
-    pred_y = predictions["y"].values
-    correlation = np.corrcoef(true_y, pred_y)[0, 1]
-    assert correlation > 0.5  # Should have reasonable correlation
-
-
-def test_qrf_error_handling():
-    """Test error handling in QRF model."""
-    # Test with empty data
-    with pytest.raises(Exception):
-        model = QRF()
-        model.fit(
-            pd.DataFrame(),
-            predictors=[],
-            imputed_variables=[],
-        )
-
-    # Test with mismatched predictors
-    data = pd.DataFrame(
-        {
-            "x": [1, 2, 3],
-            "y": [4, 5, 6],
-        }
-    )
-
-    model = QRF()
-    fitted_model = model.fit(
-        data,
-        predictors=["x"],
-        imputed_variables=["y"],
-    )
-
-    # Try to predict with missing predictor
-    test_data = pd.DataFrame(
-        {
-            "z": [7, 8, 9],  # Wrong column name
-        }
-    )
-
-    # Should handle gracefully or raise informative error
-    try:
-        predictions = fitted_model.predict(test_data)
-    except Exception as e:
-        assert "x" in str(e) or "column" in str(e).lower()
-
-
-def test_qrf_detailed_logging():
-    """Test detailed progress logging functionality."""
-    # Set up logging capture
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.INFO)
-
-    # Create test data with multiple variables to impute
-    np.random.seed(42)
-    n_samples = 50
-
-    data = pd.DataFrame(
-        {
-            "predictor1": np.random.randn(n_samples),
-            "predictor2": np.random.randn(n_samples),
-            "target1": np.random.randn(n_samples),
-            "target2": np.random.randn(n_samples),
-            "target3": np.random.randn(n_samples),
-        }
-    )
-
-    # Initialize QRF with INFO logging to capture detailed messages
-    model = QRF(log_level="INFO")
-    model.logger.addHandler(handler)
-
-    # Fit the model
-    fitted_model = model.fit(
-        data,
-        predictors=["predictor1", "predictor2"],
-        imputed_variables=["target1", "target2", "target3"],
-        n_estimators=10,  # Small for faster testing
-    )
-
-    # Get log output
-    log_output = log_stream.getvalue()
-
-    # Verify detailed logging messages are present
-    assert "Training data shape:" in log_output
-    assert "Memory usage:" in log_output
-    assert "Starting imputation for 'target1'" in log_output
-    assert "Starting imputation for 'target2'" in log_output
-    assert "Starting imputation for 'target3'" in log_output
-    assert "Features:" in log_output
-    assert "Success:" in log_output
-    assert "fitted in" in log_output
-    assert "Model complexity:" in log_output
-    assert "QRF model fitting completed" in log_output
-
-    # Test prediction logging
-    log_stream.truncate(0)
-    log_stream.seek(0)
-
-    test_data = data[["predictor1", "predictor2"]].head(10)
-    predictions = fitted_model.predict(test_data)
-
-    prediction_logs = log_stream.getvalue()
-    assert "Predicting for 'target1'" in prediction_logs
-    assert "predicted in" in prediction_logs
-    assert "samples" in prediction_logs
-
-    # Clean up
-    model.logger.removeHandler(handler)
-
-
-def test_qrf_memory_efficient_mode():
-    """Test memory-efficient mode with cleanup intervals."""
-    # Set up logging capture
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.INFO)
-
-    # Create test data with many variables to trigger cleanup
-    np.random.seed(42)
-    n_samples = 30
-
-    # Create data with enough variables to trigger cleanup interval
-    data_dict = {
-        "predictor1": np.random.randn(n_samples),
-        "predictor2": np.random.randn(n_samples),
-    }
-
-    # Add multiple target variables to trigger cleanup interval
-    for i in range(15):  # 15 variables to ensure cleanup triggers
-        data_dict[f"target{i}"] = np.random.randn(n_samples)
-
-    data = pd.DataFrame(data_dict)
-
-    # Initialize QRF with memory-efficient mode and small cleanup interval
-    model = QRF(
-        log_level="INFO",
-        memory_efficient=True,
-        cleanup_interval=3,  # Cleanup every 3 variables
-    )
-    model.logger.addHandler(handler)
-
-    # Fit the model
-    fitted_model = model.fit(
-        data,
-        predictors=["predictor1", "predictor2"],
-        imputed_variables=[f"target{i}" for i in range(15)],
-        n_estimators=5,  # Small for faster testing
-    )
-
-    # Get log output
-    log_output = log_stream.getvalue()
-
-    # Verify memory-efficient mode messages are logged (the constructor message may not be captured in stream)
-    # The important thing is that memory efficient features are working
-    assert (
-        "Memory cleanup performed" in log_output or model.cleanup_interval == 3
-    )
-    assert "Final memory usage:" in log_output
-
-    # Clean up
-    model.logger.removeHandler(handler)
-
-
-def test_qrf_batch_processing():
-    """Test batch processing functionality."""
-    # Set up logging capture
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.INFO)
-
-    # Create test data with many variables to trigger batching
-    np.random.seed(42)
-    n_samples = 30
-
-    data_dict = {
-        "predictor1": np.random.randn(n_samples),
-        "predictor2": np.random.randn(n_samples),
-    }
-
-    # Add multiple target variables to trigger batching
-    for i in range(10):
-        data_dict[f"target{i}"] = np.random.randn(n_samples)
-
-    data = pd.DataFrame(data_dict)
-
-    # Initialize QRF with batch processing
-    model = QRF(
-        log_level="INFO",
-        memory_efficient=True,
-        batch_size=3,  # Process 3 variables per batch
-        cleanup_interval=2,
-    )
-    model.logger.addHandler(handler)
-
-    # Fit the model
-    fitted_model = model.fit(
-        data,
-        predictors=["predictor1", "predictor2"],
-        imputed_variables=[f"target{i}" for i in range(10)],
-        n_estimators=5,  # Small for faster testing
-    )
-
-    # Get log output
-    log_output = log_stream.getvalue()
-
-    # Verify batch processing messages
-    # Check that batch processing is configured correctly and working
-    assert model.batch_size == 3
-    assert model.memory_efficient == True
-    assert "Processing 10 variables in batches of 3" in log_output
-    assert "Processing batch" in log_output
-    assert "Memory usage:" in log_output
-
-    # Verify the model still works correctly
-    test_data = data[["predictor1", "predictor2"]].head(5)
-    predictions = fitted_model.predict(test_data)
-
-    assert len(predictions) == len(test_data)
-    assert not predictions.isna().any().any()
-
-    # Clean up
-    model.logger.removeHandler(handler)
-
-
-def test_qrf_memory_usage_tracking():
-    """Test memory usage tracking functionality."""
-    # Create test data
-    np.random.seed(42)
-    n_samples = 50
-
-    data = pd.DataFrame(
-        {
-            "x1": np.random.randn(n_samples),
-            "x2": np.random.randn(n_samples),
-            "y1": np.random.randn(n_samples),
-            "y2": np.random.randn(n_samples),
-        }
-    )
-
-    # Test memory usage info method
-    model = QRF()
-    memory_info = model._get_memory_usage_info()
-
-    # Should return a string with memory information or "N/A"
-    assert isinstance(memory_info, str)
-    assert ("MB" in memory_info) or (memory_info == "N/A")
-
-    # Test with actual fitting to see memory tracking in action
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.INFO)
-
-    model_with_logging = QRF(log_level="INFO", memory_efficient=True)
-    model_with_logging.logger.addHandler(handler)
-
-    fitted_model = model_with_logging.fit(
-        data,
-        predictors=["x1", "x2"],
-        imputed_variables=["y1", "y2"],
-        n_estimators=10,
-    )
-
-    log_output = log_stream.getvalue()
-
-    # Should contain memory usage information
-    assert "Memory usage:" in log_output
-
-    # Clean up
-    model_with_logging.logger.removeHandler(handler)
-
-
-def test_qrf_sequential_imputation_logging():
-    """Test that sequential imputation logging shows progression correctly."""
-    # Set up logging capture
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.INFO)
-
-    # Create test data with sequential dependencies
-    np.random.seed(42)
-    n_samples = 40
-
-    # Create data where later variables depend on earlier ones
-    data = pd.DataFrame(
-        {
-            "x1": np.random.randn(n_samples),
-            "x2": np.random.randn(n_samples),
-            "y1": np.random.randn(n_samples),
-            "y2": np.random.randn(n_samples),  # Will use y1 as predictor
-            "y3": np.random.randn(n_samples),  # Will use y1, y2 as predictors
-        }
-    )
-
-    model = QRF(log_level="INFO")
-    model.logger.addHandler(handler)
-
-    # Fit with sequential imputation
-    fitted_model = model.fit(
-        data,
-        predictors=["x1", "x2"],
-        imputed_variables=["y1", "y2", "y3"],  # Sequential order
-        n_estimators=8,
-    )
-
-    log_output = log_stream.getvalue()
-
-    # Verify sequential progression is logged
-    assert "[1/3] Starting imputation for 'y1'" in log_output
-    assert "[2/3] Starting imputation for 'y2'" in log_output
-    assert "[3/3] Starting imputation for 'y3'" in log_output
-
-    # Verify feature counts increase with sequential imputation
-    # Look for the feature count patterns in the logs
-    lines = log_output.split("\n")
-
-    # Find lines that mention features and verify they show increasing counts
-    feature_lines = [
-        line for line in lines if "Features:" in line and "predictors" in line
-    ]
-    assert len(feature_lines) == 3  # Should have 3 feature count lines
-
-    # Extract feature counts and verify they increase
-    feature_counts = []
-    for line in feature_lines:
-        # Extract number from "Features: X predictors"
-        import re
-
-        match = re.search(r"Features: (\d+) predictors", line)
-        if match:
-            feature_counts.append(int(match.group(1)))
-
-    assert len(feature_counts) == 3
-    assert feature_counts[0] == 2  # y1: x1, x2
-    assert feature_counts[1] == 3  # y2: x1, x2, y1
-    assert feature_counts[2] == 4  # y3: x1, x2, y1, y2
-
-    # Clean up
-    model.logger.removeHandler(handler)
-
-
-def test_qrf_missing_variables_handling():
-    """Test graceful handling of missing variables in imputation."""
-    # Create test data with some variables
-    np.random.seed(42)
-    n_samples = 50
-
-    data = pd.DataFrame(
-        {
-            "x1": np.random.randn(n_samples),
-            "x2": np.random.randn(n_samples),
-            "existing_var": np.random.randn(n_samples),
-        }
-    )
-
-    # Test with skip_missing=False (should raise error)
-    model_strict = QRF(log_level="WARNING")
-
-    with pytest.raises(ValueError) as excinfo:
-        model_strict.fit(
-            data,
-            predictors=["x1", "x2"],
-            imputed_variables=["existing_var", "missing_var1", "missing_var2"],
-            skip_missing=False,
-            n_estimators=5,
-        )
-
-    # The error message should contain information about missing columns
-    error_str = str(excinfo.value)
-    assert (
-        "Missing columns in data" in error_str
-        or "Missing variables" in error_str
-    )
-    assert "missing_var1" in error_str
-    assert "missing_var2" in error_str
-
-    # Test with skip_missing=True (should work)
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.WARNING)
-
-    model_lenient = QRF(log_level="WARNING")
-    model_lenient.logger.addHandler(handler)
-
-    fitted_model = model_lenient.fit(
-        data,
-        predictors=["x1", "x2"],
-        imputed_variables=["existing_var", "missing_var1", "missing_var2"],
-        skip_missing=True,
-        n_estimators=5,
-    )
-
-    # Check that warning was logged
-    log_output = log_stream.getvalue()
-    assert "Variables not found in X_train" in log_output
-    assert "missing_var1" in log_output
-    assert "missing_var2" in log_output
-
-    # Check that only existing variable was included
-    assert len(fitted_model.imputed_variables) == 1
-    assert "existing_var" in fitted_model.imputed_variables
-    assert "missing_var1" not in fitted_model.imputed_variables
-    assert "missing_var2" not in fitted_model.imputed_variables
-
-    # Test prediction works with available variables
-    test_data = data[["x1", "x2"]].head(10)
-    predictions = fitted_model.predict(test_data)
-
-    assert "existing_var" in predictions.columns
-    assert len(predictions) == len(test_data)
-
-    # Clean up
-    model_lenient.logger.removeHandler(handler)
-
-
-def test_qrf_all_variables_missing():
-    """Test behavior when all variables are missing."""
-    # Create test data
-    np.random.seed(42)
-    n_samples = 30
-
-    data = pd.DataFrame(
-        {
-            "x1": np.random.randn(n_samples),
-            "x2": np.random.randn(n_samples),
-        }
-    )
-
-    # Test with skip_missing=True but all variables missing
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.WARNING)
-
-    model = QRF(log_level="WARNING")
-    model.logger.addHandler(handler)
-
-    fitted_model = model.fit(
-        data,
-        predictors=["x1", "x2"],
-        imputed_variables=["missing_var1", "missing_var2"],
-        skip_missing=True,
-        n_estimators=5,
-    )
-
-    # Check that warning was logged
-    log_output = log_stream.getvalue()
-    assert "Variables not found in X_train" in log_output
-    # The base class logs that it's skipping missing variables
-    assert "Skipping missing variables" in log_output
-
-    # Check that no variables were included
-    assert len(fitted_model.imputed_variables) == 0
-    assert fitted_model.models == {}
-
-    # Test prediction with empty model
-    test_data = data[["x1", "x2"]].head(5)
-    predictions = fitted_model.predict(test_data)
-
-    assert len(predictions.columns) == 0  # No variables to predict
-    # When there are no variables to impute, predictions should be empty but defined
-    assert isinstance(predictions, pd.DataFrame)
-
-    # Clean up
-    model.logger.removeHandler(handler)
-
-
-def test_qrf_partial_missing_variables():
-    """Test handling of partially missing variables."""
-    # Create test data
-    np.random.seed(42)
-    n_samples = 40
-
-    data = pd.DataFrame(
-        {
-            "predictor1": np.random.randn(n_samples),
-            "predictor2": np.random.randn(n_samples),
-            "target1": np.random.randn(n_samples),
-            "target3": np.random.randn(n_samples),
-            # Note: target2 is missing
-        }
-    )
-
-    # Test with skip_missing=True
-    log_stream = io.StringIO()
-    handler = logging.StreamHandler(log_stream)
-    handler.setLevel(logging.INFO)
-
-    model = QRF(log_level="INFO")
-    model.logger.addHandler(handler)
-
-    fitted_model = model.fit(
-        data,
-        predictors=["predictor1", "predictor2"],
-        imputed_variables=[
-            "target1",
-            "target2",
-            "target3",
-        ],  # target2 is missing
-        skip_missing=True,
-        n_estimators=5,
-    )
-
-    log_output = log_stream.getvalue()
-
-    # Check that warning was logged for missing variable
-    assert "Variables not found in X_train: ['target2']" in log_output
-    assert "Available variables: ['target1', 'target3']" in log_output
-
-    # Check that available variables were processed
-    assert len(fitted_model.imputed_variables) == 2
-    assert "target1" in fitted_model.imputed_variables
-    assert "target3" in fitted_model.imputed_variables
-    assert "target2" not in fitted_model.imputed_variables
-
-    # Verify sequential imputation still works correctly
-    # target3 should use target1 as a predictor since it comes after target1
-    assert "target1" in fitted_model.models
-    assert "target3" in fitted_model.models
-
-    # Test prediction
-    test_data = data[["predictor1", "predictor2"]].head(8)
-    predictions = fitted_model.predict(test_data)
-
-    assert "target1" in predictions.columns
-    assert "target3" in predictions.columns
-    assert "target2" not in predictions.columns
-
-    # Clean up
-    model.logger.removeHandler(handler)
diff --git a/tests/test_models/test_quantreg.py b/tests/test_models/test_quantreg.py
index 2241e3a..fd72f24 100644
--- a/tests/test_models/test_quantreg.py
+++ b/tests/test_models/test_quantreg.py
@@ -1,125 +1,342 @@
-"""Tests for the Quantile Regression imputation model."""
+"""Comprehensive tests for the Quantile Regression imputation model."""
 
 from typing import Dict, List
 
+import numpy as np
 import pandas as pd
+import pytest
 from sklearn.datasets import load_diabetes
 
-from microimpute.utils.data import preprocess_data
 from microimpute.config import QUANTILES, RANDOM_STATE
 from microimpute.evaluations import *
 from microimpute.models.quantreg import QuantReg
-from microimpute.visualizations.plotting import *
-
-# Test Method on diabetes dataset
-diabetes_data = load_diabetes()
-diabetes_df = pd.DataFrame(
-    diabetes_data.data, columns=diabetes_data.feature_names
-)
-
-predictors = ["age", "sex", "bmi", "bp"]
-imputed_variables = ["s1", "s4"]
-
-diabetes_df = diabetes_df[predictors + imputed_variables]
-
-random_generator = np.random.default_rng(RANDOM_STATE)
-count_samples = 10
-mean_quantile = 0.5
-# Calculate alpha parameter for beta distribution
-a = mean_quantile / (1 - mean_quantile)
-# Generate count_samples beta distributed values with parameter a
-beta_samples = random_generator.beta(a, 1, size=count_samples)
-quantiles = list(set(beta_samples))
-
-
-def test_quantreg_cross_validation(
-    data: pd.DataFrame = diabetes_df,
-    predictors: List[str] = predictors,
-    imputed_variables: List[str] = imputed_variables,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Test the QuantReg model on a specific dataset.
-
-    Args:
-            data: DataFrame with the dataset of interest.
-            predictors: List of predictor variables.
-            imputed_variables: List of variables to impute.
-            quantiles: List of quantiles to predict.
-    """
-    quantreg_results = cross_validate_model(
-        QuantReg, data, predictors, imputed_variables
+from microimpute.utils.data import preprocess_data
+from microimpute.visualizations import *
+
+
+# === Fixtures ===
+
+
+@pytest.fixture
+def diabetes_data() -> pd.DataFrame:
+    """Load and prepare diabetes dataset for testing."""
+    diabetes = load_diabetes()
+    df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+    return df[predictors + imputed_variables]
+
+
+@pytest.fixture
+def simple_data() -> pd.DataFrame:
+    """Create simple synthetic data for testing."""
+    np.random.seed(42)
+    return pd.DataFrame(
+        {
+            "x1": np.random.randn(100),
+            "x2": np.random.randn(100),
+            "y": np.random.randn(100),
+        }
+    )
+
+
+@pytest.fixture
+def skewed_data() -> pd.DataFrame:
+    """Create data with skewed distribution."""
+    np.random.seed(42)
+    n_samples = 100
+
+    # Create skewed target using exponential distribution
+    return pd.DataFrame(
+        {
+            "x1": np.random.randn(n_samples),
+            "x2": np.random.randn(n_samples),
+            "y": np.random.exponential(scale=2.0, size=n_samples),
+        }
+    )
+
+
+# === Basic Functionality Tests ===
+
+
+def test_quantreg_basic_fit_predict(diabetes_data: pd.DataFrame) -> None:
+    """Test basic QuantReg model fitting and prediction."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+
+    X_train, X_test = preprocess_data(diabetes_data)
+
+    # Initialize and fit model
+    model = QuantReg()
+    fitted_model = model.fit(
+        X_train, predictors, imputed_variables, quantiles=QUANTILES
+    )
+
+    # Predict at fitted quantiles
+    predictions = fitted_model.predict(X_test, random_quantile_sample=False)
+
+    # Validate predictions
+    assert isinstance(predictions, dict)
+
+    for q, pred_df in predictions.items():
+        assert pred_df is not None
+        assert len(pred_df) == len(X_test)
+        assert not pred_df.isna().any().any()
+
+
+def test_quantreg_specific_quantiles(simple_data: pd.DataFrame) -> None:
+    """Test QuantReg with specific quantiles."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    specific_quantiles = [0.1, 0.5, 0.9]
+
+    model = QuantReg()
+    fitted_model = model.fit(
+        X_train, ["x1", "x2"], ["y"], quantiles=specific_quantiles
     )
 
-    quantreg_results.to_csv("quantreg_cv_results.csv")
+    # Predict at the same quantiles
+    predictions = fitted_model.predict(X_test, quantiles=specific_quantiles)
+
+    assert set(predictions.keys()) == set(specific_quantiles)
+
+    for q in specific_quantiles:
+        assert not predictions[q]["y"].isna().any()
+
+
+def test_quantreg_monotonic_quantiles(simple_data: pd.DataFrame) -> None:
+    """Test that QuantReg produces monotonic quantile predictions."""
+    X_train, X_test = preprocess_data(simple_data)
+
+    quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
+
+    model = QuantReg()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"], quantiles=quantiles)
+
+    predictions = fitted_model.predict(X_test, quantiles=quantiles)
+
+    # Check monotonicity for most observations (allowing some crossing due to estimation)
+    monotonic_count = 0
+    for i in range(len(X_test)):
+        values = [predictions[q]["y"].iloc[i] for q in quantiles]
+        is_monotonic = all(
+            values[j] <= values[j + 1] + 1e-6 for j in range(len(values) - 1)
+        )
+        if is_monotonic:
+            monotonic_count += 1
+
+    # At least 80% should be monotonic (some crossing is expected in quantile regression)
+    assert monotonic_count / len(X_test) > 0.8, "Too many quantile crossings"
+
+
+# === Edge Cases ===
+
+
+def test_quantreg_skewed_distribution(skewed_data: pd.DataFrame) -> None:
+    """Test QuantReg with skewed target distribution."""
+    X_train, X_test = preprocess_data(skewed_data)
+
+    quantiles = [0.25, 0.5, 0.75]
+
+    model = QuantReg()
+    fitted_model = model.fit(X_train, ["x1", "x2"], ["y"], quantiles=quantiles)
+
+    predictions = fitted_model.predict(X_test, quantiles=quantiles)
+
+    # For skewed distribution, quantiles should not be symmetric
+    median = predictions[0.5]["y"].mean()
+    q25 = predictions[0.25]["y"].mean()
+    q75 = predictions[0.75]["y"].mean()
+
+    lower_diff = median - q25
+    upper_diff = q75 - median
+
+    # For skewed distributions, quantiles should be different from symmetric
+    # Just check they're not exactly equal (some asymmetry is captured)
+    assert (
+        abs(upper_diff - lower_diff) > 0.01
+    ), "QuantReg should capture some asymmetry"
+
+
+# === Cross-Validation Test ===
 
+
+def test_quantreg_cross_validation(diabetes_data: pd.DataFrame) -> None:
+    """Test QuantReg model with cross-validation."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1", "s4"]
+
+    quantreg_results = cross_validate_model(
+        QuantReg, diabetes_data, predictors, imputed_variables
+    )
+
+    # Validate cross-validation results
     assert not quantreg_results.isna().any().any()
+    assert len(quantreg_results) > 0
 
+    # Test visualization capability
     perf_results_viz = model_performance_results(
         results=quantreg_results,
         model_name="QuantReg",
         method_name="Cross-Validation Quantile Loss Average",
     )
-    fig = perf_results_viz.plot(
-        title="QuantReg Cross-Validation Performance",
-        save_path="quantreg_cv_performance.jpg",
+
+    assert perf_results_viz is not None
+
+
+# === Robustness Tests ===
+
+
+def test_quantreg_outliers() -> None:
+    """Test QuantReg robustness to outliers (should be more robust than OLS)."""
+    np.random.seed(42)
+
+    data = pd.DataFrame({"x": np.random.randn(100), "y": np.random.randn(100)})
+
+    # Add extreme outliers
+    data.loc[0, "y"] = 1000
+    data.loc[1, "y"] = -1000
+
+    X_train, X_test = preprocess_data(data)
+
+    model = QuantReg()
+    fitted_model = model.fit(
+        X_train,
+        ["x"],
+        ["y"],
+        quantiles=[0.5],  # Median regression is robust to outliers
     )
 
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    # When quantiles specified, returns dict
+    assert isinstance(predictions, dict)
+    assert 0.5 in predictions
+    assert not predictions[0.5]["y"].isna().any()
+
+    # Median predictions should not be heavily influenced by outliers
+    median_pred = predictions[0.5]["y"].median()
+    assert -10 < median_pred < 10, "Median regression affected by outliers"
+
+
+def test_quantreg_heteroscedasticity() -> None:
+    """Test QuantReg with heteroscedastic errors."""
+    np.random.seed(42)
+    n_samples = 200
+
+    x = np.random.uniform(-3, 3, n_samples)
+    # Variance increases with x (heteroscedasticity)
+    y = 2 * x + np.random.randn(n_samples) * (1 + np.abs(x))
+
+    data = pd.DataFrame({"x": x, "y": y})
 
-def test_quantreg_example(
-    data: pd.DataFrame = diabetes_df,
-    predictors: List[str] = predictors,
-    imputed_variables: List[str] = imputed_variables,
-    quantiles: List[float] = QUANTILES,
-) -> None:
-    """
-    Example of how to use the Quantile Regression imputer model.
-
-    This example demonstrates:
-    - Initializing a QuantReg model
-    - Fitting the model to specific quantiles
-    - Predicting quantiles on test data
-    - How QuantReg can capture non-symmetric distributions
-
-    Args:
-        data: DataFrame with test data
-        predictors: List of predictor column names
-        imputed_variables: List of target column names
-        quantiles: List of quantiles to predict
-    """
     X_train, X_test = preprocess_data(data)
 
-    # Initialize QuantReg model
     model = QuantReg()
+    fitted_model = model.fit(X_train, ["x"], ["y"], quantiles=[0.1, 0.5, 0.9])
 
-    # Fit the model to specific quantiles
-    fitted_model = model.fit(
-        X_train, predictors, imputed_variables, quantiles=quantiles
+    predictions = fitted_model.predict(X_test, quantiles=[0.1, 0.5, 0.9])
+
+    # Quantile regression should capture heteroscedasticity
+    # Prediction intervals should be wider for larger |x|
+    X_test_sorted = X_test.sort_values("x")
+    low_x_idx = X_test_sorted.index[:10]
+    high_x_idx = X_test_sorted.index[-10:]
+
+    low_x_spread = (
+        predictions[0.9].loc[low_x_idx, "y"]
+        - predictions[0.1].loc[low_x_idx, "y"]
+    ).mean()
+    high_x_spread = (
+        predictions[0.9].loc[high_x_idx, "y"]
+        - predictions[0.1].loc[high_x_idx, "y"]
+    ).mean()
+
+    # Check that there's some difference in spread (heteroscedasticity is partially captured)
+    assert (
+        abs(high_x_spread - low_x_spread) > 0.01
+    ), "QuantReg should show some heteroscedasticity effect"
+
+
+# === Comparison with OLS ===
+
+
+def test_quantreg_vs_ols_median(simple_data: pd.DataFrame) -> None:
+    """Test that QuantReg median predictions are similar to OLS mean predictions for normal data."""
+    from microimpute.models.ols import OLS
+
+    X_train, X_test = preprocess_data(simple_data)
+
+    # Fit QuantReg for median
+    quantreg = QuantReg()
+    quantreg_fitted = quantreg.fit(
+        X_train, ["x1", "x2"], ["y"], quantiles=[0.5]
     )
+    quantreg_pred = quantreg_fitted.predict(X_test, quantiles=[0.5])
 
-    # Predict at the fitted quantiles
-    predictions: Dict[float, pd.DataFrame] = fitted_model.predict(
-        X_test, random_quantile_sample=False
+    # Fit OLS
+    ols = OLS()
+    ols_fitted = ols.fit(X_train, ["x1", "x2"], ["y"])
+    ols_pred = ols_fitted.predict(
+        X_test, quantiles=[0.5], random_quantile_sample=False
     )
 
-    # Check structure of predictions
-    assert isinstance(predictions, dict)
+    # For normally distributed errors, median and mean should be similar
+    # QuantReg returns DataFrame for single quantile, OLS returns dict
+    quantreg_median = quantreg_pred[0.5]["y"].values
+    ols_mean = ols_pred[0.5]["y"].values
+
+    correlation = np.corrcoef(quantreg_median, ols_mean)[0, 1]
+    assert (
+        correlation > 0.85
+    ), "QuantReg median and OLS mean should be reasonably similar for normal data"
+
+
+# === Performance Tests ===
+
+
+def test_quantreg_prediction_quality(diabetes_data: pd.DataFrame) -> None:
+    """Test QuantReg prediction quality on real data."""
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1"]
+
+    data = diabetes_data[predictors + imputed_variables]
+
+    # Split data
+    np.random.seed(42)
+    train_idx = np.random.choice(
+        len(data), int(0.8 * len(data)), replace=False
+    )
+    test_idx = np.array([i for i in range(len(data)) if i not in train_idx])
+
+    train_data = data.iloc[train_idx].reset_index(drop=True)
+    test_data = data.iloc[test_idx].reset_index(drop=True)
+
+    X_train = preprocess_data(
+        train_data, full_data=True, train_size=1.0, test_size=0.0
+    )
+    X_test = preprocess_data(
+        test_data, full_data=True, train_size=1.0, test_size=0.0
+    )
+
+    # Fit model
+    model = QuantReg()
+    fitted_model = model.fit(
+        X_train, predictors, imputed_variables, quantiles=[0.5]
+    )
+
+    # Get predictions
+    predictions = fitted_model.predict(X_test, quantiles=[0.5])
+
+    # Calculate correlation with true values
+    # When quantiles specified, returns dict
+    true_values = X_test["s1"].values
+    pred_values = predictions[0.5]["s1"].values
+
+    correlation = np.corrcoef(true_values, pred_values)[0, 1]
+
+    # QuantReg should achieve reasonable correlation
+    assert correlation > 0.15, f"QuantReg correlation too low: {correlation}"
 
-    # Basic checks
-    for q, pred in predictions.items():
-        assert pred is not None
-        assert len(pred) == len(X_test)
-
-    transformed_df = pd.DataFrame()
-    for quantile, pred_df in predictions.items():
-        # For each quantile and its predictions DataFrame
-        for variable in imputed_variables:
-            # Calculate the mean of predictions for this variable at this quantile
-            mean_value = pred_df[variable].mean()
-            # Create or update the value in our transformed DataFrame
-            if variable not in transformed_df.columns:
-                transformed_df[variable] = pd.Series(dtype="float64")
-            transformed_df.loc[quantile, variable] = mean_value
-
-    # Save to CSV for further analysis
-    transformed_df.to_csv("quantreg_predictions_by_quantile.csv")
+    # Check that predictions have reasonable variance
+    assert np.var(pred_values) > 0, "QuantReg predictions have no variance"
diff --git a/tests/test_quantile_comparison.py b/tests/test_quantile_comparison.py
index fee3aea..863d7f6 100644
--- a/tests/test_quantile_comparison.py
+++ b/tests/test_quantile_comparison.py
@@ -1,54 +1,179 @@
-"""Tests for the end-to-end quantile loss comparison workflow.
+"""Comprehensive tests for quantile loss comparison functionality."""
 
-This module tests the complete workflow of:
-1. Preparing data
-2. Training different imputation models
-3. Generating predictions
-4. Comparing models using quantile loss metrics
-5. Visualizing the results
-"""
-
-from typing import List, Type
-
-import io
+import numpy as np
 import pandas as pd
-import requests
-from sklearn.datasets import load_diabetes
-from sklearn.model_selection import train_test_split
-import zipfile
+import pytest
 
-from microimpute.comparisons import *
-from microimpute.config import RANDOM_STATE, VALID_YEARS
-from microimpute.models import Imputer, OLS, QRF, QuantReg
+from microimpute.comparisons import compare_quantile_loss, get_imputations
+from microimpute.config import QUANTILES
+from microimpute.models import OLS, QRF, QuantReg
 
+# Check if Matching is available
 try:
     from microimpute.models import Matching
 
     HAS_MATCHING = True
 except ImportError:
     HAS_MATCHING = False
-from microimpute.visualizations.plotting import *
-from microimpute.utils.data import preprocess_data
 
 
-def test_quantile_comparison_diabetes() -> None:
-    """Test the end-to-end quantile loss comparison workflow."""
-    diabetes_data = load_diabetes()
-    diabetes_df = pd.DataFrame(
-        diabetes_data.data, columns=diabetes_data.feature_names
+# === Fixtures ===
+
+
+@pytest.fixture
+def split_data() -> tuple:
+    """Generate simple split data for testing."""
+    np.random.seed(42)
+    n_train, n_test = 100, 20
+
+    X_train = pd.DataFrame(
+        {
+            "x1": np.random.randn(n_train),
+            "x2": np.random.randn(n_train),
+            "x3": np.random.randn(n_train),
+            "y1": np.random.randn(n_train),
+            "y2": np.random.randn(n_train),
+        }
     )
 
-    predictors = ["age", "sex", "bmi", "bp"]
-    imputed_variables = ["s1", "s4"]
+    X_test = pd.DataFrame(
+        {
+            "x1": np.random.randn(n_test),
+            "x2": np.random.randn(n_test),
+            "x3": np.random.randn(n_test),
+            "y1": np.random.randn(n_test),
+            "y2": np.random.randn(n_test),
+        }
+    )
+
+    return X_train, X_test
+
+
+@pytest.fixture
+def diabetes_data() -> pd.DataFrame:
+    """Load diabetes dataset for testing."""
+    from sklearn.datasets import load_diabetes
+
+    X, y = load_diabetes(return_X_y=True, as_frame=True)
+    data = pd.concat([X, y.rename("target")], axis=1)
+    data.columns = [
+        "age",
+        "sex",
+        "bmi",
+        "bp",
+        "s1",
+        "s2",
+        "s3",
+        "s4",
+        "s5",
+        "s6",
+        "target",
+    ]
+    return data
 
-    diabetes_df = diabetes_df[predictors + imputed_variables]
-    X_train, X_test = train_test_split(
-        diabetes_df, test_size=0.2, random_state=RANDOM_STATE
+
+# === Basic Functionality Tests ===
+
+
+def test_get_imputations_basic(split_data: tuple) -> None:
+    """Test basic functionality of get_imputations."""
+    X_train, X_test = split_data
+    predictors = ["x1", "x2", "x3"]
+    imputed_variables = ["y1", "y2"]
+
+    model_classes = [OLS, QRF]
+    method_imputations = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
     )
 
-    Y_test: pd.DataFrame = X_test[imputed_variables]
+    # Check structure
+    assert isinstance(method_imputations, dict)
+    assert len(method_imputations) == len(model_classes)
+    assert "OLS" in method_imputations
+    assert "QRF" in method_imputations
+
+    # Check each model's imputations
+    for model_name, imputations in method_imputations.items():
+        assert isinstance(imputations, dict)
+        # Should have imputations for each quantile
+        assert all(q in imputations for q in QUANTILES)
+        # Each quantile should have a DataFrame
+        for q, df in imputations.items():
+            assert isinstance(df, pd.DataFrame)
+            assert df.shape == (len(X_test), len(imputed_variables))
+            assert all(var in df.columns for var in imputed_variables)
+
+
+def test_compare_quantile_loss_basic(split_data: tuple) -> None:
+    """Test basic functionality of compare_quantile_loss."""
+    X_train, X_test = split_data
+    predictors = ["x1", "x2", "x3"]
+    imputed_variables = ["y1", "y2"]
+
+    Y_test = X_test[imputed_variables]
+
+    # Get imputations
+    model_classes = [OLS, QuantReg]
+    method_imputations = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
+
+    # Compare quantile loss
+    loss_comparison_df = compare_quantile_loss(
+        Y_test, method_imputations, imputed_variables
+    )
+
+    # Check structure - returns long format DataFrame
+    assert isinstance(loss_comparison_df, pd.DataFrame)
+    assert "Method" in loss_comparison_df.columns
+    assert "Imputed Variable" in loss_comparison_df.columns
+    assert "Percentile" in loss_comparison_df.columns
+    assert "Loss" in loss_comparison_df.columns
+
+    # Check that both models are present
+    methods = loss_comparison_df["Method"].unique()
+    assert "OLS" in methods
+    assert "QuantReg" in methods
+
+    # Check values are non-negative (quantile loss is always >= 0)
+    assert (loss_comparison_df["Loss"] >= 0).all()
+    assert not loss_comparison_df["Loss"].isna().any()
 
-    model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg]
+
+# === Data Handling Tests ===
+
+
+def test_single_imputed_variable(split_data: tuple) -> None:
+    """Test with single imputed variable."""
+    X_train, X_test = split_data
+    predictors = ["x1", "x2"]
+    imputed_variables = ["y1"]  # Single variable
+
+    Y_test = X_test[imputed_variables]
+
+    model_classes = [OLS, QuantReg]
+    method_imputations = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
+
+    loss_comparison_df = compare_quantile_loss(
+        Y_test, method_imputations, imputed_variables
+    )
+
+    # Check results contain data for single variable
+    variables = loss_comparison_df["Imputed Variable"].unique()
+    assert "y1" in variables or "mean_loss" in variables
+
+
+def test_multiple_imputed_variables(split_data: tuple) -> None:
+    """Test with multiple imputed variables."""
+    X_train, X_test = split_data
+    predictors = ["x1"]
+    imputed_variables = ["y1", "y2"]  # Multiple variables
+
+    Y_test = X_test[imputed_variables]
+
+    model_classes = [OLS, QRF, QuantReg]
     if HAS_MATCHING:
         model_classes.append(Matching)
     method_imputations = get_imputations(
@@ -59,193 +184,237 @@ def test_quantile_comparison_diabetes() -> None:
         Y_test, method_imputations, imputed_variables
     )
 
-    assert not loss_comparison_df.isna().any().any()
+    # Check results contain data for all variables
+    variables = loss_comparison_df["Imputed Variable"].unique()
+    # Should have y1, y2, and mean_loss
+    assert len(variables) >= 2
+
+
+# === Statistical Properties Tests ===
+
+
+def test_quantile_loss_symmetry() -> None:
+    """Test that quantile loss is properly asymmetric."""
+    np.random.seed(42)
+
+    # Create data where predictions are always above true values
+    X_train = pd.DataFrame(
+        {
+            "x": np.random.randn(50),
+            "y": np.random.randn(50),
+        }
+    )
+
+    X_test = pd.DataFrame(
+        {
+            "x": np.random.randn(10),
+            "y": np.random.randn(10) - 5,  # True values are much lower
+        }
+    )
+
+    predictors = ["x"]
+    imputed_variables = ["y"]
+    Y_test = X_test[imputed_variables]
+
+    model_classes = [OLS, QuantReg]
+    method_imputations = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
+
+    loss_comparison_df = compare_quantile_loss(
+        Y_test, method_imputations, imputed_variables
+    )
+
+    assert not loss_comparison_df.empty
+
 
-    loss_comparison_df.to_csv("diabetes_comparison_results.csv")
+def test_perfect_predictions() -> None:
+    """Test with perfect predictions."""
+    np.random.seed(42)
 
-    comparison_viz = method_comparison_results(
-        data=loss_comparison_df,
-        metric_name="Test Quantile Loss",
-        data_format="long",  # Explicitly using wide format
+    # Create perfectly predictable data
+    X_train = pd.DataFrame(
+        {
+            "x": range(100),
+            "y": range(100),  # Perfect correlation
+        }
     )
-    fig = comparison_viz.plot(
-        title="Method Comparison for Diabetes Dataset",
-        show_mean=True,
-        save_path="diabetes_model_comparison.jpg",
+
+    X_test = pd.DataFrame(
+        {
+            "x": [10, 20, 30],
+            "y": [10, 20, 30],  # Perfect match
+        }
     )
 
+    predictors = ["x"]
+    imputed_variables = ["y"]
+    Y_test = X_test[imputed_variables]
 
-def test_quantile_comparison_scf() -> None:
-    """Test the end-to-end quantile loss comparison workflow on the scf data set."""
-    scf_data = load_scf(2022)
-    PREDICTORS: List[str] = [
-        "hhsex",  # sex of head of household
-        "age",  # age of respondent
-        "married",  # marital status of respondent
-        # "kids",  # number of children in household
-        "race",  # race of respondent
-        "income",  # total annual income of household
-        "wageinc",  # income from wages and salaries
-        "bussefarminc",  # income from business, self-employment or farm
-        "intdivinc",  # income from interest and dividends
-        "ssretinc",  # income from social security and retirement accounts
-        "lf",  # labor force status
+    model_classes = [OLS, QuantReg]
+    method_imputations = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
+
+    loss_comparison_df = compare_quantile_loss(
+        Y_test, method_imputations, imputed_variables
+    )
+
+    # Loss should be relatively low for perfect predictions
+    ols_loss = loss_comparison_df[loss_comparison_df["Method"] == "OLS"][
+        "Loss"
     ]
-    IMPUTED_VARIABLES: List[str] = ["networth"]
+    assert ols_loss.min() <= 1.01  # Allow for small floating point errors
+
+
+# === Integration Tests ===
+
+
+def test_model_ranking(diabetes_data: pd.DataFrame) -> None:
+    """Test that models can be ranked by performance."""
+    # Split data
+    train_size = int(0.8 * len(diabetes_data))
+    X_train = diabetes_data[:train_size]
+    X_test = diabetes_data[train_size:]
+
+    predictors = ["age", "sex", "bmi", "bp"]
+    imputed_variables = ["s1"]
+
+    Y_test = X_test[imputed_variables]
+
+    # Compare models
+    model_classes = [OLS, QRF, QuantReg]
+    method_imputations = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
 
-    X_train, X_test = preprocess_data(
-        data=scf_data,
-        full_data=False,
-        normalize=False,
+    loss_comparison_df = compare_quantile_loss(
+        Y_test, method_imputations, imputed_variables
     )
 
-    # Shrink down the data by sampling
-    X_train = X_train.sample(frac=0.01, random_state=RANDOM_STATE)
-    X_test = X_test.sample(frac=0.01, random_state=RANDOM_STATE)
+    # Check we can compute mean loss per model
+    mean_losses = loss_comparison_df.groupby("Method")["Loss"].mean()
+    assert len(mean_losses) == len(model_classes)
 
-    Y_test: pd.DataFrame = X_test[IMPUTED_VARIABLES]
+    # Best model should have lowest mean loss
+    best_model = mean_losses.idxmin()
+    assert best_model in [m.__name__ for m in model_classes]
 
-    model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg]
-    if HAS_MATCHING:
-        model_classes.append(Matching)
+
+# === Visualization Support Tests ===
+
+
+def test_wide_format_visualization(split_data: tuple) -> None:
+    """Test that results can be converted to wide format for visualization."""
+    X_train, X_test = split_data
+    predictors = ["x1", "x2"]
+    imputed_variables = ["y1"]
+
+    Y_test = X_test[imputed_variables]
+
+    model_classes = [OLS, QuantReg]
+    method_imputations = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
+
+    loss_comparison_df = compare_quantile_loss(
+        Y_test, method_imputations, imputed_variables
+    )
+
+    # Convert to wide format for plotting
+    # Filter to mean_loss only
+    mean_loss_df = loss_comparison_df[
+        loss_comparison_df["Imputed Variable"] == "mean_loss"
+    ]
+
+    if not mean_loss_df.empty:
+        wide_df = mean_loss_df.pivot(
+            index="Method", columns="Percentile", values="Loss"
+        )
+
+        # Check wide format structure
+        assert isinstance(wide_df, pd.DataFrame)
+        assert wide_df.shape[0] <= len(model_classes)  # One row per model
+        # Columns should be quantiles
+        assert all(
+            col in QUANTILES for col in wide_df.columns if col in QUANTILES
+        )
+
+
+def test_long_format_visualization(split_data: tuple) -> None:
+    """Test that results are suitable for long-format visualization."""
+    X_train, X_test = split_data
+    predictors = ["x1", "x2", "x3"]
+    imputed_variables = ["y1", "y2"]
+
+    Y_test = X_test[imputed_variables]
+
+    model_classes = [OLS, QRF]
     method_imputations = get_imputations(
-        model_classes, X_train, X_test, PREDICTORS, IMPUTED_VARIABLES
+        model_classes, X_train, X_test, predictors, imputed_variables
     )
 
     loss_comparison_df = compare_quantile_loss(
-        Y_test, method_imputations, IMPUTED_VARIABLES
-    )
-
-    assert not loss_comparison_df.isna().any().any()
-
-    loss_comparison_df.to_csv("scf_comparison_results.csv")
-
-    comparison_viz = method_comparison_results(
-        data=loss_comparison_df,
-        metric_name="Test Quantile Loss",
-        data_format="long",  # Explicitly using wide format
-    )
-    fig = comparison_viz.plot(
-        title="Method Comparison for SCF Dataset",
-        show_mean=True,
-        save_path="scf_model_comparison.jpg",
-    )
-
-
-@validate_call(config=VALIDATE_CONFIG)
-def load_scf(
-    years: Optional[Union[int, List[int]]] = None,
-    columns: Optional[List[str]] = None,
-) -> pd.DataFrame:
-    """Load Survey of Consumer Finances data for specified years and columns.
-
-    Args:
-        years: Year or list of years to load data for.
-        columns: List of column names to load.
-
-    Returns:
-        DataFrame containing the requested data.
-
-    Raises:
-        ValueError: If no Stata files are found in the downloaded zip
-            or invalid parameters
-        RuntimeError: If there's a network error or a problem processing
-            the downloaded data
-    """
-
-    def scf_url(year: int) -> str:
-        """Return the URL of the SCF summary microdata zip file for a year."""
-        logger.debug(f"Generating SCF URL for year {year}")
-
-        if year not in VALID_YEARS:
-            logger.error(
-                f"Invalid SCF year: {year}. Valid years are {VALID_YEARS}"
-            )
-            raise
-
-        url = f"https://www.federalreserve.gov/econres/files/scfp{year}s.zip"
-        logger.debug(f"Generated URL: {url}")
-        return url
-
-    logger.info(f"Loading SCF data with years={years}")
-
-    try:
-        # Identify years for download
-        if years is None:
-            years = VALID_YEARS
-            logger.warning(f"Using default years: {years}")
-
-        if isinstance(years, int):
-            years = [years]
-
-        all_data: List[pd.DataFrame] = []
-
-        for year in years:
-            logger.info(f"Processing data for year {year}")
-            try:
-                # Download zip file
-                logger.debug(f"Downloading SCF data for year {year}")
-                url = scf_url(year)
-                try:
-                    response = requests.get(url, timeout=60)
-                    response.raise_for_status()  # Raise an error for bad responses
-                except requests.exceptions.RequestException as e:
-                    logger.error(
-                        f"Network error downloading SCF data for year {year}: {str(e)}"
-                    )
-                    raise
-
-                # Process zip file
-                z = zipfile.ZipFile(io.BytesIO(response.content))
-                # Find the .dta file in the zip
-                dta_files: List[str] = [
-                    f for f in z.namelist() if f.endswith(".dta")
-                ]
-                if not dta_files:
-                    logger.error(
-                        f"No Stata files found in zip for year {year}"
-                    )
-                    raise
-
-                # Read the Stata file
-                try:
-                    logger.debug(f"Reading Stata file: {dta_files[0]}")
-                    with z.open(dta_files[0]) as f:
-                        df = pd.read_stata(
-                            io.BytesIO(f.read()), columns=columns
-                        )
-                        logger.debug(f"Read DataFrame with shape {df.shape}")
-                except Exception as e:
-                    logger.error(
-                        f"Error reading Stata file for year {year}: {str(e)}"
-                    )
-                    raise
-
-                # Add year column
-                df["year"] = year
-                logger.info(
-                    f"Successfully processed data for year {year}, shape: {df.shape}"
-                )
-                all_data.append(df)
-
-            except Exception as e:
-                logger.error(f"Error processing year {year}: {str(e)}")
-                raise
-
-        # Combine all years
-        logger.debug(f"Combining data from {len(all_data)} years")
-        if len(all_data) > 1:
-            result = pd.concat(all_data)
-            logger.info(
-                f"Combined data from {len(years)} years, final shape: {result.shape}"
-            )
-            return result
-        else:
-            logger.info(
-                f"Returning data for single year, shape: {all_data[0].shape}"
-            )
-            return all_data[0]
-
-    except Exception as e:
-        logger.error(f"Error in _load: {str(e)}")
-        raise
+        Y_test, method_imputations, imputed_variables
+    )
+
+    # Long format is directly suitable for seaborn/plotly
+    assert "Method" in loss_comparison_df.columns
+    assert "Percentile" in loss_comparison_df.columns
+    assert "Loss" in loss_comparison_df.columns
+
+    # Can group by method and percentile
+    grouped = loss_comparison_df.groupby(["Method", "Percentile"])[
+        "Loss"
+    ].mean()
+    assert not grouped.empty
+
+
+# === Robustness Tests ===
+
+
+def test_comparison_consistency() -> None:
+    """Test that repeated comparisons give consistent results."""
+    np.random.seed(42)
+
+    X_train = pd.DataFrame(
+        {
+            "x1": np.random.randn(50),
+            "x2": np.random.randn(50),
+            "y": np.random.randn(50),
+        }
+    )
+
+    X_test = pd.DataFrame(
+        {
+            "x1": np.random.randn(10),
+            "x2": np.random.randn(10),
+            "y": np.random.randn(10),
+        }
+    )
+
+    predictors = ["x1", "x2"]
+    imputed_variables = ["y"]
+    Y_test = X_test[imputed_variables]
+
+    model_classes = [OLS]
+
+    # Run twice
+    method_imputations1 = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
+    loss_df1 = compare_quantile_loss(
+        Y_test, method_imputations1, imputed_variables
+    )
+
+    method_imputations2 = get_imputations(
+        model_classes, X_train, X_test, predictors, imputed_variables
+    )
+    loss_df2 = compare_quantile_loss(
+        Y_test, method_imputations2, imputed_variables
+    )
+
+    # Results should be deterministic for OLS
+    ols_loss1 = loss_df1[loss_df1["Method"] == "OLS"]["Loss"].values
+    ols_loss2 = loss_df2[loss_df2["Method"] == "OLS"]["Loss"].values
+    np.testing.assert_array_almost_equal(ols_loss1, ols_loss2, decimal=5)