diff --git a/comfy/sampler_helpers.py b/comfy/sampler_helpers.py index e46971afb778..e158e8a84634 100644 --- a/comfy/sampler_helpers.py +++ b/comfy/sampler_helpers.py @@ -122,20 +122,21 @@ def estimate_memory(model, noise_shape, conds): minimum_memory_required = model.model.memory_required([noise_shape[0]] + list(noise_shape[1:]), cond_shapes=cond_shapes_min) return memory_required, minimum_memory_required -def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None): +def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, skip_load_model=False): executor = comfy.patcher_extension.WrapperExecutor.new_executor( _prepare_sampling, comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREPARE_SAMPLING, model_options, is_model_options=True) ) - return executor.execute(model, noise_shape, conds, model_options=model_options) + return executor.execute(model, noise_shape, conds, model_options=model_options, skip_load_model=skip_load_model) -def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None): +def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, skip_load_model=False): real_model: BaseModel = None models, inference_memory = get_additional_models(conds, model.model_dtype()) models += get_additional_models_from_model_options(model_options) models += model.get_nested_additional_models() # TODO: does this require inference_memory update? memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds) - comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory) + models_list = [model] if not skip_load_model else [] + comfy.model_management.load_models_gpu(models_list + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory) real_model = model.model return real_model, conds, models diff --git a/comfy_extras/nodes_dataset.py b/comfy_extras/nodes_dataset.py index 4789d7d53046..513aecf3a941 100644 --- a/comfy_extras/nodes_dataset.py +++ b/comfy_extras/nodes_dataset.py @@ -1125,6 +1125,99 @@ def _group_process(cls, texts): # ========== Training Dataset Nodes ========== +class ResolutionBucket(io.ComfyNode): + """Bucket latents and conditions by resolution for efficient batch training.""" + + @classmethod + def define_schema(cls): + return io.Schema( + node_id="ResolutionBucket", + display_name="Resolution Bucket", + category="dataset", + is_experimental=True, + is_input_list=True, + inputs=[ + io.Latent.Input( + "latents", + tooltip="List of latent dicts to bucket by resolution.", + ), + io.Conditioning.Input( + "conditioning", + tooltip="List of conditioning lists (must match latents length).", + ), + ], + outputs=[ + io.Latent.Output( + display_name="latents", + is_output_list=True, + tooltip="List of batched latent dicts, one per resolution bucket.", + ), + io.Conditioning.Output( + display_name="conditioning", + is_output_list=True, + tooltip="List of condition lists, one per resolution bucket.", + ), + ], + ) + + @classmethod + def execute(cls, latents, conditioning): + # latents: list[{"samples": tensor}] where tensor is (B, C, H, W), typically B=1 + # conditioning: list[list[cond]] + + # Validate lengths match + if len(latents) != len(conditioning): + raise ValueError( + f"Number of latents ({len(latents)}) does not match number of conditions ({len(conditioning)})." + ) + + # Flatten latents and conditions to individual samples + flat_latents = [] # list of (C, H, W) tensors + flat_conditions = [] # list of condition lists + + for latent_dict, cond in zip(latents, conditioning): + samples = latent_dict["samples"] # (B, C, H, W) + batch_size = samples.shape[0] + + # cond is a list of conditions with length == batch_size + for i in range(batch_size): + flat_latents.append(samples[i]) # (C, H, W) + flat_conditions.append(cond[i]) # single condition + + # Group by resolution (H, W) + buckets = {} # (H, W) -> {"latents": list, "conditions": list} + + for latent, cond in zip(flat_latents, flat_conditions): + # latent shape is (..., H, W) (B, C, H, W) or (B, T, C, H ,W) + h, w = latent.shape[-2], latent.shape[-1] + key = (h, w) + + if key not in buckets: + buckets[key] = {"latents": [], "conditions": []} + + buckets[key]["latents"].append(latent) + buckets[key]["conditions"].append(cond) + + # Convert buckets to output format + output_latents = [] # list[{"samples": tensor}] where tensor is (Bi, ..., H, W) + output_conditions = [] # list[list[cond]] where each inner list has Bi conditions + + for (h, w), bucket_data in buckets.items(): + # Stack latents into batch: list of (..., H, W) -> (Bi, ..., H, W) + stacked_latents = torch.stack(bucket_data["latents"], dim=0) + output_latents.append({"samples": stacked_latents}) + + # Conditions stay as list of condition lists + output_conditions.append(bucket_data["conditions"]) + + logging.info( + f"Resolution bucket ({h}x{w}): {len(bucket_data['latents'])} samples" + ) + + logging.info(f"Created {len(buckets)} resolution buckets from {len(flat_latents)} samples") + return io.NodeOutput(output_latents, output_conditions) + + class MakeTrainingDataset(io.ComfyNode): """Encode images with VAE and texts with CLIP to create a training dataset.""" @@ -1373,7 +1466,7 @@ def execute(cls, folder_name): shard_path = os.path.join(dataset_dir, shard_file) with open(shard_path, "rb") as f: - shard_data = torch.load(f, weights_only=True) + shard_data = torch.load(f) all_latents.extend(shard_data["latents"]) all_conditioning.extend(shard_data["conditioning"]) @@ -1425,6 +1518,7 @@ async def get_node_list(self) -> list[type[io.ComfyNode]]: MakeTrainingDataset, SaveTrainingDataset, LoadTrainingDataset, + ResolutionBucket, ] diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py index 34c388a5ad69..ca2cdeb50546 100644 --- a/comfy_extras/nodes_post_processing.py +++ b/comfy_extras/nodes_post_processing.py @@ -221,6 +221,7 @@ def define_schema(cls): io.Image.Input("image"), io.Combo.Input("upscale_method", options=cls.upscale_methods), io.Float.Input("megapixels", default=1.0, min=0.01, max=16.0, step=0.01), + io.Int.Input("resolution_steps", default=1, min=1, max=256), ], outputs=[ io.Image.Output(), @@ -228,15 +229,15 @@ def define_schema(cls): ) @classmethod - def execute(cls, image, upscale_method, megapixels) -> io.NodeOutput: + def execute(cls, image, upscale_method, megapixels, resolution_steps) -> io.NodeOutput: samples = image.movedim(-1,1) - total = int(megapixels * 1024 * 1024) + total = megapixels * 1024 * 1024 scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) - width = round(samples.shape[3] * scale_by) - height = round(samples.shape[2] * scale_by) + width = round(samples.shape[3] * scale_by / resolution_steps) * resolution_steps + height = round(samples.shape[2] * scale_by / resolution_steps) * resolution_steps - s = comfy.utils.common_upscale(samples, width, height, upscale_method, "disabled") + s = comfy.utils.common_upscale(samples, int(width), int(height), upscale_method, "disabled") s = s.movedim(1,-1) return io.NodeOutput(s) diff --git a/comfy_extras/nodes_train.py b/comfy_extras/nodes_train.py index 19b8baaf4b89..88bc8c8e8b4c 100644 --- a/comfy_extras/nodes_train.py +++ b/comfy_extras/nodes_train.py @@ -10,6 +10,7 @@ from typing_extensions import override import comfy.samplers +import comfy.sampler_helpers import comfy.sd import comfy.utils import comfy.model_management @@ -21,6 +22,68 @@ from comfy.utils import ProgressBar +class TrainGuider(comfy_extras.nodes_custom_sampler.Guider_Basic): + """ + CFGGuider with modifications for training specific logic + """ + def outer_sample( + self, + noise, + latent_image, + sampler, + sigmas, + denoise_mask=None, + callback=None, + disable_pbar=False, + seed=None, + latent_shapes=None, + ): + self.inner_model, self.conds, self.loaded_models = ( + comfy.sampler_helpers.prepare_sampling( + self.model_patcher, + noise.shape, + self.conds, + self.model_options, + skip_load_model=True, # skip load model as we manage it in TrainLoraNode.execute() + ) + ) + device = self.model_patcher.load_device + + if denoise_mask is not None: + denoise_mask = comfy.sampler_helpers.prepare_mask( + denoise_mask, noise.shape, device + ) + + noise = noise.to(device) + latent_image = latent_image.to(device) + sigmas = sigmas.to(device) + comfy.samplers.cast_to_load_options( + self.model_options, device=device, dtype=self.model_patcher.model_dtype() + ) + + try: + self.model_patcher.pre_run() + output = self.inner_sample( + noise, + latent_image, + device, + sampler, + sigmas, + denoise_mask, + callback, + disable_pbar, + seed, + latent_shapes=latent_shapes, + ) + finally: + self.model_patcher.cleanup() + + comfy.sampler_helpers.cleanup_models(self.conds, self.loaded_models) + del self.inner_model + del self.loaded_models + return output + + def make_batch_extra_option_dict(d, indicies, full_size=None): new_dict = {} for k, v in d.items(): @@ -65,6 +128,7 @@ def __init__( seed=0, training_dtype=torch.bfloat16, real_dataset=None, + bucket_latents=None, ): self.loss_fn = loss_fn self.optimizer = optimizer @@ -75,6 +139,28 @@ def __init__( self.seed = seed self.training_dtype = training_dtype self.real_dataset: list[torch.Tensor] | None = real_dataset + # Bucket mode data + self.bucket_latents: list[torch.Tensor] | None = ( + bucket_latents # list of (Bi, C, Hi, Wi) + ) + # Precompute bucket offsets and weights for sampling + if bucket_latents is not None: + self._init_bucket_data(bucket_latents) + else: + self.bucket_offsets = None + self.bucket_weights = None + self.num_images = None + + def _init_bucket_data(self, bucket_latents): + """Initialize bucket offsets and weights for sampling.""" + self.bucket_offsets = [0] + bucket_sizes = [] + for lat in bucket_latents: + bucket_sizes.append(lat.shape[0]) + self.bucket_offsets.append(self.bucket_offsets[-1] + lat.shape[0]) + self.num_images = self.bucket_offsets[-1] + # Weights for sampling buckets proportional to their size + self.bucket_weights = torch.tensor(bucket_sizes, dtype=torch.float32) def fwd_bwd( self, @@ -115,6 +201,108 @@ def fwd_bwd( bwd_loss.backward() return loss + def _generate_batch_sigmas(self, model_wrap, batch_size, device): + """Generate random sigma values for a batch.""" + batch_sigmas = [ + model_wrap.inner_model.model_sampling.percent_to_sigma( + torch.rand((1,)).item() + ) + for _ in range(batch_size) + ] + return torch.tensor(batch_sigmas).to(device) + + def _train_step_bucket_mode(self, model_wrap, cond, extra_args, noisegen, latent_image, pbar): + """Execute one training step in bucket mode.""" + # Sample bucket (weighted by size), then sample batch from bucket + bucket_idx = torch.multinomial(self.bucket_weights, 1).item() + bucket_latent = self.bucket_latents[bucket_idx] # (Bi, C, Hi, Wi) + bucket_size = bucket_latent.shape[0] + bucket_offset = self.bucket_offsets[bucket_idx] + + # Sample indices from this bucket (use all if bucket_size < batch_size) + actual_batch_size = min(self.batch_size, bucket_size) + relative_indices = torch.randperm(bucket_size)[:actual_batch_size].tolist() + # Convert to absolute indices for fwd_bwd (cond is flattened, use absolute index) + absolute_indices = [bucket_offset + idx for idx in relative_indices] + + batch_latent = bucket_latent[relative_indices].to(latent_image) # (actual_batch_size, C, H, W) + batch_noise = noisegen.generate_noise({"samples": batch_latent}).to( + batch_latent.device + ) + batch_sigmas = self._generate_batch_sigmas(model_wrap, actual_batch_size, batch_latent.device) + + loss = self.fwd_bwd( + model_wrap, + batch_sigmas, + batch_noise, + batch_latent, + cond, # Use flattened cond with absolute indices + absolute_indices, + extra_args, + self.num_images, + bwd=True, + ) + if self.loss_callback: + self.loss_callback(loss.item()) + pbar.set_postfix({"loss": f"{loss.item():.4f}", "bucket": bucket_idx}) + + def _train_step_standard_mode(self, model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar): + """Execute one training step in standard (non-bucket, non-multi-res) mode.""" + indicies = torch.randperm(dataset_size)[: self.batch_size].tolist() + batch_latent = torch.stack([latent_image[i] for i in indicies]) + batch_noise = noisegen.generate_noise({"samples": batch_latent}).to( + batch_latent.device + ) + batch_sigmas = self._generate_batch_sigmas(model_wrap, min(self.batch_size, dataset_size), batch_latent.device) + + loss = self.fwd_bwd( + model_wrap, + batch_sigmas, + batch_noise, + batch_latent, + cond, + indicies, + extra_args, + dataset_size, + bwd=True, + ) + if self.loss_callback: + self.loss_callback(loss.item()) + pbar.set_postfix({"loss": f"{loss.item():.4f}"}) + + def _train_step_multires_mode(self, model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar): + """Execute one training step in multi-resolution mode (real_dataset is set).""" + indicies = torch.randperm(dataset_size)[: self.batch_size].tolist() + total_loss = 0 + for index in indicies: + single_latent = self.real_dataset[index].to(latent_image) + batch_noise = noisegen.generate_noise( + {"samples": single_latent} + ).to(single_latent.device) + batch_sigmas = ( + model_wrap.inner_model.model_sampling.percent_to_sigma( + torch.rand((1,)).item() + ) + ) + batch_sigmas = torch.tensor([batch_sigmas]).to(single_latent.device) + loss = self.fwd_bwd( + model_wrap, + batch_sigmas, + batch_noise, + single_latent, + cond, + [index], + extra_args, + dataset_size, + bwd=False, + ) + total_loss += loss + total_loss = total_loss / self.grad_acc / len(indicies) + total_loss.backward() + if self.loss_callback: + self.loss_callback(total_loss.item()) + pbar.set_postfix({"loss": f"{total_loss.item():.4f}"}) + def sample( self, model_wrap, @@ -142,70 +330,18 @@ def sample( noisegen = comfy_extras.nodes_custom_sampler.Noise_RandomNoise( self.seed + i * 1000 ) - indicies = torch.randperm(dataset_size)[: self.batch_size].tolist() - if self.real_dataset is None: - batch_latent = torch.stack([latent_image[i] for i in indicies]) - batch_noise = noisegen.generate_noise({"samples": batch_latent}).to( - batch_latent.device - ) - batch_sigmas = [ - model_wrap.inner_model.model_sampling.percent_to_sigma( - torch.rand((1,)).item() - ) - for _ in range(min(self.batch_size, dataset_size)) - ] - batch_sigmas = torch.tensor(batch_sigmas).to(batch_latent.device) - - loss = self.fwd_bwd( - model_wrap, - batch_sigmas, - batch_noise, - batch_latent, - cond, - indicies, - extra_args, - dataset_size, - bwd=True, - ) - if self.loss_callback: - self.loss_callback(loss.item()) - pbar.set_postfix({"loss": f"{loss.item():.4f}"}) + if self.bucket_latents is not None: + self._train_step_bucket_mode(model_wrap, cond, extra_args, noisegen, latent_image, pbar) + elif self.real_dataset is None: + self._train_step_standard_mode(model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar) else: - total_loss = 0 - for index in indicies: - single_latent = self.real_dataset[index].to(latent_image) - batch_noise = noisegen.generate_noise( - {"samples": single_latent} - ).to(single_latent.device) - batch_sigmas = ( - model_wrap.inner_model.model_sampling.percent_to_sigma( - torch.rand((1,)).item() - ) - ) - batch_sigmas = torch.tensor([batch_sigmas]).to(single_latent.device) - loss = self.fwd_bwd( - model_wrap, - batch_sigmas, - batch_noise, - single_latent, - cond, - [index], - extra_args, - dataset_size, - bwd=False, - ) - total_loss += loss - total_loss = total_loss / self.grad_acc / len(indicies) - total_loss.backward() - if self.loss_callback: - self.loss_callback(total_loss.item()) - pbar.set_postfix({"loss": f"{total_loss.item():.4f}"}) + self._train_step_multires_mode(model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar) if (i + 1) % self.grad_acc == 0: self.optimizer.step() self.optimizer.zero_grad() - ui_pbar.update(1) + ui_pbar.update(1) torch.cuda.empty_cache() return torch.zeros_like(latent_image) @@ -283,6 +419,364 @@ def unpatch(m): del m.org_forward +def _process_latents_bucket_mode(latents): + """Process latents for bucket mode training. + + Args: + latents: list[{"samples": tensor}] where each tensor is (Bi, C, Hi, Wi) + + Returns: + list of latent tensors + """ + bucket_latents = [] + for latent_dict in latents: + bucket_latents.append(latent_dict["samples"]) # (Bi, C, Hi, Wi) + return bucket_latents + + +def _process_latents_standard_mode(latents): + """Process latents for standard (non-bucket) mode training. + + Args: + latents: list of latent dicts or single latent dict + + Returns: + Processed latents (tensor or list of tensors) + """ + if len(latents) == 1: + return latents[0]["samples"] # Single latent dict + + latent_list = [] + for latent in latents: + latent = latent["samples"] + bs = latent.shape[0] + if bs != 1: + for sub_latent in latent: + latent_list.append(sub_latent[None]) + else: + latent_list.append(latent) + return latent_list + + +def _process_conditioning(positive): + """Process conditioning - either single list or list of lists. + + Args: + positive: list of conditioning + + Returns: + Flattened conditioning list + """ + if len(positive) == 1: + return positive[0] # Single conditioning list + + # Multiple conditioning lists - flatten + flat_positive = [] + for cond in positive: + if isinstance(cond, list): + flat_positive.extend(cond) + else: + flat_positive.append(cond) + return flat_positive + + +def _prepare_latents_and_count(latents, dtype, bucket_mode): + """Convert latents to dtype and compute image counts. + + Args: + latents: Latents (tensor, list of tensors, or bucket list) + dtype: Target dtype + bucket_mode: Whether bucket mode is enabled + + Returns: + tuple: (processed_latents, num_images, multi_res) + """ + if bucket_mode: + # In bucket mode, latents is list of tensors (Bi, C, Hi, Wi) + latents = [t.to(dtype) for t in latents] + num_buckets = len(latents) + num_images = sum(t.shape[0] for t in latents) + multi_res = False # Not using multi_res path in bucket mode + + logging.info(f"Bucket mode: {num_buckets} buckets, {num_images} total samples") + for i, lat in enumerate(latents): + logging.info(f" Bucket {i}: shape {lat.shape}") + return latents, num_images, multi_res + + # Non-bucket mode + if isinstance(latents, list): + all_shapes = set() + latents = [t.to(dtype) for t in latents] + for latent in latents: + all_shapes.add(latent.shape) + logging.info(f"Latent shapes: {all_shapes}") + if len(all_shapes) > 1: + multi_res = True + else: + multi_res = False + latents = torch.cat(latents, dim=0) + num_images = len(latents) + elif isinstance(latents, torch.Tensor): + latents = latents.to(dtype) + num_images = latents.shape[0] + multi_res = False + else: + logging.error(f"Invalid latents type: {type(latents)}") + num_images = 0 + multi_res = False + + return latents, num_images, multi_res + + +def _validate_and_expand_conditioning(positive, num_images, bucket_mode): + """Validate conditioning count matches image count, expand if needed. + + Args: + positive: Conditioning list + num_images: Number of images + bucket_mode: Whether bucket mode is enabled + + Returns: + Validated/expanded conditioning list + + Raises: + ValueError: If conditioning count doesn't match image count + """ + if bucket_mode: + return positive # Skip validation in bucket mode + + logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}") + if len(positive) == 1 and num_images > 1: + return positive * num_images + elif len(positive) != num_images: + raise ValueError( + f"Number of positive conditions ({len(positive)}) does not match number of images ({num_images})." + ) + return positive + + +def _load_existing_lora(existing_lora): + """Load existing LoRA weights if provided. + + Args: + existing_lora: LoRA filename or "[None]" + + Returns: + tuple: (existing_weights dict, existing_steps int) + """ + if existing_lora == "[None]": + return {}, 0 + + lora_path = folder_paths.get_full_path_or_raise("loras", existing_lora) + # Extract steps from filename like "trained_lora_10_steps_20250225_203716" + existing_steps = int(existing_lora.split("_steps_")[0].split("_")[-1]) + existing_weights = {} + if lora_path: + existing_weights = comfy.utils.load_torch_file(lora_path) + return existing_weights, existing_steps + + +def _create_weight_adapter( + module, module_name, existing_weights, algorithm, lora_dtype, rank +): + """Create a weight adapter for a module with weight. + + Args: + module: The module to create adapter for + module_name: Name of the module + existing_weights: Dict of existing LoRA weights + algorithm: Algorithm name for new adapters + lora_dtype: dtype for LoRA weights + rank: Rank for new LoRA adapters + + Returns: + tuple: (train_adapter, lora_params dict) + """ + key = f"{module_name}.weight" + shape = module.weight.shape + lora_params = {} + + if len(shape) >= 2: + alpha = float(existing_weights.get(f"{key}.alpha", 1.0)) + dora_scale = existing_weights.get(f"{key}.dora_scale", None) + + # Try to load existing adapter + existing_adapter = None + for adapter_cls in adapters: + existing_adapter = adapter_cls.load( + module_name, existing_weights, alpha, dora_scale + ) + if existing_adapter is not None: + break + + if existing_adapter is None: + adapter_cls = adapter_maps[algorithm] + + if existing_adapter is not None: + train_adapter = existing_adapter.to_train().to(lora_dtype) + else: + # Use LoRA with alpha=1.0 by default + train_adapter = adapter_cls.create_train( + module.weight, rank=rank, alpha=1.0 + ).to(lora_dtype) + + for name, parameter in train_adapter.named_parameters(): + lora_params[f"{module_name}.{name}"] = parameter + + return train_adapter.train().requires_grad_(True), lora_params + else: + # 1D weight - use BiasDiff + diff = torch.nn.Parameter( + torch.zeros(module.weight.shape, dtype=lora_dtype, requires_grad=True) + ) + diff_module = BiasDiff(diff).train().requires_grad_(True) + lora_params[f"{module_name}.diff"] = diff + return diff_module, lora_params + + +def _create_bias_adapter(module, module_name, lora_dtype): + """Create a bias adapter for a module with bias. + + Args: + module: The module with bias + module_name: Name of the module + lora_dtype: dtype for LoRA weights + + Returns: + tuple: (bias_module, lora_params dict) + """ + bias = torch.nn.Parameter( + torch.zeros(module.bias.shape, dtype=lora_dtype, requires_grad=True) + ) + bias_module = BiasDiff(bias).train().requires_grad_(True) + lora_params = {f"{module_name}.diff_b": bias} + return bias_module, lora_params + + +def _setup_lora_adapters(mp, existing_weights, algorithm, lora_dtype, rank): + """Setup all LoRA adapters on the model. + + Args: + mp: Model patcher + existing_weights: Dict of existing LoRA weights + algorithm: Algorithm name for new adapters + lora_dtype: dtype for LoRA weights + rank: Rank for new LoRA adapters + + Returns: + tuple: (lora_sd dict, all_weight_adapters list) + """ + lora_sd = {} + all_weight_adapters = [] + + for n, m in mp.model.named_modules(): + if hasattr(m, "weight_function"): + if m.weight is not None: + adapter, params = _create_weight_adapter( + m, n, existing_weights, algorithm, lora_dtype, rank + ) + lora_sd.update(params) + key = f"{n}.weight" + mp.add_weight_wrapper(key, adapter) + all_weight_adapters.append(adapter) + + if hasattr(m, "bias") and m.bias is not None: + bias_adapter, bias_params = _create_bias_adapter(m, n, lora_dtype) + lora_sd.update(bias_params) + key = f"{n}.bias" + mp.add_weight_wrapper(key, bias_adapter) + all_weight_adapters.append(bias_adapter) + + return lora_sd, all_weight_adapters + + +def _create_optimizer(optimizer_name, parameters, learning_rate): + """Create optimizer based on name. + + Args: + optimizer_name: Name of optimizer ("Adam", "AdamW", "SGD", "RMSprop") + parameters: Parameters to optimize + learning_rate: Learning rate + + Returns: + Optimizer instance + """ + if optimizer_name == "Adam": + return torch.optim.Adam(parameters, lr=learning_rate) + elif optimizer_name == "AdamW": + return torch.optim.AdamW(parameters, lr=learning_rate) + elif optimizer_name == "SGD": + return torch.optim.SGD(parameters, lr=learning_rate) + elif optimizer_name == "RMSprop": + return torch.optim.RMSprop(parameters, lr=learning_rate) + + +def _create_loss_function(loss_function_name): + """Create loss function based on name. + + Args: + loss_function_name: Name of loss function ("MSE", "L1", "Huber", "SmoothL1") + + Returns: + Loss function instance + """ + if loss_function_name == "MSE": + return torch.nn.MSELoss() + elif loss_function_name == "L1": + return torch.nn.L1Loss() + elif loss_function_name == "Huber": + return torch.nn.HuberLoss() + elif loss_function_name == "SmoothL1": + return torch.nn.SmoothL1Loss() + + +def _run_training_loop( + guider, train_sampler, latents, num_images, seed, bucket_mode, multi_res +): + """Execute the training loop. + + Args: + guider: The guider object + train_sampler: The training sampler + latents: Latent tensors + num_images: Number of images + seed: Random seed + bucket_mode: Whether bucket mode is enabled + multi_res: Whether multi-resolution mode is enabled + """ + sigmas = torch.tensor(range(num_images)) + noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed) + + if bucket_mode: + # Use first bucket's first latent as dummy for guider + dummy_latent = latents[0][:1].repeat(num_images, 1, 1, 1) + guider.sample( + noise.generate_noise({"samples": dummy_latent}), + dummy_latent, + train_sampler, + sigmas, + seed=noise.seed, + ) + elif multi_res: + # use first latent as dummy latent if multi_res + latents = latents[0].repeat(num_images, 1, 1, 1) + guider.sample( + noise.generate_noise({"samples": latents}), + latents, + train_sampler, + sigmas, + seed=noise.seed, + ) + else: + guider.sample( + noise.generate_noise({"samples": latents}), + latents, + train_sampler, + sigmas, + seed=noise.seed, + ) + + class TrainLoraNode(io.ComfyNode): @classmethod def define_schema(cls): @@ -385,6 +879,11 @@ def define_schema(cls): default="[None]", tooltip="The existing LoRA to append to. Set to None for new LoRA.", ), + io.Boolean.Input( + "bucket_mode", + default=False, + tooltip="Enable resolution bucket mode. When enabled, expects pre-bucketed latents from ResolutionBucket node.", + ), ], outputs=[ io.Model.Output( @@ -419,6 +918,7 @@ def execute( algorithm, gradient_checkpointing, existing_lora, + bucket_mode, ): # Extract scalars from lists (due to is_input_list=True) model = model[0] @@ -427,215 +927,125 @@ def execute( grad_accumulation_steps = grad_accumulation_steps[0] learning_rate = learning_rate[0] rank = rank[0] - optimizer = optimizer[0] - loss_function = loss_function[0] + optimizer_name = optimizer[0] + loss_function_name = loss_function[0] seed = seed[0] training_dtype = training_dtype[0] lora_dtype = lora_dtype[0] algorithm = algorithm[0] gradient_checkpointing = gradient_checkpointing[0] existing_lora = existing_lora[0] + bucket_mode = bucket_mode[0] - # Handle latents - either single dict or list of dicts - if len(latents) == 1: - latents = latents[0]["samples"] # Single latent dict - else: - latent_list = [] - for latent in latents: - latent = latent["samples"] - bs = latent.shape[0] - if bs != 1: - for sub_latent in latent: - latent_list.append(sub_latent[None]) - else: - latent_list.append(latent) - latents = latent_list - - # Handle conditioning - either single list or list of lists - if len(positive) == 1: - positive = positive[0] # Single conditioning list + # Process latents based on mode + if bucket_mode: + latents = _process_latents_bucket_mode(latents) else: - # Multiple conditioning lists - flatten - flat_positive = [] - for cond in positive: - if isinstance(cond, list): - flat_positive.extend(cond) - else: - flat_positive.append(cond) - positive = flat_positive + latents = _process_latents_standard_mode(latents) + # Process conditioning + positive = _process_conditioning(positive) + + # Setup model and dtype mp = model.clone() dtype = node_helpers.string_to_torch_dtype(training_dtype) lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype) mp.set_model_compute_dtype(dtype) - # latents here can be list of different size latent or one large batch - if isinstance(latents, list): - all_shapes = set() - latents = [t.to(dtype) for t in latents] - for latent in latents: - all_shapes.add(latent.shape) - logging.info(f"Latent shapes: {all_shapes}") - if len(all_shapes) > 1: - multi_res = True - else: - multi_res = False - latents = torch.cat(latents, dim=0) - num_images = len(latents) - elif isinstance(latents, torch.Tensor): - latents = latents.to(dtype) - num_images = latents.shape[0] - else: - logging.error(f"Invalid latents type: {type(latents)}") - - logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}") - if len(positive) == 1 and num_images > 1: - positive = positive * num_images - elif len(positive) != num_images: - raise ValueError( - f"Number of positive conditions ({len(positive)}) does not match number of images ({num_images})." - ) + # Prepare latents and compute counts + latents, num_images, multi_res = _prepare_latents_and_count( + latents, dtype, bucket_mode + ) + + # Validate and expand conditioning + positive = _validate_and_expand_conditioning(positive, num_images, bucket_mode) with torch.inference_mode(False): - lora_sd = {} - generator = torch.Generator() - generator.manual_seed(seed) + # Setup models for training + mp.model.requires_grad_(False) # Load existing LoRA weights if provided - existing_weights = {} - existing_steps = 0 - if existing_lora != "[None]": - lora_path = folder_paths.get_full_path_or_raise("loras", existing_lora) - # Extract steps from filename like "trained_lora_10_steps_20250225_203716" - existing_steps = int(existing_lora.split("_steps_")[0].split("_")[-1]) - if lora_path: - existing_weights = comfy.utils.load_torch_file(lora_path) - - all_weight_adapters = [] - for n, m in mp.model.named_modules(): - if hasattr(m, "weight_function"): - if m.weight is not None: - key = "{}.weight".format(n) - shape = m.weight.shape - if len(shape) >= 2: - alpha = float(existing_weights.get(f"{key}.alpha", 1.0)) - dora_scale = existing_weights.get(f"{key}.dora_scale", None) - for adapter_cls in adapters: - existing_adapter = adapter_cls.load( - n, existing_weights, alpha, dora_scale - ) - if existing_adapter is not None: - break - else: - existing_adapter = None - adapter_cls = adapter_maps[algorithm] - - if existing_adapter is not None: - train_adapter = existing_adapter.to_train().to( - lora_dtype - ) - else: - # Use LoRA with alpha=1.0 by default - train_adapter = adapter_cls.create_train( - m.weight, rank=rank, alpha=1.0 - ).to(lora_dtype) - for name, parameter in train_adapter.named_parameters(): - lora_sd[f"{n}.{name}"] = parameter - - mp.add_weight_wrapper(key, train_adapter) - all_weight_adapters.append(train_adapter) - else: - diff = torch.nn.Parameter( - torch.zeros( - m.weight.shape, dtype=lora_dtype, requires_grad=True - ) - ) - diff_module = BiasDiff(diff) - mp.add_weight_wrapper(key, BiasDiff(diff)) - all_weight_adapters.append(diff_module) - lora_sd["{}.diff".format(n)] = diff - if hasattr(m, "bias") and m.bias is not None: - key = "{}.bias".format(n) - bias = torch.nn.Parameter( - torch.zeros( - m.bias.shape, dtype=lora_dtype, requires_grad=True - ) - ) - bias_module = BiasDiff(bias) - lora_sd["{}.diff_b".format(n)] = bias - mp.add_weight_wrapper(key, BiasDiff(bias)) - all_weight_adapters.append(bias_module) - - if optimizer == "Adam": - optimizer = torch.optim.Adam(lora_sd.values(), lr=learning_rate) - elif optimizer == "AdamW": - optimizer = torch.optim.AdamW(lora_sd.values(), lr=learning_rate) - elif optimizer == "SGD": - optimizer = torch.optim.SGD(lora_sd.values(), lr=learning_rate) - elif optimizer == "RMSprop": - optimizer = torch.optim.RMSprop(lora_sd.values(), lr=learning_rate) - - # Setup loss function based on selection - if loss_function == "MSE": - criterion = torch.nn.MSELoss() - elif loss_function == "L1": - criterion = torch.nn.L1Loss() - elif loss_function == "Huber": - criterion = torch.nn.HuberLoss() - elif loss_function == "SmoothL1": - criterion = torch.nn.SmoothL1Loss() - - # setup models + existing_weights, existing_steps = _load_existing_lora(existing_lora) + + # Setup LoRA adapters + lora_sd, all_weight_adapters = _setup_lora_adapters( + mp, existing_weights, algorithm, lora_dtype, rank + ) + + # Create optimizer and loss function + optimizer = _create_optimizer( + optimizer_name, lora_sd.values(), learning_rate + ) + criterion = _create_loss_function(loss_function_name) + + # Setup gradient checkpointing if gradient_checkpointing: for m in find_all_highest_child_module_with_forward( mp.model.diffusion_model ): patch(m) - mp.model.requires_grad_(False) + + torch.cuda.empty_cache() + # With force_full_load=False we should be able to have offloading + # But for offloading in training we need custom AutoGrad hooks for fwd/bwd comfy.model_management.load_models_gpu( [mp], memory_required=1e20, force_full_load=True ) + torch.cuda.empty_cache() - # Setup sampler and guider like in test script + # Setup loss tracking loss_map = {"loss": []} def loss_callback(loss): loss_map["loss"].append(loss) - train_sampler = TrainSampler( - criterion, - optimizer, - loss_callback=loss_callback, - batch_size=batch_size, - grad_acc=grad_accumulation_steps, - total_steps=steps * grad_accumulation_steps, - seed=seed, - training_dtype=dtype, - real_dataset=latents if multi_res else None, - ) - guider = comfy_extras.nodes_custom_sampler.Guider_Basic(mp) - guider.set_conds(positive) # Set conditioning from input + # Create sampler + if bucket_mode: + train_sampler = TrainSampler( + criterion, + optimizer, + loss_callback=loss_callback, + batch_size=batch_size, + grad_acc=grad_accumulation_steps, + total_steps=steps * grad_accumulation_steps, + seed=seed, + training_dtype=dtype, + bucket_latents=latents, + ) + else: + train_sampler = TrainSampler( + criterion, + optimizer, + loss_callback=loss_callback, + batch_size=batch_size, + grad_acc=grad_accumulation_steps, + total_steps=steps * grad_accumulation_steps, + seed=seed, + training_dtype=dtype, + real_dataset=latents if multi_res else None, + ) - # Training loop + # Setup guider + guider = TrainGuider(mp) + guider.set_conds(positive) + + # Run training loop try: - # Generate dummy sigmas and noise - sigmas = torch.tensor(range(num_images)) - noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed) - if multi_res: - # use first latent as dummy latent if multi_res - latents = latents[0].repeat((num_images,) + ((1,) * (latents[0].ndim - 1))) - guider.sample( - noise.generate_noise({"samples": latents}), - latents, + _run_training_loop( + guider, train_sampler, - sigmas, - seed=noise.seed, + latents, + num_images, + seed, + bucket_mode, + multi_res, ) finally: for m in mp.model.modules(): unpatch(m) del train_sampler, optimizer + # Finalize adapters for adapter in all_weight_adapters: adapter.requires_grad_(False) @@ -645,7 +1055,7 @@ def loss_callback(loss): return io.NodeOutput(mp, lora_sd, loss_map, steps + existing_steps) -class LoraModelLoader(io.ComfyNode): +class LoraModelLoader(io.ComfyNode):# @classmethod def define_schema(cls): return io.Schema(