Skip to content

Commit 791e30f

Browse files
Fix nan issue when quantizing fp16 tensor. (#11213)
1 parent e2a800e commit 791e30f

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

comfy/quant_ops.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,10 @@ def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn, stochastic_roun
399399
orig_dtype = tensor.dtype
400400

401401
if isinstance(scale, str) and scale == "recalculate":
402-
scale = torch.amax(tensor.abs()) / torch.finfo(dtype).max
402+
scale = torch.amax(tensor.abs()).to(dtype=torch.float32) / torch.finfo(dtype).max
403+
if tensor.dtype not in [torch.float32, torch.bfloat16]: # Prevent scale from being too small
404+
tensor_info = torch.finfo(tensor.dtype)
405+
scale = (1.0 / torch.clamp((1.0 / scale), min=tensor_info.min, max=tensor_info.max))
403406

404407
if scale is not None:
405408
if not isinstance(scale, torch.Tensor):

0 commit comments

Comments
 (0)