Skip to content

Commit 4c11750

Browse files
committed
update
1 parent 3150cea commit 4c11750

File tree

1 file changed

+24
-11
lines changed

1 file changed

+24
-11
lines changed

src/compressed_tensors/quantization/quant_scheme.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,44 +142,57 @@ def is_preset_scheme(name: str) -> bool:
142142
)
143143
)
144144

145-
MXFP4 = dict(
145+
146+
NVFP4 = dict(
146147
weights=QuantizationArgs(
147148
num_bits=4,
148149
type=QuantizationType.FLOAT,
149-
strategy=QuantizationStrategy.GROUP,
150+
strategy=QuantizationStrategy.TENSOR_GROUP,
150151
symmetric=True,
151152
dynamic=False,
152-
group_size=32,
153+
group_size=16,
153154
),
154155
input_activations=QuantizationArgs(
156+
num_bits=4,
157+
type=QuantizationType.FLOAT,
158+
strategy=QuantizationStrategy.TENSOR_GROUP,
159+
symmetric=True,
160+
dynamic=DynamicType.LOCAL,
161+
group_size=16,
162+
),
163+
)
164+
165+
MXFP4A16 = dict(
166+
weights=QuantizationArgs(
155167
num_bits=4,
156168
type=QuantizationType.FLOAT,
157169
strategy=QuantizationStrategy.GROUP,
158-
dynamic=True,
159170
symmetric=True,
171+
dynamic=False,
160172
group_size=32,
161-
),
173+
)
162174
)
163175

164-
NVFP4 = dict(
176+
MXFP4 = dict(
165177
weights=QuantizationArgs(
166178
num_bits=4,
167179
type=QuantizationType.FLOAT,
168-
strategy=QuantizationStrategy.TENSOR_GROUP,
180+
strategy=QuantizationStrategy.GROUP,
169181
symmetric=True,
170182
dynamic=False,
171-
group_size=16,
183+
group_size=32,
172184
),
173185
input_activations=QuantizationArgs(
174186
num_bits=4,
175187
type=QuantizationType.FLOAT,
176-
strategy=QuantizationStrategy.TENSOR_GROUP,
188+
strategy=QuantizationStrategy.GROUP,
189+
dynamic=True,
177190
symmetric=True,
178-
dynamic=DynamicType.LOCAL,
179-
group_size=16,
191+
group_size=32,
180192
),
181193
)
182194

195+
183196
# 8 bit integer weights and 8 bit activations quantization
184197
INT8_W8A8 = dict(
185198
weights=QuantizationArgs(

0 commit comments

Comments
 (0)