Skip to content

Commit 2effbbb

Browse files
authored
Fix Falcon3-7b WOQ TP=3 with int4 checkpoint (#3402)
* Falcon3-7b WOQ TP=3 with int4 checkpoint * Add assert * Split mha according to kv_heads * Fix bug with old deepspeed * Fix run_accuracy_with_deepspeed.py
1 parent 136c1d0 commit 2effbbb

File tree

2 files changed

+69
-51
lines changed

2 files changed

+69
-51
lines changed

examples/cpu/llm/inference/distributed/run_accuracy_with_deepspeed.py

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -224,25 +224,35 @@ def get_int_from_env(env_keys, default):
224224
TokenSequence = Union[List[int], torch.LongTensor, torch.Tensor, BatchEncoding]
225225

226226
tp_grain_size = 64
227-
if args.ipex_weight_only_quantization and args.low_precision_checkpoint != "":
228-
pathname = args.low_precision_checkpoint
229-
assert os.path.exists(pathname), f"Checkpoint file does not exist: {pathname}"
230-
if os.path.isdir(pathname):
231-
try:
232-
with open(pathname + "/config.json") as f:
233-
quant_model_config = json.load(f)
234-
tp_grain_size = int(
235-
quant_model_config["quantization_config"]["group_size"]
236-
)
237-
except Exception as e:
238-
print("Failed to get group_size from config.json")
239-
elif args.group_size > 0:
240-
tp_grain_size = args.group_size
241-
else:
242-
print(
243-
"Warning: cannot get group_size from config.json or --group-size, "
244-
"using default value 64 for tp_grain_size"
245-
)
227+
ds_init_inf_kwargs = {}
228+
# Need to check if this attr is available. Old DeepSpeep does not have it.
229+
if "tp_grain_size" in dir(deepspeed.inference.config.DeepSpeedTPConfig()):
230+
if args.ipex_weight_only_quantization and args.low_precision_checkpoint != "":
231+
pathname = args.low_precision_checkpoint
232+
assert os.path.exists(pathname), f"Checkpoint file does not exist: {pathname}"
233+
if os.path.isdir(pathname):
234+
try:
235+
with open(pathname + "/config.json") as f:
236+
quant_model_config = json.load(f)
237+
tp_grain_size = int(
238+
quant_model_config["quantization_config"]["group_size"]
239+
)
240+
except Exception as e:
241+
print("Failed to get group_size from config.json")
242+
elif args.group_size > 0:
243+
tp_grain_size = args.group_size
244+
else:
245+
print(
246+
"Warning: cannot get group_size from config.json or --group-size, "
247+
"using default value 64 for tp_grain_size"
248+
)
249+
ds_init_inf_kwargs.update(
250+
{
251+
"tensor_parallel": deepspeed.inference.config.DeepSpeedTPConfig(
252+
tp_grain_size=tp_grain_size
253+
)
254+
}
255+
)
246256

247257

248258
class HuggingFaceModel(BaseLM):
@@ -420,9 +430,7 @@ def write_checkpoints_json():
420430
base_dir=repo_root,
421431
dtype=infer_dtype,
422432
checkpoint=checkpoints_json,
423-
tensor_parallel=deepspeed.inference.config.DeepSpeedTPConfig(
424-
tp_grain_size=tp_grain_size
425-
),
433+
**ds_init_inf_kwargs,
426434
)
427435

428436
self.model = self.model.module
@@ -1495,9 +1503,7 @@ def write_checkpoints_json():
14951503
base_dir=repo_root,
14961504
dtype=infer_dtype,
14971505
checkpoint=checkpoints_json,
1498-
tensor_parallel=deepspeed.inference.config.DeepSpeedTPConfig(
1499-
tp_grain_size=tp_grain_size
1500-
),
1506+
**ds_init_inf_kwargs,
15011507
)
15021508

15031509
self._model = self._model.module
@@ -2263,9 +2269,7 @@ def write_checkpoints_json():
22632269
base_dir=repo_root,
22642270
dtype=infer_dtype,
22652271
checkpoint=checkpoints_json,
2266-
tensor_parallel=deepspeed.inference.config.DeepSpeedTPConfig(
2267-
tp_grain_size=tp_grain_size
2268-
),
2272+
**ds_init_inf_kwargs,
22692273
)
22702274

22712275
self.model = self.model.module

examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -486,35 +486,41 @@ def write_checkpoints_json():
486486
dist.barrier()
487487

488488
tp_grain_size = 64
489-
if args.ipex_weight_only_quantization and args.low_precision_checkpoint != "":
490-
pathname = args.low_precision_checkpoint
491-
assert os.path.exists(pathname), f"Checkpoint file does not exist: {pathname}"
492-
if os.path.isdir(pathname):
493-
try:
494-
with open(pathname + "/config.json") as f:
495-
quant_model_config = json.load(f)
496-
tp_grain_size = int(
497-
quant_model_config["quantization_config"]["group_size"]
498-
)
499-
except Exception as e:
500-
print("Failed to get group_size from config.json")
501-
elif args.group_size > 0:
502-
tp_grain_size = args.group_size
503-
else:
504-
print(
505-
"Warning: cannot get group_size from config.json or --group-size, "
506-
"using default value 64 for tp_grain_size"
507-
)
489+
# Need to check if this attr is available. Old DeepSpeep does not have it.
490+
if "tp_grain_size" in dir(deepspeed.inference.config.DeepSpeedTPConfig()):
491+
if args.ipex_weight_only_quantization and args.low_precision_checkpoint != "":
492+
pathname = args.low_precision_checkpoint
493+
assert os.path.exists(pathname), f"Checkpoint file does not exist: {pathname}"
494+
if os.path.isdir(pathname):
495+
try:
496+
with open(pathname + "/config.json") as f:
497+
quant_model_config = json.load(f)
498+
tp_grain_size = int(
499+
quant_model_config["quantization_config"]["group_size"]
500+
)
501+
except Exception as e:
502+
print("Failed to get group_size from config.json")
503+
elif args.group_size > 0:
504+
tp_grain_size = args.group_size
505+
else:
506+
print(
507+
"Warning: cannot get group_size from config.json or --group-size, "
508+
"using default value 64 for tp_grain_size"
509+
)
510+
kwargs.update(
511+
{
512+
"tensor_parallel": deepspeed.inference.config.DeepSpeedTPConfig(
513+
tp_grain_size=tp_grain_size
514+
)
515+
}
516+
)
508517

509518
model = deepspeed.init_inference(
510519
model,
511520
mp_size=world_size,
512521
base_dir=repo_root,
513522
dtype=infer_dtype,
514523
checkpoint=checkpoints_json,
515-
tensor_parallel=deepspeed.inference.config.DeepSpeedTPConfig(
516-
tp_grain_size=tp_grain_size
517-
),
518524
**kwargs,
519525
)
520526

@@ -659,6 +665,8 @@ def write_checkpoints_json():
659665
)
660666
if low_precision_checkpoint is not None:
661667
num_heads = model.config.num_attention_heads
668+
if hasattr(model.config, "num_key_value_heads"):
669+
num_heads = model.config.num_key_value_heads
662670
rank = local_rank
663671

664672
mha_layers_split_by_N = [
@@ -712,7 +720,13 @@ def write_checkpoints_json():
712720
# awq qweight: [K, N // 8]
713721
# awq scales: [K // G, N]
714722
# awq qzeros: [K // G, N // 8]
715-
dim = data.shape[-1] // head_range[-1]
723+
if data.shape[-1] % head_range[-1] == 0:
724+
dim = data.shape[-1] // head_range[-1]
725+
else:
726+
assert data.shape[-1] % world_size == 0
727+
dim = data.shape[-1] // world_size
728+
q_head_start = local_rank
729+
q_head_end = local_rank + 1
716730
low_precision_checkpoint_dict[key] = data[
717731
:, q_head_start * dim : q_head_end * dim
718732
]

0 commit comments

Comments
 (0)