# 安装 Mixture-of-Prompts 头(作为 model 的一个子模块)
if getattr(args, "prompt_mixture", False) and args.prompt_len > 0 and args.prompt_bank_size > 0:
hid = int(getattr(model.config, "hidden_size", None) or model.get_input_embeddings().weight.shape[1])
model.prompt_mixture_head = PromptMixtureHead(
hidden_size=hid,
bank_size=int(args.prompt_bank_size),
prompt_len=int(args.prompt_len),
gate_hidden=int(args.prompt_gate_hidden),
top_k=int(getattr(args, "prompt_top_k", 0) or 0),
attn_gate=bool(getattr(args, "prompt_gate_attention", False)),
attn_heads=int(getattr(args, "prompt_gate_heads", 8)),
attn_dropout=float(getattr(args, "prompt_gate_attn_dropout", 0.0)),
)
tk = int(getattr(args, "prompt_top_k", 0) or 0)
mode = "attn" if getattr(args, "prompt_gate_attention", False) else "mlp"
if tk > 0:
print(f"[PromptMix] enabled K={args.prompt_bank_size} P={args.prompt_len} hidden={hid} top_k={tk} gate={mode}")
else:
print(f"[PromptMix] enabled K={args.prompt_bank_size} P={args.prompt_len} hidden={hid} soft-all gate={mode}")
else:
model.prompt_mixture_head = Non
from the code, it looks like this part only adds an attribute, but I don’t see where it is actually used in forward process. Could you explain how it is utilized during training and inference?
from the code, it looks like this part only adds an attribute, but I don’t see where it is actually used in forward process. Could you explain how it is utilized during training and inference?