Skip to content

Commit d38f032

Browse files
authored
fix hf modules read/write conflicts by multi processors (#4188)
1 parent b591b28 commit d38f032

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

lmdeploy/pytorch/engine/model_agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from ..spec_decode import build_spec_agent
2929
from ..strategies import build_strategy_factory
3030
from ..strategies.base.model_agent import ExtraInputs, ExtraOutputs, StoppingCriteria
31-
from ..utils import get_gpu_memory
31+
from ..utils import get_gpu_memory, monkey_patch_hf_modules_cache
3232
from ..weight_loader.model_weight_loader import ModelWeightLoader, load_model_weights
3333
from .cache_engine import CacheEngine, StateCacheEngine
3434
from .guided_process import GuidedDecodingManager
@@ -325,6 +325,7 @@ def __init__(
325325
self.model_config = model_config
326326
self.cache_config = cache_config
327327
# use raw tokenizer
328+
monkey_patch_hf_modules_cache()
328329
self.tokenizer = Tokenizer(model_path).model.model
329330

330331
self._pre_in_que = None

lmdeploy/pytorch/utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,40 @@ def _reduce_modelconfig(mc: ModelConfig):
142142
' lead to a later error. If remote code is not needed'
143143
' remove `--trust-remote-code`',
144144
exc_info=e)
145+
146+
147+
def monkey_patch_hf_modules_cache():
148+
"""Monkey patch HF_MODULES_CACHE to a temporary directory per process. This
149+
is necessary to avoid conflicts when multiple processes try to read/write
150+
to the same HF_MODULES_CACHE directory, especially in multi-GPU setups.
151+
152+
modified from: https://github.com/InternLM/xtuner/blob/main/xtuner/v1/utils/misc.py
153+
"""
154+
import os
155+
156+
import transformers
157+
from huggingface_hub import constants
158+
159+
# When using `remote_code` in HF components like tokenizer or config
160+
# (e.g., `AutoConfig.from_pretrained(hf_model_path, trust_remote_code=True)`),
161+
# the hf_model_path is copied to HF_MODULES_CACHE.
162+
# On multi-GPU machines (e.g., 8 GPUs), simultaneous read/write operations
163+
# by multiple processes on this shared directory can cause conflicts.
164+
# Therefore, we set HF_MODULES_CACHE to a temporary directory per process.
165+
166+
HF_PATCH_MODULES_CACHE_PREFIX = 'modules_pid_'
167+
modules_cache = os.path.join(constants.HF_HOME, f'{HF_PATCH_MODULES_CACHE_PREFIX}{os.getpid()}')
168+
os.environ['HF_MODULES_CACHE'] = modules_cache
169+
170+
transformers.utils.hub.HF_MODULES_CACHE = modules_cache
171+
172+
# During import, Python creates a new name HF_MODULES_CACHE in the namespace
173+
# of the dynamic_module_utils module, binding it to the object referenced by
174+
# transformers.utils.HF_MODULES_CACHE at that moment.
175+
# Hence, we also need to set transformers.dynamic_module_utils.HF_MODULES_CACHE
176+
# to the new modules_cache.
177+
178+
transformers.dynamic_module_utils.HF_MODULES_CACHE = modules_cache
179+
transformers.utils.HF_MODULES_CACHE = modules_cache
180+
181+
logger.info(f'Set HF_MODULES_CACHE to {modules_cache} for current process {os.getpid()}')

0 commit comments

Comments
 (0)