From fdc5df6f54854ba437429fa0ff721939259364ad Mon Sep 17 00:00:00 2001 From: Zhe Zhang <2631992879@qq.com> Date: Wed, 19 Feb 2025 16:05:02 +0800 Subject: [PATCH] use device param in load_model method (#13037) --- vllm/worker/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index c7814f17..78cc352b 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1107,7 +1107,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]): def load_model(self) -> None: logger.info("Starting to load model %s...", self.model_config.model) - with DeviceMemoryProfiler() as m: + with DeviceMemoryProfiler(self.device) as m: self.model = get_model(vllm_config=self.vllm_config) self.model_memory_usage = m.consumed_memory