diff --git a/app/models/qwen.py b/app/models/qwen.py index 219af6f..4b3de92 100644 --- a/app/models/qwen.py +++ b/app/models/qwen.py @@ -25,7 +25,7 @@ class Qwen(BaseModel): self.model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16, - # quantization_config=quantization_config, + quantization_config=quantization_config, device_map="auto", # low_cpu_mem_usage=True )