From ae8ac815acc96100faa58c8c183dfc1781e9bcfa Mon Sep 17 00:00:00 2001 From: eugr Date: Sat, 9 May 2026 13:45:15 -0700 Subject: [PATCH] Adjusted Qwen3.5-397B recipe to fix OOM issue and lower memory requirements --- recipes/qwen3.5-397b-int4-autoround.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/recipes/qwen3.5-397b-int4-autoround.yaml b/recipes/qwen3.5-397b-int4-autoround.yaml index c688bd1..246e909 100644 --- a/recipes/qwen3.5-397b-int4-autoround.yaml +++ b/recipes/qwen3.5-397b-int4-autoround.yaml @@ -18,9 +18,7 @@ container: vllm-node-tf5 build_args: - --tf5 -# Mod required to fix ROPE syntax error mods: - # - mods/fix-qwen3.5-autoround - mods/fix-qwen3.5-chat-template #- mods/gpu-mem-util-gb # - mods/drop-caches @@ -36,7 +34,6 @@ defaults: # Environment variables env: - # PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:True" VLLM_MARLIN_USE_ATOMIC_ADD: 1 # The vLLM serve command template