Adjusted Qwen3.5-397B recipe to fix OOM issue and lower memory requirements

2026-05-09 13:45:15 -07:00
parent 83a680c87b
commit ae8ac815ac
1 changed files with 0 additions and 3 deletions
--- a/recipes/qwen3.5-397b-int4-autoround.yaml
+++ b/recipes/qwen3.5-397b-int4-autoround.yaml
@@ -18,9 +18,7 @@ container: vllm-node-tf5
 build_args:
  - --tf5

-# Mod required to fix ROPE syntax error
 mods:
-  # - mods/fix-qwen3.5-autoround
  - mods/fix-qwen3.5-chat-template
  #- mods/gpu-mem-util-gb
  # - mods/drop-caches
@@ -36,7 +34,6 @@ defaults:

 # Environment variables
 env: 
-  #  PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:True" 
  VLLM_MARLIN_USE_ATOMIC_ADD: 1

 # The vLLM serve command template