From 2d2df4bb0aa0f0b975126892447c9e99900b1a81 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Mon, 11 May 2026 11:36:59 -0500 Subject: [PATCH] use defaults --- clusters/k3s-dgx/nim-service/qwen.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 0f1b79f..872b8e9 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -64,10 +64,6 @@ spec: - --max-num-seqs - "254" # ↑ Allow more concurrent sequences - --enable-prefix-caching - - --dtype - - "float" # Explicit FP8 encoding - - --quantization - - "fp8" # Explicit quantization backend authSecret: hf-api-secret storage: sharedMemorySizeLimit: 64Gi