From 08106b6693d9ff33a4c4fd8f7e6fd3eb3a256c2a Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Mon, 11 May 2026 11:34:39 -0500 Subject: [PATCH] use correct dtype --- clusters/k3s-dgx/nim-service/qwen.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index f08ef2d..082a330 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -65,7 +65,7 @@ spec: - "254" # ↑ Allow more concurrent sequences - --enable-prefix-caching - --dtype - - "float8" # Explicit FP8 encoding + - "float" # Explicit FP8 encoding - --quantization - "fbgemm-fp8" # Explicit quantization backend - --sv2-transformer-bindings # Skip SA checks (small speedup)