From 855cce3c5403484653bd4348edaef14d588cbef5 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Mon, 11 May 2026 11:32:56 -0500 Subject: [PATCH] fix list --- clusters/k3s-dgx/nim-service/qwen.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 61fae86..71c84bb 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -63,8 +63,8 @@ spec: - "131072" # ↑ 32K→128K — larger decode batches = more tokens/sec - --max-num-seqs - "254" # ↑ Allow more concurrent sequences - --enable-prefix-caching - --dtype + - --enable-prefix-caching + - --dtype - "float8" # Explicit FP8 encoding --quantization - "fbgemm-fp8" # Explicit quantization backend