From cb56f3838d4fad95c9c9380e2673eafe9a511ee4 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Tue, 5 May 2026 15:21:10 -0500 Subject: [PATCH] no lb --- .../apps/huihui-granite-inference.yaml | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml index d0f98f3..5c47f2f 100644 --- a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml +++ b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml @@ -6,16 +6,15 @@ spec: model: uri: hf://huihui-ai/Huihui-granite-4.1-30b-abliterated name: huihui-ai/Huihui-granite-4.1-30b-abliterated + # Three replicas for load balancing replicas: 1 + router: + scheduler: { } # Default scheduler with default load balancing + route: { } + gateway: { } template: containers: - name: main - image: quay.io/pierdipi/vllm-cpu:latest - securityContext: - runAsNonRoot: false - env: - - name: VLLM_LOGGING_LEVEL - value: DEBUG resources: limits: cpu: "4" @@ -26,11 +25,11 @@ spec: memory: 8Gi nvidia.com/gpu: "1" livenessProbe: - initialDelaySeconds: 30 + httpGet: + path: /health + port: 8000 + scheme: HTTPS + initialDelaySeconds: 120 periodSeconds: 30 timeoutSeconds: 30 failureThreshold: 5 - router: - gateway: {} - route: {} - scheduler: {}