From a092b6ffa51912d6d9f02c130d280c86be35d6ec Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Thu, 7 May 2026 16:16:21 -0500 Subject: [PATCH] set probes --- clusters/k3s-dgx/nim-service/qwen.yaml | 30 +++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 03dc7df..f07e0f7 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -52,7 +52,9 @@ spec: repository: vllm/vllm-openai tag: v0.19.1-cu130 pullPolicy: IfNotPresent - + env: + - name: VLLM_CACHE_ROOT + value: /model-store/vllm-cache command: - python3 args: @@ -91,6 +93,32 @@ spec: resources: limits: nvidia.com/gpu: 1 + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 10 + + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 10 + failureThreshold: 20 + + startupProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 20 + timeoutSeconds: 10 + failureThreshold: 60 expose: service: type: ClusterIP