From 778083b9f647a1b63692192f2531ed5da88f477b Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Tue, 5 May 2026 15:13:59 -0500 Subject: [PATCH] use correctly --- .../apps/huihui-granite-inference.yaml | 51 ++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml index d661648..114be8e 100644 --- a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml +++ b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml @@ -2,21 +2,38 @@ apiVersion: serving.kserve.io/v1alpha1 kind: LLMInferenceService metadata: name: huihui-granite - namespace: kserve spec: - predictor: - model: - modelFormat: - name: huggingface - args: - - --model_name=huihui-granite - storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" - resources: - limits: - cpu: "4" - memory: 16Gi - nvidia.com/gpu: "1" - requests: - cpu: "2" - memory: 8Gi - nvidia.com/gpu: "1" + model: + modelFormat: + name: huggingface + storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" + replicas: 1 + template: + containers: + - name: main + image: quay.io/pierdipi/vllm-cpu:latest + args: + - --model_name=huihui-granite + securityContext: + runAsNonRoot: false + env: + - name: VLLM_LOGGING_LEVEL + value: DEBUG + resources: + limits: + cpu: "4" + memory: 16Gi + nvidia.com/gpu: "1" + requests: + cpu: "2" + memory: 8Gi + nvidia.com/gpu: "1" + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 + failureThreshold: 5 + router: + gateway: {} + route: {} + scheduler: {}