use correctly

This commit is contained in:
2026-05-05 15:13:59 -05:00
parent 87f98fbca6
commit 778083b9f6

View File

@@ -2,21 +2,38 @@ apiVersion: serving.kserve.io/v1alpha1
kind: LLMInferenceService kind: LLMInferenceService
metadata: metadata:
name: huihui-granite name: huihui-granite
namespace: kserve
spec: spec:
predictor: model:
model: modelFormat:
modelFormat: name: huggingface
name: huggingface storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
args: replicas: 1
- --model_name=huihui-granite template:
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" containers:
resources: - name: main
limits: image: quay.io/pierdipi/vllm-cpu:latest
cpu: "4" args:
memory: 16Gi - --model_name=huihui-granite
nvidia.com/gpu: "1" securityContext:
requests: runAsNonRoot: false
cpu: "2" env:
memory: 8Gi - name: VLLM_LOGGING_LEVEL
nvidia.com/gpu: "1" value: DEBUG
resources:
limits:
cpu: "4"
memory: 16Gi
nvidia.com/gpu: "1"
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu: "1"
livenessProbe:
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 30
failureThreshold: 5
router:
gateway: {}
route: {}
scheduler: {}