use correctly

This commit is contained in:
2026-05-05 15:13:59 -05:00
parent 87f98fbca6
commit 778083b9f6

View File

@@ -2,15 +2,23 @@ apiVersion: serving.kserve.io/v1alpha1
kind: LLMInferenceService kind: LLMInferenceService
metadata: metadata:
name: huihui-granite name: huihui-granite
namespace: kserve
spec: spec:
predictor:
model: model:
modelFormat: modelFormat:
name: huggingface name: huggingface
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
replicas: 1
template:
containers:
- name: main
image: quay.io/pierdipi/vllm-cpu:latest
args: args:
- --model_name=huihui-granite - --model_name=huihui-granite
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" securityContext:
runAsNonRoot: false
env:
- name: VLLM_LOGGING_LEVEL
value: DEBUG
resources: resources:
limits: limits:
cpu: "4" cpu: "4"
@@ -20,3 +28,12 @@ spec:
cpu: "2" cpu: "2"
memory: 8Gi memory: 8Gi
nvidia.com/gpu: "1" nvidia.com/gpu: "1"
livenessProbe:
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 30
failureThreshold: 5
router:
gateway: {}
route: {}
scheduler: {}