use correctly

This commit is contained in:
2026-05-05 15:13:59 -05:00
parent 87f98fbca6
commit 778083b9f6

View File

@@ -2,21 +2,38 @@ apiVersion: serving.kserve.io/v1alpha1
kind: LLMInferenceService
metadata:
name: huihui-granite
namespace: kserve
spec:
predictor:
model:
modelFormat:
name: huggingface
args:
- --model_name=huihui-granite
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
resources:
limits:
cpu: "4"
memory: 16Gi
nvidia.com/gpu: "1"
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu: "1"
model:
modelFormat:
name: huggingface
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
replicas: 1
template:
containers:
- name: main
image: quay.io/pierdipi/vllm-cpu:latest
args:
- --model_name=huihui-granite
securityContext:
runAsNonRoot: false
env:
- name: VLLM_LOGGING_LEVEL
value: DEBUG
resources:
limits:
cpu: "4"
memory: 16Gi
nvidia.com/gpu: "1"
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu: "1"
livenessProbe:
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 30
failureThreshold: 5
router:
gateway: {}
route: {}
scheduler: {}