Files
edge-gitops/clusters/k3s-dgx/apps/huihui-granite-inference.yaml
2026-05-05 15:13:59 -05:00

40 lines
929 B
YAML

apiVersion: serving.kserve.io/v1alpha1
kind: LLMInferenceService
metadata:
name: huihui-granite
spec:
model:
modelFormat:
name: huggingface
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
replicas: 1
template:
containers:
- name: main
image: quay.io/pierdipi/vllm-cpu:latest
args:
- --model_name=huihui-granite
securityContext:
runAsNonRoot: false
env:
- name: VLLM_LOGGING_LEVEL
value: DEBUG
resources:
limits:
cpu: "4"
memory: 16Gi
nvidia.com/gpu: "1"
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu: "1"
livenessProbe:
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 30
failureThreshold: 5
router:
gateway: {}
route: {}
scheduler: {}