40 lines
929 B
YAML
40 lines
929 B
YAML
apiVersion: serving.kserve.io/v1alpha1
|
|
kind: LLMInferenceService
|
|
metadata:
|
|
name: huihui-granite
|
|
spec:
|
|
model:
|
|
modelFormat:
|
|
name: huggingface
|
|
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
|
|
replicas: 1
|
|
template:
|
|
containers:
|
|
- name: main
|
|
image: quay.io/pierdipi/vllm-cpu:latest
|
|
args:
|
|
- --model_name=huihui-granite
|
|
securityContext:
|
|
runAsNonRoot: false
|
|
env:
|
|
- name: VLLM_LOGGING_LEVEL
|
|
value: DEBUG
|
|
resources:
|
|
limits:
|
|
cpu: "4"
|
|
memory: 16Gi
|
|
nvidia.com/gpu: "1"
|
|
requests:
|
|
cpu: "2"
|
|
memory: 8Gi
|
|
nvidia.com/gpu: "1"
|
|
livenessProbe:
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 30
|
|
timeoutSeconds: 30
|
|
failureThreshold: 5
|
|
router:
|
|
gateway: {}
|
|
route: {}
|
|
scheduler: {}
|