use correctly
This commit is contained in:
@@ -2,15 +2,23 @@ apiVersion: serving.kserve.io/v1alpha1
|
||||
kind: LLMInferenceService
|
||||
metadata:
|
||||
name: huihui-granite
|
||||
namespace: kserve
|
||||
spec:
|
||||
predictor:
|
||||
model:
|
||||
modelFormat:
|
||||
name: huggingface
|
||||
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
|
||||
replicas: 1
|
||||
template:
|
||||
containers:
|
||||
- name: main
|
||||
image: quay.io/pierdipi/vllm-cpu:latest
|
||||
args:
|
||||
- --model_name=huihui-granite
|
||||
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
|
||||
securityContext:
|
||||
runAsNonRoot: false
|
||||
env:
|
||||
- name: VLLM_LOGGING_LEVEL
|
||||
value: DEBUG
|
||||
resources:
|
||||
limits:
|
||||
cpu: "4"
|
||||
@@ -20,3 +28,12 @@ spec:
|
||||
cpu: "2"
|
||||
memory: 8Gi
|
||||
nvidia.com/gpu: "1"
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 30
|
||||
failureThreshold: 5
|
||||
router:
|
||||
gateway: {}
|
||||
route: {}
|
||||
scheduler: {}
|
||||
|
||||
Reference in New Issue
Block a user