This commit is contained in:
2026-05-05 15:21:10 -05:00
parent 0bef50c896
commit cb56f3838d

View File

@@ -6,16 +6,15 @@ spec:
model:
uri: hf://huihui-ai/Huihui-granite-4.1-30b-abliterated
name: huihui-ai/Huihui-granite-4.1-30b-abliterated
# Three replicas for load balancing
replicas: 1
router:
scheduler: { } # Default scheduler with default load balancing
route: { }
gateway: { }
template:
containers:
- name: main
image: quay.io/pierdipi/vllm-cpu:latest
securityContext:
runAsNonRoot: false
env:
- name: VLLM_LOGGING_LEVEL
value: DEBUG
resources:
limits:
cpu: "4"
@@ -26,11 +25,11 @@ spec:
memory: 8Gi
nvidia.com/gpu: "1"
livenessProbe:
initialDelaySeconds: 30
httpGet:
path: /health
port: 8000
scheme: HTTPS
initialDelaySeconds: 120
periodSeconds: 30
timeoutSeconds: 30
failureThreshold: 5
router:
gateway: {}
route: {}
scheduler: {}