no lb
This commit is contained in:
@@ -6,16 +6,15 @@ spec:
|
|||||||
model:
|
model:
|
||||||
uri: hf://huihui-ai/Huihui-granite-4.1-30b-abliterated
|
uri: hf://huihui-ai/Huihui-granite-4.1-30b-abliterated
|
||||||
name: huihui-ai/Huihui-granite-4.1-30b-abliterated
|
name: huihui-ai/Huihui-granite-4.1-30b-abliterated
|
||||||
|
# Three replicas for load balancing
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
router:
|
||||||
|
scheduler: { } # Default scheduler with default load balancing
|
||||||
|
route: { }
|
||||||
|
gateway: { }
|
||||||
template:
|
template:
|
||||||
containers:
|
containers:
|
||||||
- name: main
|
- name: main
|
||||||
image: quay.io/pierdipi/vllm-cpu:latest
|
|
||||||
securityContext:
|
|
||||||
runAsNonRoot: false
|
|
||||||
env:
|
|
||||||
- name: VLLM_LOGGING_LEVEL
|
|
||||||
value: DEBUG
|
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpu: "4"
|
cpu: "4"
|
||||||
@@ -26,11 +25,11 @@ spec:
|
|||||||
memory: 8Gi
|
memory: 8Gi
|
||||||
nvidia.com/gpu: "1"
|
nvidia.com/gpu: "1"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
initialDelaySeconds: 30
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8000
|
||||||
|
scheme: HTTPS
|
||||||
|
initialDelaySeconds: 120
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 30
|
timeoutSeconds: 30
|
||||||
failureThreshold: 5
|
failureThreshold: 5
|
||||||
router:
|
|
||||||
gateway: {}
|
|
||||||
route: {}
|
|
||||||
scheduler: {}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user