23 lines
506 B
YAML
23 lines
506 B
YAML
apiVersion: serving.kserve.io/v1beta1
|
|
kind: LLMInferenceService
|
|
metadata:
|
|
name: huihui-granite
|
|
namespace: kserve
|
|
spec:
|
|
predictor:
|
|
model:
|
|
modelFormat:
|
|
name: huggingface
|
|
args:
|
|
- --model_name=huihui-granite
|
|
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
|
|
resources:
|
|
limits:
|
|
cpu: "4"
|
|
memory: 16Gi
|
|
nvidia.com/gpu: "1"
|
|
requests:
|
|
cpu: "2"
|
|
memory: 8Gi
|
|
nvidia.com/gpu: "1"
|