init commit
This commit is contained in:
22
clusters/k3s-dgx/apps/huihui-granite-inference.yaml
Normal file
22
clusters/k3s-dgx/apps/huihui-granite-inference.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: serving.kserve.io/v1beta1
|
||||
kind: InferenceService
|
||||
metadata:
|
||||
name: huihui-granite
|
||||
namespace: kserve
|
||||
spec:
|
||||
predictor:
|
||||
model:
|
||||
modelFormat:
|
||||
name: huggingface
|
||||
args:
|
||||
- --model_name=huihui-granite
|
||||
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: 16Gi
|
||||
nvidia.com/gpu: "1"
|
||||
requests:
|
||||
cpu: "2"
|
||||
memory: 8Gi
|
||||
nvidia.com/gpu: "1"
|
||||
Reference in New Issue
Block a user