diff --git a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml index 6ff11bf..c108c76 100644 --- a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml +++ b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml @@ -1,18 +1,22 @@ -apiVersion: "serving.kserve.io/v1beta1" -kind: "InferenceService" +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService metadata: - name: "flower-sample" + name: huihui-granite namespace: kserve spec: predictor: model: modelFormat: - name: tensorflow - storageUri: "gs://kfserving-examples/models/tensorflow/flowers" + name: huggingface + args: + - --model_name=huihui-granite + storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" resources: - requests: - cpu: "100m" - memory: "256Mi" limits: - cpu: "1" - memory: "1Gi" + cpu: "4" + memory: 16Gi + nvidia.com/gpu: "1" + requests: + cpu: "2" + memory: 8Gi + nvidia.com/gpu: "1" diff --git a/clusters/k3s-dgx/infrastructure/kustomization.yaml b/clusters/k3s-dgx/infrastructure/kustomization.yaml index e2059c0..d3fa31b 100644 --- a/clusters/k3s-dgx/infrastructure/kustomization.yaml +++ b/clusters/k3s-dgx/infrastructure/kustomization.yaml @@ -3,4 +3,4 @@ kind: Kustomization resources: - cert-manager.yaml - envoy-gateway-system.yaml - # - envoy-gateway-class.yaml + - envoy-gateway-class.yaml diff --git a/clusters/k3s-dgx/kustomization.yaml b/clusters/k3s-dgx/kustomization.yaml index 818c75d..b5c8777 100644 --- a/clusters/k3s-dgx/kustomization.yaml +++ b/clusters/k3s-dgx/kustomization.yaml @@ -4,5 +4,5 @@ resources: - flux-system - gpu-support - infrastructure - # - kserve + - kserve # - apps