apiVersion: serving.kserve.io/v1alpha1 kind: LLMInferenceService metadata: name: huihui-granite namespace: kserve spec: predictor: model: modelFormat: name: huggingface args: - --model_name=huihui-granite storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" resources: limits: cpu: "4" memory: 16Gi nvidia.com/gpu: "1" requests: cpu: "2" memory: 8Gi nvidia.com/gpu: "1"