diff --git a/clusters/k3s-dgx/kustomization.yaml b/clusters/k3s-dgx/kustomization.yaml index eb40073..dbb8875 100644 --- a/clusters/k3s-dgx/kustomization.yaml +++ b/clusters/k3s-dgx/kustomization.yaml @@ -5,5 +5,6 @@ resources: - gpu-support - nim-operator - infrastructure + - nim-service # - kserve # - apps diff --git a/clusters/k3s-dgx/nim-service/kustomization.yaml b/clusters/k3s-dgx/nim-service/kustomization.yaml new file mode 100644 index 0000000..f632f54 --- /dev/null +++ b/clusters/k3s-dgx/nim-service/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kserve +resources: + - namespace.yaml + - llama-instruct-1b.yaml diff --git a/clusters/k3s-dgx/nim-service/llama-instruct-1b.yaml b/clusters/k3s-dgx/nim-service/llama-instruct-1b.yaml new file mode 100644 index 0000000..7b37660 --- /dev/null +++ b/clusters/k3s-dgx/nim-service/llama-instruct-1b.yaml @@ -0,0 +1,45 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: meta-llama-3-2-1b-instruct + namespace: nim-service +spec: + source: + ngc: + modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.12.0 + pullSecret: ngc-secret + authSecret: ngc-api-secret + model: + engine: tensorrt_llm + tensorParallelism: "1" + storage: + pvc: + create: true + size: "50Gi" + volumeAccessMode: ReadWriteOnce +--- +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMService +metadata: + name: meta-llama-3-2-1b-instruct + namespace: nim-service +spec: + image: + repository: nvcr.io/nim/meta/llama-3.2-1b-instruct + tag: "1.12.0" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + authSecret: ngc-api-secret + storage: + nimCache: + name: meta-llama-3-2-1b-instruct + profile: '' + replicas: 1 + resources: + limits: + nvidia.com/gpu: 1 + expose: + service: + type: ClusterIP + port: 8000 diff --git a/clusters/k3s-dgx/nim-service/namespace.yaml b/clusters/k3s-dgx/nim-service/namespace.yaml new file mode 100644 index 0000000..4e48deb --- /dev/null +++ b/clusters/k3s-dgx/nim-service/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: nim-service