use nwer model

This commit is contained in:
2026-05-06 12:52:03 -05:00
parent 314e730995
commit 5e57329067

View File

@@ -1,21 +1,21 @@
apiVersion: apps.nvidia.com/v1alpha1 apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache kind: NIMCache
metadata: metadata:
name: meta-llama-3-2-1b-instruct name: meta-llama-3-1-8b-instruct
namespace: nim-service namespace: nim-service
spec: spec:
source: source:
ngc: ngc:
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.12.0 modelPuller: nvcr.io/nim/meta/llama-3_1-8b-instruct:2.0.3
pullSecret: ngc-secret pullSecret: ngc-secret
authSecret: ngc-api-secret authSecret: ngc-api-secret
model: model:
engine: tensorrt_llm engine: "vllm"
tensorParallelism: "1" tensorParallelism: "1"
storage: storage:
pvc: pvc:
create: true create: true
size: "50Gi" size: "100Gi"
volumeAccessMode: ReadWriteOnce volumeAccessMode: ReadWriteOnce
--- ---
apiVersion: apps.nvidia.com/v1alpha1 apiVersion: apps.nvidia.com/v1alpha1
@@ -25,16 +25,16 @@ metadata:
namespace: nim-service namespace: nim-service
spec: spec:
image: image:
repository: nvcr.io/nim/meta/llama-3.2-1b-instruct repository: nvcr.io/nim/meta/llama-3_1-8b-instruct
tag: "1.12.0" tag: "2.0.3"
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
pullSecrets: pullSecrets:
- ngc-secret - ngc-secret
authSecret: ngc-api-secret authSecret: ngc-api-secret
storage: storage:
nimCache: nimCache:
name: meta-llama-3-2-1b-instruct name: meta-llama-3-1-8b-instruct
profile: '' readOnly: true
replicas: 1 replicas: 1
resources: resources:
limits: limits: