diff --git a/clusters/k3s-dgx/nim-service/llama.yaml b/clusters/k3s-dgx/nim-service/llama.yaml
index 7b37660..64b44e8 100644
--- a/clusters/k3s-dgx/nim-service/llama.yaml
+++ b/clusters/k3s-dgx/nim-service/llama.yaml
@@ -1,21 +1,21 @@
 apiVersion: apps.nvidia.com/v1alpha1
 kind: NIMCache
 metadata:
-  name: meta-llama-3-2-1b-instruct
+  name: meta-llama-3-1-8b-instruct
   namespace: nim-service
 spec:
   source:
     ngc:
-      modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.12.0
+      modelPuller: nvcr.io/nim/meta/llama-3_1-8b-instruct:2.0.3
       pullSecret: ngc-secret
       authSecret: ngc-api-secret
       model:
-        engine: tensorrt_llm
+        engine: "vllm"
         tensorParallelism: "1"
   storage:
     pvc:
       create: true
-      size: "50Gi"
+      size: "100Gi"
       volumeAccessMode: ReadWriteOnce
 ---
 apiVersion: apps.nvidia.com/v1alpha1
@@ -25,16 +25,16 @@ metadata:
   namespace: nim-service
 spec:
   image:
-    repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
-    tag: "1.12.0"
+    repository: nvcr.io/nim/meta/llama-3_1-8b-instruct
+    tag: "2.0.3"
     pullPolicy: IfNotPresent
     pullSecrets:
       - ngc-secret
   authSecret: ngc-api-secret
   storage:
     nimCache:
-      name: meta-llama-3-2-1b-instruct
-      profile: ''
+      name: meta-llama-3-1-8b-instruct
+    readOnly: true
   replicas: 1
   resources:
     limits: