use nwer model
This commit is contained in:
@@ -1,21 +1,21 @@
|
|||||||
apiVersion: apps.nvidia.com/v1alpha1
|
apiVersion: apps.nvidia.com/v1alpha1
|
||||||
kind: NIMCache
|
kind: NIMCache
|
||||||
metadata:
|
metadata:
|
||||||
name: meta-llama-3-2-1b-instruct
|
name: meta-llama-3-1-8b-instruct
|
||||||
namespace: nim-service
|
namespace: nim-service
|
||||||
spec:
|
spec:
|
||||||
source:
|
source:
|
||||||
ngc:
|
ngc:
|
||||||
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.12.0
|
modelPuller: nvcr.io/nim/meta/llama-3_1-8b-instruct:2.0.3
|
||||||
pullSecret: ngc-secret
|
pullSecret: ngc-secret
|
||||||
authSecret: ngc-api-secret
|
authSecret: ngc-api-secret
|
||||||
model:
|
model:
|
||||||
engine: tensorrt_llm
|
engine: "vllm"
|
||||||
tensorParallelism: "1"
|
tensorParallelism: "1"
|
||||||
storage:
|
storage:
|
||||||
pvc:
|
pvc:
|
||||||
create: true
|
create: true
|
||||||
size: "50Gi"
|
size: "100Gi"
|
||||||
volumeAccessMode: ReadWriteOnce
|
volumeAccessMode: ReadWriteOnce
|
||||||
---
|
---
|
||||||
apiVersion: apps.nvidia.com/v1alpha1
|
apiVersion: apps.nvidia.com/v1alpha1
|
||||||
@@ -25,16 +25,16 @@ metadata:
|
|||||||
namespace: nim-service
|
namespace: nim-service
|
||||||
spec:
|
spec:
|
||||||
image:
|
image:
|
||||||
repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
|
repository: nvcr.io/nim/meta/llama-3_1-8b-instruct
|
||||||
tag: "1.12.0"
|
tag: "2.0.3"
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
pullSecrets:
|
pullSecrets:
|
||||||
- ngc-secret
|
- ngc-secret
|
||||||
authSecret: ngc-api-secret
|
authSecret: ngc-api-secret
|
||||||
storage:
|
storage:
|
||||||
nimCache:
|
nimCache:
|
||||||
name: meta-llama-3-2-1b-instruct
|
name: meta-llama-3-1-8b-instruct
|
||||||
profile: ''
|
readOnly: true
|
||||||
replicas: 1
|
replicas: 1
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
|
|||||||
Reference in New Issue
Block a user