enable cache reuse
This commit is contained in:
@@ -45,6 +45,8 @@ spec:
|
|||||||
value: "true"
|
value: "true"
|
||||||
- name: NIM_MAX_MODEL_LEN
|
- name: NIM_MAX_MODEL_LEN
|
||||||
value: "32768"
|
value: "32768"
|
||||||
|
- name: NIM_ENABLE_KV_CACHE_REUSE
|
||||||
|
value: "true"
|
||||||
image:
|
image:
|
||||||
repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
|
repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
|
||||||
tag: "1.1.0-variant"
|
tag: "1.1.0-variant"
|
||||||
|
|||||||
Reference in New Issue
Block a user