Added prefix caching to nemotron recipe

This commit is contained in:
Eugene Rakhmatulin
2026-02-10 18:25:01 -08:00
parent 6d3f5dfd5c
commit c6b245cfe8

View File

@@ -43,5 +43,7 @@ command: |
--reasoning-parser-plugin nano_v3_reasoning_parser.py \
--reasoning-parser nano_v3 \
--kv-cache-dtype fp8 \
--enable-prefix-caching \
--attention-backend flashinfer \
--load-format fastsafetensors \
--gpu-memory-utilization {gpu_memory_utilization}