From c6b245cfe8b92e4c4d7a397cf308a934f4aac2a6 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Tue, 10 Feb 2026 18:25:01 -0800 Subject: [PATCH] Added prefix caching to nemotron recipe --- recipes/nemotron-3-nano-nvfp4.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/recipes/nemotron-3-nano-nvfp4.yaml b/recipes/nemotron-3-nano-nvfp4.yaml index 15cf910..eaeb161 100644 --- a/recipes/nemotron-3-nano-nvfp4.yaml +++ b/recipes/nemotron-3-nano-nvfp4.yaml @@ -43,5 +43,7 @@ command: | --reasoning-parser-plugin nano_v3_reasoning_parser.py \ --reasoning-parser nano_v3 \ --kv-cache-dtype fp8 \ + --enable-prefix-caching \ + --attention-backend flashinfer \ --load-format fastsafetensors \ --gpu-memory-utilization {gpu_memory_utilization}