diff --git a/recipes/nemotron-3-nano-nvfp4.yaml b/recipes/nemotron-3-nano-nvfp4.yaml index 15cf910..eaeb161 100644 --- a/recipes/nemotron-3-nano-nvfp4.yaml +++ b/recipes/nemotron-3-nano-nvfp4.yaml @@ -43,5 +43,7 @@ command: | --reasoning-parser-plugin nano_v3_reasoning_parser.py \ --reasoning-parser nano_v3 \ --kv-cache-dtype fp8 \ + --enable-prefix-caching \ + --attention-backend flashinfer \ --load-format fastsafetensors \ --gpu-memory-utilization {gpu_memory_utilization}