map flashinfer/torch/triton cache directories by default

This commit is contained in:
Eugene Rakhmatulin
2026-02-10 16:36:02 -08:00
parent b990a1b8ac
commit 6d3f5dfd5c
2 changed files with 51 additions and 0 deletions

View File

@@ -164,6 +164,17 @@ Don't do it every time you rebuild, because it will slow down compilation times.
For periodic maintenance, I recommend using a filter: `docker builder prune --filter until=72h`
### 2026-02-10
#### Cache Directory Mounting
`launch-cluster.sh` now automatically mounts default cache directories to the container to improve cold start times:
- `~/.cache/vllm`
- `~/.cache/flashinfer`
- `~/.triton`
To disable this behavior (clean start), use `--no-cache-dirs` flag.
### 2026-02-09
- Migrated to a new base image with PyTorch 2.10 compiled with Spark support. With this change, wheels build is no longer a recommended way - please use a source build instead.
@@ -732,6 +743,7 @@ You can override the auto-detected values if needed:
| `--nccl-debug` | NCCL debug level (e.g., INFO, WARN). Defaults to INFO if flag is present but value is omitted. |
| `--check-config` | Check configuration and auto-detection without launching. |
| `--solo` | Solo mode: skip autodetection, launch only on current node, do not launch Ray cluster |
| `--no-cache-dirs` | Do not mount default cache directories (~/.cache/vllm, ~/.cache/flashinfer, ~/.triton). |
| `--launch-script` | Path to bash script to execute in the container (from examples/ directory or absolute path). If launch script is specified, action should be omitted. |
| `-d` | Run in daemon mode (detached). |

View File

@@ -31,6 +31,7 @@ SCRIPT_DIR="$(dirname "$(realpath "$0")")"
ACTIONS_ARG=""
SOLO_MODE="false"
MOUNT_CACHE_DIRS="true"
# Function to print usage
usage() {
@@ -46,6 +47,7 @@ usage() {
echo " --launch-script Path to bash script to execute in the container (from examples/ directory or absolute path). If launch script is specified, action should be omitted."
echo " --check-config Check configuration and auto-detection without launching"
echo " --solo Solo mode: skip autodetection, launch only on current node, do not launch Ray cluster"
echo " --no-cache-dirs Do not mount default cache directories (~/.cache/vllm, ~/.cache/flashinfer, ~/.triton)"
echo " -d Daemon mode (only for 'start' action)"
echo " action start | stop | status | exec (Default: start). Not compatible with --launch-script."
echo " command Command to run (only for 'exec' action). Not compatible with --launch-script."
@@ -77,6 +79,7 @@ while [[ "$#" -gt 0 ]]; do
;;
--check-config) CHECK_CONFIG="true" ;;
--solo) SOLO_MODE="true" ;;
--no-cache-dirs) MOUNT_CACHE_DIRS="false" ;;
-d) DAEMON_MODE="true" ;;
-h|--help) usage ;;
start|stop|status)
@@ -127,6 +130,22 @@ if [[ -n "$NCCL_DEBUG_VAL" ]]; then
esac
fi
# Add cache dirs if requested
CACHE_DIRS_TO_CREATE=()
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
# vLLM Cache
DOCKER_ARGS="$DOCKER_ARGS -v $HOME/.cache/vllm:/root/.cache/vllm"
CACHE_DIRS_TO_CREATE+=("$HOME/.cache/vllm")
# FlashInfer Cache
DOCKER_ARGS="$DOCKER_ARGS -v $HOME/.cache/flashinfer:/root/.cache/flashinfer"
CACHE_DIRS_TO_CREATE+=("$HOME/.cache/flashinfer")
# Triton Cache
DOCKER_ARGS="$DOCKER_ARGS -v $HOME/.triton:/root/.triton"
CACHE_DIRS_TO_CREATE+=("$HOME/.triton")
fi
# Resolve launch script path if specified
if [[ -n "$LAUNCH_SCRIPT_PATH" ]]; then
# Check if it's an absolute path or relative path that exists
@@ -276,6 +295,12 @@ if [[ "$CHECK_CONFIG" == "true" ]]; then
echo " Image Name: $IMAGE_NAME"
echo " ETH Interface: $ETH_IF"
echo " IB Interface: $IB_IF"
echo " Docker Args: $DOCKER_ARGS"
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
echo " Mounting Cache Dirs: ${CACHE_DIRS_TO_CREATE[*]}"
else
echo " Mounting Cache Dirs: (Disabled)"
fi
exit 0
fi
@@ -508,6 +533,13 @@ start_cluster() {
# Start Head Node
echo "Starting Head Node on $HEAD_IP..."
# Ensure cache dirs exist on head
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
for dir in "${CACHE_DIRS_TO_CREATE[@]}"; do
mkdir -p "$dir"
done
fi
local head_cmd_args=()
if [[ "$SOLO_MODE" == "true" ]]; then
if [[ ${#MOD_PATHS[@]} -gt 0 ]]; then
@@ -534,6 +566,13 @@ start_cluster() {
for worker in "${PEER_NODES[@]}"; do
echo "Starting Worker Node on $worker..."
# Ensure cache dirs exist on worker
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
# Create string of dirs to create
dirs_str="${CACHE_DIRS_TO_CREATE[*]}"
ssh "$worker" "mkdir -p $dirs_str"
fi
local docker_run_cmd="docker run -d --privileged --gpus all --rm --ipc=host --network host --name $CONTAINER_NAME $DOCKER_ARGS $IMAGE_NAME"
if [[ ${#MOD_PATHS[@]} -gt 0 ]]; then