map flashinfer/torch/triton cache directories by default
This commit is contained in:
12
README.md
12
README.md
@@ -164,6 +164,17 @@ Don't do it every time you rebuild, because it will slow down compilation times.
|
|||||||
|
|
||||||
For periodic maintenance, I recommend using a filter: `docker builder prune --filter until=72h`
|
For periodic maintenance, I recommend using a filter: `docker builder prune --filter until=72h`
|
||||||
|
|
||||||
|
### 2026-02-10
|
||||||
|
|
||||||
|
#### Cache Directory Mounting
|
||||||
|
|
||||||
|
`launch-cluster.sh` now automatically mounts default cache directories to the container to improve cold start times:
|
||||||
|
- `~/.cache/vllm`
|
||||||
|
- `~/.cache/flashinfer`
|
||||||
|
- `~/.triton`
|
||||||
|
|
||||||
|
To disable this behavior (clean start), use `--no-cache-dirs` flag.
|
||||||
|
|
||||||
### 2026-02-09
|
### 2026-02-09
|
||||||
|
|
||||||
- Migrated to a new base image with PyTorch 2.10 compiled with Spark support. With this change, wheels build is no longer a recommended way - please use a source build instead.
|
- Migrated to a new base image with PyTorch 2.10 compiled with Spark support. With this change, wheels build is no longer a recommended way - please use a source build instead.
|
||||||
@@ -732,6 +743,7 @@ You can override the auto-detected values if needed:
|
|||||||
| `--nccl-debug` | NCCL debug level (e.g., INFO, WARN). Defaults to INFO if flag is present but value is omitted. |
|
| `--nccl-debug` | NCCL debug level (e.g., INFO, WARN). Defaults to INFO if flag is present but value is omitted. |
|
||||||
| `--check-config` | Check configuration and auto-detection without launching. |
|
| `--check-config` | Check configuration and auto-detection without launching. |
|
||||||
| `--solo` | Solo mode: skip autodetection, launch only on current node, do not launch Ray cluster |
|
| `--solo` | Solo mode: skip autodetection, launch only on current node, do not launch Ray cluster |
|
||||||
|
| `--no-cache-dirs` | Do not mount default cache directories (~/.cache/vllm, ~/.cache/flashinfer, ~/.triton). |
|
||||||
| `--launch-script` | Path to bash script to execute in the container (from examples/ directory or absolute path). If launch script is specified, action should be omitted. |
|
| `--launch-script` | Path to bash script to execute in the container (from examples/ directory or absolute path). If launch script is specified, action should be omitted. |
|
||||||
| `-d` | Run in daemon mode (detached). |
|
| `-d` | Run in daemon mode (detached). |
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ SCRIPT_DIR="$(dirname "$(realpath "$0")")"
|
|||||||
|
|
||||||
ACTIONS_ARG=""
|
ACTIONS_ARG=""
|
||||||
SOLO_MODE="false"
|
SOLO_MODE="false"
|
||||||
|
MOUNT_CACHE_DIRS="true"
|
||||||
|
|
||||||
# Function to print usage
|
# Function to print usage
|
||||||
usage() {
|
usage() {
|
||||||
@@ -46,6 +47,7 @@ usage() {
|
|||||||
echo " --launch-script Path to bash script to execute in the container (from examples/ directory or absolute path). If launch script is specified, action should be omitted."
|
echo " --launch-script Path to bash script to execute in the container (from examples/ directory or absolute path). If launch script is specified, action should be omitted."
|
||||||
echo " --check-config Check configuration and auto-detection without launching"
|
echo " --check-config Check configuration and auto-detection without launching"
|
||||||
echo " --solo Solo mode: skip autodetection, launch only on current node, do not launch Ray cluster"
|
echo " --solo Solo mode: skip autodetection, launch only on current node, do not launch Ray cluster"
|
||||||
|
echo " --no-cache-dirs Do not mount default cache directories (~/.cache/vllm, ~/.cache/flashinfer, ~/.triton)"
|
||||||
echo " -d Daemon mode (only for 'start' action)"
|
echo " -d Daemon mode (only for 'start' action)"
|
||||||
echo " action start | stop | status | exec (Default: start). Not compatible with --launch-script."
|
echo " action start | stop | status | exec (Default: start). Not compatible with --launch-script."
|
||||||
echo " command Command to run (only for 'exec' action). Not compatible with --launch-script."
|
echo " command Command to run (only for 'exec' action). Not compatible with --launch-script."
|
||||||
@@ -77,6 +79,7 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
;;
|
;;
|
||||||
--check-config) CHECK_CONFIG="true" ;;
|
--check-config) CHECK_CONFIG="true" ;;
|
||||||
--solo) SOLO_MODE="true" ;;
|
--solo) SOLO_MODE="true" ;;
|
||||||
|
--no-cache-dirs) MOUNT_CACHE_DIRS="false" ;;
|
||||||
-d) DAEMON_MODE="true" ;;
|
-d) DAEMON_MODE="true" ;;
|
||||||
-h|--help) usage ;;
|
-h|--help) usage ;;
|
||||||
start|stop|status)
|
start|stop|status)
|
||||||
@@ -127,6 +130,22 @@ if [[ -n "$NCCL_DEBUG_VAL" ]]; then
|
|||||||
esac
|
esac
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Add cache dirs if requested
|
||||||
|
CACHE_DIRS_TO_CREATE=()
|
||||||
|
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
|
||||||
|
# vLLM Cache
|
||||||
|
DOCKER_ARGS="$DOCKER_ARGS -v $HOME/.cache/vllm:/root/.cache/vllm"
|
||||||
|
CACHE_DIRS_TO_CREATE+=("$HOME/.cache/vllm")
|
||||||
|
|
||||||
|
# FlashInfer Cache
|
||||||
|
DOCKER_ARGS="$DOCKER_ARGS -v $HOME/.cache/flashinfer:/root/.cache/flashinfer"
|
||||||
|
CACHE_DIRS_TO_CREATE+=("$HOME/.cache/flashinfer")
|
||||||
|
|
||||||
|
# Triton Cache
|
||||||
|
DOCKER_ARGS="$DOCKER_ARGS -v $HOME/.triton:/root/.triton"
|
||||||
|
CACHE_DIRS_TO_CREATE+=("$HOME/.triton")
|
||||||
|
fi
|
||||||
|
|
||||||
# Resolve launch script path if specified
|
# Resolve launch script path if specified
|
||||||
if [[ -n "$LAUNCH_SCRIPT_PATH" ]]; then
|
if [[ -n "$LAUNCH_SCRIPT_PATH" ]]; then
|
||||||
# Check if it's an absolute path or relative path that exists
|
# Check if it's an absolute path or relative path that exists
|
||||||
@@ -276,6 +295,12 @@ if [[ "$CHECK_CONFIG" == "true" ]]; then
|
|||||||
echo " Image Name: $IMAGE_NAME"
|
echo " Image Name: $IMAGE_NAME"
|
||||||
echo " ETH Interface: $ETH_IF"
|
echo " ETH Interface: $ETH_IF"
|
||||||
echo " IB Interface: $IB_IF"
|
echo " IB Interface: $IB_IF"
|
||||||
|
echo " Docker Args: $DOCKER_ARGS"
|
||||||
|
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
|
||||||
|
echo " Mounting Cache Dirs: ${CACHE_DIRS_TO_CREATE[*]}"
|
||||||
|
else
|
||||||
|
echo " Mounting Cache Dirs: (Disabled)"
|
||||||
|
fi
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -508,6 +533,13 @@ start_cluster() {
|
|||||||
# Start Head Node
|
# Start Head Node
|
||||||
echo "Starting Head Node on $HEAD_IP..."
|
echo "Starting Head Node on $HEAD_IP..."
|
||||||
|
|
||||||
|
# Ensure cache dirs exist on head
|
||||||
|
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
|
||||||
|
for dir in "${CACHE_DIRS_TO_CREATE[@]}"; do
|
||||||
|
mkdir -p "$dir"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
local head_cmd_args=()
|
local head_cmd_args=()
|
||||||
if [[ "$SOLO_MODE" == "true" ]]; then
|
if [[ "$SOLO_MODE" == "true" ]]; then
|
||||||
if [[ ${#MOD_PATHS[@]} -gt 0 ]]; then
|
if [[ ${#MOD_PATHS[@]} -gt 0 ]]; then
|
||||||
@@ -534,6 +566,13 @@ start_cluster() {
|
|||||||
for worker in "${PEER_NODES[@]}"; do
|
for worker in "${PEER_NODES[@]}"; do
|
||||||
echo "Starting Worker Node on $worker..."
|
echo "Starting Worker Node on $worker..."
|
||||||
|
|
||||||
|
# Ensure cache dirs exist on worker
|
||||||
|
if [[ "$MOUNT_CACHE_DIRS" == "true" ]]; then
|
||||||
|
# Create string of dirs to create
|
||||||
|
dirs_str="${CACHE_DIRS_TO_CREATE[*]}"
|
||||||
|
ssh "$worker" "mkdir -p $dirs_str"
|
||||||
|
fi
|
||||||
|
|
||||||
local docker_run_cmd="docker run -d --privileged --gpus all --rm --ipc=host --network host --name $CONTAINER_NAME $DOCKER_ARGS $IMAGE_NAME"
|
local docker_run_cmd="docker run -d --privileged --gpus all --rm --ipc=host --network host --name $CONTAINER_NAME $DOCKER_ARGS $IMAGE_NAME"
|
||||||
|
|
||||||
if [[ ${#MOD_PATHS[@]} -gt 0 ]]; then
|
if [[ ${#MOD_PATHS[@]} -gt 0 ]]; then
|
||||||
|
|||||||
Reference in New Issue
Block a user