diff --git a/Dockerfile b/Dockerfile index 02835e7..4e224f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -90,7 +90,7 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ echo "Cache hit: Fetching flashinfer updates..." && \ cd flashinfer && \ git fetch origin && \ - git fetch origin --tags && \ + git fetch origin --tags --force && \ (git checkout --detach origin/${FLASHINFER_REF} 2>/dev/null || git checkout ${FLASHINFER_REF}) && \ git submodule update --init --recursive && \ git clean -fdx && \ @@ -153,7 +153,7 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ echo "Cache hit: Fetching updates..." && \ cd vllm && \ git fetch origin && \ - git fetch origin --tags && \ + git fetch origin --tags --force && \ (git checkout --detach origin/${VLLM_REF} 2>/dev/null || git checkout ${VLLM_REF}) && \ git submodule update --init --recursive && \ git clean -fdx && \ diff --git a/hf-download.sh b/hf-download.sh index 4c4b4c2..c4c8d9f 100755 --- a/hf-download.sh +++ b/hf-download.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -HUB_PATH="$HOME/.cache/huggingface/hub" +HUB_PATH="${HF_HOME:-$HOME/.cache/huggingface}/hub" # Default values COPY_HOSTS=() @@ -42,7 +42,7 @@ copy_model_to_host() { local host_copy_start host_copy_end host_copy_time host_copy_start=$(date +%s) - if rsync -av --progress "$model_dir" "${SSH_USER}@${host}:$HUB_PATH/"; then + if rsync -av --mkpath --progress "$model_dir" "${SSH_USER}@${host}:$HUB_PATH/"; then host_copy_end=$(date +%s) host_copy_time=$((host_copy_end - host_copy_start)) printf "Copy to %s completed in %02d:%02d:%02d\n" "$host" $((host_copy_time/3600)) $((host_copy_time%3600/60)) $((host_copy_time%60)) diff --git a/launch-cluster.sh b/launch-cluster.sh index 2fe36dc..c5276f4 100755 --- a/launch-cluster.sh +++ b/launch-cluster.sh @@ -3,8 +3,9 @@ # Default Configuration IMAGE_NAME="vllm-node" DEFAULT_CONTAINER_NAME="vllm_node" +HF_CACHE_DIR="${HF_HOME:-$HOME/.cache/huggingface}" # Modify these if you want to pass additional docker args or set VLLM_SPARK_EXTRA_DOCKER_ARGS variable -DOCKER_ARGS="-e NCCL_IGNORE_CPU_AFFINITY=1 -v $HOME/.cache/huggingface:/root/.cache/huggingface" +DOCKER_ARGS="-e NCCL_IGNORE_CPU_AFFINITY=1 -v $HF_CACHE_DIR:/root/.cache/huggingface" # Append additional arguments from environment variable if [[ -n "$VLLM_SPARK_EXTRA_DOCKER_ARGS" ]]; then diff --git a/mods/fix-qwen3-next-autoround/run.sh b/mods/fix-qwen3-next-autoround/run.sh new file mode 100644 index 0000000..8e99055 --- /dev/null +++ b/mods/fix-qwen3-next-autoround/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e + +echo "Reverting PR #35156" +if curl -L https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/35156.diff | patch -p1 -R -d /usr/local/lib/python3.12/dist-packages; then + echo " OK" +else + echo " Patch can't be reversed, skipping" +fi