Applied new fastsafetensors fix to mxfp4 build; disabled wheel builds by default
This commit is contained in:
@@ -40,7 +40,7 @@ RUN mkdir -p tiktoken_encodings && \
|
||||
wget -O tiktoken_encodings/o200k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/o200k_base.tiktoken" && \
|
||||
wget -O tiktoken_encodings/cl100k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
|
||||
|
||||
# Cache TEMPORARY PATCH for fastsafetensors loading in cluster setup - tracking https://github.com/foundation-model-stack/fastsafetensors/issues/36
|
||||
# Cache TEMPORARY PATCH for fastsafetensors loading in cluster setup - tracking https://github.com/vllm-project/vllm/issues/34180
|
||||
# COPY fastsafetensors.patch .
|
||||
|
||||
# Install fastsafetensors
|
||||
@@ -66,9 +66,13 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||
--extra-index-url https://wheels.vllm.ai/nightly/cu130; \
|
||||
fi
|
||||
|
||||
# Apply TEMPORARY PATCH for fastsafetensors loading in cluster setup - tracking https://github.com/foundation-model-stack/fastsafetensors/issues/36
|
||||
# Apply TEMPORARY PATCH for fastsafetensors loading in cluster setup - tracking https://github.com/vllm-project/vllm/issues/34180
|
||||
# Apply in site-packages
|
||||
# RUN patch -p1 -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch
|
||||
# RUN if patch -p1 --dry-run --reverse -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch &>/dev/null; then \
|
||||
# echo "PR #34180 is already applied"; \
|
||||
# else \
|
||||
# patch -p1 -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch; \
|
||||
# fi
|
||||
|
||||
ARG FLASHINFER_PRE=""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user