Added optional flashinfer packages, using pre-release flashinfer

This commit is contained in:
Eugene Rakhmatulin
2025-12-20 22:56:40 -08:00
parent 76988e0c75
commit d5d85aaac7

View File

@@ -18,6 +18,7 @@ ENV MAKEFLAGS="-j${BUILD_JOBS}"
ENV PIP_CACHE_DIR=/root/.cache/pip ENV PIP_CACHE_DIR=/root/.cache/pip
ENV UV_CACHE_DIR=/root/.cache/uv ENV UV_CACHE_DIR=/root/.cache/uv
ENV UV_SYSTEM_PYTHON=1 ENV UV_SYSTEM_PYTHON=1
ENV UV_LINK_MODE=copy
# Install minimal runtime dependencies (NCCL, Python) # Install minimal runtime dependencies (NCCL, Python)
# Note: "devel" tools like cmake/gcc are NOT installed here to save space # Note: "devel" tools like cmake/gcc are NOT installed here to save space
@@ -60,6 +61,12 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
# Apply in site-packages # Apply in site-packages
RUN patch -p1 -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch RUN patch -p1 -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch
# Install flashinfer helper packages
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
uv pip install --system --break-system-packages flashinfer-python -U --no-deps --pre --index-url https://flashinfer.ai/whl && \
uv pip install --system --break-system-packages flashinfer-cubin --pre --index-url https://flashinfer.ai/whl && \
uv pip install --system --break-system-packages flashinfer-jit-cache --pre --index-url https://flashinfer.ai/whl/cu130
# Setup Env for Runtime # Setup Env for Runtime
ENV TORCH_CUDA_ARCH_LIST=12.1a ENV TORCH_CUDA_ARCH_LIST=12.1a
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas