Added optional flashinfer packages, using pre-release flashinfer
This commit is contained in:
@@ -18,6 +18,7 @@ ENV MAKEFLAGS="-j${BUILD_JOBS}"
|
|||||||
ENV PIP_CACHE_DIR=/root/.cache/pip
|
ENV PIP_CACHE_DIR=/root/.cache/pip
|
||||||
ENV UV_CACHE_DIR=/root/.cache/uv
|
ENV UV_CACHE_DIR=/root/.cache/uv
|
||||||
ENV UV_SYSTEM_PYTHON=1
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
|
ENV UV_LINK_MODE=copy
|
||||||
|
|
||||||
# Install minimal runtime dependencies (NCCL, Python)
|
# Install minimal runtime dependencies (NCCL, Python)
|
||||||
# Note: "devel" tools like cmake/gcc are NOT installed here to save space
|
# Note: "devel" tools like cmake/gcc are NOT installed here to save space
|
||||||
@@ -60,6 +61,12 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
|||||||
# Apply in site-packages
|
# Apply in site-packages
|
||||||
RUN patch -p1 -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch
|
RUN patch -p1 -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch
|
||||||
|
|
||||||
|
# Install flashinfer helper packages
|
||||||
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
|
uv pip install --system --break-system-packages flashinfer-python -U --no-deps --pre --index-url https://flashinfer.ai/whl && \
|
||||||
|
uv pip install --system --break-system-packages flashinfer-cubin --pre --index-url https://flashinfer.ai/whl && \
|
||||||
|
uv pip install --system --break-system-packages flashinfer-jit-cache --pre --index-url https://flashinfer.ai/whl/cu130
|
||||||
|
|
||||||
# Setup Env for Runtime
|
# Setup Env for Runtime
|
||||||
ENV TORCH_CUDA_ARCH_LIST=12.1a
|
ENV TORCH_CUDA_ARCH_LIST=12.1a
|
||||||
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
|
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
|
||||||
|
|||||||
Reference in New Issue
Block a user