diff --git a/Dockerfile b/Dockerfile index 8d0805b..e8e8bcb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -223,7 +223,8 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ # Copy artifacts from Builder Stage COPY --from=builder /workspace/wheels /workspace/wheels RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install /workspace/wheels/*.whl + uv pip install /workspace/wheels/*.whl && \ + rm -rf /workspace/wheels # Setup Env for Runtime ENV TORCH_CUDA_ARCH_LIST=12.1a @@ -239,3 +240,18 @@ RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh # Final extra deps RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install ray[default] fastsafetensors "apache-tvm-ffi<0.2" + +# Cleanup +# RUN uv pip uninstall absl-py apex argon2-cffi \ +# argon2-cffi-bindings arrow asttokens astunparse async-lru audioread babel beautifulsoup4 \ +# black bleach comm contourpy cycler datasets debugpy decorator defusedxml dllist dm-tree \ +# execnet executing expecttest fastjsonschema fonttools fqdn gast hypothesis \ +# ipykernel ipython ipython_pygments_lexers isoduration isort jedi joblib jupyter-events \ +# jupyter-lsp jupyter_client jupyter_core jupyter_server jupyter_server_terminals jupyterlab \ +# jupyterlab_code_formatter jupyterlab_code_formatter jupyterlab_pygments jupyterlab_server \ +# jupyterlab_tensorboard_pro jupytext kiwisolver matplotlib matplotlib-inline matplotlib-inline \ +# mistune ml_dtypes mock nbclient nbconvert nbformat nest-asyncio notebook notebook_shim \ +# opt_einsum optree outlines_core overrides pandas pandocfilters parso pexpect polygraphy pooch \ +# pyarrow pycocotools pytest-flakefinder pytest-rerunfailures pytest-shard pytest-xdist \ +# scikit-learn scipy Send2Trash soundfile soupsieve soxr spin stack-data \ +# wcwidth webcolors xdoctest Werkzeug \ No newline at end of file diff --git a/Dockerfile.mxfp4 b/Dockerfile.mxfp4 index 11af230..1902e32 100644 --- a/Dockerfile.mxfp4 +++ b/Dockerfile.mxfp4 @@ -258,7 +258,8 @@ RUN mkdir -p tiktoken_encodings && \ # No need to copy source code, as it's already in the site-packages COPY --from=builder /workspace/wheels /workspace/wheels RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install /workspace/wheels/*.whl + uv pip install /workspace/wheels/*.whl && \ + rm -rf /workspace/wheels # Setup Env for Runtime ENV TORCH_CUDA_ARCH_LIST="12.0;12.1" @@ -274,3 +275,4 @@ RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh # Final extra deps RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install ray[default] fastsafetensors + diff --git a/Dockerfile.wheels b/Dockerfile.wheels index 3e42d47..8311661 100644 --- a/Dockerfile.wheels +++ b/Dockerfile.wheels @@ -1,6 +1,9 @@ # syntax=docker/dockerfile:1.6 -FROM nvcr.io/nvidia/pytorch:26.01-py3 +# ========================================================= +# STAGE 1: Base Image (Installs Dependencies) +# ========================================================= +FROM nvcr.io/nvidia/pytorch:26.01-py3 AS base ENV DEBIAN_FRONTEND=noninteractive ENV PIP_BREAK_SYSTEM_PACKAGES=1 @@ -42,7 +45,7 @@ COPY fastsafetensors.patch . # Install fastsafetensors RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install -U fastsafetensors + uv pip install -U fastsafetensors ray[default] "apache-tvm-ffi<0.2" # --- VLLM SOURCE CACHE BUSTER --- # Change THIS argument to force a fresh git clone and rebuild of vLLM @@ -92,7 +95,17 @@ ENV TIKTOKEN_ENCODINGS_BASE=$VLLM_BASE_DIR/tiktoken_encodings COPY run-cluster-node.sh $VLLM_BASE_DIR/ RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh -# Final extra deps -RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install ray[default] fastsafetensors "apache-tvm-ffi<0.2" - +# Cleanup unneeded packages to reduce image size +# RUN uv pip uninstall absl-py apex argon2-cffi \ +# argon2-cffi-bindings arrow asttokens astunparse async-lru audioread babel beautifulsoup4 \ +# black bleach comm contourpy cycler datasets debugpy decorator defusedxml dllist dm-tree \ +# execnet executing expecttest fastjsonschema fonttools fqdn gast hypothesis \ +# ipykernel ipython ipython_pygments_lexers isoduration isort jedi joblib jupyter-events \ +# jupyter-lsp jupyter_client jupyter_core jupyter_server jupyter_server_terminals jupyterlab \ +# jupyterlab_code_formatter jupyterlab_code_formatter jupyterlab_pygments jupyterlab_server \ +# jupyterlab_tensorboard_pro jupytext kiwisolver matplotlib matplotlib-inline matplotlib-inline \ +# mistune ml_dtypes mock nbclient nbconvert nbformat nest-asyncio notebook notebook_shim \ +# opt_einsum optree outlines_core overrides pandas pandocfilters parso pexpect polygraphy pooch \ +# pyarrow pycocotools pytest-flakefinder pytest-rerunfailures pytest-shard pytest-xdist \ +# scikit-learn scipy Send2Trash soundfile soupsieve soxr spin stack-data \ +# wcwidth webcolors xdoctest Werkzeug \ No newline at end of file