Reduced MXFP4 container size

This commit is contained in:
Eugene Rakhmatulin
2026-01-30 15:18:42 -08:00
parent be19675980
commit 57c890b10c

View File

@@ -256,12 +256,9 @@ RUN mkdir -p tiktoken_encodings && \
wget -O tiktoken_encodings/cl100k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" wget -O tiktoken_encodings/cl100k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
# Copy artifacts from Builder Stage # Copy artifacts from Builder Stage
# We copy the python packages and executables RUN --mount=type=bind,from=builder,source=/workspace/wheels,target=/mount/wheels \
# No need to copy source code, as it's already in the site-packages --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
COPY --from=builder /workspace/wheels /workspace/wheels uv pip install /mount/wheels/*.whl
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
uv pip install /workspace/wheels/*.whl && \
rm -rf /workspace/wheels
# Setup Env for Runtime # Setup Env for Runtime
ENV TORCH_CUDA_ARCH_LIST="12.0;12.1" ENV TORCH_CUDA_ARCH_LIST="12.0;12.1"