Reduced MXFP4 container size
This commit is contained in:
@@ -256,12 +256,9 @@ RUN mkdir -p tiktoken_encodings && \
|
|||||||
wget -O tiktoken_encodings/cl100k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
|
wget -O tiktoken_encodings/cl100k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
|
||||||
|
|
||||||
# Copy artifacts from Builder Stage
|
# Copy artifacts from Builder Stage
|
||||||
# We copy the python packages and executables
|
RUN --mount=type=bind,from=builder,source=/workspace/wheels,target=/mount/wheels \
|
||||||
# No need to copy source code, as it's already in the site-packages
|
--mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
COPY --from=builder /workspace/wheels /workspace/wheels
|
uv pip install /mount/wheels/*.whl
|
||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
|
||||||
uv pip install /workspace/wheels/*.whl && \
|
|
||||||
rm -rf /workspace/wheels
|
|
||||||
|
|
||||||
# Setup Env for Runtime
|
# Setup Env for Runtime
|
||||||
ENV TORCH_CUDA_ARCH_LIST="12.0;12.1"
|
ENV TORCH_CUDA_ARCH_LIST="12.0;12.1"
|
||||||
|
|||||||
Reference in New Issue
Block a user