Merge branch 'main' into pytorch-base
This commit is contained in:
@@ -256,12 +256,9 @@ RUN mkdir -p tiktoken_encodings && \
|
||||
wget -O tiktoken_encodings/cl100k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
|
||||
|
||||
# Copy artifacts from Builder Stage
|
||||
# We copy the python packages and executables
|
||||
# No need to copy source code, as it's already in the site-packages
|
||||
COPY --from=builder /workspace/wheels /workspace/wheels
|
||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||
uv pip install /workspace/wheels/*.whl && \
|
||||
rm -rf /workspace/wheels
|
||||
RUN --mount=type=bind,from=builder,source=/workspace/wheels,target=/mount/wheels \
|
||||
--mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||
uv pip install /mount/wheels/*.whl
|
||||
|
||||
# Setup Env for Runtime
|
||||
ENV TORCH_CUDA_ARCH_LIST="12.0;12.1"
|
||||
|
||||
@@ -144,24 +144,22 @@ for i in "${!MOD_PATHS[@]}"; do
|
||||
done
|
||||
|
||||
# --- Auto-Detection Logic ---
|
||||
|
||||
# Source autodiscover module
|
||||
source "$(dirname "$0")/autodiscover.sh"
|
||||
|
||||
# Perform auto-detection
|
||||
detect_interfaces || exit 1
|
||||
|
||||
if [[ "$SOLO_MODE" == "true" ]]; then
|
||||
if [[ -n "$NODES_ARG" ]]; then
|
||||
echo "Error: --solo is incompatible with -n/--nodes."
|
||||
exit 1
|
||||
fi
|
||||
# Solo mode: skip node detection, just get local IP
|
||||
detect_local_ip || exit 1
|
||||
LOCAL_IP="127.0.0.1"
|
||||
NODES_ARG="$LOCAL_IP"
|
||||
PEER_NODES=()
|
||||
echo "Solo mode enabled. Skipping node detection."
|
||||
else
|
||||
# Perform auto-detection
|
||||
detect_interfaces || exit 1
|
||||
detect_nodes || exit 1
|
||||
fi
|
||||
|
||||
@@ -173,8 +171,11 @@ fi
|
||||
# Split nodes into array
|
||||
IFS=',' read -r -a ALL_NODES <<< "$NODES_ARG"
|
||||
|
||||
# Detect Head IP (Local IP)
|
||||
detect_local_ip || exit 1
|
||||
if [[ "$SOLO_MODE" != "true" ]]; then
|
||||
# Detect Head IP (Local IP)
|
||||
detect_local_ip || exit 1
|
||||
fi
|
||||
|
||||
HEAD_IP="$LOCAL_IP"
|
||||
|
||||
# Verify HEAD_IP is in ALL_NODES
|
||||
|
||||
Reference in New Issue
Block a user