More robust wheels check before download

This commit is contained in:
Eugene Rakhmatulin
2026-03-05 17:06:57 -08:00
parent 5f8f988d91
commit 5346372f14

View File

@@ -66,7 +66,12 @@ copy_to_host() {
# try_download_wheels TAG PREFIX # try_download_wheels TAG PREFIX
# Downloads wheels matching PREFIX*.whl from a GitHub release. # Downloads wheels matching PREFIX*.whl from a GitHub release.
# Skips files that are already present and up to date (by remote updated_at vs local mtime). # Skip conditions (either is sufficient):
# 1. Commit hash in release name matches .wheels/.{PREFIX}_commit (primary check).
# 2. All local wheels are newer than the latest GitHub asset (freshly built).
# Only downloads a file when the remote asset is newer than the local copy AND
# the above skip conditions are not met.
# On success, persists the release commit hash to .wheels/.{PREFIX}_commit.
# Returns 0 if all matching wheels are now available, 1 on any error. # Returns 0 if all matching wheels are now available, 1 on any error.
try_download_wheels() { try_download_wheels() {
local TAG="$1" local TAG="$1"
@@ -92,7 +97,7 @@ try_download_wheels() {
local DOWNLOAD_LIST local DOWNLOAD_LIST
DOWNLOAD_LIST=$(echo "$RELEASE_JSON" | python3 -c ' DOWNLOAD_LIST=$(echo "$RELEASE_JSON" | python3 -c '
import json, sys, os import json, sys, os, re
from datetime import datetime, timezone from datetime import datetime, timezone
wheels_dir, prefix = sys.argv[1], sys.argv[2] wheels_dir, prefix = sys.argv[1], sys.argv[2]
@@ -104,6 +109,31 @@ if not assets:
print("No assets found matching prefix: " + prefix, file=sys.stderr) print("No assets found matching prefix: " + prefix, file=sys.stderr)
sys.exit(1) sys.exit(1)
# Extract commit hash from the release name:
# FlashInfer: "Prebuilt FlashInfer Wheels (0.6.5-124a2d32-d20260305) - DGX Spark Only"
# vLLM: "Prebuilt vLLM Wheels (0.16.1rc1.dev296+ga73af584f.d20260305.cu131) - DGX Spark only"
release_name = data.get("name", "")
commit_hash = None
if prefix.startswith("flashinfer"):
m = re.search(r"\([\d.]+\w*-([0-9a-f]{6,})-d\d{8}\)", release_name, re.IGNORECASE)
if m:
commit_hash = m.group(1)
else:
m = re.search(r"\+g([0-9a-f]{6,})\.", release_name, re.IGNORECASE)
if m:
commit_hash = m.group(1)
# Compare against the locally stored commit hash
commit_file = os.path.join(wheels_dir, "." + prefix + "-commit")
local_commit = None
if os.path.exists(commit_file):
with open(commit_file) as f:
local_commit = f.read().strip()
if commit_hash and local_commit and local_commit[:len(commit_hash)] == commit_hash:
print("Commit hash matches (" + commit_hash + ") — wheels are up to date.", file=sys.stderr)
sys.exit(0)
newest_remote_ts = max( newest_remote_ts = max(
datetime.strptime(a["updated_at"], "%Y-%m-%dT%H:%M:%SZ") datetime.strptime(a["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
.replace(tzinfo=timezone.utc).timestamp() .replace(tzinfo=timezone.utc).timestamp()
@@ -119,12 +149,19 @@ local_wheels = [
if local_wheels and all(os.path.getmtime(p) >= newest_remote_ts for p in local_wheels): if local_wheels and all(os.path.getmtime(p) >= newest_remote_ts for p in local_wheels):
sys.exit(0) sys.exit(0)
downloads = []
for a in assets: for a in assets:
local_path = os.path.join(wheels_dir, a["name"]) local_path = os.path.join(wheels_dir, a["name"])
remote_ts = datetime.strptime(a["updated_at"], "%Y-%m-%dT%H:%M:%SZ") \ remote_ts = datetime.strptime(a["updated_at"], "%Y-%m-%dT%H:%M:%SZ") \
.replace(tzinfo=timezone.utc).timestamp() .replace(tzinfo=timezone.utc).timestamp()
if not os.path.exists(local_path) or remote_ts > os.path.getmtime(local_path): if not os.path.exists(local_path) or remote_ts > os.path.getmtime(local_path):
print(a["browser_download_url"] + " " + a["name"]) downloads.append(a["browser_download_url"] + " " + a["name"])
if downloads:
if commit_hash:
print("#commit:" + commit_hash)
for d in downloads:
print(d)
' "$WHEELS_DIR" "$PREFIX") || return 1 ' "$WHEELS_DIR" "$PREFIX") || return 1
if [ -z "$DOWNLOAD_LIST" ]; then if [ -z "$DOWNLOAD_LIST" ]; then
@@ -132,6 +169,22 @@ for a in assets:
return 0 return 0
fi fi
# Parse the optional '#commit:HASH' sentinel emitted by the Python script
local REMOTE_COMMIT=""
local DOWNLOAD_ENTRIES=""
while IFS= read -r LINE; do
if [[ "$LINE" == "#commit:"* ]]; then
REMOTE_COMMIT="${LINE#"#commit:"}"
elif [[ -n "$LINE" ]]; then
DOWNLOAD_ENTRIES+="$LINE"$'\n'
fi
done <<< "$DOWNLOAD_LIST"
if [ -z "$DOWNLOAD_ENTRIES" ]; then
echo "All $PREFIX wheels are up to date — skipping download."
return 0
fi
# Back up existing wheels so we never leave a mix of old and new on failure # Back up existing wheels so we never leave a mix of old and new on failure
local DL_BACKUP="$WHEELS_DIR/.backup-download-${PREFIX}" local DL_BACKUP="$WHEELS_DIR/.backup-download-${PREFIX}"
rm -rf "$DL_BACKUP" && mkdir -p "$DL_BACKUP" rm -rf "$DL_BACKUP" && mkdir -p "$DL_BACKUP"
@@ -162,9 +215,13 @@ for a in assets:
rm -rf "$DL_BACKUP" rm -rf "$DL_BACKUP"
return 1 return 1
fi fi
done <<< "$DOWNLOAD_LIST" done <<< "$DOWNLOAD_ENTRIES"
rm -rf "$DL_BACKUP" rm -rf "$DL_BACKUP"
if [ -n "$REMOTE_COMMIT" ]; then
echo "$REMOTE_COMMIT" > "$WHEELS_DIR/.${PREFIX}-commit"
echo "Recorded $PREFIX commit hash: $REMOTE_COMMIT"
fi
return 0 return 0
} }