Another fix for the Qwen mod as the slow PR was reversed in main
This commit is contained in:
@@ -4,15 +4,16 @@ set -e
|
|||||||
echo "Patching Qwen3-Coder-Next crashing on start"
|
echo "Patching Qwen3-Coder-Next crashing on start"
|
||||||
patch -p1 -d /usr/local/lib/python3.12/dist-packages < fix_crash.diff || echo "Patch is not applicable, skipping"
|
patch -p1 -d /usr/local/lib/python3.12/dist-packages < fix_crash.diff || echo "Patch is not applicable, skipping"
|
||||||
|
|
||||||
# echo "Reverting PR #34279 that causes slowness"
|
# Restoring this one because the PR has been reverted in main
|
||||||
# patch -p1 -R -d /usr/local/lib/python3.12/dist-packages < fix_slowness.diff || echo "Reversing PR #34279 failed, skipping"
|
echo "Reverting PR #34279 that causes slowness"
|
||||||
|
patch -p1 -R -d /usr/local/lib/python3.12/dist-packages < fix_slowness.diff || echo "Can't revert PR #34279, skipping as it was reverted in recent commits"
|
||||||
|
|
||||||
if grep -q "Cast to int64 to prevent overflow in stride" /usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/fused_moe/fused_moe.py; then
|
# if grep -q "Cast to int64 to prevent overflow in stride" /usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/fused_moe/fused_moe.py; then
|
||||||
echo "PR #34507 already applied, skipping."
|
# echo "PR #34507 already applied, skipping."
|
||||||
else
|
# else
|
||||||
echo "Applying PR #34507 for slowness fix..."
|
# echo "Applying PR #34507 for slowness fix..."
|
||||||
curl -L https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/34507.diff | patch -p1 -d /usr/local/lib/python3.12/dist-packages
|
# curl -L https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/34507.diff | patch -p1 -d /usr/local/lib/python3.12/dist-packages
|
||||||
fi
|
# fi
|
||||||
|
|
||||||
echo "Fixing Triton allocator bug"
|
echo "Fixing Triton allocator bug"
|
||||||
cp _triton* /usr/local/lib/python3.12/dist-packages/
|
cp _triton* /usr/local/lib/python3.12/dist-packages/
|
||||||
|
|||||||
Reference in New Issue
Block a user