From ef07046d5135b4b8f05aa2b09d4fc09b47d66b9d Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Tue, 17 Feb 2026 12:45:17 -0800 Subject: [PATCH] Now using an opened PR for glm-4.7-flash crash fix in the mod --- mods/fix-glm-4.7-flash-AWQ/run.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mods/fix-glm-4.7-flash-AWQ/run.sh b/mods/fix-glm-4.7-flash-AWQ/run.sh index e696e8e..4d7ba09 100644 --- a/mods/fix-glm-4.7-flash-AWQ/run.sh +++ b/mods/fix-glm-4.7-flash-AWQ/run.sh @@ -3,6 +3,16 @@ set -e echo "--- Applying GLM 4.7 AWQ speed patch..." patch -p1 -d / < glm47_flash.patch echo "=== OK" -echo "--- Applying vLLM crash patch..." -patch -p1 -d /usr/local/lib/python3.12/dist-packages < glm47_vllm_bug.patch || echo "=== Patch is not applicable, skipping" +echo "--- Applying vLLM crash patch (34695)..." +# Check if PR 34695 is already applied by looking for the changed file +if [ -f /usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/attention/mla_attention.py ]; then + # Check if the specific line from PR 34695 is already present + if grep -q "and hasattr(self.kv_b_proj, \"weight\")" /usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/attention/mla_attention.py 2>/dev/null; then + echo "=== PR 34695 is already applied, skipping" + else + curl -L https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/34695.diff | patch -p1 -d /usr/local/lib/python3.12/dist-packages || echo "=== Warning: Failed to apply PR 34695, continuing..." + fi +else + curl -L https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/34695.diff | patch -p1 -d /usr/local/lib/python3.12/dist-packages || echo "=== Warning: Failed to apply PR 34695, continuing..." +fi echo "=== OK"