Files
spark-vllm-docker/mods/fix-qwen3-coder-next/_triton_alloc_setup.py
2026-02-12 15:56:32 -08:00

10 lines
257 B
Python

try:
import triton.runtime._allocation as _alloc
import torch
_alloc.NullAllocator.__call__ = staticmethod(
lambda size, alignment, stream:
torch.cuda.caching_allocator_alloc(size, stream=stream))
except Exception:
pass