[ROCM] Core Functionality for AMD (#1983)

* this pr adds a third party backend for triton that works on AMD * this expose a lot of the work that has been done in our [fork](https://github.com/ROCmSoftwarePlatform/triton) * most unit tests on `test_core.py` pass * it skips some unit tests for various reasons * we plan to follow up with more prs improving Functionality and Performance in the future --------- Co-authored-by: Philippe Tillet <phil@openai.com>
2026-04-05 03:01:17 -04:00 · 2023-10-16 15:06:07 -05:00
parent 833c9b985f
commit 09ba348f87
17 changed files with 264 additions and 377 deletions
--- a/python/triton/runtime/jit.py
+++ b/python/triton/runtime/jit.py
@@ -383,20 +383,20 @@ def {self.fn.__name__}({args_signature}grid=None, num_warps=None, num_ctas=1, nu
        device_type = self._conclude_device_type(device_types, {pinned_memory_flags})

    device_backend = None
-    if device_type not in ['cuda', 'hip']:
+    if device_type not in ['cuda']:
        device_backend = get_backend(device_type)
        if device_backend is None:
            raise ValueError('Cannot find backend for ' + device_type)

    if device is None:
-        if device_type in ['cuda', 'hip']:
+        if device_type in ['cuda']:
            device = get_current_device()
            set_current_device(device)
        else:
            device = device_backend.get_current_device()
            device_backend.set_current_device(device)
    if stream is None and not warmup:
-        if device_type in ['cuda', 'hip']:
+        if device_type in ['cuda']:
            stream = get_cuda_stream(device)
        else:
            stream = device_backend.get_stream()