diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 3b37abda35..b823c454c3 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -590,6 +590,8 @@ jobs: run: test/external/process_replay/reset.py - name: openpilot compile3 0.11.0 driving_vision run: BENCHMARK_LOG=openpilot_0_11_0_vision PYTHONPATH="." ASSERT_MIN_STEP_TIME=17 DEV=QCOM FLOAT16=1 IMAGE=1 NOLOCALS=1 taskset -c 4-7 python3 examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.11.0/selfdrive/modeld/models/driving_vision.onnx + - name: IR3 openpilot compile3 0.11.0 driving_vision + run: BENCHMARK_LOG=ir3_openpilot_0_11_0_vision PYTHONPATH="." ASSERT_MIN_STEP_TIME=17 DEV=QCOM QCOM_IR3=1 FLOAT16=1 IMAGE=1 NOLOCALS=1 taskset -c 4-7 python3 examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.11.0/selfdrive/modeld/models/driving_vision.onnx - name: openpilot compile3 0.11.0 driving_policy run: BENCHMARK_LOG=openpilot_0_11_0_policy PYTHONPATH="." ASSERT_MIN_STEP_TIME=3 DEV=QCOM FLOAT16=1 IMAGE=1 NOLOCALS=1 taskset -c 4-7 python3 examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.11.0/selfdrive/modeld/models/driving_policy.onnx - name: openpilot compile3 0.11.0 dmonitoring diff --git a/tinygrad/runtime/ops_qcom.py b/tinygrad/runtime/ops_qcom.py index c97d63a9d9..154b69e9ba 100644 --- a/tinygrad/runtime/ops_qcom.py +++ b/tinygrad/runtime/ops_qcom.py @@ -202,7 +202,8 @@ class QCOMArgsState(HCQArgsState): ubos = [b for i,b in enumerate(bufs) for _,dt in prg.buf_dtypes[i] if not isinstance(dt, ImageDType)] uavs = [(dt,b) for i,b in enumerate(bufs) for _,dt in prg.buf_dtypes[i] if isinstance(dt, ImageDType)] - ibos, texs = uavs[:prg.ibo_cnt], uavs[prg.ibo_cnt:] + # NIR can reorder images to different texture slots + ibos, texs = uavs[:prg.ibo_cnt], [uavs[prg.ibo_cnt + (prg.tex_to_image[i] if prg.NIR else i)] for i in range(prg.tex_cnt)] for cnst_val,cnst_off,cnst_sz in prg.consts_info: to_mv(cast(int, self.buf.va_addr) + cnst_off, cnst_sz)[:] = cnst_val.to_bytes(cnst_sz, byteorder='little') @@ -243,6 +244,7 @@ class QCOMProgram(HCQProgram): # see https://elixir.bootlin.com/mesa/mesa-25.3.0/source/src/freedreno/ir3/ir3_shader.h#L525 # and https://elixir.bootlin.com/mesa/mesa-25.3.0/source/src/freedreno/ir3/ir3_compiler_nir.c#L5389 self.samp_cnt, self.tex_cnt, self.ibo_cnt = (nt:=v.image_mapping.num_tex), nt, v.num_uavs - nt + self.tex_to_image = v.image_mapping.tex_to_image[:] # IR3 outputs a sampler for every texture (https://elixir.bootlin.com/mesa/mesa-25.3.0/source/src/freedreno/ir3/ir3_compiler_nir.c#L1714) self.samplers = [qreg.a6xx_tex_samp_0(wrap_s=(clamp_mode:=mesa.A6XX_TEX_CLAMP_TO_BORDER), wrap_t=clamp_mode, wrap_r=clamp_mode), qreg.a6xx_tex_samp_1(unnorm_coords=True, cubemapseamlessfiltoff=True), 0, 0] * self.samp_cnt diff --git a/tinygrad/runtime/support/c.py b/tinygrad/runtime/support/c.py index dddaaa90b5..a4f092d8f8 100644 --- a/tinygrad/runtime/support/c.py +++ b/tinygrad/runtime/support/c.py @@ -36,6 +36,8 @@ if TYPE_CHECKING: @overload def __getitem__(self: Array[_SimpleCData[V], Any], key: int) -> V: ... @overload + def __getitem__(self: Array[T, Any], key: slice) -> list[T]: ... + @overload def __getitem__(self: Array[T, Any], key: int) -> T: ... def __getitem__(self, key) -> Any: ... @overload