nv: minimal hevc (#13502)

* nv: minimal hevc * validate * not needed * tralin * var * cpu * fxi * desc * move * cleanup
2026-01-06 21:53:53 -05:00 · 2025-11-30 16:46:55 +03:00
parent fd373fea7a
commit 455dd88236
23 changed files with 7417 additions and 37 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -318,6 +318,8 @@ jobs:
    # TODO: too slow
    # - name: Fuzz Padded Tensor Core GEMM (PTX)
    #   run: NV=1 NV_PTX=1 M_START=12 M_STOP=20 M_STEP=1 N_START=6 N_STOP=10 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 python3 ./extra/gemm/fuzz_matmul.py
+    - name: HEVC Decode Benchmark
+      run: VALIDATE=1 MAX_FRAMES=100 NV=1 PYTHONPATH=. python3 extra/hevc/decode.py
    - name: Train MNIST
      run: time PYTHONPATH=. NV=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt
    # TODO: too slow
--- a/extra/hevc/.gitignore
+++ b/extra/hevc/.gitignore
@@ -0,0 +1 @@
+out/
--- a/extra/hevc/decode.py
+++ b/extra/hevc/decode.py
@@ -0,0 +1,71 @@
+import argparse, os, hashlib
+from tinygrad.helpers import getenv, DEBUG, round_up, Timing, tqdm, fetch
+from extra.hevc.hevc import parse_hevc_file_headers, untile_nv12, to_bgr, nv_gpu
+from tinygrad import Tensor, dtypes, Device, Variable
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser()
+  parser.add_argument("--input_file", type=str, default="")
+  parser.add_argument("--output_dir", type=str, default="extra/hevc/out")
+  args = parser.parse_args()
+
+  os.makedirs(args.output_dir, exist_ok=True)
+
+  if args.input_file == "":
+    url = "https://github.com/haraschax/filedump/raw/09a497959f7fa6fd8dba501a25f2cdb3a41ecb12/comma_video.hevc"
+    hevc_tensor = Tensor.from_url(url, device="CPU")
+  else:
+    hevc_tensor = Tensor.empty(os.stat(args.input_file).st_size, dtype=dtypes.uint8, device=f"disk:{args.input_file}").to("CPU")
+
+  dat = bytes(hevc_tensor.data())
+  dat_hash = hashlib.md5(dat).hexdigest()
+
+  with Timing("prep infos: "):
+    dat_nv = hevc_tensor.to("NV")
+    opaque, frame_info, w, h, luma_w, luma_h, chroma_off = parse_hevc_file_headers(dat)
+
+  frame_info = frame_info[:getenv("MAX_FRAMES", len(frame_info))]
+
+  # move all needed data to gpu
+  all_slices = []
+  with Timing("prep slices to gpu: "):
+    opaque_nv = opaque.to("NV").contiguous().realize()
+
+    for i, (offset, sz, frame_pos, history_sz, _) in enumerate(frame_info):
+      all_slices.append(hevc_tensor[offset:offset+sz].to("NV").contiguous().realize())
+
+    Device.default.synchronize()
+
+  out_image_size = luma_h + (luma_h + 1) // 2, round_up(luma_w, 64)
+  max_hist = max(history_sz for _, _, _, history_sz, _ in frame_info)
+  pos = Variable("pos", 0, max_hist + 1)
+
+  history = []
+  out_images = []
+  with Timing("decoding whole file: ", on_exit=(lambda et: f", {len(frame_info)} frames, {len(frame_info)/(et/1e9):.2f} fps")):
+    for i, (offset, sz, frame_pos, history_sz, is_hist) in enumerate(frame_info):
+      history = history[-history_sz:] if history_sz > 0 else []
+
+      outimg = all_slices[i].decode_hevc_frame(pos.bind(frame_pos), out_image_size, opaque_nv[i], history).realize()
+      out_images.append(outimg)
+      if is_hist: history.append(outimg)
+
+    Device.default.synchronize()
+
+  if getenv("VALIDATE", 0):
+    import pickle
+    if dat_hash == "b813bfdbec194fd17fdf0e3ceb8cea1c":
+      url = "https://github.com/nimlgen/hevc_validate_set/raw/refs/heads/main/decoded_frames_b813bfdbec194fd17fdf0e3ceb8cea1c.pkl"
+      decoded_frames = pickle.load(fetch(url).open("rb"))
+    else: decoded_frames = pickle.load(open(f"extra/hevc/decoded_frames_{dat_hash}.pkl", "rb"))
+  else: import cv2
+
+  for i, img in tqdm(enumerate(out_images)):
+    if getenv("VALIDATE", 0):
+      if i < len(decoded_frames) and len(decoded_frames[i]) > 0:
+        img = untile_nv12(img, h, w, luma_w, chroma_off).realize()
+        assert img.data() == decoded_frames[i], f"Frame {i} does not match reference decoder!"
+        print(f"Frame {i} matches reference decoder!")
+    else:
+      img = to_bgr(img, h, w, luma_w, chroma_off).realize()
+      cv2.imwrite(f"{args.output_dir}/out_frame_{i:04d}.png", img.numpy())
--- a/extra/hevc/hevc.py
+++ b/extra/hevc/hevc.py
@@ -0,0 +1,449 @@
+import dataclasses, enum, argparse, os, itertools, time, ctypes
+from typing import Any
+from tinygrad import Tensor, dtypes, Device, TinyJit
+from tinygrad.helpers import DEBUG, round_up, ceildiv, Timing, prod
+from tinygrad.runtime.autogen import avcodec, nv_570 as nv_gpu
+
+class BitReader:
+  def __init__(self, data:bytes): self.reader, self.current_bits, self.bits, self.read_bits, self.total = iter(data), 0, 0, 0, len(data) * 8
+  def empty(self): return self.read_bits == self.total and self.current_bits == 0
+  def peak_bits(self, n):
+    while self.current_bits < n:
+      self.bits = (self.bits << 8) | next(self.reader)
+      self.current_bits += 8
+      self.read_bits += 8
+    return (self.bits >> (self.current_bits - n)) & ((1 << n) - 1)
+  def _next_bits(self, n):
+    val = self.peak_bits(n)
+    self.bits &= (1 << (self.current_bits - n)) - 1
+    self.current_bits -= n
+    return val
+
+  def u(self, n): return self._next_bits(n)
+
+  # 9.2 Parsing process for 0-th order Exp-Golomb codes
+  def ue_v(self):
+    leading_zero_bits = -1
+    while True:
+      bit = self.u(1)
+      leading_zero_bits += 1
+      if bit == 1: break
+
+    part = self.u(leading_zero_bits)
+
+    if leading_zero_bits == 0: return 0
+    return (1 << leading_zero_bits) - 1 + part
+
+  # 9.2.2 Mapping process for signed Exp-Golomb codes
+  def se_v(self):
+    k = self.ue_v()
+    return (-1 ** (k + 1)) * (k // 2)
+
+# 7.3.1.1 General NAL unit syntax
+def _hevc_get_rbsp(dat:bytes, off=0) -> bytes:
+  rbsp = bytes()
+  while off < len(dat):
+    if off + 2 < len(dat) and dat[off:off+3] == b'\x00\x00\x03':
+      rbsp += bytes([0, 0])
+      off += 3
+    else:
+      rbsp += bytes([dat[off]])
+      off += 1
+  return rbsp
+
+class HevcSlice:
+  # 7.3.3 Profile, tier and level syntax
+  def profile_tier_level(self, r:BitReader, enable:bool, max_sub_layers:int):
+    assert enable and max_sub_layers == 0, "no sublayers supported"
+    self._notimpl_profile_tier_level = r.u(88)
+    self.general_level_idc = r.u(8)
+
+  # 7.3.7 Short-term reference picture set syntax
+  def st_ref_pic_set(self, r:BitReader, stRpsIdx:int, num_short_term_ref_pic_sets:int=0, sps=None):
+    inter_ref_pic_set_prediction_flag = r.u(1) if stRpsIdx != 0 else 0
+
+    if inter_ref_pic_set_prediction_flag:
+      if stRpsIdx == num_short_term_ref_pic_sets:
+        delta_idx_minus1 = r.ue_v()
+      delta_rps_sign = r.u(1)
+      abs_delta_rps_minus1 = r.ue_v()
+
+      NumDeltaPocs = sps.num_negative_pics + sps.num_positive_pics
+      for i in range(NumDeltaPocs + 1):
+        used_by_curr_pic_flag = r.u(1)
+        if not used_by_curr_pic_flag:
+          use_delta_flag = r.u(1)
+    else:
+      self.num_negative_pics = r.ue_v()
+      self.num_positive_pics = r.ue_v()
+      for i in range(self.num_negative_pics):
+        delta_poc_s0_minus1 = r.ue_v()
+        used_by_curr_pic_s0_flag = r.u(1)
+      for i in range(self.num_positive_pics):
+        delta_poc_s1_minus1 = r.ue_v()
+        used_by_curr_pic_s1_flag = r.u(1)
+
+# 7.3.2.2 Sequence parameter set RBSP syntax
+class SPS(HevcSlice):
+  def __init__(self, r:BitReader):
+    self.sps_video_parameter_set_id = r.u(4)
+    self.sps_max_sub_layers_minus1 = r.u(3)
+    self.sps_temporal_id_nesting_flag = r.u(1)
+
+    self.profile_tier_level(r, True, self.sps_max_sub_layers_minus1)
+
+    self.sps_seq_parameter_set_id = r.ue_v()
+    self.chroma_format_idc = r.ue_v()
+    self.separate_colour_plane_flag = r.u(1) if self.chroma_format_idc == 3 else 0
+    self.pic_width_in_luma_samples = r.ue_v()
+    self.pic_height_in_luma_samples = r.ue_v()
+    self.conformance_window_flag = r.u(1)
+
+    if self.conformance_window_flag:
+      self.conf_win_left_offset = r.ue_v()
+      self.conf_win_right_offset = r.ue_v()
+      self.conf_win_top_offset = r.ue_v()
+      self.conf_win_bottom_offset = r.ue_v()
+    else: self.conf_win_left_offset = self.conf_win_right_offset = self.conf_win_top_offset = self.conf_win_bottom_offset = 0
+
+    self.bit_depth_luma = r.ue_v() + 8
+    self.bit_depth_chroma = r.ue_v() + 8
+    self.log2_max_pic_order_cnt_lsb_minus4 = r.ue_v()
+    self.sps_sub_layer_ordering_info_present_flag = r.u(1)
+    self.sps_max_dec_pic_buffering, self.sps_max_num_reorder_pics, self.sps_max_latency_increase_plus1 = [], [], []
+    for i in range((0 if self.sps_sub_layer_ordering_info_present_flag else self.sps_max_sub_layers_minus1), self.sps_max_sub_layers_minus1 + 1):
+      self.sps_max_dec_pic_buffering.append(r.ue_v() + 1)
+      self.sps_max_num_reorder_pics.append(r.ue_v())
+      self.sps_max_latency_increase_plus1.append(r.ue_v())
+    self.log2_min_luma_coding_block_size = r.ue_v() + 3
+    self.log2_max_luma_coding_block_size = self.log2_min_luma_coding_block_size + r.ue_v()
+    self.log2_min_transform_block_size = r.ue_v() + 2
+    self.log2_max_transform_block_size = self.log2_min_transform_block_size + r.ue_v()
+    self.max_transform_hierarchy_depth_inter = r.ue_v()
+    self.max_transform_hierarchy_depth_intra = r.ue_v()
+    if scaling_list_enabled_flag := r.u(1):
+      if sps_scaling_list_data_present_flag := r.u(1): assert False, "scaling_list_data parsing not implemented"
+    self.amp_enabled_flag = r.u(1)
+    self.sample_adaptive_offset_enabled_flag = r.u(1)
+    self.pcm_enabled_flag = r.u(1)
+    assert self.pcm_enabled_flag == 0, "pcm not implemented"
+    self.num_short_term_ref_pic_sets = r.ue_v()
+    for i in range(self.num_short_term_ref_pic_sets):
+      self.st_ref_pic_set(r, i, self.num_short_term_ref_pic_sets)
+    self.long_term_ref_pics_present_flag = r.u(1)
+    if self.long_term_ref_pics_present_flag: assert False, "long_term_ref_pics parsing not implemented"
+    self.sps_temporal_mvp_enabled_flag = r.u(1)
+    self.strong_intra_smoothing_enabled_flag = r.u(1)
+
+# 7.3.2.3 Picture parameter set RBSP syntax
+class PPS(HevcSlice):
+  def __init__(self, r:BitReader):
+    self.pps_pic_parameter_set_id = r.ue_v()
+    self.pps_seq_parameter_set_id = r.ue_v()
+    self.dependent_slice_segments_enabled_flag = r.u(1)
+    self.output_flag_present_flag = r.u(1)
+    self.num_extra_slice_header_bits = r.u(3)
+    self.sign_data_hiding_enabled_flag = r.u(1)
+    self.cabac_init_present_flag = r.u(1)
+    self.num_ref_idx_l0_default_active = r.ue_v() + 1
+    self.num_ref_idx_l1_default_active = r.ue_v() + 1
+    self.init_qp = r.se_v() + 26
+    self.constrained_intra_pred_flag = r.u(1)
+    self.transform_skip_enabled_flag = r.u(1)
+    self.cu_qp_delta_enabled_flag = r.u(1)
+    if self.cu_qp_delta_enabled_flag: self.diff_cu_qp_delta_depth = r.ue_v()
+
+    self.pps_cb_qp_offset = r.se_v()
+    self.pps_cr_qp_offset = r.se_v()
+    self.pps_slice_chroma_qp_offsets_present_flag = r.u(1)
+    self.weighted_pred_flag = r.u(1)
+    self.weighted_bipred_flag = r.u(1)
+    self.transquant_bypass_enabled_flag = r.u(1)
+    self.tiles_enabled_flag = r.u(1)
+    self.entropy_coding_sync_enabled_flag = r.u(1)
+    if self.tiles_enabled_flag:
+      self.num_tile_columns_minus1 = r.ue_v()
+      self.num_tile_rows_minus1 = r.ue_v()
+      self.uniform_spacing_flag = r.u(1)
+      self.column_width_minus1, self.row_height_minus1 = [], []
+      if not self.uniform_spacing_flag:
+        for i in range(self.num_tile_columns_minus1): self.column_width_minus1.append(r.ue_v())
+        for i in range(self.num_tile_rows_minus1): self.row_height_minus1.append(r.ue_v())
+      self.loop_filter_across_tiles_enabled_flag = r.u(1)
+    self.loop_filter_across_slices_enabled_flag = r.u(1)
+    self.deblocking_filter_control_present_flag = r.u(1)
+    if self.deblocking_filter_control_present_flag: assert False, "deblocking_filter parsing not implemented"
+    self.scaling_list_data_present_flag = r.u(1)
+    if self.scaling_list_data_present_flag: assert False, "scaling_list_data parsing not implemented"
+    self.lists_modification_present_flag = r.u(1)
+    self.log2_parallel_merge_level = r.ue_v() + 2
+
+# 7.3.6 Slice segment header syntax
+class SliceSegment(HevcSlice):
+  def __init__(self, r:BitReader, nal_unit_type:int, sps:SPS, pps:PPS):
+    self.first_slice_segment_in_pic_flag = r.u(1)
+    if nal_unit_type >= avcodec.HEVC_NAL_BLA_W_LP and nal_unit_type <= avcodec.HEVC_NAL_RSV_IRAP_VCL23:
+      self.no_output_of_prior_pics_flag = r.u(1)
+    self.slice_pic_parameter_set_id = r.ue_v()
+    if not self.first_slice_segment_in_pic_flag:
+      if pps.dependent_slice_segments_enabled_flag:
+        self.dependent_slice_segment_flag = r.u(1)
+      self.slice_segment_address = r.ue_v()
+    self.dependent_slice_segment_flag = 0
+    if not self.dependent_slice_segment_flag:
+      r.u(pps.num_extra_slice_header_bits) # extra bits ignored
+      self.slice_type = r.ue_v()
+
+      self.sw_skip_start = r.read_bits - r.current_bits
+      self.pic_output_flag = r.u(1) if pps.output_flag_present_flag else 0
+      self.colour_plane_id = r.u(2) if sps.separate_colour_plane_flag else 0
+
+      if nal_unit_type != avcodec.HEVC_NAL_IDR_W_RADL and nal_unit_type != avcodec.HEVC_NAL_IDR_N_LP:
+        self.slice_pic_order_cnt_lsb = r.u(sps.log2_max_pic_order_cnt_lsb_minus4 + 4)
+
+        self.short_term_ref_pic_set_sps_flag = r.u(1)
+        if not self.short_term_ref_pic_set_sps_flag:
+          self.short_term_ref_pics_in_slice_start = r.read_bits - r.current_bits
+          self.st_ref_pic_set(r, sps.num_short_term_ref_pic_sets, sps=sps)
+          self.short_term_ref_pics_in_slice_end = r.read_bits - r.current_bits
+        elif sps.num_short_term_ref_pic_sets > 1: assert False, "short_term_ref_pic_set parsing not implemented"
+
+        if sps.long_term_ref_pics_present_flag: assert False, "long_term_ref_pics parsing not implemented"
+
+        self.sw_skip_end = r.read_bits - r.current_bits
+        self.slice_temporal_mvp_enabled_flag = r.u(1) if sps.sps_temporal_mvp_enabled_flag else 0
+      else: self.slice_pic_order_cnt_lsb, self.sw_skip_end = 0, self.sw_skip_start
+
+      if sps.sample_adaptive_offset_enabled_flag:
+        slice_sao_luma_flag = r.u(1)
+        ChromaArrayType = sps.chroma_format_idc if sps.separate_colour_plane_flag == 0 else 0
+        slice_sao_chroma_flag = r.u(1) if ChromaArrayType != 0 else 0
+
+      if self.slice_type in {avcodec.HEVC_SLICE_B, avcodec.HEVC_SLICE_B}:
+        if num_ref_idx_active_override_flag := r.u(1):
+          num_ref_idx_l0_active_minus1 = r.ue_v()
+          num_ref_idx_l1_active_minus1 = r.ue_v() if self.slice_type == avcodec.HEVC_SLICE_B else 0
+
+def fill_sps_into_dev_context(device_ctx, sps:SPS):
+  device_ctx.chroma_format_idc = sps.chroma_format_idc
+  device_ctx.pic_width_in_luma_samples = sps.pic_width_in_luma_samples
+  device_ctx.pic_height_in_luma_samples = sps.pic_height_in_luma_samples
+  device_ctx.bit_depth_luma = sps.bit_depth_luma
+  device_ctx.bit_depth_chroma = sps.bit_depth_chroma
+  device_ctx.log2_max_pic_order_cnt_lsb_minus4 = sps.log2_max_pic_order_cnt_lsb_minus4
+  device_ctx.log2_min_luma_coding_block_size = sps.log2_min_luma_coding_block_size
+  device_ctx.log2_max_luma_coding_block_size = sps.log2_max_luma_coding_block_size
+  device_ctx.log2_min_transform_block_size = sps.log2_min_transform_block_size
+  device_ctx.log2_max_transform_block_size = sps.log2_max_transform_block_size
+  device_ctx.amp_enabled_flag = sps.amp_enabled_flag
+  device_ctx.pcm_enabled_flag = sps.pcm_enabled_flag
+  device_ctx.sample_adaptive_offset_enabled_flag = sps.sample_adaptive_offset_enabled_flag
+  device_ctx.sps_temporal_mvp_enabled_flag = sps.sps_temporal_mvp_enabled_flag
+  device_ctx.strong_intra_smoothing_enabled_flag = sps.strong_intra_smoothing_enabled_flag
+
+def fill_pps_into_dev_context(device_ctx, pps:PPS):
+  device_ctx.sign_data_hiding_enabled_flag = pps.sign_data_hiding_enabled_flag
+  device_ctx.cabac_init_present_flag = pps.cabac_init_present_flag
+  device_ctx.num_ref_idx_l0_default_active = pps.num_ref_idx_l0_default_active
+  device_ctx.num_ref_idx_l1_default_active = pps.num_ref_idx_l1_default_active
+  device_ctx.init_qp = pps.init_qp
+  device_ctx.cu_qp_delta_enabled_flag = pps.cu_qp_delta_enabled_flag
+  device_ctx.diff_cu_qp_delta_depth = getattr(pps, 'diff_cu_qp_delta_depth', 0)
+  device_ctx.pps_cb_qp_offset = pps.pps_cb_qp_offset
+  device_ctx.pps_cr_qp_offset = pps.pps_cr_qp_offset
+  device_ctx.pps_slice_chroma_qp_offsets_present_flag = pps.pps_slice_chroma_qp_offsets_present_flag
+  device_ctx.weighted_pred_flag = pps.weighted_pred_flag
+  device_ctx.weighted_bipred_flag = pps.weighted_bipred_flag
+  device_ctx.transquant_bypass_enabled_flag = pps.transquant_bypass_enabled_flag
+  device_ctx.tiles_enabled_flag = pps.tiles_enabled_flag
+  device_ctx.entropy_coding_sync_enabled_flag = pps.entropy_coding_sync_enabled_flag
+  device_ctx.loop_filter_across_slices_enabled_flag = pps.loop_filter_across_slices_enabled_flag
+  device_ctx.deblocking_filter_control_present_flag = pps.deblocking_filter_control_present_flag
+  device_ctx.scaling_list_data_present_flag = pps.scaling_list_data_present_flag
+  device_ctx.lists_modification_present_flag = pps.lists_modification_present_flag
+  device_ctx.log2_parallel_merge_level = pps.log2_parallel_merge_level
+  device_ctx.loop_filter_across_tiles_enabled_flag = getattr(pps, 'loop_filter_across_tiles_enabled_flag', 0)
+
+def parse_hevc_file_headers(dat:bytes, device="NV"):
+  res = []
+  nal_unit_start = 1
+  history:list[tuple[int, int, int]] = []
+  device_ctx = nv_gpu.nvdec_hevc_pic_s(gptimer_timeout_value=92720000, tileformat=1, sw_start_code_e=1, pattern_id=2)
+  nal_infos = []
+  ctx_bytes = bytes()
+  align_ctx_bytes_size = 0x300
+
+  def _flush_picture():
+    nonlocal res, history, device_ctx, nal_infos, ctx_bytes, align_ctx_bytes_size
+
+    if not len(nal_infos): return
+
+    hdr, nal_unit_type = nal_infos[0][0]
+    assert all(nal_unit_type == x[0][1] for x in nal_infos), "all NAL units in a picture must be of the same type"
+
+    device_ctx.curr_pic_idx = next(i for i in range(16) if all(d[0] != i for d in history))
+
+    if nal_unit_type in {avcodec.HEVC_NAL_IDR_W_RADL, avcodec.HEVC_NAL_IDR_N_LP}:
+      history = []
+
+    device_ctx.num_ref_frames = len(history)
+    device_ctx.IDR_picture_flag = int(nal_unit_type in {avcodec.HEVC_NAL_IDR_W_RADL, avcodec.HEVC_NAL_IDR_N_LP})
+    device_ctx.RAP_picture_flag = int(nal_unit_type >= avcodec.HEVC_NAL_BLA_W_LP and nal_unit_type <= avcodec.HEVC_NAL_RSV_IRAP_VCL23)
+    device_ctx.RefDiffPicOrderCnts=(ctypes.c_int16 * 16)()
+    device_ctx.colMvBuffersize = (round_up(sps.pic_width_in_luma_samples, 64) * round_up(sps.pic_height_in_luma_samples, 64) // 16) // 256
+    device_ctx.framestride=(ctypes.c_uint32 * 2)(round_up(sps.pic_width_in_luma_samples, 64), round_up(sps.pic_width_in_luma_samples, 64))
+    device_ctx.sw_hdr_skip_length = hdr.sw_skip_end - hdr.sw_skip_start
+    device_ctx.num_bits_short_term_ref_pics_in_slice = max(0, device_ctx.sw_hdr_skip_length - 9)
+    device_ctx.stream_len = sum(x[2] for x in nal_infos)
+
+    if pps.tiles_enabled_flag:
+      device_ctx.num_tile_columns = pps.num_tile_columns_minus1 + 1
+      device_ctx.num_tile_rows = pps.num_tile_rows_minus1 + 1
+
+    device_ctx.num_short_term_ref_pic_sets = sps.num_short_term_ref_pic_sets
+
+    luma_h_rounded = round_up(sps.pic_height_in_luma_samples, 64)
+    device_ctx.HevcSaoBufferOffset = (608 * luma_h_rounded) >> 8
+    device_ctx.HevcBsdCtrlOffset = ((device_ctx.HevcSaoBufferOffset<<8) + 4864 * luma_h_rounded) >> 8
+
+    device_ctx.v1.hevc_main10_444_ext.HevcFltAboveOffset = ((device_ctx.HevcBsdCtrlOffset<<8) + 152 * luma_h_rounded) >> 8
+    device_ctx.v1.hevc_main10_444_ext.HevcSaoAboveOffset = ((device_ctx.v1.hevc_main10_444_ext.HevcFltAboveOffset<<8) + 2000 * luma_h_rounded) >> 8
+    device_ctx.v3.HevcSliceEdgeOffset = device_ctx.v1.hevc_main10_444_ext.HevcSaoAboveOffset
+
+    before_list, after_list = [], []
+    for pic_idx, poc, _ in history:
+      device_ctx.RefDiffPicOrderCnts[pic_idx] = hdr.slice_pic_order_cnt_lsb - poc
+      if hdr.slice_pic_order_cnt_lsb < poc: after_list.append((poc - hdr.slice_pic_order_cnt_lsb, pic_idx))
+      else: before_list.append((hdr.slice_pic_order_cnt_lsb - poc, pic_idx))
+    before_list.sort()
+    after_list.sort()
+
+    device_ctx.initreflistidxl0 = (ctypes.c_uint8 * 16)(*[idx for _,idx in before_list + after_list])
+    if hdr.slice_type == avcodec.HEVC_SLICE_B: device_ctx.initreflistidxl1 = (ctypes.c_uint8 * 16)(*[idx for _,idx in after_list + before_list])
+
+    locl_ctx_bytes = bytes(device_ctx)
+    locl_ctx_bytes += bytes(0x200 - len(locl_ctx_bytes)) # pad to 512 bytes
+
+    pic_width_in_ctbs = ceildiv(sps.pic_width_in_luma_samples, (1 << sps.log2_max_luma_coding_block_size))
+    pic_height_in_ctbs = ceildiv(sps.pic_height_in_luma_samples, (1 << sps.log2_max_luma_coding_block_size))
+    # append tile sizes 0x200
+    if pps.tiles_enabled_flag and pps.uniform_spacing_flag:
+      assert device_ctx.num_tile_columns == 1 and device_ctx.num_tile_rows == 1, "not implemented: uniform spacing with multiple tiles"
+      locl_ctx_bytes += pic_width_in_ctbs.to_bytes(2, "little") + pic_height_in_ctbs.to_bytes(2, "little")
+    else:
+      if pps.tiles_enabled_flag and not getattr(pps, 'uniform_spacing_flag', 0):
+        column_width = [cw_minus1 + 1 for cw_minus1 in pps.column_width_minus1[0:pps.num_tile_columns_minus1]]
+        row_height = [rh_minus1 + 1 for rh_minus1 in pps.row_height_minus1[0:pps.num_tile_rows_minus1]]
+      else:
+        column_width = []
+        row_height = []
+
+      column_width.append(pic_width_in_ctbs - sum(column_width))
+      row_height.append(pic_height_in_ctbs - sum(row_height))
+
+      for c in column_width:
+        for r in row_height: locl_ctx_bytes += c.to_bytes(2, "little") + r.to_bytes(2, "little")
+
+    luma_size = round_up(sps.pic_width_in_luma_samples, 64) * round_up(sps.pic_height_in_luma_samples, 64)
+    chroma_size = round_up(sps.pic_width_in_luma_samples, 64) * round_up((sps.pic_height_in_luma_samples + 1) // 2, 64)
+    is_hist = nal_unit_type in {avcodec.HEVC_NAL_TRAIL_R, avcodec.HEVC_NAL_IDR_N_LP, avcodec.HEVC_NAL_IDR_W_RADL}
+
+    res.append((nal_infos[0][1], device_ctx.stream_len, device_ctx.curr_pic_idx, len(history), is_hist))
+
+    locl_ctx_bytes += (align_ctx_bytes_size - len(locl_ctx_bytes)) * b'\x00'
+    ctx_bytes += locl_ctx_bytes
+
+    if nal_unit_type in {avcodec.HEVC_NAL_TRAIL_R, avcodec.HEVC_NAL_IDR_N_LP, avcodec.HEVC_NAL_IDR_W_RADL}:
+      history.append((device_ctx.curr_pic_idx, hdr.slice_pic_order_cnt_lsb, None))
+
+    if len(history) >= sps.sps_max_dec_pic_buffering[0]:
+      # remove the oldest poc
+      history.pop(0)
+
+    nal_infos = []
+
+  cnt = 0
+  while nal_unit_start < len(dat):
+    assert dat[nal_unit_start:nal_unit_start+3] == b"\x00\x00\x01", "NAL unit start code not found"
+
+    pos = dat.find(b"\x00\x00\x01", nal_unit_start + 3)
+    nal_unit_len = (pos if pos != -1 else len(dat)) - nal_unit_start
+
+    # 7.3.1.1 General NAL unit syntax
+    nal_unit_type = (dat[nal_unit_start+3] >> 1) & 0x3F
+    slice_dat = dat[nal_unit_start+5:nal_unit_start+nal_unit_len]
+
+    if nal_unit_type == avcodec.HEVC_NAL_SPS:
+      sps = SPS(BitReader(_hevc_get_rbsp(slice_dat)))
+      fill_sps_into_dev_context(device_ctx, sps)
+    elif nal_unit_type == avcodec.HEVC_NAL_PPS:
+      pps = PPS(BitReader(_hevc_get_rbsp(slice_dat)))
+      fill_pps_into_dev_context(device_ctx, pps)
+    elif nal_unit_type in {avcodec.HEVC_NAL_IDR_N_LP, avcodec.HEVC_NAL_IDR_W_RADL, avcodec.HEVC_NAL_TRAIL_R, avcodec.HEVC_NAL_TRAIL_N}:
+      hdr = SliceSegment(BitReader(slice_dat), nal_unit_type, sps, pps)
+
+      if hdr.first_slice_segment_in_pic_flag == 1: _flush_picture()
+      nal_infos.append(((hdr, nal_unit_type), nal_unit_start, nal_unit_len))
+
+    nal_unit_start += nal_unit_len
+  _flush_picture()
+
+  w = sps.pic_width_in_luma_samples - 2 * (sps.conf_win_left_offset + sps.conf_win_right_offset)
+  h = sps.pic_height_in_luma_samples - 2 * (sps.conf_win_top_offset  + sps.conf_win_bottom_offset)
+  chroma_off = round_up(sps.pic_width_in_luma_samples, 64) * round_up(sps.pic_height_in_luma_samples, 64)
+  opaque = Tensor(ctx_bytes, device=device).reshape(len(res), align_ctx_bytes_size)
+  return opaque, res, w, h, sps.pic_width_in_luma_samples, sps.pic_height_in_luma_samples, chroma_off
+
+def _addr_table(h, w, w_aligned):
+  GOB_W, GOB_H = 64, 8
+  GOB_SIZE = GOB_W * GOB_H
+  BLOCK_H_GOBS = 2
+
+  xs = Tensor.arange(w, dtype=dtypes.uint32).reshape(1, w)
+  ys = Tensor.arange(h, dtype=dtypes.uint32).reshape(h, 1)
+
+  gob_x = xs // GOB_W
+  gob_y = ys // GOB_H
+  super_block_y = gob_y // BLOCK_H_GOBS
+  gob_y_in_block = gob_y  % BLOCK_H_GOBS
+  stride_gobs = w_aligned // GOB_W
+
+  base = ((super_block_y * stride_gobs + gob_x) * BLOCK_H_GOBS + gob_y_in_block) * GOB_SIZE
+
+  lx, ly = xs % GOB_W, ys % GOB_H
+  swiz = (lx & 0x0F) | ((ly & 0x03) << 4) | ((lx & 0x10) << 2) | ((ly & 0x04) << 5) | ((lx & 0x20) << 3)
+  return (base + swiz).reshape(-1)
+
+def nv12_to_bgr_from_planes(luma: Tensor, chroma: Tensor, h: int, w: int) -> Tensor:
+  Y = luma.reshape(h, w).cast(dtypes.float32)
+
+  uv = chroma.reshape(h // 2, w // 2, 2).cast(dtypes.float32)
+  U_small = uv[..., 0]
+  V_small = uv[..., 1]
+
+  U = U_small.reshape(h // 2, 1, w // 2, 1).expand(h // 2, 2, w // 2, 2).reshape(h, w)
+  V = V_small.reshape(h // 2, 1, w // 2, 1).expand(h // 2, 2, w // 2, 2).reshape(h, w)
+
+  C = Y - 16.0
+  D = U - 128.0
+  E = V - 128.0
+
+  R = 1.1643835616438356 * C + 1.5960267857142858 * E
+  G = 1.1643835616438356 * C - 0.39176229009491365 * D - 0.8129676472377708 * E
+  B = 1.1643835616438356 * C + 2.017232142857143  * D
+
+  R = R.maximum(0.0).minimum(255.0)
+  G = G.maximum(0.0).minimum(255.0)
+  B = B.maximum(0.0).minimum(255.0)
+
+  return Tensor.stack([B, G, R], dim=2).cast(dtypes.uint8)
+
+def untile_nv12(src:Tensor, h:int, w:int, luma_w:int, chroma_off:int) -> Tensor:
+  luma = src.reshape(-1)[_addr_table(h, w, round_up(luma_w, 64))]
+  chroma = src.reshape(-1)[chroma_off:][_addr_table((h + 1) // 2, w, round_up(luma_w, 64))]
+  return luma.cat(chroma).realize()
+
+def to_bgr(tensor:Tensor, h:int, w:int, luma_w:int, chroma_off:int) -> Tensor:
+  luma = tensor.reshape(-1)[_addr_table(h, w, round_up(luma_w, 64))]
+  chroma = tensor.reshape(-1)[chroma_off:][_addr_table((h + 1) // 2, w, round_up(luma_w, 64))]
+  return nv12_to_bgr_from_planes(luma, chroma, h, w).realize()
--- a/extra/nv_gpu_driver/clc9b0.h
+++ b/extra/nv_gpu_driver/clc9b0.h
@@ -0,0 +1,603 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef clc9b0_h_
+#define clc9b0_h_
+
+#include "nvtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NVC9B0_VIDEO_DECODER                                                       (0x0000C9B0)
+
+#define NVC9B0_NOP                                                              (0x00000100)
+#define NVC9B0_NOP_PARAMETER                                                    31:0
+#define NVC9B0_PM_TRIGGER                                                       (0x00000140)
+#define NVC9B0_PM_TRIGGER_V                                                     31:0
+#define NVC9B0_SET_APPLICATION_ID                                               (0x00000200)
+#define NVC9B0_SET_APPLICATION_ID_ID                                            31:0
+#define NVC9B0_SET_APPLICATION_ID_ID_MPEG12                                     (0x00000001)
+#define NVC9B0_SET_APPLICATION_ID_ID_VC1                                        (0x00000002)
+#define NVC9B0_SET_APPLICATION_ID_ID_H264                                       (0x00000003)
+#define NVC9B0_SET_APPLICATION_ID_ID_MPEG4                                      (0x00000004)
+#define NVC9B0_SET_APPLICATION_ID_ID_VP8                                        (0x00000005)
+#define NVC9B0_SET_APPLICATION_ID_ID_CTR64                                      (0x00000006)
+#define NVC9B0_SET_APPLICATION_ID_ID_HEVC                                       (0x00000007)
+#define NVC9B0_SET_APPLICATION_ID_ID_NEW_H264                                   (0x00000008)
+#define NVC9B0_SET_APPLICATION_ID_ID_VP9                                        (0x00000009)
+#define NVC9B0_SET_APPLICATION_ID_ID_PASS1                                      (0x0000000A)
+#define NVC9B0_SET_APPLICATION_ID_ID_HEVC_PARSER                                (0x0000000C)
+#define NVC9B0_SET_APPLICATION_ID_ID_UCODE_TEST                                 (0x0000000D)
+#define NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIO                      (0x0000000E)
+#define NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIOMULTIPLE              (0x0000000F)
+#define NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_PREPROCESSENCRYPTEDDATA           (0x00000010)
+#define NVC9B0_SET_APPLICATION_ID_ID_VP9_WITH_PARSER                            (0x00000011)
+#define NVC9B0_SET_APPLICATION_ID_ID_AVD                                        (0x00000012)
+#define NVC9B0_SET_APPLICATION_ID_ID_HW_DRM_PR4_DECRYPTCONTENTMULTIPLE          (0x00000013)
+#define NVC9B0_SET_APPLICATION_ID_ID_DHKE                                       (0x00000020)
+#define NVC9B0_SET_WATCHDOG_TIMER                                               (0x00000204)
+#define NVC9B0_SET_WATCHDOG_TIMER_TIMER                                         31:0
+#define NVC9B0_SEMAPHORE_A                                                      (0x00000240)
+#define NVC9B0_SEMAPHORE_A_UPPER                                                7:0
+#define NVC9B0_SEMAPHORE_B                                                      (0x00000244)
+#define NVC9B0_SEMAPHORE_B_LOWER                                                31:0
+#define NVC9B0_SEMAPHORE_C                                                      (0x00000248)
+#define NVC9B0_SEMAPHORE_C_PAYLOAD                                              31:0
+#define NVC9B0_CTX_SAVE_AREA                                                    (0x0000024C)
+#define NVC9B0_CTX_SAVE_AREA_OFFSET                                             31:0
+#define NVC9B0_CTX_SWITCH                                                       (0x00000250)
+#define NVC9B0_CTX_SWITCH_OP                                                    1:0
+#define NVC9B0_CTX_SWITCH_OP_CTX_UPDATE                                         (0x00000000)
+#define NVC9B0_CTX_SWITCH_OP_CTX_SAVE                                           (0x00000001)
+#define NVC9B0_CTX_SWITCH_OP_CTX_RESTORE                                        (0x00000002)
+#define NVC9B0_CTX_SWITCH_OP_CTX_FORCERESTORE                                   (0x00000003)
+#define NVC9B0_CTX_SWITCH_CTXID_VALID                                           2:2
+#define NVC9B0_CTX_SWITCH_CTXID_VALID_FALSE                                     (0x00000000)
+#define NVC9B0_CTX_SWITCH_CTXID_VALID_TRUE                                      (0x00000001)
+#define NVC9B0_CTX_SWITCH_RESERVED0                                             7:3
+#define NVC9B0_CTX_SWITCH_CTX_ID                                                23:8
+#define NVC9B0_CTX_SWITCH_RESERVED1                                             31:24
+#define NVC9B0_SET_SEMAPHORE_PAYLOAD_LOWER                                      (0x00000254)
+#define NVC9B0_SET_SEMAPHORE_PAYLOAD_LOWER_PAYLOAD_LOWER                        31:0
+#define NVC9B0_SET_SEMAPHORE_PAYLOAD_UPPER                                      (0x00000258)
+#define NVC9B0_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD_UPPER                        31:0
+#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_A                        (0x0000025C)
+#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_A_LOWER                  31:0
+#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_B                        (0x00000260)
+#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_B_UPPER                  31:0
+#define NVC9B0_EXECUTE                                                          (0x00000300)
+#define NVC9B0_EXECUTE_NOTIFY                                                   0:0
+#define NVC9B0_EXECUTE_NOTIFY_DISABLE                                           (0x00000000)
+#define NVC9B0_EXECUTE_NOTIFY_ENABLE                                            (0x00000001)
+#define NVC9B0_EXECUTE_NOTIFY_ON                                                1:1
+#define NVC9B0_EXECUTE_NOTIFY_ON_END                                            (0x00000000)
+#define NVC9B0_EXECUTE_NOTIFY_ON_BEGIN                                          (0x00000001)
+#define NVC9B0_EXECUTE_PREDICATION                                              2:2
+#define NVC9B0_EXECUTE_PREDICATION_DISABLE                                      (0x00000000)
+#define NVC9B0_EXECUTE_PREDICATION_ENABLE                                       (0x00000001)
+#define NVC9B0_EXECUTE_PREDICATION_OP                                           3:3
+#define NVC9B0_EXECUTE_PREDICATION_OP_EQUAL_ZERO                                (0x00000000)
+#define NVC9B0_EXECUTE_PREDICATION_OP_NOT_EQUAL_ZERO                            (0x00000001)
+#define NVC9B0_EXECUTE_AWAKEN                                                   8:8
+#define NVC9B0_EXECUTE_AWAKEN_DISABLE                                           (0x00000000)
+#define NVC9B0_EXECUTE_AWAKEN_ENABLE                                            (0x00000001)
+#define NVC9B0_SEMAPHORE_D                                                      (0x00000304)
+#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE                                       1:0
+#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_ONE                                   (0x00000000)
+#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_FOUR                                  (0x00000001)
+#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_TWO                                   (0x00000002)
+#define NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE                                        8:8
+#define NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_FALSE                                  (0x00000000)
+#define NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_TRUE                                   (0x00000001)
+#define NVC9B0_SEMAPHORE_D_OPERATION                                            17:16
+#define NVC9B0_SEMAPHORE_D_OPERATION_RELEASE                                    (0x00000000)
+#define NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_0                                 (0x00000001)
+#define NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_1                                 (0x00000002)
+#define NVC9B0_SEMAPHORE_D_OPERATION_TRAP                                       (0x00000003)
+#define NVC9B0_SEMAPHORE_D_FLUSH_DISABLE                                        21:21
+#define NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_FALSE                                  (0x00000000)
+#define NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_TRUE                                   (0x00000001)
+#define NVC9B0_SEMAPHORE_D_TRAP_TYPE                                            23:22
+#define NVC9B0_SEMAPHORE_D_TRAP_TYPE_UNCONDITIONAL                              (0x00000000)
+#define NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL                                (0x00000001)
+#define NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL_EXT                            (0x00000002)
+#define NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE                                         24:24
+#define NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_32BIT                                   (0x00000000)
+#define NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_64BIT                                   (0x00000001)
+#define NVC9B0_SET_PREDICATION_OFFSET_UPPER                                     (0x00000308)
+#define NVC9B0_SET_PREDICATION_OFFSET_UPPER_OFFSET                              7:0
+#define NVC9B0_SET_PREDICATION_OFFSET_LOWER                                     (0x0000030C)
+#define NVC9B0_SET_PREDICATION_OFFSET_LOWER_OFFSET                              31:0
+#define NVC9B0_SET_AUXILIARY_DATA_BUFFER                                        (0x00000310)
+#define NVC9B0_SET_AUXILIARY_DATA_BUFFER_OFFSET                                 31:0
+#define NVC9B0_SET_CONTROL_PARAMS                                               (0x00000400)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE                                    3:0
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG1                              (0x00000000)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG2                              (0x00000001)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VC1                                (0x00000002)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_H264                               (0x00000003)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG4                              (0x00000004)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_DIVX3                              (0x00000004)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP8                                (0x00000005)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_HEVC                               (0x00000007)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP9                                (0x00000009)
+#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_AV1                                (0x0000000A)
+#define NVC9B0_SET_CONTROL_PARAMS_GPTIMER_ON                                    4:4
+#define NVC9B0_SET_CONTROL_PARAMS_RET_ERROR                                     5:5
+#define NVC9B0_SET_CONTROL_PARAMS_ERR_CONCEAL_ON                                6:6
+#define NVC9B0_SET_CONTROL_PARAMS_ERROR_FRM_IDX                                 12:7
+#define NVC9B0_SET_CONTROL_PARAMS_MBTIMER_ON                                    13:13
+#define NVC9B0_SET_CONTROL_PARAMS_EC_INTRA_FRAME_USING_PSLC                     14:14
+#define NVC9B0_SET_CONTROL_PARAMS_IGNORE_SOME_FIELDS_CRC_CHECK                  15:15
+#define NVC9B0_SET_CONTROL_PARAMS_EVENT_TRACE_LOGGING_ON                        16:16
+#define NVC9B0_SET_CONTROL_PARAMS_ALL_INTRA_FRAME                               17:17
+#define NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV                                   19:18
+#define NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_TRACE3D_RUN                       (0x00000000)
+#define NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_PROD_RUN                          (0x00000001)
+#define NVC9B0_SET_CONTROL_PARAMS_HINT_DUMP_EN                                  20:20
+#define NVC9B0_SET_CONTROL_PARAMS_RESERVED                                      25:21
+#define NVC9B0_SET_CONTROL_PARAMS_NVDECSIM_SKIP_SCP                             26:26
+#define NVC9B0_SET_CONTROL_PARAMS_ENABLE_ENCRYPT                                27:27
+#define NVC9B0_SET_CONTROL_PARAMS_ENCRYPTMODE                                   31:28
+#define NVC9B0_SET_DRV_PIC_SETUP_OFFSET                                         (0x00000404)
+#define NVC9B0_SET_DRV_PIC_SETUP_OFFSET_OFFSET                                  31:0
+#define NVC9B0_SET_IN_BUF_BASE_OFFSET                                           (0x00000408)
+#define NVC9B0_SET_IN_BUF_BASE_OFFSET_OFFSET                                    31:0
+#define NVC9B0_SET_PICTURE_INDEX                                                (0x0000040C)
+#define NVC9B0_SET_PICTURE_INDEX_INDEX                                          31:0
+#define NVC9B0_SET_SLICE_OFFSETS_BUF_OFFSET                                     (0x00000410)
+#define NVC9B0_SET_SLICE_OFFSETS_BUF_OFFSET_OFFSET                              31:0
+#define NVC9B0_SET_COLOC_DATA_OFFSET                                            (0x00000414)
+#define NVC9B0_SET_COLOC_DATA_OFFSET_OFFSET                                     31:0
+#define NVC9B0_SET_HISTORY_OFFSET                                               (0x00000418)
+#define NVC9B0_SET_HISTORY_OFFSET_OFFSET                                        31:0
+#define NVC9B0_SET_DISPLAY_BUF_SIZE                                             (0x0000041C)
+#define NVC9B0_SET_DISPLAY_BUF_SIZE_SIZE                                        31:0
+#define NVC9B0_SET_HISTOGRAM_OFFSET                                             (0x00000420)
+#define NVC9B0_SET_HISTOGRAM_OFFSET_OFFSET                                      31:0
+#define NVC9B0_SET_NVDEC_STATUS_OFFSET                                          (0x00000424)
+#define NVC9B0_SET_NVDEC_STATUS_OFFSET_OFFSET                                   31:0
+#define NVC9B0_SET_DISPLAY_BUF_LUMA_OFFSET                                      (0x00000428)
+#define NVC9B0_SET_DISPLAY_BUF_LUMA_OFFSET_OFFSET                               31:0
+#define NVC9B0_SET_DISPLAY_BUF_CHROMA_OFFSET                                    (0x0000042C)
+#define NVC9B0_SET_DISPLAY_BUF_CHROMA_OFFSET_OFFSET                             31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET0                                         (0x00000430)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET0_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET1                                         (0x00000434)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET1_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET2                                         (0x00000438)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET2_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET3                                         (0x0000043C)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET3_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET4                                         (0x00000440)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET4_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET5                                         (0x00000444)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET5_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET6                                         (0x00000448)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET6_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET7                                         (0x0000044C)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET7_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET8                                         (0x00000450)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET8_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET9                                         (0x00000454)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET9_OFFSET                                  31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET10                                        (0x00000458)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET10_OFFSET                                 31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET11                                        (0x0000045C)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET11_OFFSET                                 31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET12                                        (0x00000460)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET12_OFFSET                                 31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET13                                        (0x00000464)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET13_OFFSET                                 31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET14                                        (0x00000468)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET14_OFFSET                                 31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET15                                        (0x0000046C)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET15_OFFSET                                 31:0
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET16                                        (0x00000470)
+#define NVC9B0_SET_PICTURE_LUMA_OFFSET16_OFFSET                                 31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET0                                       (0x00000474)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET0_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET1                                       (0x00000478)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET1_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET2                                       (0x0000047C)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET2_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET3                                       (0x00000480)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET3_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET4                                       (0x00000484)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET4_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET5                                       (0x00000488)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET5_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET6                                       (0x0000048C)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET6_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET7                                       (0x00000490)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET7_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET8                                       (0x00000494)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET8_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET9                                       (0x00000498)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET9_OFFSET                                31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET10                                      (0x0000049C)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET10_OFFSET                               31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET11                                      (0x000004A0)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET11_OFFSET                               31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET12                                      (0x000004A4)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET12_OFFSET                               31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET13                                      (0x000004A8)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET13_OFFSET                               31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET14                                      (0x000004AC)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET14_OFFSET                               31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET15                                      (0x000004B0)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET15_OFFSET                               31:0
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET16                                      (0x000004B4)
+#define NVC9B0_SET_PICTURE_CHROMA_OFFSET16_OFFSET                               31:0
+#define NVC9B0_SET_PIC_SCRATCH_BUF_OFFSET                                       (0x000004B8)
+#define NVC9B0_SET_PIC_SCRATCH_BUF_OFFSET_OFFSET                                31:0
+#define NVC9B0_SET_EXTERNAL_MVBUFFER_OFFSET                                     (0x000004BC)
+#define NVC9B0_SET_EXTERNAL_MVBUFFER_OFFSET_OFFSET                              31:0
+#define NVC9B0_SET_SUB_SAMPLE_MAP_OFFSET                                        (0x000004C0)
+#define NVC9B0_SET_SUB_SAMPLE_MAP_OFFSET_OFFSET                                 31:0
+#define NVC9B0_SET_SUB_SAMPLE_MAP_IV_OFFSET                                     (0x000004C4)
+#define NVC9B0_SET_SUB_SAMPLE_MAP_IV_OFFSET_OFFSET                              31:0
+#define NVC9B0_SET_INTRA_TOP_BUF_OFFSET                                         (0x000004C8)
+#define NVC9B0_SET_INTRA_TOP_BUF_OFFSET_OFFSET                                  31:0
+#define NVC9B0_SET_TILE_SIZE_BUF_OFFSET                                         (0x000004CC)
+#define NVC9B0_SET_TILE_SIZE_BUF_OFFSET_OFFSET                                  31:0
+#define NVC9B0_SET_FILTER_BUFFER_OFFSET                                         (0x000004D0)
+#define NVC9B0_SET_FILTER_BUFFER_OFFSET_OFFSET                                  31:0
+#define NVC9B0_SET_CRC_STRUCT_OFFSET                                            (0x000004D4)
+#define NVC9B0_SET_CRC_STRUCT_OFFSET_OFFSET                                     31:0
+#define NVC9B0_SET_PR_SSM_CONTENT_INFO_BUF_OFFSET                               (0x000004D8)
+#define NVC9B0_SET_PR_SSM_CONTENT_INFO_BUF_OFFSET_OFFSET                        31:0
+#define NVC9B0_H264_SET_MBHIST_BUF_OFFSET                                       (0x00000500)
+#define NVC9B0_H264_SET_MBHIST_BUF_OFFSET_OFFSET                                31:0
+#define NVC9B0_VP8_SET_PROB_DATA_OFFSET                                         (0x00000540)
+#define NVC9B0_VP8_SET_PROB_DATA_OFFSET_OFFSET                                  31:0
+#define NVC9B0_VP8_SET_HEADER_PARTITION_BUF_BASE_OFFSET                         (0x00000544)
+#define NVC9B0_VP8_SET_HEADER_PARTITION_BUF_BASE_OFFSET_OFFSET                  31:0
+#define NVC9B0_HEVC_SET_SCALING_LIST_OFFSET                                     (0x00000580)
+#define NVC9B0_HEVC_SET_SCALING_LIST_OFFSET_OFFSET                              31:0
+#define NVC9B0_HEVC_SET_TILE_SIZES_OFFSET                                       (0x00000584)
+#define NVC9B0_HEVC_SET_TILE_SIZES_OFFSET_OFFSET                                31:0
+#define NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET                                    (0x00000588)
+#define NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET_OFFSET                             31:0
+#define NVC9B0_HEVC_SET_SAO_BUFFER_OFFSET                                       (0x0000058C)
+#define NVC9B0_HEVC_SET_SAO_BUFFER_OFFSET_OFFSET                                31:0
+#define NVC9B0_HEVC_SET_SLICE_INFO_BUFFER_OFFSET                                (0x00000590)
+#define NVC9B0_HEVC_SET_SLICE_INFO_BUFFER_OFFSET_OFFSET                         31:0
+#define NVC9B0_HEVC_SET_SLICE_GROUP_INDEX                                       (0x00000594)
+#define NVC9B0_HEVC_SET_SLICE_GROUP_INDEX_OFFSET                                31:0
+#define NVC9B0_VP9_SET_PROB_TAB_BUF_OFFSET                                      (0x000005C0)
+#define NVC9B0_VP9_SET_PROB_TAB_BUF_OFFSET_OFFSET                               31:0
+#define NVC9B0_VP9_SET_CTX_COUNTER_BUF_OFFSET                                   (0x000005C4)
+#define NVC9B0_VP9_SET_CTX_COUNTER_BUF_OFFSET_OFFSET                            31:0
+#define NVC9B0_VP9_SET_SEGMENT_READ_BUF_OFFSET                                  (0x000005C8)
+#define NVC9B0_VP9_SET_SEGMENT_READ_BUF_OFFSET_OFFSET                           31:0
+#define NVC9B0_VP9_SET_SEGMENT_WRITE_BUF_OFFSET                                 (0x000005CC)
+#define NVC9B0_VP9_SET_SEGMENT_WRITE_BUF_OFFSET_OFFSET                          31:0
+#define NVC9B0_VP9_SET_TILE_SIZE_BUF_OFFSET                                     (0x000005D0)
+#define NVC9B0_VP9_SET_TILE_SIZE_BUF_OFFSET_OFFSET                              31:0
+#define NVC9B0_VP9_SET_COL_MVWRITE_BUF_OFFSET                                   (0x000005D4)
+#define NVC9B0_VP9_SET_COL_MVWRITE_BUF_OFFSET_OFFSET                            31:0
+#define NVC9B0_VP9_SET_COL_MVREAD_BUF_OFFSET                                    (0x000005D8)
+#define NVC9B0_VP9_SET_COL_MVREAD_BUF_OFFSET_OFFSET                             31:0
+#define NVC9B0_VP9_SET_FILTER_BUFFER_OFFSET                                     (0x000005DC)
+#define NVC9B0_VP9_SET_FILTER_BUFFER_OFFSET_OFFSET                              31:0
+#define NVC9B0_VP9_PARSER_SET_PIC_SETUP_OFFSET                                  (0x000005E0)
+#define NVC9B0_VP9_PARSER_SET_PIC_SETUP_OFFSET_OFFSET                           31:0
+#define NVC9B0_VP9_PARSER_SET_PREV_PIC_SETUP_OFFSET                             (0x000005E4)
+#define NVC9B0_VP9_PARSER_SET_PREV_PIC_SETUP_OFFSET_OFFSET                      31:0
+#define NVC9B0_VP9_PARSER_SET_PROB_TAB_BUF_OFFSET                               (0x000005E8)
+#define NVC9B0_VP9_PARSER_SET_PROB_TAB_BUF_OFFSET_OFFSET                        31:0
+#define NVC9B0_VP9_SET_HINT_DUMP_BUF_OFFSET                                     (0x000005EC)
+#define NVC9B0_VP9_SET_HINT_DUMP_BUF_OFFSET_OFFSET                              31:0
+#define NVC9B0_PASS1_SET_CLEAR_HEADER_OFFSET                                    (0x00000600)
+#define NVC9B0_PASS1_SET_CLEAR_HEADER_OFFSET_OFFSET                             31:0
+#define NVC9B0_PASS1_SET_RE_ENCRYPT_OFFSET                                      (0x00000604)
+#define NVC9B0_PASS1_SET_RE_ENCRYPT_OFFSET_OFFSET                               31:0
+#define NVC9B0_PASS1_SET_VP8_TOKEN_OFFSET                                       (0x00000608)
+#define NVC9B0_PASS1_SET_VP8_TOKEN_OFFSET_OFFSET                                31:0
+#define NVC9B0_PASS1_SET_INPUT_DATA_OFFSET                                      (0x0000060C)
+#define NVC9B0_PASS1_SET_INPUT_DATA_OFFSET_OFFSET                               31:0
+#define NVC9B0_PASS1_SET_OUTPUT_DATA_SIZE_OFFSET                                (0x00000610)
+#define NVC9B0_PASS1_SET_OUTPUT_DATA_SIZE_OFFSET_OFFSET                         31:0
+#define NVC9B0_AV1_SET_PROB_TAB_READ_BUF_OFFSET                                 (0x00000640)
+#define NVC9B0_AV1_SET_PROB_TAB_READ_BUF_OFFSET_OFFSET                          31:0
+#define NVC9B0_AV1_SET_PROB_TAB_WRITE_BUF_OFFSET                                (0x00000644)
+#define NVC9B0_AV1_SET_PROB_TAB_WRITE_BUF_OFFSET_OFFSET                         31:0
+#define NVC9B0_AV1_SET_SEGMENT_READ_BUF_OFFSET                                  (0x00000648)
+#define NVC9B0_AV1_SET_SEGMENT_READ_BUF_OFFSET_OFFSET                           31:0
+#define NVC9B0_AV1_SET_SEGMENT_WRITE_BUF_OFFSET                                 (0x0000064C)
+#define NVC9B0_AV1_SET_SEGMENT_WRITE_BUF_OFFSET_OFFSET                          31:0
+#define NVC9B0_AV1_SET_COL_MV0_READ_BUF_OFFSET                                  (0x00000650)
+#define NVC9B0_AV1_SET_COL_MV0_READ_BUF_OFFSET_OFFSET                           31:0
+#define NVC9B0_AV1_SET_COL_MV1_READ_BUF_OFFSET                                  (0x00000654)
+#define NVC9B0_AV1_SET_COL_MV1_READ_BUF_OFFSET_OFFSET                           31:0
+#define NVC9B0_AV1_SET_COL_MV2_READ_BUF_OFFSET                                  (0x00000658)
+#define NVC9B0_AV1_SET_COL_MV2_READ_BUF_OFFSET_OFFSET                           31:0
+#define NVC9B0_AV1_SET_COL_MVWRITE_BUF_OFFSET                                   (0x0000065C)
+#define NVC9B0_AV1_SET_COL_MVWRITE_BUF_OFFSET_OFFSET                            31:0
+#define NVC9B0_AV1_SET_GLOBAL_MODEL_BUF_OFFSET                                  (0x00000660)
+#define NVC9B0_AV1_SET_GLOBAL_MODEL_BUF_OFFSET_OFFSET                           31:0
+#define NVC9B0_AV1_SET_FILM_GRAIN_BUF_OFFSET                                    (0x00000664)
+#define NVC9B0_AV1_SET_FILM_GRAIN_BUF_OFFSET_OFFSET                             31:0
+#define NVC9B0_AV1_SET_TILE_STREAM_INFO_BUF_OFFSET                              (0x00000668)
+#define NVC9B0_AV1_SET_TILE_STREAM_INFO_BUF_OFFSET_OFFSET                       31:0
+#define NVC9B0_AV1_SET_SUB_STREAM_ENTRY_BUF_OFFSET                              (0x0000066C)
+#define NVC9B0_AV1_SET_SUB_STREAM_ENTRY_BUF_OFFSET_OFFSET                       31:0
+#define NVC9B0_AV1_SET_HINT_DUMP_BUF_OFFSET                                     (0x00000670)
+#define NVC9B0_AV1_SET_HINT_DUMP_BUF_OFFSET_OFFSET                              31:0
+#define NVC9B0_H264_SET_SCALING_LIST_OFFSET                                     (0x00000680)
+#define NVC9B0_H264_SET_SCALING_LIST_OFFSET_OFFSET                              31:0
+#define NVC9B0_H264_SET_VLDHIST_BUF_OFFSET                                      (0x00000684)
+#define NVC9B0_H264_SET_VLDHIST_BUF_OFFSET_OFFSET                               31:0
+#define NVC9B0_H264_SET_EDOBOFFSET0                                             (0x00000688)
+#define NVC9B0_H264_SET_EDOBOFFSET0_OFFSET                                      31:0
+#define NVC9B0_H264_SET_EDOBOFFSET1                                             (0x0000068C)
+#define NVC9B0_H264_SET_EDOBOFFSET1_OFFSET                                      31:0
+#define NVC9B0_H264_SET_EDOBOFFSET2                                             (0x00000690)
+#define NVC9B0_H264_SET_EDOBOFFSET2_OFFSET                                      31:0
+#define NVC9B0_H264_SET_EDOBOFFSET3                                             (0x00000694)
+#define NVC9B0_H264_SET_EDOBOFFSET3_OFFSET                                      31:0
+#define NVC9B0_SET_CONTENT_INITIAL_VECTOR(b)                                    (0x00000C00 + (b)*0x00000004)
+#define NVC9B0_SET_CONTENT_INITIAL_VECTOR_VALUE                                 31:0
+#define NVC9B0_SET_CTL_COUNT                                                    (0x00000C10)
+#define NVC9B0_SET_CTL_COUNT_VALUE                                              31:0
+#define NVC9B0_SET_UPPER_SRC                                                    (0x00000C14)
+#define NVC9B0_SET_UPPER_SRC_OFFSET                                             7:0
+#define NVC9B0_SET_LOWER_SRC                                                    (0x00000C18)
+#define NVC9B0_SET_LOWER_SRC_OFFSET                                             31:0
+#define NVC9B0_SET_UPPER_DST                                                    (0x00000C1C)
+#define NVC9B0_SET_UPPER_DST_OFFSET                                             7:0
+#define NVC9B0_SET_LOWER_DST                                                    (0x00000C20)
+#define NVC9B0_SET_LOWER_DST_OFFSET                                             31:0
+#define NVC9B0_SET_BLOCK_COUNT                                                  (0x00000C24)
+#define NVC9B0_SET_BLOCK_COUNT_VALUE                                            31:0
+#define NVC9B0_PR_SET_REQUEST_BUF_OFFSET                                        (0x00000D00)
+#define NVC9B0_PR_SET_REQUEST_BUF_OFFSET_OFFSET                                 31:0
+#define NVC9B0_PR_SET_REQUEST_BUF_SIZE                                          (0x00000D04)
+#define NVC9B0_PR_SET_REQUEST_BUF_SIZE_SIZE                                     31:0
+#define NVC9B0_PR_SET_RESPONSE_BUF_OFFSET                                       (0x00000D08)
+#define NVC9B0_PR_SET_RESPONSE_BUF_OFFSET_OFFSET                                31:0
+#define NVC9B0_PR_SET_RESPONSE_BUF_SIZE                                         (0x00000D0C)
+#define NVC9B0_PR_SET_RESPONSE_BUF_SIZE_SIZE                                    31:0
+#define NVC9B0_PR_SET_REQUEST_MESSAGE_BUF_OFFSET                                (0x00000D10)
+#define NVC9B0_PR_SET_REQUEST_MESSAGE_BUF_OFFSET_OFFSET                         31:0
+#define NVC9B0_PR_SET_RESPONSE_MESSAGE_BUF_OFFSET                               (0x00000D14)
+#define NVC9B0_PR_SET_RESPONSE_MESSAGE_BUF_OFFSET_OFFSET                        31:0
+#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_OFFSET                                  (0x00000D18)
+#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_OFFSET_OFFSET                           31:0
+#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_SIZE                                    (0x00000D1C)
+#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_SIZE_SIZE                               31:0
+#define NVC9B0_PR_SET_CONTENT_DECRYPT_INFO_BUF_OFFSET                           (0x00000D20)
+#define NVC9B0_PR_SET_CONTENT_DECRYPT_INFO_BUF_OFFSET_OFFSET                    31:0
+#define NVC9B0_PR_SET_REENCRYPTED_BITSTREAM_BUF_OFFSET                          (0x00000D24)
+#define NVC9B0_PR_SET_REENCRYPTED_BITSTREAM_BUF_OFFSET_OFFSET                   31:0
+#define NVC9B0_DH_KE_SET_CHALLENGE_BUF_OFFSET                                   (0x00000E00)
+#define NVC9B0_DH_KE_SET_CHALLENGE_BUF_OFFSET_OFFSET                            31:0
+#define NVC9B0_DH_KE_SET_RESPONSE_BUF_OFFSET                                    (0x00000E04)
+#define NVC9B0_DH_KE_SET_RESPONSE_BUF_OFFSET_OFFSET                             31:0
+#define NVC9B0_SET_SESSION_KEY(b)                                               (0x00000F00 + (b)*0x00000004)
+#define NVC9B0_SET_SESSION_KEY_VALUE                                            31:0
+#define NVC9B0_SET_CONTENT_KEY(b)                                               (0x00000F10 + (b)*0x00000004)
+#define NVC9B0_SET_CONTENT_KEY_VALUE                                            31:0
+#define NVC9B0_PM_TRIGGER_END                                                   (0x00001114)
+#define NVC9B0_PM_TRIGGER_END_V                                                 31:0
+
+#define NVC9B0_ERROR_NONE                                                       (0x00000000)
+#define NVC9B0_OS_ERROR_EXECUTE_INSUFFICIENT_DATA                               (0x00000001)
+#define NVC9B0_OS_ERROR_SEMAPHORE_INSUFFICIENT_DATA                             (0x00000002)
+#define NVC9B0_OS_ERROR_INVALID_METHOD                                          (0x00000003)
+#define NVC9B0_OS_ERROR_INVALID_DMA_PAGE                                        (0x00000004)
+#define NVC9B0_OS_ERROR_UNHANDLED_INTERRUPT                                     (0x00000005)
+#define NVC9B0_OS_ERROR_EXCEPTION                                               (0x00000006)
+#define NVC9B0_OS_ERROR_INVALID_CTXSW_REQUEST                                   (0x00000007)
+#define NVC9B0_OS_ERROR_APPLICATION                                             (0x00000008)
+#define NVC9B0_OS_ERROR_SW_BREAKPT                                              (0x00000009)
+#define NVC9B0_OS_INTERRUPT_EXECUTE_AWAKEN                                      (0x00000100)
+#define NVC9B0_OS_INTERRUPT_BACKEND_SEMAPHORE_AWAKEN                            (0x00000200)
+#define NVC9B0_OS_INTERRUPT_CTX_ERROR_FBIF                                      (0x00000300)
+#define NVC9B0_OS_INTERRUPT_LIMIT_VIOLATION                                     (0x00000400)
+#define NVC9B0_OS_INTERRUPT_LIMIT_AND_FBIF_CTX_ERROR                            (0x00000500)
+#define NVC9B0_OS_INTERRUPT_HALT_ENGINE                                         (0x00000600)
+#define NVC9B0_OS_INTERRUPT_TRAP_NONSTALL                                       (0x00000700)
+#define NVC9B0_H264_VLD_ERR_SEQ_DATA_INCONSISTENT                               (0x00004001)
+#define NVC9B0_H264_VLD_ERR_PIC_DATA_INCONSISTENT                               (0x00004002)
+#define NVC9B0_H264_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS                     (0x00004100)
+#define NVC9B0_H264_VLD_ERR_BITSTREAM_ERROR                                     (0x00004101)
+#define NVC9B0_H264_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID                          (0x000041F8)
+#define NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_SIZE_NOT_MULT256                        (0x00004200)
+#define NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256                       (0x00004201)
+#define NVC9B0_H264_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID                        (0x00004203)
+#define NVC9B0_H264_VLD_ERR_CTX_DMA_ID_SLC_HDR_OUT_INVALID                      (0x00004204)
+#define NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL                           (0x00004205)
+#define NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_ALREADY_VALID                       (0x00004206)
+#define NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL                          (0x00004207)
+#define NVC9B0_H264_VLD_ERR_DATA_BUF_CNT_TOO_SMALL                              (0x00004208)
+#define NVC9B0_H264_VLD_ERR_BITSTREAM_EMPTY                                     (0x00004209)
+#define NVC9B0_H264_VLD_ERR_FRAME_WIDTH_TOO_LARGE                               (0x0000420A)
+#define NVC9B0_H264_VLD_ERR_FRAME_HEIGHT_TOO_LARGE                              (0x0000420B)
+#define NVC9B0_H264_VLD_ERR_HIST_BUF_TOO_SMALL                                  (0x00004300)
+#define NVC9B0_VC1_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND                       (0x00005100)
+#define NVC9B0_VC1_VLD_ERR_BITSTREAM_ERROR                                      (0x00005101)
+#define NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256                         (0x00005200)
+#define NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256                        (0x00005201)
+#define NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID                           (0x00005202)
+#define NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID                         (0x00005203)
+#define NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID                       (0x00005204)
+#define NVC9B0_VC1_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL                            (0x00005205)
+#define NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID                        (0x00005206)
+#define NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL                           (0x00005207)
+#define NVC9B0_VC1_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL                           (0x00005208)
+#define NVC9B0_VC1_VLD_ERR_BITSTREAM_EMPTY                                      (0x00005209)
+#define NVC9B0_VC1_VLD_ERR_FRAME_WIDTH_TOO_LARGE                                (0x0000520A)
+#define NVC9B0_VC1_VLD_ERR_FRAME_HEIGHT_TOO_LARGE                               (0x0000520B)
+#define NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT                       (0x00005300)
+#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS                   (0x00006100)
+#define NVC9B0_MPEG12_VLD_ERR_BITSTREAM_ERROR                                   (0x00006101)
+#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256                     (0x00006200)
+#define NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID                        (0x00006201)
+#define NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID                      (0x00006202)
+#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL                        (0x00006203)
+#define NVC9B0_MPEG12_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL                        (0x00006204)
+#define NVC9B0_MPEG12_VLD_ERR_BITSTREAM_EMPTY                                   (0x00006205)
+#define NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_STRUCTURE                             (0x00006206)
+#define NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_CODING_TYPE                           (0x00006207)
+#define NVC9B0_MPEG12_VLD_ERR_FRAME_WIDTH_TOO_LARGE                             (0x00006208)
+#define NVC9B0_MPEG12_VLD_ERR_FRAME_HEIGHT_TOO_LARGE                            (0x00006209)
+#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_FULL_TIME_OUT                    (0x00006300)
+#define NVC9B0_CMN_VLD_ERR_PDEC_RETURNED_ERROR                                  (0x00007101)
+#define NVC9B0_CMN_VLD_ERR_EDOB_FLUSH_TIME_OUT                                  (0x00007102)
+#define NVC9B0_CMN_VLD_ERR_EDOB_REWIND_TIME_OUT                                 (0x00007103)
+#define NVC9B0_CMN_VLD_ERR_VLD_WD_TIME_OUT                                      (0x00007104)
+#define NVC9B0_CMN_VLD_ERR_NUM_SLICES_ZERO                                      (0x00007105)
+#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND                     (0x00008100)
+#define NVC9B0_MPEG4_VLD_ERR_BITSTREAM_ERROR                                    (0x00008101)
+#define NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256                       (0x00008200)
+#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256                      (0x00008201)
+#define NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID                         (0x00008202)
+#define NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID                       (0x00008203)
+#define NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID                     (0x00008204)
+#define NVC9B0_MPEG4_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL                          (0x00008205)
+#define NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID                      (0x00008206)
+#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL                         (0x00008207)
+#define NVC9B0_MPEG4_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL                         (0x00008208)
+#define NVC9B0_MPEG4_VLD_ERR_BITSTREAM_EMPTY                                    (0x00008209)
+#define NVC9B0_MPEG4_VLD_ERR_FRAME_WIDTH_TOO_LARGE                              (0x0000820A)
+#define NVC9B0_MPEG4_VLD_ERR_FRAME_HEIGHT_TOO_LARGE                             (0x0000820B)
+#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT                     (0x00051E01)
+#define NVC9B0_DEC_ERROR_MPEG12_APPTIMER_EXPIRED                                (0xDEC10001)
+#define NVC9B0_DEC_ERROR_MPEG12_MVTIMER_EXPIRED                                 (0xDEC10002)
+#define NVC9B0_DEC_ERROR_MPEG12_INVALID_TOKEN                                   (0xDEC10003)
+#define NVC9B0_DEC_ERROR_MPEG12_SLICEDATA_MISSING                               (0xDEC10004)
+#define NVC9B0_DEC_ERROR_MPEG12_HWERR_INTERRUPT                                 (0xDEC10005)
+#define NVC9B0_DEC_ERROR_MPEG12_DETECTED_VLD_FAILURE                            (0xDEC10006)
+#define NVC9B0_DEC_ERROR_MPEG12_PICTURE_INIT                                    (0xDEC10100)
+#define NVC9B0_DEC_ERROR_MPEG12_STATEMACHINE_FAILURE                            (0xDEC10101)
+#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_PIC                               (0xDEC10901)
+#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_UCODE                             (0xDEC10902)
+#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_FC                                (0xDEC10903)
+#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_SLH                               (0xDEC10904)
+#define NVC9B0_DEC_ERROR_MPEG12_INVALID_UCODE_SIZE                              (0xDEC10905)
+#define NVC9B0_DEC_ERROR_MPEG12_INVALID_SLICE_COUNT                             (0xDEC10906)
+#define NVC9B0_DEC_ERROR_VC1_APPTIMER_EXPIRED                                   (0xDEC20001)
+#define NVC9B0_DEC_ERROR_VC1_MVTIMER_EXPIRED                                    (0xDEC20002)
+#define NVC9B0_DEC_ERROR_VC1_INVALID_TOKEN                                      (0xDEC20003)
+#define NVC9B0_DEC_ERROR_VC1_SLICEDATA_MISSING                                  (0xDEC20004)
+#define NVC9B0_DEC_ERROR_VC1_HWERR_INTERRUPT                                    (0xDEC20005)
+#define NVC9B0_DEC_ERROR_VC1_DETECTED_VLD_FAILURE                               (0xDEC20006)
+#define NVC9B0_DEC_ERROR_VC1_TIMEOUT_POLLING_FOR_DATA                           (0xDEC20007)
+#define NVC9B0_DEC_ERROR_VC1_PDEC_PIC_END_UNALIGNED                             (0xDEC20008)
+#define NVC9B0_DEC_ERROR_VC1_WDTIMER_EXPIRED                                    (0xDEC20009)
+#define NVC9B0_DEC_ERROR_VC1_ERRINTSTART                                        (0xDEC20010)
+#define NVC9B0_DEC_ERROR_VC1_IQT_ERRINT                                         (0xDEC20011)
+#define NVC9B0_DEC_ERROR_VC1_MC_ERRINT                                          (0xDEC20012)
+#define NVC9B0_DEC_ERROR_VC1_MC_IQT_ERRINT                                      (0xDEC20013)
+#define NVC9B0_DEC_ERROR_VC1_REC_ERRINT                                         (0xDEC20014)
+#define NVC9B0_DEC_ERROR_VC1_REC_IQT_ERRINT                                     (0xDEC20015)
+#define NVC9B0_DEC_ERROR_VC1_REC_MC_ERRINT                                      (0xDEC20016)
+#define NVC9B0_DEC_ERROR_VC1_REC_MC_IQT_ERRINT                                  (0xDEC20017)
+#define NVC9B0_DEC_ERROR_VC1_DBF_ERRINT                                         (0xDEC20018)
+#define NVC9B0_DEC_ERROR_VC1_DBF_IQT_ERRINT                                     (0xDEC20019)
+#define NVC9B0_DEC_ERROR_VC1_DBF_MC_ERRINT                                      (0xDEC2001A)
+#define NVC9B0_DEC_ERROR_VC1_DBF_MC_IQT_ERRINT                                  (0xDEC2001B)
+#define NVC9B0_DEC_ERROR_VC1_DBF_REC_ERRINT                                     (0xDEC2001C)
+#define NVC9B0_DEC_ERROR_VC1_DBF_REC_IQT_ERRINT                                 (0xDEC2001D)
+#define NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_ERRINT                                  (0xDEC2001E)
+#define NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_IQT_ERRINT                              (0xDEC2001F)
+#define NVC9B0_DEC_ERROR_VC1_PICTURE_INIT                                       (0xDEC20100)
+#define NVC9B0_DEC_ERROR_VC1_STATEMACHINE_FAILURE                               (0xDEC20101)
+#define NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_PIC                                  (0xDEC20901)
+#define NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_UCODE                                (0xDEC20902)
+#define NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_FC                                   (0xDEC20903)
+#define NVC9B0_DEC_ERROR_VC1_INVAILD_CTXID_SLH                                  (0xDEC20904)
+#define NVC9B0_DEC_ERROR_VC1_INVALID_UCODE_SIZE                                 (0xDEC20905)
+#define NVC9B0_DEC_ERROR_VC1_INVALID_SLICE_COUNT                                (0xDEC20906)
+#define NVC9B0_DEC_ERROR_H264_APPTIMER_EXPIRED                                  (0xDEC30001)
+#define NVC9B0_DEC_ERROR_H264_MVTIMER_EXPIRED                                   (0xDEC30002)
+#define NVC9B0_DEC_ERROR_H264_INVALID_TOKEN                                     (0xDEC30003)
+#define NVC9B0_DEC_ERROR_H264_SLICEDATA_MISSING                                 (0xDEC30004)
+#define NVC9B0_DEC_ERROR_H264_HWERR_INTERRUPT                                   (0xDEC30005)
+#define NVC9B0_DEC_ERROR_H264_DETECTED_VLD_FAILURE                              (0xDEC30006)
+#define NVC9B0_DEC_ERROR_H264_ERRINTSTART                                       (0xDEC30010)
+#define NVC9B0_DEC_ERROR_H264_IQT_ERRINT                                        (0xDEC30011)
+#define NVC9B0_DEC_ERROR_H264_MC_ERRINT                                         (0xDEC30012)
+#define NVC9B0_DEC_ERROR_H264_MC_IQT_ERRINT                                     (0xDEC30013)
+#define NVC9B0_DEC_ERROR_H264_REC_ERRINT                                        (0xDEC30014)
+#define NVC9B0_DEC_ERROR_H264_REC_IQT_ERRINT                                    (0xDEC30015)
+#define NVC9B0_DEC_ERROR_H264_REC_MC_ERRINT                                     (0xDEC30016)
+#define NVC9B0_DEC_ERROR_H264_REC_MC_IQT_ERRINT                                 (0xDEC30017)
+#define NVC9B0_DEC_ERROR_H264_DBF_ERRINT                                        (0xDEC30018)
+#define NVC9B0_DEC_ERROR_H264_DBF_IQT_ERRINT                                    (0xDEC30019)
+#define NVC9B0_DEC_ERROR_H264_DBF_MC_ERRINT                                     (0xDEC3001A)
+#define NVC9B0_DEC_ERROR_H264_DBF_MC_IQT_ERRINT                                 (0xDEC3001B)
+#define NVC9B0_DEC_ERROR_H264_DBF_REC_ERRINT                                    (0xDEC3001C)
+#define NVC9B0_DEC_ERROR_H264_DBF_REC_IQT_ERRINT                                (0xDEC3001D)
+#define NVC9B0_DEC_ERROR_H264_DBF_REC_MC_ERRINT                                 (0xDEC3001E)
+#define NVC9B0_DEC_ERROR_H264_DBF_REC_MC_IQT_ERRINT                             (0xDEC3001F)
+#define NVC9B0_DEC_ERROR_H264_PICTURE_INIT                                      (0xDEC30100)
+#define NVC9B0_DEC_ERROR_H264_STATEMACHINE_FAILURE                              (0xDEC30101)
+#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_PIC                                 (0xDEC30901)
+#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_UCODE                               (0xDEC30902)
+#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_FC                                  (0xDEC30903)
+#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_SLH                                 (0xDEC30904)
+#define NVC9B0_DEC_ERROR_H264_INVALID_UCODE_SIZE                                (0xDEC30905)
+#define NVC9B0_DEC_ERROR_H264_INVALID_SLICE_COUNT                               (0xDEC30906)
+#define NVC9B0_DEC_ERROR_MPEG4_APPTIMER_EXPIRED                                 (0xDEC40001)
+#define NVC9B0_DEC_ERROR_MPEG4_MVTIMER_EXPIRED                                  (0xDEC40002)
+#define NVC9B0_DEC_ERROR_MPEG4_INVALID_TOKEN                                    (0xDEC40003)
+#define NVC9B0_DEC_ERROR_MPEG4_SLICEDATA_MISSING                                (0xDEC40004)
+#define NVC9B0_DEC_ERROR_MPEG4_HWERR_INTERRUPT                                  (0xDEC40005)
+#define NVC9B0_DEC_ERROR_MPEG4_DETECTED_VLD_FAILURE                             (0xDEC40006)
+#define NVC9B0_DEC_ERROR_MPEG4_TIMEOUT_POLLING_FOR_DATA                         (0xDEC40007)
+#define NVC9B0_DEC_ERROR_MPEG4_PDEC_PIC_END_UNALIGNED                           (0xDEC40008)
+#define NVC9B0_DEC_ERROR_MPEG4_WDTIMER_EXPIRED                                  (0xDEC40009)
+#define NVC9B0_DEC_ERROR_MPEG4_ERRINTSTART                                      (0xDEC40010)
+#define NVC9B0_DEC_ERROR_MPEG4_IQT_ERRINT                                       (0xDEC40011)
+#define NVC9B0_DEC_ERROR_MPEG4_MC_ERRINT                                        (0xDEC40012)
+#define NVC9B0_DEC_ERROR_MPEG4_MC_IQT_ERRINT                                    (0xDEC40013)
+#define NVC9B0_DEC_ERROR_MPEG4_REC_ERRINT                                       (0xDEC40014)
+#define NVC9B0_DEC_ERROR_MPEG4_REC_IQT_ERRINT                                   (0xDEC40015)
+#define NVC9B0_DEC_ERROR_MPEG4_REC_MC_ERRINT                                    (0xDEC40016)
+#define NVC9B0_DEC_ERROR_MPEG4_REC_MC_IQT_ERRINT                                (0xDEC40017)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_ERRINT                                       (0xDEC40018)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_IQT_ERRINT                                   (0xDEC40019)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_MC_ERRINT                                    (0xDEC4001A)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_MC_IQT_ERRINT                                (0xDEC4001B)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_ERRINT                                   (0xDEC4001C)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_IQT_ERRINT                               (0xDEC4001D)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_ERRINT                                (0xDEC4001E)
+#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_IQT_ERRINT                            (0xDEC4001F)
+#define NVC9B0_DEC_ERROR_MPEG4_PICTURE_INIT                                     (0xDEC40100)
+#define NVC9B0_DEC_ERROR_MPEG4_STATEMACHINE_FAILURE                             (0xDEC40101)
+#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_PIC                                (0xDEC40901)
+#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_UCODE                              (0xDEC40902)
+#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_FC                                 (0xDEC40903)
+#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_SLH                                (0xDEC40904)
+#define NVC9B0_DEC_ERROR_MPEG4_INVALID_UCODE_SIZE                               (0xDEC40905)
+#define NVC9B0_DEC_ERROR_MPEG4_INVALID_SLICE_COUNT                              (0xDEC40906)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // clc9b0_h
--- a/extra/nv_gpu_driver/nvdec_drv.h
+++ b/extra/nv_gpu_driver/nvdec_drv.h
--- a/test/mockgpu/nv/nvdriver.py
+++ b/test/mockgpu/nv/nvdriver.py
@@ -100,6 +100,9 @@ class NVDriver(VirtDriver):
      assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
      struct.hObjectNew = self._alloc_handle()
      self.object_by_handle[struct.hObjectNew] = NVSubDevice(self.object_by_handle[struct.hObjectParent])
+    elif struct.hClass == nv_gpu.NV01_MEMORY_VIRTUAL:
+      assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
+      struct.hObjectNew = self._alloc_handle()
    elif struct.hClass == nv_gpu.TURING_USERMODE_A:
      assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVSubDevice)
      struct.hObjectNew = self._alloc_handle()
@@ -215,6 +218,8 @@ class NVDriver(VirtDriver):
    elif nr == nv_gpu.NV_ESC_RM_FREE:
      st = nv_gpu.NVOS00_PARAMETERS.from_address(argp)
      self.object_by_handle.pop(st.hObjectOld)
+    elif nr == nv_gpu.NV_ESC_RM_MAP_MEMORY_DMA:
+      pass # mappings are same as uvm
    elif nr == nv_gpu.NV_ESC_CARD_INFO:
      for i,gpu in enumerate(self.gpus.values()):
        st = nv_gpu.nv_ioctl_card_info_t.from_address(argp + i * ctypes.sizeof(nv_gpu.nv_ioctl_card_info_t))
--- a/test/testextra/test_hevc.py
+++ b/test/testextra/test_hevc.py
@@ -0,0 +1,65 @@
+import unittest
+
+from tinygrad import Tensor, Device
+from extra.hevc.hevc import parse_hevc_file_headers, nv_gpu
+
+class TestHevc(unittest.TestCase):
+  def test_hevc_parser(self):
+    url = "https://github.com/haraschax/filedump/raw/09a497959f7fa6fd8dba501a25f2cdb3a41ecb12/comma_video.hevc"
+    hevc_tensor = Tensor.from_url(url, device="CPU")
+
+    dat = bytes(hevc_tensor.data())
+    opaque, frame_info, w, h, luma_w, luma_h, chroma_off = parse_hevc_file_headers(dat, device=Device.DEFAULT)
+
+    def _test_common(frame, bts):
+      self.assertEqual(frame0.pic_width_in_luma_samples, 1952)
+      self.assertEqual(frame0.pic_height_in_luma_samples, 1216)
+      self.assertEqual(frame0.chroma_format_idc, 1)
+      self.assertEqual(frame0.bit_depth_luma, 8)
+      self.assertEqual(frame0.bit_depth_chroma, 8)
+      self.assertEqual(frame0.log2_min_luma_coding_block_size, 3)
+      self.assertEqual(frame0.log2_max_luma_coding_block_size, 5)
+      self.assertEqual(frame0.log2_min_transform_block_size, 2)
+      self.assertEqual(frame0.log2_max_transform_block_size, 5)
+      self.assertEqual(frame0.num_tile_columns, 3)
+      self.assertEqual(frame0.num_tile_rows, 1)
+      self.assertEqual(frame0.colMvBuffersize, 589)
+      self.assertEqual(frame0.HevcSaoBufferOffset, 2888)
+      self.assertEqual(frame0.HevcBsdCtrlOffset, 25992)
+      self.assertEqual(frame0.v1.hevc_main10_444_ext.HevcFltAboveOffset, 26714)
+      self.assertEqual(frame0.v1.hevc_main10_444_ext.HevcSaoAboveOffset, 36214)
+
+      # tiles
+      self.assertEqual(bytes(bts[0x200:0x210]), b'\x18\x00&\x00\x18\x00&\x00\r\x00&\x00\x00\x00\x00\x00')
+
+    frame0 = nv_gpu.nvdec_hevc_pic_s.from_buffer(opaque[0].data())
+    _test_common(frame0, opaque[0].data())
+    self.assertEqual(frame0.stream_len, 148063)
+    self.assertEqual(frame0.IDR_picture_flag, 1)
+    self.assertEqual(frame0.RAP_picture_flag, 1)
+    self.assertEqual(frame0.sw_hdr_skip_length, 0)
+    self.assertEqual(frame0.num_ref_frames, 0)
+
+    frame1 = nv_gpu.nvdec_hevc_pic_s.from_buffer(opaque[1].data())
+    _test_common(frame1, opaque[1].data())
+    self.assertEqual(frame1.stream_len, 57110)
+    self.assertEqual(frame1.IDR_picture_flag, 0)
+    self.assertEqual(frame1.RAP_picture_flag, 0)
+    self.assertEqual(frame1.sw_hdr_skip_length, 9)
+    self.assertEqual(frame1.num_ref_frames, 1)
+    self.assertEqual(list(frame1.initreflistidxl0), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    self.assertEqual(list(frame1.initreflistidxl1), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    self.assertEqual(list(frame1.RefDiffPicOrderCnts), [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+
+    frame3 = nv_gpu.nvdec_hevc_pic_s.from_buffer(opaque[3].data())
+    _test_common(frame3, opaque[3].data())
+    self.assertEqual(frame3.stream_len, 47036)
+    self.assertEqual(frame3.IDR_picture_flag, 0)
+    self.assertEqual(frame3.RAP_picture_flag, 0)
+    self.assertEqual(frame3.sw_hdr_skip_length, 9)
+    self.assertEqual(frame3.num_ref_frames, 1)
+    self.assertEqual(list(frame3.initreflistidxl0), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    self.assertEqual(list(frame3.initreflistidxl1), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    self.assertEqual(list(frame3.RefDiffPicOrderCnts), [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+if __name__ == "__main__":
+  unittest.main()
--- a/tinygrad/device.py
+++ b/tinygrad/device.py
@@ -238,6 +238,7 @@ class Allocator(Generic[DeviceType]):
  # def _as_buffer(self, src) -> memoryview:
  # def _offset(self, buf, size:int, offset:int):
  # def _transfer(self, dest, src, sz:int, src_dev, dest_dev):
+  def _encode_decode(self, bufout, bufin, desc, hist:list, shape:tuple[int,...], frame_pos:int): raise NotImplementedError("need encdec") # optional

 class LRUAllocator(Allocator, Generic[DeviceType]):
  """
--- a/tinygrad/engine/realize.py
+++ b/tinygrad/engine/realize.py
@@ -141,6 +141,19 @@ class BufferCopy(Runner):
 class BufferXfer(BufferCopy):
  def copy(self, dest, src): dest.allocator._transfer(dest._buf, src._buf, dest.nbytes, src_dev=src.allocator.dev, dest_dev=dest.allocator.dev)

+class EncDec(Runner):
+  def __init__(self, encdec:UOp, total_sz:int, device:str):
+    self.shape, self.pos_var = encdec.arg[0], encdec.variables()[0].expr
+    name = f"enc/dec {total_sz/1e6:7.2f}M, HEVC" if total_sz >= 1e6 else f"enc/dec {total_sz:8d}, HEVC"
+    super().__init__(colored(name, "yellow"), device, Estimates(lds=total_sz, mem=total_sz))
+  def __call__(self, rawbufs:list[Buffer], var_vals:dict[str, int], wait=False):
+    st = time.perf_counter()
+    rawbufs[0].allocator._encode_decode(rawbufs[0]._buf, rawbufs[1]._buf, rawbufs[2]._buf,
+                                        [x._buf for x in rawbufs[3:]], self.shape, var_vals[self.pos_var])
+    if wait:
+      Device[rawbufs[0].device].synchronize()
+      return time.perf_counter() - st
+
 # **************** method cache ****************

 method_cache: dict[tuple[str, type, bytes, tuple[int, ...], bool], CompiledRunner] = {}
@@ -201,6 +214,7 @@ si_lowerer = PatternMatcher([
  (UPat(Ops.COPY, name="copy"), lambda ctx,copy: ((BufferXfer(ctx[0].nbytes, ctx[0].device, ctx[1].device) \
      if hasattr(Device[ctx[0].device].allocator, '_transfer') and all_same([x.device.split(":")[0] for x in ctx]) \
      else BufferCopy(ctx[0].nbytes, ctx[0].device, ctx[1].device)), list(ctx))),
+  (UPat(Ops.ENCDEC, name="encdec"), lambda ctx,encdec: ((EncDec(encdec, ctx[0].nbytes, ctx[1].device)), list(ctx))),
 ])
 def lower_schedule_item(si:ScheduleItem) -> ExecItem:
  return ExecItem(*cast(tuple[Runner,list], si_lowerer.rewrite(si.ast, si.bufs)), si.metadata, si.fixedvars)
--- a/tinygrad/runtime/autogen/init.py
+++ b/tinygrad/runtime/autogen/init.py
@@ -4,6 +4,7 @@ from tinygrad.helpers import fetch, flatten, system, getenv
 root = (here:=pathlib.Path(__file__).parent).parents[2]
 nv_src = {"nv_570": "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/81fe4fb417c8ac3b9bdcc1d56827d116743892a5.tar.gz",
          "nv_580": "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/2af9f1f0f7de4988432d4ae875b5858ffdb09cc2.tar.gz"}
+ffmpeg_src = "https://ffmpeg.org/releases/ffmpeg-8.0.1.tar.gz"
 macossdk = "/var/db/xcode_select_link/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk"

 def load(name, dll, files, **kwargs):
@@ -27,6 +28,7 @@ def __getattr__(nm):
    case "libc": return load("libc", ["find_library('c')"], lambda: (
      [i for i in system("dpkg -L libc6-dev").split() if 'sys/mman.h' in i or 'sys/syscall.h' in i] +
      ["/usr/include/string.h", "/usr/include/elf.h", "/usr/include/unistd.h", "/usr/include/asm-generic/mman-common.h"]), use_errno=True)
+    case "avcodec": return load("avcodec", [], ["{}/libavcodec/hevc/hevc.h", "{}/libavcodec/cbs_h265.h"], tarball=ffmpeg_src)
    case "opencl": return load("opencl", ["find_library('OpenCL')"], ["/usr/include/CL/cl.h"])
    case "cuda": return load("cuda", ["find_library('cuda')"], ["/usr/include/cuda.h"], args=["-D__CUDA_API_VERSION_INTERNAL"], parse_macros=False)
    case "nvrtc": return load("nvrtc", ["find_library('nvrtc')"], ["/usr/include/nvrtc.h"])
@@ -34,14 +36,14 @@ def __getattr__(nm):
    case "kfd": return load("kfd", [], ["/usr/include/linux/kfd_ioctl.h"])
    case "nv_570" | "nv_580":
      return load(nm, [], [
-        *[root/"extra/nv_gpu_driver"/s for s in ["clc6c0qmd.h","clcec0qmd.h"]], "{}/kernel-open/common/inc/nvmisc.h",
-        *[f"{{}}/src/common/sdk/nvidia/inc/class/cl{s}.h" for s in ["0000", "0080", "2080", "2080_notification", "c56f", "c86f", "c96f", "c761",
+        *[root/"extra/nv_gpu_driver"/s for s in ["clc9b0.h", "clc6c0qmd.h","clcec0qmd.h", "nvdec_drv.h"]], "{}/kernel-open/common/inc/nvmisc.h",
+        *[f"{{}}/src/common/sdk/nvidia/inc/class/cl{s}.h" for s in ["0000", "0070", "0080", "2080", "2080_notification", "c56f", "c86f", "c96f", "c761",
                                                                    "83de", "c6c0", "cdc0"]],
        *[f"{{}}/kernel-open/nvidia-uvm/{s}.h" for s in ["clc6b5", "clc9b5", "uvm_ioctl", "uvm_linux_ioctl", "hwref/ampere/ga100/dev_fault"]],
        *[f"{{}}/src/nvidia/arch/nvalloc/unix/include/nv{s}.h" for s in ["_escape", "-ioctl", "-ioctl-numbers",
                                                                         "-ioctl-numa", "-unix-nvos-params-wrappers"]],
        *[f"{{}}/src/common/sdk/nvidia/inc/{s}.h" for s in ["alloc/alloc_channel", "nvos", "ctrl/ctrlc36f", "ctrl/ctrlcb33",
-                                                            "ctrl/ctrla06c", "ctrl/ctrl90f1"]],
+                                                            "ctrl/ctrla06c", "ctrl/ctrl90f1", "ctrl/ctrla06f/ctrla06fgpfifo"]],
        *[f"{{}}/src/common/sdk/nvidia/inc/ctrl/ctrl{s}/*.h" for s in ["0000", "0080", "2080", "83de"]],
        "{}/kernel-open/common/inc/nvstatus.h", "{}/src/nvidia/generated/g_allclasses.h"
      ], args=[
@@ -129,4 +131,4 @@ python3 src/compiler/builtin_types_h.py gen/builtin_types.h""", cwd=path, shell=
      return load("metal", ["find_library('Metal')"],[f"{macossdk}/System/Library/Frameworks/Metal.framework/Headers/MTL{s}.h" for s in
                  ["ComputeCommandEncoder", "ComputePipeline", "CommandQueue", "Device", "IndirectCommandBuffer", "Resource", "CommandEncoder"]],
                  args=["-xobjective-c","-isysroot",macossdk], types={"dispatch_data_t":"objc.id_"})
-    case _: raise AttributeError(f"no such autogen: {nm}")
+    case _: raise AttributeError(f"no such autogen: {nm}")
--- a/tinygrad/runtime/autogen/avcodec.py
+++ b/tinygrad/runtime/autogen/avcodec.py
@@ -0,0 +1,543 @@
+# mypy: ignore-errors
+import ctypes
+from tinygrad.helpers import unwrap
+from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR
+enum_HEVCNALUnitType = CEnum(ctypes.c_uint32)
+HEVC_NAL_TRAIL_N = enum_HEVCNALUnitType.define('HEVC_NAL_TRAIL_N', 0)
+HEVC_NAL_TRAIL_R = enum_HEVCNALUnitType.define('HEVC_NAL_TRAIL_R', 1)
+HEVC_NAL_TSA_N = enum_HEVCNALUnitType.define('HEVC_NAL_TSA_N', 2)
+HEVC_NAL_TSA_R = enum_HEVCNALUnitType.define('HEVC_NAL_TSA_R', 3)
+HEVC_NAL_STSA_N = enum_HEVCNALUnitType.define('HEVC_NAL_STSA_N', 4)
+HEVC_NAL_STSA_R = enum_HEVCNALUnitType.define('HEVC_NAL_STSA_R', 5)
+HEVC_NAL_RADL_N = enum_HEVCNALUnitType.define('HEVC_NAL_RADL_N', 6)
+HEVC_NAL_RADL_R = enum_HEVCNALUnitType.define('HEVC_NAL_RADL_R', 7)
+HEVC_NAL_RASL_N = enum_HEVCNALUnitType.define('HEVC_NAL_RASL_N', 8)
+HEVC_NAL_RASL_R = enum_HEVCNALUnitType.define('HEVC_NAL_RASL_R', 9)
+HEVC_NAL_VCL_N10 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_N10', 10)
+HEVC_NAL_VCL_R11 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_R11', 11)
+HEVC_NAL_VCL_N12 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_N12', 12)
+HEVC_NAL_VCL_R13 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_R13', 13)
+HEVC_NAL_VCL_N14 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_N14', 14)
+HEVC_NAL_VCL_R15 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_R15', 15)
+HEVC_NAL_BLA_W_LP = enum_HEVCNALUnitType.define('HEVC_NAL_BLA_W_LP', 16)
+HEVC_NAL_BLA_W_RADL = enum_HEVCNALUnitType.define('HEVC_NAL_BLA_W_RADL', 17)
+HEVC_NAL_BLA_N_LP = enum_HEVCNALUnitType.define('HEVC_NAL_BLA_N_LP', 18)
+HEVC_NAL_IDR_W_RADL = enum_HEVCNALUnitType.define('HEVC_NAL_IDR_W_RADL', 19)
+HEVC_NAL_IDR_N_LP = enum_HEVCNALUnitType.define('HEVC_NAL_IDR_N_LP', 20)
+HEVC_NAL_CRA_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_CRA_NUT', 21)
+HEVC_NAL_RSV_IRAP_VCL22 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_IRAP_VCL22', 22)
+HEVC_NAL_RSV_IRAP_VCL23 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_IRAP_VCL23', 23)
+HEVC_NAL_RSV_VCL24 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL24', 24)
+HEVC_NAL_RSV_VCL25 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL25', 25)
+HEVC_NAL_RSV_VCL26 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL26', 26)
+HEVC_NAL_RSV_VCL27 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL27', 27)
+HEVC_NAL_RSV_VCL28 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL28', 28)
+HEVC_NAL_RSV_VCL29 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL29', 29)
+HEVC_NAL_RSV_VCL30 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL30', 30)
+HEVC_NAL_RSV_VCL31 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL31', 31)
+HEVC_NAL_VPS = enum_HEVCNALUnitType.define('HEVC_NAL_VPS', 32)
+HEVC_NAL_SPS = enum_HEVCNALUnitType.define('HEVC_NAL_SPS', 33)
+HEVC_NAL_PPS = enum_HEVCNALUnitType.define('HEVC_NAL_PPS', 34)
+HEVC_NAL_AUD = enum_HEVCNALUnitType.define('HEVC_NAL_AUD', 35)
+HEVC_NAL_EOS_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_EOS_NUT', 36)
+HEVC_NAL_EOB_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_EOB_NUT', 37)
+HEVC_NAL_FD_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_FD_NUT', 38)
+HEVC_NAL_SEI_PREFIX = enum_HEVCNALUnitType.define('HEVC_NAL_SEI_PREFIX', 39)
+HEVC_NAL_SEI_SUFFIX = enum_HEVCNALUnitType.define('HEVC_NAL_SEI_SUFFIX', 40)
+HEVC_NAL_RSV_NVCL41 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL41', 41)
+HEVC_NAL_RSV_NVCL42 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL42', 42)
+HEVC_NAL_RSV_NVCL43 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL43', 43)
+HEVC_NAL_RSV_NVCL44 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL44', 44)
+HEVC_NAL_RSV_NVCL45 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL45', 45)
+HEVC_NAL_RSV_NVCL46 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL46', 46)
+HEVC_NAL_RSV_NVCL47 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL47', 47)
+HEVC_NAL_UNSPEC48 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC48', 48)
+HEVC_NAL_UNSPEC49 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC49', 49)
+HEVC_NAL_UNSPEC50 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC50', 50)
+HEVC_NAL_UNSPEC51 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC51', 51)
+HEVC_NAL_UNSPEC52 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC52', 52)
+HEVC_NAL_UNSPEC53 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC53', 53)
+HEVC_NAL_UNSPEC54 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC54', 54)
+HEVC_NAL_UNSPEC55 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC55', 55)
+HEVC_NAL_UNSPEC56 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC56', 56)
+HEVC_NAL_UNSPEC57 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC57', 57)
+HEVC_NAL_UNSPEC58 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC58', 58)
+HEVC_NAL_UNSPEC59 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC59', 59)
+HEVC_NAL_UNSPEC60 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC60', 60)
+HEVC_NAL_UNSPEC61 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC61', 61)
+HEVC_NAL_UNSPEC62 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC62', 62)
+HEVC_NAL_UNSPEC63 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC63', 63)
+
+enum_HEVCSliceType = CEnum(ctypes.c_uint32)
+HEVC_SLICE_B = enum_HEVCSliceType.define('HEVC_SLICE_B', 0)
+HEVC_SLICE_P = enum_HEVCSliceType.define('HEVC_SLICE_P', 1)
+HEVC_SLICE_I = enum_HEVCSliceType.define('HEVC_SLICE_I', 2)
+
+_anonenum0 = CEnum(ctypes.c_uint32)
+HEVC_MAX_LAYERS = _anonenum0.define('HEVC_MAX_LAYERS', 63)
+HEVC_MAX_SUB_LAYERS = _anonenum0.define('HEVC_MAX_SUB_LAYERS', 7)
+HEVC_MAX_LAYER_SETS = _anonenum0.define('HEVC_MAX_LAYER_SETS', 1024)
+HEVC_MAX_LAYER_ID = _anonenum0.define('HEVC_MAX_LAYER_ID', 63)
+HEVC_MAX_NUH_LAYER_ID = _anonenum0.define('HEVC_MAX_NUH_LAYER_ID', 62)
+HEVC_MAX_VPS_COUNT = _anonenum0.define('HEVC_MAX_VPS_COUNT', 16)
+HEVC_MAX_SPS_COUNT = _anonenum0.define('HEVC_MAX_SPS_COUNT', 16)
+HEVC_MAX_PPS_COUNT = _anonenum0.define('HEVC_MAX_PPS_COUNT', 64)
+HEVC_MAX_DPB_SIZE = _anonenum0.define('HEVC_MAX_DPB_SIZE', 16)
+HEVC_MAX_REFS = _anonenum0.define('HEVC_MAX_REFS', 16)
+HEVC_MAX_SHORT_TERM_REF_PIC_SETS = _anonenum0.define('HEVC_MAX_SHORT_TERM_REF_PIC_SETS', 64)
+HEVC_MAX_LONG_TERM_REF_PICS = _anonenum0.define('HEVC_MAX_LONG_TERM_REF_PICS', 32)
+HEVC_MIN_LOG2_CTB_SIZE = _anonenum0.define('HEVC_MIN_LOG2_CTB_SIZE', 4)
+HEVC_MAX_LOG2_CTB_SIZE = _anonenum0.define('HEVC_MAX_LOG2_CTB_SIZE', 6)
+HEVC_MAX_CPB_CNT = _anonenum0.define('HEVC_MAX_CPB_CNT', 32)
+HEVC_MAX_LUMA_PS = _anonenum0.define('HEVC_MAX_LUMA_PS', 35651584)
+HEVC_MAX_WIDTH = _anonenum0.define('HEVC_MAX_WIDTH', 16888)
+HEVC_MAX_HEIGHT = _anonenum0.define('HEVC_MAX_HEIGHT', 16888)
+HEVC_MAX_TILE_ROWS = _anonenum0.define('HEVC_MAX_TILE_ROWS', 22)
+HEVC_MAX_TILE_COLUMNS = _anonenum0.define('HEVC_MAX_TILE_COLUMNS', 20)
+HEVC_MAX_SLICE_SEGMENTS = _anonenum0.define('HEVC_MAX_SLICE_SEGMENTS', 600)
+HEVC_MAX_ENTRY_POINT_OFFSETS = _anonenum0.define('HEVC_MAX_ENTRY_POINT_OFFSETS', 2700)
+HEVC_MAX_PALETTE_PREDICTOR_SIZE = _anonenum0.define('HEVC_MAX_PALETTE_PREDICTOR_SIZE', 128)
+
+enum_HEVCScalabilityMask = CEnum(ctypes.c_uint32)
+HEVC_SCALABILITY_DEPTH = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_DEPTH', 32768)
+HEVC_SCALABILITY_MULTIVIEW = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_MULTIVIEW', 16384)
+HEVC_SCALABILITY_SPATIAL = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_SPATIAL', 8192)
+HEVC_SCALABILITY_AUXILIARY = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_AUXILIARY', 4096)
+HEVC_SCALABILITY_MASK_MAX = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_MASK_MAX', 65535)
+
+enum_HEVCAuxId = CEnum(ctypes.c_uint32)
+HEVC_AUX_ALPHA = enum_HEVCAuxId.define('HEVC_AUX_ALPHA', 1)
+HEVC_AUX_DEPTH = enum_HEVCAuxId.define('HEVC_AUX_DEPTH', 2)
+
+class struct_H265RawNALUnitHeader(Struct): pass
+uint8_t = ctypes.c_ubyte
+struct_H265RawNALUnitHeader._fields_ = [
+  ('nal_unit_type', uint8_t),
+  ('nuh_layer_id', uint8_t),
+  ('nuh_temporal_id_plus1', uint8_t),
+]
+H265RawNALUnitHeader = struct_H265RawNALUnitHeader
+class struct_H265RawProfileTierLevel(Struct): pass
+struct_H265RawProfileTierLevel._fields_ = [
+  ('general_profile_space', uint8_t),
+  ('general_tier_flag', uint8_t),
+  ('general_profile_idc', uint8_t),
+  ('general_profile_compatibility_flag', (uint8_t * 32)),
+  ('general_progressive_source_flag', uint8_t),
+  ('general_interlaced_source_flag', uint8_t),
+  ('general_non_packed_constraint_flag', uint8_t),
+  ('general_frame_only_constraint_flag', uint8_t),
+  ('general_max_12bit_constraint_flag', uint8_t),
+  ('general_max_10bit_constraint_flag', uint8_t),
+  ('general_max_8bit_constraint_flag', uint8_t),
+  ('general_max_422chroma_constraint_flag', uint8_t),
+  ('general_max_420chroma_constraint_flag', uint8_t),
+  ('general_max_monochrome_constraint_flag', uint8_t),
+  ('general_intra_constraint_flag', uint8_t),
+  ('general_one_picture_only_constraint_flag', uint8_t),
+  ('general_lower_bit_rate_constraint_flag', uint8_t),
+  ('general_max_14bit_constraint_flag', uint8_t),
+  ('general_inbld_flag', uint8_t),
+  ('general_level_idc', uint8_t),
+  ('sub_layer_profile_present_flag', (uint8_t * 7)),
+  ('sub_layer_level_present_flag', (uint8_t * 7)),
+  ('sub_layer_profile_space', (uint8_t * 7)),
+  ('sub_layer_tier_flag', (uint8_t * 7)),
+  ('sub_layer_profile_idc', (uint8_t * 7)),
+  ('sub_layer_profile_compatibility_flag', ((uint8_t * 32) * 7)),
+  ('sub_layer_progressive_source_flag', (uint8_t * 7)),
+  ('sub_layer_interlaced_source_flag', (uint8_t * 7)),
+  ('sub_layer_non_packed_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_frame_only_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_max_12bit_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_max_10bit_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_max_8bit_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_max_422chroma_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_max_420chroma_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_max_monochrome_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_intra_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_one_picture_only_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_lower_bit_rate_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_max_14bit_constraint_flag', (uint8_t * 7)),
+  ('sub_layer_inbld_flag', (uint8_t * 7)),
+  ('sub_layer_level_idc', (uint8_t * 7)),
+]
+H265RawProfileTierLevel = struct_H265RawProfileTierLevel
+class struct_H265RawSubLayerHRDParameters(Struct): pass
+uint32_t = ctypes.c_uint32
+struct_H265RawSubLayerHRDParameters._fields_ = [
+  ('bit_rate_value_minus1', (uint32_t * 32)),
+  ('cpb_size_value_minus1', (uint32_t * 32)),
+  ('cpb_size_du_value_minus1', (uint32_t * 32)),
+  ('bit_rate_du_value_minus1', (uint32_t * 32)),
+  ('cbr_flag', (uint8_t * 32)),
+]
+H265RawSubLayerHRDParameters = struct_H265RawSubLayerHRDParameters
+class struct_H265RawHRDParameters(Struct): pass
+uint16_t = ctypes.c_uint16
+struct_H265RawHRDParameters._fields_ = [
+  ('nal_hrd_parameters_present_flag', uint8_t),
+  ('vcl_hrd_parameters_present_flag', uint8_t),
+  ('sub_pic_hrd_params_present_flag', uint8_t),
+  ('tick_divisor_minus2', uint8_t),
+  ('du_cpb_removal_delay_increment_length_minus1', uint8_t),
+  ('sub_pic_cpb_params_in_pic_timing_sei_flag', uint8_t),
+  ('dpb_output_delay_du_length_minus1', uint8_t),
+  ('bit_rate_scale', uint8_t),
+  ('cpb_size_scale', uint8_t),
+  ('cpb_size_du_scale', uint8_t),
+  ('initial_cpb_removal_delay_length_minus1', uint8_t),
+  ('au_cpb_removal_delay_length_minus1', uint8_t),
+  ('dpb_output_delay_length_minus1', uint8_t),
+  ('fixed_pic_rate_general_flag', (uint8_t * 7)),
+  ('fixed_pic_rate_within_cvs_flag', (uint8_t * 7)),
+  ('elemental_duration_in_tc_minus1', (uint16_t * 7)),
+  ('low_delay_hrd_flag', (uint8_t * 7)),
+  ('cpb_cnt_minus1', (uint8_t * 7)),
+  ('nal_sub_layer_hrd_parameters', (H265RawSubLayerHRDParameters * 7)),
+  ('vcl_sub_layer_hrd_parameters', (H265RawSubLayerHRDParameters * 7)),
+]
+H265RawHRDParameters = struct_H265RawHRDParameters
+class struct_H265RawVUI(Struct): pass
+struct_H265RawVUI._fields_ = [
+  ('aspect_ratio_info_present_flag', uint8_t),
+  ('aspect_ratio_idc', uint8_t),
+  ('sar_width', uint16_t),
+  ('sar_height', uint16_t),
+  ('overscan_info_present_flag', uint8_t),
+  ('overscan_appropriate_flag', uint8_t),
+  ('video_signal_type_present_flag', uint8_t),
+  ('video_format', uint8_t),
+  ('video_full_range_flag', uint8_t),
+  ('colour_description_present_flag', uint8_t),
+  ('colour_primaries', uint8_t),
+  ('transfer_characteristics', uint8_t),
+  ('matrix_coefficients', uint8_t),
+  ('chroma_loc_info_present_flag', uint8_t),
+  ('chroma_sample_loc_type_top_field', uint8_t),
+  ('chroma_sample_loc_type_bottom_field', uint8_t),
+  ('neutral_chroma_indication_flag', uint8_t),
+  ('field_seq_flag', uint8_t),
+  ('frame_field_info_present_flag', uint8_t),
+  ('default_display_window_flag', uint8_t),
+  ('def_disp_win_left_offset', uint16_t),
+  ('def_disp_win_right_offset', uint16_t),
+  ('def_disp_win_top_offset', uint16_t),
+  ('def_disp_win_bottom_offset', uint16_t),
+  ('vui_timing_info_present_flag', uint8_t),
+  ('vui_num_units_in_tick', uint32_t),
+  ('vui_time_scale', uint32_t),
+  ('vui_poc_proportional_to_timing_flag', uint8_t),
+  ('vui_num_ticks_poc_diff_one_minus1', uint32_t),
+  ('vui_hrd_parameters_present_flag', uint8_t),
+  ('hrd_parameters', H265RawHRDParameters),
+  ('bitstream_restriction_flag', uint8_t),
+  ('tiles_fixed_structure_flag', uint8_t),
+  ('motion_vectors_over_pic_boundaries_flag', uint8_t),
+  ('restricted_ref_pic_lists_flag', uint8_t),
+  ('min_spatial_segmentation_idc', uint16_t),
+  ('max_bytes_per_pic_denom', uint8_t),
+  ('max_bits_per_min_cu_denom', uint8_t),
+  ('log2_max_mv_length_horizontal', uint8_t),
+  ('log2_max_mv_length_vertical', uint8_t),
+]
+H265RawVUI = struct_H265RawVUI
+class struct_H265RawExtensionData(Struct): pass
+H265RawExtensionData = struct_H265RawExtensionData
+class struct_H265RawVPS(Struct): pass
+H265RawVPS = struct_H265RawVPS
+class struct_H265RawSTRefPicSet(Struct): pass
+struct_H265RawSTRefPicSet._fields_ = [
+  ('inter_ref_pic_set_prediction_flag', uint8_t),
+  ('delta_idx_minus1', uint8_t),
+  ('delta_rps_sign', uint8_t),
+  ('abs_delta_rps_minus1', uint16_t),
+  ('used_by_curr_pic_flag', (uint8_t * 16)),
+  ('use_delta_flag', (uint8_t * 16)),
+  ('num_negative_pics', uint8_t),
+  ('num_positive_pics', uint8_t),
+  ('delta_poc_s0_minus1', (uint16_t * 16)),
+  ('used_by_curr_pic_s0_flag', (uint8_t * 16)),
+  ('delta_poc_s1_minus1', (uint16_t * 16)),
+  ('used_by_curr_pic_s1_flag', (uint8_t * 16)),
+]
+H265RawSTRefPicSet = struct_H265RawSTRefPicSet
+class struct_H265RawScalingList(Struct): pass
+int16_t = ctypes.c_int16
+int8_t = ctypes.c_byte
+struct_H265RawScalingList._fields_ = [
+  ('scaling_list_pred_mode_flag', ((uint8_t * 6) * 4)),
+  ('scaling_list_pred_matrix_id_delta', ((uint8_t * 6) * 4)),
+  ('scaling_list_dc_coef_minus8', ((int16_t * 6) * 4)),
+  ('scaling_list_delta_coeff', (((int8_t * 64) * 6) * 4)),
+]
+H265RawScalingList = struct_H265RawScalingList
+class struct_H265RawSPS(Struct): pass
+H265RawSPS = struct_H265RawSPS
+class struct_H265RawPPS(Struct): pass
+H265RawPPS = struct_H265RawPPS
+class struct_H265RawAUD(Struct): pass
+struct_H265RawAUD._fields_ = [
+  ('nal_unit_header', H265RawNALUnitHeader),
+  ('pic_type', uint8_t),
+]
+H265RawAUD = struct_H265RawAUD
+class struct_H265RawSliceHeader(Struct): pass
+struct_H265RawSliceHeader._fields_ = [
+  ('nal_unit_header', H265RawNALUnitHeader),
+  ('first_slice_segment_in_pic_flag', uint8_t),
+  ('no_output_of_prior_pics_flag', uint8_t),
+  ('slice_pic_parameter_set_id', uint8_t),
+  ('dependent_slice_segment_flag', uint8_t),
+  ('slice_segment_address', uint16_t),
+  ('slice_reserved_flag', (uint8_t * 8)),
+  ('slice_type', uint8_t),
+  ('pic_output_flag', uint8_t),
+  ('colour_plane_id', uint8_t),
+  ('slice_pic_order_cnt_lsb', uint16_t),
+  ('short_term_ref_pic_set_sps_flag', uint8_t),
+  ('short_term_ref_pic_set', H265RawSTRefPicSet),
+  ('short_term_ref_pic_set_idx', uint8_t),
+  ('num_long_term_sps', uint8_t),
+  ('num_long_term_pics', uint8_t),
+  ('lt_idx_sps', (uint8_t * 16)),
+  ('poc_lsb_lt', (uint8_t * 16)),
+  ('used_by_curr_pic_lt_flag', (uint8_t * 16)),
+  ('delta_poc_msb_present_flag', (uint8_t * 16)),
+  ('delta_poc_msb_cycle_lt', (uint32_t * 16)),
+  ('slice_temporal_mvp_enabled_flag', uint8_t),
+  ('slice_sao_luma_flag', uint8_t),
+  ('slice_sao_chroma_flag', uint8_t),
+  ('num_ref_idx_active_override_flag', uint8_t),
+  ('num_ref_idx_l0_active_minus1', uint8_t),
+  ('num_ref_idx_l1_active_minus1', uint8_t),
+  ('ref_pic_list_modification_flag_l0', uint8_t),
+  ('list_entry_l0', (uint8_t * 16)),
+  ('ref_pic_list_modification_flag_l1', uint8_t),
+  ('list_entry_l1', (uint8_t * 16)),
+  ('mvd_l1_zero_flag', uint8_t),
+  ('cabac_init_flag', uint8_t),
+  ('collocated_from_l0_flag', uint8_t),
+  ('collocated_ref_idx', uint8_t),
+  ('luma_log2_weight_denom', uint8_t),
+  ('delta_chroma_log2_weight_denom', int8_t),
+  ('luma_weight_l0_flag', (uint8_t * 16)),
+  ('chroma_weight_l0_flag', (uint8_t * 16)),
+  ('delta_luma_weight_l0', (int8_t * 16)),
+  ('luma_offset_l0', (int16_t * 16)),
+  ('delta_chroma_weight_l0', ((int8_t * 2) * 16)),
+  ('chroma_offset_l0', ((int16_t * 2) * 16)),
+  ('luma_weight_l1_flag', (uint8_t * 16)),
+  ('chroma_weight_l1_flag', (uint8_t * 16)),
+  ('delta_luma_weight_l1', (int8_t * 16)),
+  ('luma_offset_l1', (int16_t * 16)),
+  ('delta_chroma_weight_l1', ((int8_t * 2) * 16)),
+  ('chroma_offset_l1', ((int16_t * 2) * 16)),
+  ('five_minus_max_num_merge_cand', uint8_t),
+  ('use_integer_mv_flag', uint8_t),
+  ('slice_qp_delta', int8_t),
+  ('slice_cb_qp_offset', int8_t),
+  ('slice_cr_qp_offset', int8_t),
+  ('slice_act_y_qp_offset', int8_t),
+  ('slice_act_cb_qp_offset', int8_t),
+  ('slice_act_cr_qp_offset', int8_t),
+  ('cu_chroma_qp_offset_enabled_flag', uint8_t),
+  ('deblocking_filter_override_flag', uint8_t),
+  ('slice_deblocking_filter_disabled_flag', uint8_t),
+  ('slice_beta_offset_div2', int8_t),
+  ('slice_tc_offset_div2', int8_t),
+  ('slice_loop_filter_across_slices_enabled_flag', uint8_t),
+  ('num_entry_point_offsets', uint16_t),
+  ('offset_len_minus1', uint8_t),
+  ('entry_point_offset_minus1', (uint32_t * 2700)),
+  ('slice_segment_header_extension_length', uint16_t),
+  ('slice_segment_header_extension_data_byte', (uint8_t * 256)),
+]
+H265RawSliceHeader = struct_H265RawSliceHeader
+class struct_H265RawSlice(Struct): pass
+H265RawSlice = struct_H265RawSlice
+class struct_H265RawSEIBufferingPeriod(Struct): pass
+struct_H265RawSEIBufferingPeriod._fields_ = [
+  ('bp_seq_parameter_set_id', uint8_t),
+  ('irap_cpb_params_present_flag', uint8_t),
+  ('cpb_delay_offset', uint32_t),
+  ('dpb_delay_offset', uint32_t),
+  ('concatenation_flag', uint8_t),
+  ('au_cpb_removal_delay_delta_minus1', uint32_t),
+  ('nal_initial_cpb_removal_delay', (uint32_t * 32)),
+  ('nal_initial_cpb_removal_offset', (uint32_t * 32)),
+  ('nal_initial_alt_cpb_removal_delay', (uint32_t * 32)),
+  ('nal_initial_alt_cpb_removal_offset', (uint32_t * 32)),
+  ('vcl_initial_cpb_removal_delay', (uint32_t * 32)),
+  ('vcl_initial_cpb_removal_offset', (uint32_t * 32)),
+  ('vcl_initial_alt_cpb_removal_delay', (uint32_t * 32)),
+  ('vcl_initial_alt_cpb_removal_offset', (uint32_t * 32)),
+  ('use_alt_cpb_params_flag', uint8_t),
+]
+H265RawSEIBufferingPeriod = struct_H265RawSEIBufferingPeriod
+class struct_H265RawSEIPicTiming(Struct): pass
+struct_H265RawSEIPicTiming._fields_ = [
+  ('pic_struct', uint8_t),
+  ('source_scan_type', uint8_t),
+  ('duplicate_flag', uint8_t),
+  ('au_cpb_removal_delay_minus1', uint32_t),
+  ('pic_dpb_output_delay', uint32_t),
+  ('pic_dpb_output_du_delay', uint32_t),
+  ('num_decoding_units_minus1', uint16_t),
+  ('du_common_cpb_removal_delay_flag', uint8_t),
+  ('du_common_cpb_removal_delay_increment_minus1', uint32_t),
+  ('num_nalus_in_du_minus1', (uint16_t * 600)),
+  ('du_cpb_removal_delay_increment_minus1', (uint32_t * 600)),
+]
+H265RawSEIPicTiming = struct_H265RawSEIPicTiming
+class struct_H265RawSEIPanScanRect(Struct): pass
+int32_t = ctypes.c_int32
+struct_H265RawSEIPanScanRect._fields_ = [
+  ('pan_scan_rect_id', uint32_t),
+  ('pan_scan_rect_cancel_flag', uint8_t),
+  ('pan_scan_cnt_minus1', uint8_t),
+  ('pan_scan_rect_left_offset', (int32_t * 3)),
+  ('pan_scan_rect_right_offset', (int32_t * 3)),
+  ('pan_scan_rect_top_offset', (int32_t * 3)),
+  ('pan_scan_rect_bottom_offset', (int32_t * 3)),
+  ('pan_scan_rect_persistence_flag', uint16_t),
+]
+H265RawSEIPanScanRect = struct_H265RawSEIPanScanRect
+class struct_H265RawSEIRecoveryPoint(Struct): pass
+struct_H265RawSEIRecoveryPoint._fields_ = [
+  ('recovery_poc_cnt', int16_t),
+  ('exact_match_flag', uint8_t),
+  ('broken_link_flag', uint8_t),
+]
+H265RawSEIRecoveryPoint = struct_H265RawSEIRecoveryPoint
+class struct_H265RawFilmGrainCharacteristics(Struct): pass
+struct_H265RawFilmGrainCharacteristics._fields_ = [
+  ('film_grain_characteristics_cancel_flag', uint8_t),
+  ('film_grain_model_id', uint8_t),
+  ('separate_colour_description_present_flag', uint8_t),
+  ('film_grain_bit_depth_luma_minus8', uint8_t),
+  ('film_grain_bit_depth_chroma_minus8', uint8_t),
+  ('film_grain_full_range_flag', uint8_t),
+  ('film_grain_colour_primaries', uint8_t),
+  ('film_grain_transfer_characteristics', uint8_t),
+  ('film_grain_matrix_coeffs', uint8_t),
+  ('blending_mode_id', uint8_t),
+  ('log2_scale_factor', uint8_t),
+  ('comp_model_present_flag', (uint8_t * 3)),
+  ('num_intensity_intervals_minus1', (uint8_t * 3)),
+  ('num_model_values_minus1', (uint8_t * 3)),
+  ('intensity_interval_lower_bound', ((uint8_t * 256) * 3)),
+  ('intensity_interval_upper_bound', ((uint8_t * 256) * 3)),
+  ('comp_model_value', (((int16_t * 6) * 256) * 3)),
+  ('film_grain_characteristics_persistence_flag', uint8_t),
+]
+H265RawFilmGrainCharacteristics = struct_H265RawFilmGrainCharacteristics
+class struct_H265RawSEIDisplayOrientation(Struct): pass
+struct_H265RawSEIDisplayOrientation._fields_ = [
+  ('display_orientation_cancel_flag', uint8_t),
+  ('hor_flip', uint8_t),
+  ('ver_flip', uint8_t),
+  ('anticlockwise_rotation', uint16_t),
+  ('display_orientation_repetition_period', uint16_t),
+  ('display_orientation_persistence_flag', uint8_t),
+]
+H265RawSEIDisplayOrientation = struct_H265RawSEIDisplayOrientation
+class struct_H265RawSEIActiveParameterSets(Struct): pass
+struct_H265RawSEIActiveParameterSets._fields_ = [
+  ('active_video_parameter_set_id', uint8_t),
+  ('self_contained_cvs_flag', uint8_t),
+  ('no_parameter_set_update_flag', uint8_t),
+  ('num_sps_ids_minus1', uint8_t),
+  ('active_seq_parameter_set_id', (uint8_t * 16)),
+  ('layer_sps_idx', (uint8_t * 63)),
+]
+H265RawSEIActiveParameterSets = struct_H265RawSEIActiveParameterSets
+class struct_H265RawSEIDecodedPictureHash(Struct): pass
+struct_H265RawSEIDecodedPictureHash._fields_ = [
+  ('hash_type', uint8_t),
+  ('picture_md5', ((uint8_t * 16) * 3)),
+  ('picture_crc', (uint16_t * 3)),
+  ('picture_checksum', (uint32_t * 3)),
+]
+H265RawSEIDecodedPictureHash = struct_H265RawSEIDecodedPictureHash
+class struct_H265RawSEITimeCode(Struct): pass
+struct_H265RawSEITimeCode._fields_ = [
+  ('num_clock_ts', uint8_t),
+  ('clock_timestamp_flag', (uint8_t * 3)),
+  ('units_field_based_flag', (uint8_t * 3)),
+  ('counting_type', (uint8_t * 3)),
+  ('full_timestamp_flag', (uint8_t * 3)),
+  ('discontinuity_flag', (uint8_t * 3)),
+  ('cnt_dropped_flag', (uint8_t * 3)),
+  ('n_frames', (uint16_t * 3)),
+  ('seconds_value', (uint8_t * 3)),
+  ('minutes_value', (uint8_t * 3)),
+  ('hours_value', (uint8_t * 3)),
+  ('seconds_flag', (uint8_t * 3)),
+  ('minutes_flag', (uint8_t * 3)),
+  ('hours_flag', (uint8_t * 3)),
+  ('time_offset_length', (uint8_t * 3)),
+  ('time_offset_value', (int32_t * 3)),
+]
+H265RawSEITimeCode = struct_H265RawSEITimeCode
+class struct_H265RawSEIAlphaChannelInfo(Struct): pass
+struct_H265RawSEIAlphaChannelInfo._fields_ = [
+  ('alpha_channel_cancel_flag', uint8_t),
+  ('alpha_channel_use_idc', uint8_t),
+  ('alpha_channel_bit_depth_minus8', uint8_t),
+  ('alpha_transparent_value', uint16_t),
+  ('alpha_opaque_value', uint16_t),
+  ('alpha_channel_incr_flag', uint8_t),
+  ('alpha_channel_clip_flag', uint8_t),
+  ('alpha_channel_clip_type_flag', uint8_t),
+]
+H265RawSEIAlphaChannelInfo = struct_H265RawSEIAlphaChannelInfo
+class struct_H265RawSEI3DReferenceDisplaysInfo(Struct): pass
+struct_H265RawSEI3DReferenceDisplaysInfo._fields_ = [
+  ('prec_ref_display_width', uint8_t),
+  ('ref_viewing_distance_flag', uint8_t),
+  ('prec_ref_viewing_dist', uint8_t),
+  ('num_ref_displays_minus1', uint8_t),
+  ('left_view_id', (uint16_t * 32)),
+  ('right_view_id', (uint16_t * 32)),
+  ('exponent_ref_display_width', (uint8_t * 32)),
+  ('mantissa_ref_display_width', (uint8_t * 32)),
+  ('exponent_ref_viewing_distance', (uint8_t * 32)),
+  ('mantissa_ref_viewing_distance', (uint8_t * 32)),
+  ('additional_shift_present_flag', (uint8_t * 32)),
+  ('num_sample_shift_plus512', (uint16_t * 32)),
+  ('three_dimensional_reference_displays_extension_flag', uint8_t),
+]
+H265RawSEI3DReferenceDisplaysInfo = struct_H265RawSEI3DReferenceDisplaysInfo
+class struct_H265RawSEI(Struct): pass
+class struct_SEIRawMessageList(Struct): pass
+SEIRawMessageList = struct_SEIRawMessageList
+class struct_SEIRawMessage(Struct): pass
+SEIRawMessage = struct_SEIRawMessage
+size_t = ctypes.c_uint64
+struct_SEIRawMessage._fields_ = [
+  ('payload_type', uint32_t),
+  ('payload_size', uint32_t),
+  ('payload', ctypes.c_void_p),
+  ('payload_ref', ctypes.c_void_p),
+  ('extension_data', ctypes.POINTER(uint8_t)),
+  ('extension_bit_length', size_t),
+]
+struct_SEIRawMessageList._fields_ = [
+  ('messages', ctypes.POINTER(SEIRawMessage)),
+  ('nb_messages', ctypes.c_int32),
+  ('nb_messages_allocated', ctypes.c_int32),
+]
+struct_H265RawSEI._fields_ = [
+  ('nal_unit_header', H265RawNALUnitHeader),
+  ('message_list', SEIRawMessageList),
+]
+H265RawSEI = struct_H265RawSEI
+class struct_H265RawFiller(Struct): pass
+struct_H265RawFiller._fields_ = [
+  ('nal_unit_header', H265RawNALUnitHeader),
+  ('filler_size', uint32_t),
+]
+H265RawFiller = struct_H265RawFiller
+class struct_CodedBitstreamH265Context(Struct): pass
+CodedBitstreamH265Context = struct_CodedBitstreamH265Context
--- a/tinygrad/runtime/autogen/nv_570.py
+++ b/tinygrad/runtime/autogen/nv_570.py
--- a/tinygrad/runtime/autogen/nv_580.py
+++ b/tinygrad/runtime/autogen/nv_580.py
--- a/tinygrad/runtime/ops_nv.py
+++ b/tinygrad/runtime/ops_nv.py
@@ -180,6 +180,31 @@ class NVCopyQueue(NVCommandQueue):

  def _submit(self, dev:NVDevice): self._submit_to_gpfifo(dev, dev.dma_gpfifo)

+class NVVideoQueue(NVCommandQueue):
+  def decode_hevc_chunk(self, pic_desc:HCQBuffer, in_buf:HCQBuffer, out_buf:HCQBuffer, out_buf_pos:int, hist_bufs:list[HCQBuffer],
+                        hist_pos:list[int], chroma_off:int, coloc_buf:HCQBuffer, filter_buf:HCQBuffer, intra_top_off:int, status_buf:HCQBuffer):
+    self.nvm(4, nv_gpu.NVC9B0_SET_APPLICATION_ID, nv_gpu.NVC9B0_SET_APPLICATION_ID_ID_HEVC)
+    self.nvm(4, nv_gpu.NVC9B0_SET_CONTROL_PARAMS, 0x52057)
+    self.nvm(4, nv_gpu.NVC9B0_SET_DRV_PIC_SETUP_OFFSET, pic_desc.va_addr >> 8)
+    self.nvm(4, nv_gpu.NVC9B0_SET_IN_BUF_BASE_OFFSET, in_buf.va_addr >> 8)
+    for pos, buf in zip(hist_pos + [out_buf_pos], hist_bufs + [out_buf]):
+      self.nvm(4, nv_gpu.NVC9B0_SET_PICTURE_LUMA_OFFSET0 + pos*4, buf.va_addr >> 8)
+      self.nvm(4, nv_gpu.NVC9B0_SET_PICTURE_CHROMA_OFFSET0 + pos*4, buf.offset(chroma_off).va_addr >> 8)
+    self.nvm(4, nv_gpu.NVC9B0_SET_COLOC_DATA_OFFSET, coloc_buf.va_addr >> 8)
+    self.nvm(4, nv_gpu.NVC9B0_SET_NVDEC_STATUS_OFFSET, status_buf.va_addr >> 8)
+    self.nvm(4, nv_gpu.NVC9B0_HEVC_SET_TILE_SIZES_OFFSET, pic_desc.offset(0x200).va_addr >> 8)
+    self.nvm(4, nv_gpu.NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET, filter_buf.va_addr >> 8)
+    self.nvm(4, nv_gpu.NVC9B0_SET_INTRA_TOP_BUF_OFFSET, (filter_buf.va_addr + intra_top_off) >> 8)
+    self.nvm(4, nv_gpu.NVC9B0_EXECUTE, 0)
+    return self
+
+  def signal(self, signal:HCQSignal, value:sint=0):
+    self.nvm(4, nv_gpu.NVC9B0_SEMAPHORE_A, *data64(signal.value_addr), value)
+    self.nvm(4, nv_gpu.NVC9B0_SEMAPHORE_D, 0)
+    return self
+
+  def _submit(self, dev:NVDevice): self._submit_to_gpfifo(dev, dev.vid_gpfifo)
+
 class NVArgsState(CLikeArgsState):
  def __init__(self, buf:HCQBuffer, prg:NVProgram, bufs:tuple[HCQBuffer, ...], vals:tuple[int, ...]=()):
    if MOCKGPU: prg.cbuf_0[80:82] = [len(bufs), len(vals)]
@@ -281,6 +306,16 @@ class NVAllocator(HCQAllocator['NVDevice']):

  def _map(self, buf:HCQBuffer): return self.dev.iface.map(buf._base if buf._base is not None else buf)

+  def _encode_decode(self, bufout:HCQBuffer, bufin:HCQBuffer, desc_buf:HCQBuffer, hist:list[HCQBuffer], shape:tuple[int,...], frame_pos:int):
+    assert all(h.va_addr % 0x100 == 0 for h in hist + [bufin, bufout]), "all buffers must be 0x100 aligned"
+
+    h, w = ((2 * shape[0]) // 3 if shape[0] % 3 == 0 else (2 * shape[0] - 1) // 3), shape[1]
+    self.dev._ensure_has_vid_hw(w, h)
+    NVVideoQueue().wait(self.dev.timeline_signal, self.dev.timeline_value - 1) \
+                  .decode_hevc_chunk(desc_buf, bufin, bufout, frame_pos, hist, [(frame_pos-x) % (len(hist) + 1) for x in range(len(hist), 0, -1)],
+                    round_up(w, 64)*round_up(h, 64), self.dev.vid_coloc_buf, self.dev.vid_filter_buf, self.dev.intra_top_off, self.dev.vid_stat_buf) \
+                  .signal(self.dev.timeline_signal, self.dev.next_timeline()).submit(self.dev)
+
@dataclass
 class GPFifo:
  ring: MMIOInterface
@@ -358,6 +393,7 @@ class NVKIface:
    self.gpfifo_class:int = next(c for c in [nv_gpu.BLACKWELL_CHANNEL_GPFIFO_A, nv_gpu.AMPERE_CHANNEL_GPFIFO_A] if c in self.nvclasses)
    self.compute_class:int = next(c for c in [nv_gpu.BLACKWELL_COMPUTE_B, nv_gpu.ADA_COMPUTE_A, nv_gpu.AMPERE_COMPUTE_B] if c in self.nvclasses)
    self.dma_class:int = next(c for c in [nv_gpu.BLACKWELL_DMA_COPY_B, nv_gpu.AMPERE_DMA_COPY_B] if c in self.nvclasses)
+    self.viddec_class:int|None = next((c for c in [nv_gpu.NVC9B0_VIDEO_DECODER] if c in self.nvclasses), None)

    usermode = self.rm_alloc(self.dev.subdevice, self.usermode_class)
    return usermode, MMIOInterface(self._gpu_map_to_cpu(usermode, mmio_sz:=0x10000), mmio_sz, fmt='I')
@@ -440,7 +476,15 @@ class NVKIface:
    if mem.meta.has_cpu_mapping: FileIOInterface.munmap(cast(int, mem.va_addr), mem.size)

  def _gpu_uvm_map(self, va_base, size, mem_handle, create_range=True, has_cpu_mapping=False) -> HCQBuffer:
-    if create_range: self.uvm(nv_gpu.UVM_CREATE_EXTERNAL_RANGE, nv_gpu.UVM_CREATE_EXTERNAL_RANGE_PARAMS(base=va_base, length=size))
+    if create_range:
+      self.uvm(nv_gpu.UVM_CREATE_EXTERNAL_RANGE, nv_gpu.UVM_CREATE_EXTERNAL_RANGE_PARAMS(base=va_base, length=size))
+      made = nv_gpu.NVOS46_PARAMETERS(hClient=self.root, hDevice=self.dev.nvdevice, hDma=self.dev.virtmem, hMemory=mem_handle, length=size,
+        flags=(nv_gpu.NVOS46_FLAGS_PAGE_SIZE_4KB<<8)|(nv_gpu.NVOS46_FLAGS_CACHE_SNOOP_ENABLE<<4)|(nv_gpu.NVOS46_FLAGS_DMA_OFFSET_FIXED_TRUE<<15),
+        dmaOffset=va_base)
+      nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_MAP_MEMORY_DMA, made)
+      if made.status != 0: raise RuntimeError(f"nv_sys_alloc 1 returned {get_error_str(made.status)}")
+      assert made.dmaOffset == va_base, f"made.dmaOffset != va_base {made.dmaOffset=} {va_base=}"
+
    attrs = (nv_gpu.UvmGpuMappingAttributes*256)(nv_gpu.UvmGpuMappingAttributes(gpuUuid=self.gpu_uuid, gpuMappingType=1))

    self.uvm(nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION, uvm_map:=nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION_PARAMS(base=va_base, length=size,
@@ -472,6 +516,7 @@ class PCIIface(PCIIfaceBase):

    # Setup classes for the GPU
    self.gpfifo_class, self.compute_class, self.dma_class = (gsp:=self.dev_impl.gsp).gpfifo_class, gsp.compute_class, gsp.dma_class
+    self.viddec_class = None

  def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, **kwargs) -> HCQBuffer:
    # Force use of huge pages for large allocations. NVDev will attempt to use huge pages in any case,
@@ -499,6 +544,7 @@ class NVDevice(HCQCompiled[HCQSignal]):
                                                   vaMode=nv_gpu.NV_DEVICE_ALLOCATION_VAMODE_OPTIONAL_MULTIPLE_VASPACES)
    self.nvdevice = self.iface.rm_alloc(self.iface.root, nv_gpu.NV01_DEVICE_0, device_params)
    self.subdevice = self.iface.rm_alloc(self.nvdevice, nv_gpu.NV20_SUBDEVICE_0, nv_gpu.NV2080_ALLOC_PARAMETERS())
+    self.virtmem = self.iface.rm_alloc(self.nvdevice, nv_gpu.NV01_MEMORY_VIRTUAL, nv_gpu.NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS(limit=0x1ffffffffffff))
    self.usermode, self.gpu_mmio = self.iface.setup_usermode()

    self.iface.rm_control(self.subdevice, nv_gpu.NV2080_CTRL_CMD_PERF_BOOST, nv_gpu.NV2080_CTRL_PERF_BOOST_PARAMS(duration=0xffffffff,
@@ -514,14 +560,14 @@ class NVDevice(HCQCompiled[HCQSignal]):
    channel_params = nv_gpu.NV_CHANNEL_GROUP_ALLOCATION_PARAMETERS(engineType=nv_gpu.NV2080_ENGINE_TYPE_GRAPHICS)
    channel_group = self.iface.rm_alloc(self.nvdevice, nv_gpu.KEPLER_CHANNEL_GROUP_A, channel_params)

-    gpfifo_area = self.iface.alloc(0x200000, contiguous=True, cpu_access=True, force_devmem=True,
+    self.gpfifo_area = self.iface.alloc(0x300000, contiguous=True, cpu_access=True, force_devmem=True,
      map_flags=(nv_gpu.NVOS33_FLAGS_CACHING_TYPE_WRITECOMBINED<<23))

    ctxshare_params = nv_gpu.NV_CTXSHARE_ALLOCATION_PARAMETERS(hVASpace=vaspace, flags=nv_gpu.NV_CTXSHARE_ALLOCATION_FLAGS_SUBCONTEXT_ASYNC)
    ctxshare = self.iface.rm_alloc(channel_group, nv_gpu.FERMI_CONTEXT_SHARE_A, ctxshare_params)

-    self.compute_gpfifo = self._new_gpu_fifo(gpfifo_area, ctxshare, channel_group, offset=0, entries=0x10000, compute=True)
-    self.dma_gpfifo = self._new_gpu_fifo(gpfifo_area, ctxshare, channel_group, offset=0x100000, entries=0x10000, compute=False)
+    self.compute_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, channel_group, offset=0, entries=0x10000, compute=True)
+    self.dma_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, channel_group, offset=0x100000, entries=0x10000, compute=False)
    self.iface.rm_control(channel_group, nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06C_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1))

    self.cmdq_page:HCQBuffer = self.iface.alloc(0x200000, cpu_access=True)
@@ -542,22 +588,27 @@ class NVDevice(HCQCompiled[HCQSignal]):

    self._setup_gpfifos()

-  def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400, compute=False) -> GPFifo:
+  def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400, compute=False, video=False) -> GPFifo:
    notifier = self.iface.alloc(48 << 20, uncached=True)
-    params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(hObjectError=notifier.meta.hMemory, hObjectBuffer=gpfifo_area.meta.hMemory,
-      gpFifoOffset=gpfifo_area.va_addr+offset, gpFifoEntries=entries, hContextShare=ctxshare,
-      hUserdMemory=(ctypes.c_uint32*8)(gpfifo_area.meta.hMemory), userdOffset=(ctypes.c_uint64*8)(entries*8+offset))
+    params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(gpFifoOffset=gpfifo_area.va_addr+offset, gpFifoEntries=entries, hContextShare=ctxshare,
+      hObjectError=notifier.meta.hMemory, hObjectBuffer=self.virtmem if video else gpfifo_area.meta.hMemory,
+      hUserdMemory=(ctypes.c_uint32*8)(gpfifo_area.meta.hMemory), userdOffset=(ctypes.c_uint64*8)(entries*8+offset), engineType=19 if video else 0)
    gpfifo = self.iface.rm_alloc(channel_group, self.iface.gpfifo_class, params)

    if compute:
      self.debug_compute_obj, self.debug_channel = self.iface.rm_alloc(gpfifo, self.iface.compute_class), gpfifo
      debugger_params = nv_gpu.NV83DE_ALLOC_PARAMETERS(hAppClient=self.iface.root, hClass3dObject=self.debug_compute_obj)
      self.debugger = self.iface.rm_alloc(self.nvdevice, nv_gpu.GT200_DEBUGGER, debugger_params)
-    else: self.iface.rm_alloc(gpfifo, self.iface.dma_class)
+    elif not video: self.iface.rm_alloc(gpfifo, self.iface.dma_class)
+    else: self.iface.rm_alloc(gpfifo, self.iface.viddec_class)
+
+    if channel_group == self.nvdevice:
+      self.iface.rm_control(gpfifo, nv_gpu.NVA06F_CTRL_CMD_BIND, nv_gpu.NVA06F_CTRL_BIND_PARAMS(engineType=params.engineType))
+      self.iface.rm_control(gpfifo, nv_gpu.NVA06F_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1))

    ws_token_params = self.iface.rm_control(gpfifo, nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN,
      nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS(workSubmitToken=-1))
-    self.iface.setup_gpfifo_vm(gpfifo)
+    if ctxshare != 0: self.iface.setup_gpfifo_vm(gpfifo)

    return GPFifo(ring=gpfifo_area.cpu_view().view(offset, entries*8, fmt='Q'), entries_count=entries, token=ws_token_params.workSubmitToken,
                  controls=nv_gpu.AmpereAControlGPFifo.from_address(gpfifo_area.cpu_view().addr + offset + entries * 8))
@@ -604,6 +655,24 @@ class NVDevice(HCQCompiled[HCQSignal]):
                                         .setup(local_mem=self.shader_local_mem.va_addr, local_mem_tpc_bytes=bytes_per_tpc) \
                                         .signal(self.timeline_signal, self.next_timeline()).submit(self)

+  def _ensure_has_vid_hw(self, w, h):
+    if self.iface.viddec_class is None: raise RuntimeError(f"{self.device} Video decoder class not available.")
+
+    coloc_size = round_up((round_up(h, 64) * round_up(h, 64)) + (round_up(w, 64) * round_up(h, 64) // 16), 2 << 20)
+    self.intra_top_off = round_up(h, 64) * (608 + 4864 + 152 + 2000)
+    filter_size = round_up(round_up(self.intra_top_off, 0x10000) + 64 << 10, 2 << 20)
+
+    if not hasattr(self, 'vid_gpfifo'):
+      self.vid_gpfifo = self._new_gpu_fifo(self.gpfifo_area, 0, self.nvdevice, offset=0x200000, entries=2048, compute=False, video=True)
+      self.vid_coloc_buf, self.vid_filter_buf = self.allocator.alloc(coloc_size), self.allocator.alloc(filter_size)
+      self.vid_stat_buf = self.allocator.alloc(0x1000)
+      NVVideoQueue().wait(self.timeline_signal, self.timeline_value - 1) \
+                    .setup(copy_class=self.iface.viddec_class) \
+                    .signal(self.timeline_signal, self.next_timeline()).submit(self)
+    else:
+      if coloc_size > self.vid_coloc_buf.size: self.vid_coloc_buf, _ = self._realloc(self.vid_coloc_buf, coloc_size, force=True)
+      if filter_size > self.vid_filter_buf.size: self.vid_filter_buf, _ = self._realloc(self.vid_filter_buf, filter_size, force=True)
+
  def invalidate_caches(self):
    if self.is_nvd(): self.iface.rm_control(self.subdevice, nv_gpu.NV2080_CTRL_CMD_INTERNAL_BUS_FLUSH_WITH_SYSMEMBAR, None)
    else:
--- a/tinygrad/runtime/support/hcq.py
+++ b/tinygrad/runtime/support/hcq.py
@@ -432,10 +432,12 @@ class HCQCompiled(Compiled, Generic[SignalType]):
    self.timeline_signal.value = 0
    cast(HCQAllocatorBase, self.allocator).b_timeline = [0] * len(cast(HCQAllocatorBase, self.allocator).b)

-  def _realloc(self, oldbuf:HCQBuffer|None, new_size:int, options:BufferSpec|None=None) -> tuple[HCQBuffer, bool]:
+  def _realloc(self, oldbuf:HCQBuffer|None, new_size:int, options:BufferSpec|None=None, force=False) -> tuple[HCQBuffer, bool]:
    if oldbuf is not None: self.allocator.free(oldbuf, oldbuf.size, options=options)
    try: buf, realloced = self.allocator.alloc(new_size, options=options), True
-    except MemoryError: buf, realloced = self.allocator.alloc(oldbuf.size if oldbuf is not None else new_size, options=options), False
+    except MemoryError:
+      if force: raise
+      buf, realloced = self.allocator.alloc(oldbuf.size if oldbuf is not None else new_size, options=options), False
    return buf, realloced

  def _select_iface(self, *ifaces:Type):
--- a/tinygrad/schedule/indexing.py
+++ b/tinygrad/schedule/indexing.py
@@ -8,7 +8,7 @@ from tinygrad.helpers import argsort, all_same, cpu_profile, PCONTIG, colored

 ALWAYS_CONTIGUOUS: set[Ops] = {Ops.CONTIGUOUS, Ops.ASSIGN, Ops.COPY, Ops.BUFFER, Ops.BUFFER_VIEW,
                     Ops.CONST, Ops.BIND, Ops.DEVICE, Ops.MSELECT, Ops.MSTACK, Ops.DEFINE_GLOBAL,
-                     Ops.DEFINE_LOCAL, Ops.DEFINE_REG, Ops.LOAD, Ops.KERNEL}
+                     Ops.DEFINE_LOCAL, Ops.DEFINE_REG, Ops.LOAD, Ops.KERNEL, Ops.ENCDEC}

 def realize(ctx:dict[UOp, None], tr:UOp) -> None: ctx[tr] = None

@@ -24,12 +24,12 @@ def realize_assign(ctx:dict[UOp, None], a:UOp) -> None:
 pm_generate_realize_map = PatternMatcher([
  # always realize SINK src
  (UPat(Ops.SINK, name="s"), lambda ctx,s: ctx.update((x.base, None) for x in s.src if x.base.op not in ALWAYS_CONTIGUOUS)),
-  # always realize COPY/BUFFER_VIEW/CONTIGUOUS/STORE
-  (UPat({Ops.COPY, Ops.BUFFER_VIEW, Ops.CONTIGUOUS, Ops.STORE}, name="tr"), realize),
+  # always realize COPY/BUFFER_VIEW/CONTIGUOUS/STORE/ENCDEC
+  (UPat({Ops.COPY, Ops.BUFFER_VIEW, Ops.CONTIGUOUS, Ops.STORE, Ops.ENCDEC}, name="tr"), realize),
  # always realize REDUCE on outer ranges
  (UPat(Ops.REDUCE, name="r"), lambda ctx,r: realize(ctx, r) if any(tr.arg[-1] == AxisType.OUTER for tr in r.src[1:]) else None),
-  # realize srcs of COPY, MSELECT, MSTACK
-  (UPat((Ops.COPY, Ops.MSELECT, Ops.MSTACK), name="rb"), realize_srcs),
+  # realize srcs of COPY, MSELECT, MSTACK, ENCDEC
+  (UPat((Ops.COPY, Ops.MSELECT, Ops.MSTACK, Ops.ENCDEC), name="rb"), realize_srcs),
  # realize ASSIGN and input to assign (might be optimized out)
  (UPat(Ops.ASSIGN, name="a"), realize_assign),
 ])
--- a/tinygrad/schedule/rangeify.py
+++ b/tinygrad/schedule/rangeify.py
@@ -117,7 +117,7 @@ earliest_rewrites = mop_cleanup+PatternMatcher([
 # 3.5 cleanups

 # Ops.NOOP happens when we have a COPY to the device the Tensor is already on. We treat it like COPY here for MSTACK.
-ALWAYS_RUN_OPS = {Ops.CONTIGUOUS, Ops.COPY, Ops.ASSIGN, Ops.NOOP}
+ALWAYS_RUN_OPS = {Ops.CONTIGUOUS, Ops.COPY, Ops.ASSIGN, Ops.ENCDEC, Ops.NOOP}

 # you don't know in the first pass if axes are going to die, this happens if there's an EXPAND to the left
 def cleanup_dead_axes(b:UOp):
@@ -494,7 +494,7 @@ def split_store(ctx:list[UOp], x:UOp) -> UOp|None:
  # NOTE: the hack for COPY is here
  for u in ret.toposort():
    # TODO: this can be wrong if there's multiple of these
-    if u.op in {Ops.COPY, Ops.BUFFER_VIEW}:
+    if u.op in {Ops.COPY, Ops.BUFFER_VIEW, Ops.ENCDEC}:
      ret = u
      break
  else:
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -11,7 +11,7 @@ from tinygrad.helpers import suppress_finalizing, disable_gc
 from tinygrad.gradient import compute_gradient
 from tinygrad.mixin import OpMixin
 from tinygrad.mixin.movement import _align_left
-from tinygrad.uop.ops import smax, smin, resolve, UOp, Ops, sint, identity_element, all_metadata, _index_to_concrete_int, sint_to_uop
+from tinygrad.uop.ops import smax, smin, resolve, UOp, Ops, sint, identity_element, all_metadata, _index_to_concrete_int, sint_to_uop, Variable
 from tinygrad.engine.schedule import ScheduleItem, complete_create_schedule_with_vars
 from tinygrad.device import Device, Buffer
 from tinygrad.engine.realize import run_schedule
@@ -3564,6 +3564,19 @@ class Tensor(OpMixin):

  def __eq__(self, x) -> Tensor: return self.eq(x)                      # type: ignore[override]

+  # ***** encoding/decoding ops *****
+
+  def decode_hevc_frame(self, frame_pos:Variable, shape:tuple[int,...], state:Tensor, ref_frames:list[Tensor]|None=None) -> Tensor:
+    """
+    Creates a Tensor by decoding an HEVC frame chunk.
+
+    You must provide the output shape of the decoded data (`shape`), the HEVC context (`vstate`), and, if required by the chunk,
+    the reference frames (`ref_frames`).
+    """
+    ref_frames = [x.contiguous() for x in ref_frames or []]
+    assert isinstance(frame_pos, Variable), "frame_pos must be a Variable"
+    return self.contiguous()._apply_uop(UOp.encdec, state.contiguous(), *ref_frames, extra_args=(frame_pos,), arg=(shape,))
+
  # ***** functional nn ops *****

  def linear(self, weight:Tensor, bias:Tensor|None=None, dtype:DTypeLike|None=None) -> Tensor:
--- a/tinygrad/uop/init.py
+++ b/tinygrad/uop/init.py
@@ -80,7 +80,7 @@ class Ops(FastEnum):
  CONTIGUOUS = auto(); CONTIGUOUS_BACKWARD = auto(); DETACH = auto()

  # buffer ops
-  BUFFERIZE = auto(); COPY = auto(); BUFFER = auto(); BUFFER_VIEW = auto(); MSELECT = auto(); MSTACK = auto()
+  BUFFERIZE = auto(); COPY = auto(); BUFFER = auto(); BUFFER_VIEW = auto(); MSELECT = auto(); MSTACK = auto(); ENCDEC = auto()

  # the core 6 movement ops! these only exist in the tensor graph
  RESHAPE = auto(); PERMUTE = auto(); EXPAND = auto(); PAD = auto(); SHRINK = auto(); FLIP = auto()
--- a/tinygrad/uop/ops.py
+++ b/tinygrad/uop/ops.py
@@ -232,6 +232,7 @@ class UOp(OpMixin, metaclass=UOpMetaClass):
      case Ops.CONST | Ops.DEFINE_VAR | Ops.BIND: return () if self._device is not None else None
      case Ops.BUFFER: return (self.arg,)
      case Ops.BUFFER_VIEW: return (self.arg[0],)
+      case Ops.ENCDEC: return self.arg[0]
      case Ops.BUFFERIZE: return tuple([int(r.vmax+1) for r in self.src[1:]])
      case Ops.DEFINE_GLOBAL | Ops.DEFINE_LOCAL | Ops.DEFINE_REG: return (self.ptrdtype.size,)

@@ -538,6 +539,7 @@ class UOp(OpMixin, metaclass=UOpMetaClass):
  def mselect(self, arg:int) -> UOp: return UOp(Ops.MSELECT, self.dtype, (self,), arg)
  @property
  def metadata(self) -> tuple[Metadata, ...]|None: return all_metadata.get(self, None)
+  def encdec(self, *src, arg=None): return UOp(Ops.ENCDEC, self.dtype, src=(self,)+src, arg=arg)

  # *** uop movement ops ***

@@ -1371,6 +1373,7 @@ pm_pyrender_extra = PatternMatcher([
  (UPat(Ops.BUFFER, src=(UPat(Ops.UNIQUE, name="u"), UPat(Ops.DEVICE, name="d")), name="x"), lambda x,u,d:
    f"UOp.new_buffer({repr(d.arg)}, {x.size}, {x.dtype}, {u.arg})"),
  (UPat(Ops.COPY, src=(UPat(name="x"), UPat(Ops.DEVICE, name="d"))), lambda ctx,x,d: f"{ctx[x]}.copy_to_device({repr(d.arg)})"),
+  (UPat(Ops.ENCDEC, name="x"), lambda ctx,x: f"{ctx[x.src[0]]}.encdec({''.join([str(ctx[s])+', ' for s in x.src[1:]])}arg={x.arg!r})"),
  (UPat(Ops.REDUCE_AXIS, name="r"), lambda ctx,r: f"{ctx[r.src[0]]}.r({r.arg[0]}, {r.arg[1]})"),
  # NOTE: range has srcs sometimes after control flow
  (UPat(Ops.RANGE, src=(UPat(Ops.CONST, name="c"),), allow_any_len=True, name="x"), lambda ctx,x,c:
--- a/tinygrad/uop/spec.py
+++ b/tinygrad/uop/spec.py
@@ -96,10 +96,11 @@ _tensor_spec = PatternMatcher([
  (UPat(Ops.CONTIGUOUS, name="root", src=(UPat.var("x"),), allow_any_len=True, arg=None),
   lambda root,x: root.dtype == x.dtype and all(u.op is Ops.RANGE for u in root.src[1:])),

-  # COPY/ALLREDUCE/MULTI
+  # COPY/ALLREDUCE/MULTI/ENCDEC
  (UPat(Ops.COPY, name="copy", src=(UPat.var("x"), UPat(Ops.DEVICE)), arg=None), lambda copy,x: copy.dtype == x.dtype),
  (UPat(Ops.ALLREDUCE, name="red", src=(UPat.var("x"), UPat(Ops.DEVICE))), lambda red,x: red.dtype == x.dtype and isinstance(red.arg, Ops)),
  (UPat(Ops.MULTI, name="multi"), lambda multi: all(x.dtype == multi.dtype for x in multi.src) and isinstance(multi.arg, int)),
+  (UPat(Ops.ENCDEC, name="x"), lambda x: len(x.src) >= 2), # state + inbuffer

  # REDUCE_AXIS is the reduce in the tensor graph
  (UPat(Ops.REDUCE_AXIS, name="x"), lambda x: isinstance(x.arg, tuple) and len(x.arg) >= 2 and x.arg[0] in {Ops.ADD, Ops.MUL, Ops.MAX}),
--- a/tinygrad/viz/serve.py
+++ b/tinygrad/viz/serve.py
@@ -19,7 +19,7 @@ uops_colors = {Ops.LOAD: "#ffc0c0", Ops.STORE: "#87CEEB", Ops.CONST: "#e0e0e0",
               Ops.RANGE: "#c8a0e0", Ops.ASSIGN: "#909090", Ops.BARRIER: "#ff8080", Ops.IF: "#c8b0c0", Ops.SPECIAL: "#c0c0ff",
               Ops.INDEX: "#cef263", Ops.WMMA: "#efefc0", Ops.MULTI: "#f6ccff", Ops.KERNEL: "#3e7f55",
               **{x:"#D8F9E4" for x in GroupOp.Movement}, **{x:"#ffffc0" for x in GroupOp.ALU}, Ops.THREEFRY:"#ffff80",
-               Ops.BUFFER_VIEW: "#E5EAFF", Ops.BUFFER: "#B0BDFF", Ops.COPY: "#a040a0",
+               Ops.BUFFER_VIEW: "#E5EAFF", Ops.BUFFER: "#B0BDFF", Ops.COPY: "#a040a0", Ops.ENCDEC: "#bf71b6",
               Ops.ALLREDUCE: "#ff40a0", Ops.MSELECT: "#d040a0", Ops.MSTACK: "#d040a0", Ops.CONTIGUOUS: "#FFC14D",
               Ops.BUFFERIZE: "#FF991C", Ops.REWRITE_ERROR: "#ff2e2e", Ops.AFTER: "#8A7866", Ops.END: "#524C46"}