diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ecd5ed938a..bb5bb4cbe4 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -318,6 +318,8 @@ jobs: # TODO: too slow # - name: Fuzz Padded Tensor Core GEMM (PTX) # run: NV=1 NV_PTX=1 M_START=12 M_STOP=20 M_STEP=1 N_START=6 N_STOP=10 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 python3 ./extra/gemm/fuzz_matmul.py + - name: HEVC Decode Benchmark + run: VALIDATE=1 MAX_FRAMES=100 NV=1 PYTHONPATH=. python3 extra/hevc/decode.py - name: Train MNIST run: time PYTHONPATH=. NV=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt # TODO: too slow diff --git a/extra/hevc/.gitignore b/extra/hevc/.gitignore new file mode 100644 index 0000000000..89f9ac04aa --- /dev/null +++ b/extra/hevc/.gitignore @@ -0,0 +1 @@ +out/ diff --git a/extra/hevc/decode.py b/extra/hevc/decode.py new file mode 100644 index 0000000000..b08d9bdc12 --- /dev/null +++ b/extra/hevc/decode.py @@ -0,0 +1,71 @@ +import argparse, os, hashlib +from tinygrad.helpers import getenv, DEBUG, round_up, Timing, tqdm, fetch +from extra.hevc.hevc import parse_hevc_file_headers, untile_nv12, to_bgr, nv_gpu +from tinygrad import Tensor, dtypes, Device, Variable + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_file", type=str, default="") + parser.add_argument("--output_dir", type=str, default="extra/hevc/out") + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + + if args.input_file == "": + url = "https://github.com/haraschax/filedump/raw/09a497959f7fa6fd8dba501a25f2cdb3a41ecb12/comma_video.hevc" + hevc_tensor = Tensor.from_url(url, device="CPU") + else: + hevc_tensor = Tensor.empty(os.stat(args.input_file).st_size, dtype=dtypes.uint8, device=f"disk:{args.input_file}").to("CPU") + + dat = bytes(hevc_tensor.data()) + dat_hash = hashlib.md5(dat).hexdigest() + + with Timing("prep infos: "): + dat_nv = hevc_tensor.to("NV") + opaque, frame_info, w, h, luma_w, luma_h, chroma_off = parse_hevc_file_headers(dat) + + frame_info = frame_info[:getenv("MAX_FRAMES", len(frame_info))] + + # move all needed data to gpu + all_slices = [] + with Timing("prep slices to gpu: "): + opaque_nv = opaque.to("NV").contiguous().realize() + + for i, (offset, sz, frame_pos, history_sz, _) in enumerate(frame_info): + all_slices.append(hevc_tensor[offset:offset+sz].to("NV").contiguous().realize()) + + Device.default.synchronize() + + out_image_size = luma_h + (luma_h + 1) // 2, round_up(luma_w, 64) + max_hist = max(history_sz for _, _, _, history_sz, _ in frame_info) + pos = Variable("pos", 0, max_hist + 1) + + history = [] + out_images = [] + with Timing("decoding whole file: ", on_exit=(lambda et: f", {len(frame_info)} frames, {len(frame_info)/(et/1e9):.2f} fps")): + for i, (offset, sz, frame_pos, history_sz, is_hist) in enumerate(frame_info): + history = history[-history_sz:] if history_sz > 0 else [] + + outimg = all_slices[i].decode_hevc_frame(pos.bind(frame_pos), out_image_size, opaque_nv[i], history).realize() + out_images.append(outimg) + if is_hist: history.append(outimg) + + Device.default.synchronize() + + if getenv("VALIDATE", 0): + import pickle + if dat_hash == "b813bfdbec194fd17fdf0e3ceb8cea1c": + url = "https://github.com/nimlgen/hevc_validate_set/raw/refs/heads/main/decoded_frames_b813bfdbec194fd17fdf0e3ceb8cea1c.pkl" + decoded_frames = pickle.load(fetch(url).open("rb")) + else: decoded_frames = pickle.load(open(f"extra/hevc/decoded_frames_{dat_hash}.pkl", "rb")) + else: import cv2 + + for i, img in tqdm(enumerate(out_images)): + if getenv("VALIDATE", 0): + if i < len(decoded_frames) and len(decoded_frames[i]) > 0: + img = untile_nv12(img, h, w, luma_w, chroma_off).realize() + assert img.data() == decoded_frames[i], f"Frame {i} does not match reference decoder!" + print(f"Frame {i} matches reference decoder!") + else: + img = to_bgr(img, h, w, luma_w, chroma_off).realize() + cv2.imwrite(f"{args.output_dir}/out_frame_{i:04d}.png", img.numpy()) diff --git a/extra/hevc/hevc.py b/extra/hevc/hevc.py new file mode 100644 index 0000000000..36abd30c8e --- /dev/null +++ b/extra/hevc/hevc.py @@ -0,0 +1,449 @@ +import dataclasses, enum, argparse, os, itertools, time, ctypes +from typing import Any +from tinygrad import Tensor, dtypes, Device, TinyJit +from tinygrad.helpers import DEBUG, round_up, ceildiv, Timing, prod +from tinygrad.runtime.autogen import avcodec, nv_570 as nv_gpu + +class BitReader: + def __init__(self, data:bytes): self.reader, self.current_bits, self.bits, self.read_bits, self.total = iter(data), 0, 0, 0, len(data) * 8 + def empty(self): return self.read_bits == self.total and self.current_bits == 0 + def peak_bits(self, n): + while self.current_bits < n: + self.bits = (self.bits << 8) | next(self.reader) + self.current_bits += 8 + self.read_bits += 8 + return (self.bits >> (self.current_bits - n)) & ((1 << n) - 1) + def _next_bits(self, n): + val = self.peak_bits(n) + self.bits &= (1 << (self.current_bits - n)) - 1 + self.current_bits -= n + return val + + def u(self, n): return self._next_bits(n) + + # 9.2 Parsing process for 0-th order Exp-Golomb codes + def ue_v(self): + leading_zero_bits = -1 + while True: + bit = self.u(1) + leading_zero_bits += 1 + if bit == 1: break + + part = self.u(leading_zero_bits) + + if leading_zero_bits == 0: return 0 + return (1 << leading_zero_bits) - 1 + part + + # 9.2.2 Mapping process for signed Exp-Golomb codes + def se_v(self): + k = self.ue_v() + return (-1 ** (k + 1)) * (k // 2) + +# 7.3.1.1 General NAL unit syntax +def _hevc_get_rbsp(dat:bytes, off=0) -> bytes: + rbsp = bytes() + while off < len(dat): + if off + 2 < len(dat) and dat[off:off+3] == b'\x00\x00\x03': + rbsp += bytes([0, 0]) + off += 3 + else: + rbsp += bytes([dat[off]]) + off += 1 + return rbsp + +class HevcSlice: + # 7.3.3 Profile, tier and level syntax + def profile_tier_level(self, r:BitReader, enable:bool, max_sub_layers:int): + assert enable and max_sub_layers == 0, "no sublayers supported" + self._notimpl_profile_tier_level = r.u(88) + self.general_level_idc = r.u(8) + + # 7.3.7 Short-term reference picture set syntax + def st_ref_pic_set(self, r:BitReader, stRpsIdx:int, num_short_term_ref_pic_sets:int=0, sps=None): + inter_ref_pic_set_prediction_flag = r.u(1) if stRpsIdx != 0 else 0 + + if inter_ref_pic_set_prediction_flag: + if stRpsIdx == num_short_term_ref_pic_sets: + delta_idx_minus1 = r.ue_v() + delta_rps_sign = r.u(1) + abs_delta_rps_minus1 = r.ue_v() + + NumDeltaPocs = sps.num_negative_pics + sps.num_positive_pics + for i in range(NumDeltaPocs + 1): + used_by_curr_pic_flag = r.u(1) + if not used_by_curr_pic_flag: + use_delta_flag = r.u(1) + else: + self.num_negative_pics = r.ue_v() + self.num_positive_pics = r.ue_v() + for i in range(self.num_negative_pics): + delta_poc_s0_minus1 = r.ue_v() + used_by_curr_pic_s0_flag = r.u(1) + for i in range(self.num_positive_pics): + delta_poc_s1_minus1 = r.ue_v() + used_by_curr_pic_s1_flag = r.u(1) + +# 7.3.2.2 Sequence parameter set RBSP syntax +class SPS(HevcSlice): + def __init__(self, r:BitReader): + self.sps_video_parameter_set_id = r.u(4) + self.sps_max_sub_layers_minus1 = r.u(3) + self.sps_temporal_id_nesting_flag = r.u(1) + + self.profile_tier_level(r, True, self.sps_max_sub_layers_minus1) + + self.sps_seq_parameter_set_id = r.ue_v() + self.chroma_format_idc = r.ue_v() + self.separate_colour_plane_flag = r.u(1) if self.chroma_format_idc == 3 else 0 + self.pic_width_in_luma_samples = r.ue_v() + self.pic_height_in_luma_samples = r.ue_v() + self.conformance_window_flag = r.u(1) + + if self.conformance_window_flag: + self.conf_win_left_offset = r.ue_v() + self.conf_win_right_offset = r.ue_v() + self.conf_win_top_offset = r.ue_v() + self.conf_win_bottom_offset = r.ue_v() + else: self.conf_win_left_offset = self.conf_win_right_offset = self.conf_win_top_offset = self.conf_win_bottom_offset = 0 + + self.bit_depth_luma = r.ue_v() + 8 + self.bit_depth_chroma = r.ue_v() + 8 + self.log2_max_pic_order_cnt_lsb_minus4 = r.ue_v() + self.sps_sub_layer_ordering_info_present_flag = r.u(1) + self.sps_max_dec_pic_buffering, self.sps_max_num_reorder_pics, self.sps_max_latency_increase_plus1 = [], [], [] + for i in range((0 if self.sps_sub_layer_ordering_info_present_flag else self.sps_max_sub_layers_minus1), self.sps_max_sub_layers_minus1 + 1): + self.sps_max_dec_pic_buffering.append(r.ue_v() + 1) + self.sps_max_num_reorder_pics.append(r.ue_v()) + self.sps_max_latency_increase_plus1.append(r.ue_v()) + self.log2_min_luma_coding_block_size = r.ue_v() + 3 + self.log2_max_luma_coding_block_size = self.log2_min_luma_coding_block_size + r.ue_v() + self.log2_min_transform_block_size = r.ue_v() + 2 + self.log2_max_transform_block_size = self.log2_min_transform_block_size + r.ue_v() + self.max_transform_hierarchy_depth_inter = r.ue_v() + self.max_transform_hierarchy_depth_intra = r.ue_v() + if scaling_list_enabled_flag := r.u(1): + if sps_scaling_list_data_present_flag := r.u(1): assert False, "scaling_list_data parsing not implemented" + self.amp_enabled_flag = r.u(1) + self.sample_adaptive_offset_enabled_flag = r.u(1) + self.pcm_enabled_flag = r.u(1) + assert self.pcm_enabled_flag == 0, "pcm not implemented" + self.num_short_term_ref_pic_sets = r.ue_v() + for i in range(self.num_short_term_ref_pic_sets): + self.st_ref_pic_set(r, i, self.num_short_term_ref_pic_sets) + self.long_term_ref_pics_present_flag = r.u(1) + if self.long_term_ref_pics_present_flag: assert False, "long_term_ref_pics parsing not implemented" + self.sps_temporal_mvp_enabled_flag = r.u(1) + self.strong_intra_smoothing_enabled_flag = r.u(1) + +# 7.3.2.3 Picture parameter set RBSP syntax +class PPS(HevcSlice): + def __init__(self, r:BitReader): + self.pps_pic_parameter_set_id = r.ue_v() + self.pps_seq_parameter_set_id = r.ue_v() + self.dependent_slice_segments_enabled_flag = r.u(1) + self.output_flag_present_flag = r.u(1) + self.num_extra_slice_header_bits = r.u(3) + self.sign_data_hiding_enabled_flag = r.u(1) + self.cabac_init_present_flag = r.u(1) + self.num_ref_idx_l0_default_active = r.ue_v() + 1 + self.num_ref_idx_l1_default_active = r.ue_v() + 1 + self.init_qp = r.se_v() + 26 + self.constrained_intra_pred_flag = r.u(1) + self.transform_skip_enabled_flag = r.u(1) + self.cu_qp_delta_enabled_flag = r.u(1) + if self.cu_qp_delta_enabled_flag: self.diff_cu_qp_delta_depth = r.ue_v() + + self.pps_cb_qp_offset = r.se_v() + self.pps_cr_qp_offset = r.se_v() + self.pps_slice_chroma_qp_offsets_present_flag = r.u(1) + self.weighted_pred_flag = r.u(1) + self.weighted_bipred_flag = r.u(1) + self.transquant_bypass_enabled_flag = r.u(1) + self.tiles_enabled_flag = r.u(1) + self.entropy_coding_sync_enabled_flag = r.u(1) + if self.tiles_enabled_flag: + self.num_tile_columns_minus1 = r.ue_v() + self.num_tile_rows_minus1 = r.ue_v() + self.uniform_spacing_flag = r.u(1) + self.column_width_minus1, self.row_height_minus1 = [], [] + if not self.uniform_spacing_flag: + for i in range(self.num_tile_columns_minus1): self.column_width_minus1.append(r.ue_v()) + for i in range(self.num_tile_rows_minus1): self.row_height_minus1.append(r.ue_v()) + self.loop_filter_across_tiles_enabled_flag = r.u(1) + self.loop_filter_across_slices_enabled_flag = r.u(1) + self.deblocking_filter_control_present_flag = r.u(1) + if self.deblocking_filter_control_present_flag: assert False, "deblocking_filter parsing not implemented" + self.scaling_list_data_present_flag = r.u(1) + if self.scaling_list_data_present_flag: assert False, "scaling_list_data parsing not implemented" + self.lists_modification_present_flag = r.u(1) + self.log2_parallel_merge_level = r.ue_v() + 2 + +# 7.3.6 Slice segment header syntax +class SliceSegment(HevcSlice): + def __init__(self, r:BitReader, nal_unit_type:int, sps:SPS, pps:PPS): + self.first_slice_segment_in_pic_flag = r.u(1) + if nal_unit_type >= avcodec.HEVC_NAL_BLA_W_LP and nal_unit_type <= avcodec.HEVC_NAL_RSV_IRAP_VCL23: + self.no_output_of_prior_pics_flag = r.u(1) + self.slice_pic_parameter_set_id = r.ue_v() + if not self.first_slice_segment_in_pic_flag: + if pps.dependent_slice_segments_enabled_flag: + self.dependent_slice_segment_flag = r.u(1) + self.slice_segment_address = r.ue_v() + self.dependent_slice_segment_flag = 0 + if not self.dependent_slice_segment_flag: + r.u(pps.num_extra_slice_header_bits) # extra bits ignored + self.slice_type = r.ue_v() + + self.sw_skip_start = r.read_bits - r.current_bits + self.pic_output_flag = r.u(1) if pps.output_flag_present_flag else 0 + self.colour_plane_id = r.u(2) if sps.separate_colour_plane_flag else 0 + + if nal_unit_type != avcodec.HEVC_NAL_IDR_W_RADL and nal_unit_type != avcodec.HEVC_NAL_IDR_N_LP: + self.slice_pic_order_cnt_lsb = r.u(sps.log2_max_pic_order_cnt_lsb_minus4 + 4) + + self.short_term_ref_pic_set_sps_flag = r.u(1) + if not self.short_term_ref_pic_set_sps_flag: + self.short_term_ref_pics_in_slice_start = r.read_bits - r.current_bits + self.st_ref_pic_set(r, sps.num_short_term_ref_pic_sets, sps=sps) + self.short_term_ref_pics_in_slice_end = r.read_bits - r.current_bits + elif sps.num_short_term_ref_pic_sets > 1: assert False, "short_term_ref_pic_set parsing not implemented" + + if sps.long_term_ref_pics_present_flag: assert False, "long_term_ref_pics parsing not implemented" + + self.sw_skip_end = r.read_bits - r.current_bits + self.slice_temporal_mvp_enabled_flag = r.u(1) if sps.sps_temporal_mvp_enabled_flag else 0 + else: self.slice_pic_order_cnt_lsb, self.sw_skip_end = 0, self.sw_skip_start + + if sps.sample_adaptive_offset_enabled_flag: + slice_sao_luma_flag = r.u(1) + ChromaArrayType = sps.chroma_format_idc if sps.separate_colour_plane_flag == 0 else 0 + slice_sao_chroma_flag = r.u(1) if ChromaArrayType != 0 else 0 + + if self.slice_type in {avcodec.HEVC_SLICE_B, avcodec.HEVC_SLICE_B}: + if num_ref_idx_active_override_flag := r.u(1): + num_ref_idx_l0_active_minus1 = r.ue_v() + num_ref_idx_l1_active_minus1 = r.ue_v() if self.slice_type == avcodec.HEVC_SLICE_B else 0 + +def fill_sps_into_dev_context(device_ctx, sps:SPS): + device_ctx.chroma_format_idc = sps.chroma_format_idc + device_ctx.pic_width_in_luma_samples = sps.pic_width_in_luma_samples + device_ctx.pic_height_in_luma_samples = sps.pic_height_in_luma_samples + device_ctx.bit_depth_luma = sps.bit_depth_luma + device_ctx.bit_depth_chroma = sps.bit_depth_chroma + device_ctx.log2_max_pic_order_cnt_lsb_minus4 = sps.log2_max_pic_order_cnt_lsb_minus4 + device_ctx.log2_min_luma_coding_block_size = sps.log2_min_luma_coding_block_size + device_ctx.log2_max_luma_coding_block_size = sps.log2_max_luma_coding_block_size + device_ctx.log2_min_transform_block_size = sps.log2_min_transform_block_size + device_ctx.log2_max_transform_block_size = sps.log2_max_transform_block_size + device_ctx.amp_enabled_flag = sps.amp_enabled_flag + device_ctx.pcm_enabled_flag = sps.pcm_enabled_flag + device_ctx.sample_adaptive_offset_enabled_flag = sps.sample_adaptive_offset_enabled_flag + device_ctx.sps_temporal_mvp_enabled_flag = sps.sps_temporal_mvp_enabled_flag + device_ctx.strong_intra_smoothing_enabled_flag = sps.strong_intra_smoothing_enabled_flag + +def fill_pps_into_dev_context(device_ctx, pps:PPS): + device_ctx.sign_data_hiding_enabled_flag = pps.sign_data_hiding_enabled_flag + device_ctx.cabac_init_present_flag = pps.cabac_init_present_flag + device_ctx.num_ref_idx_l0_default_active = pps.num_ref_idx_l0_default_active + device_ctx.num_ref_idx_l1_default_active = pps.num_ref_idx_l1_default_active + device_ctx.init_qp = pps.init_qp + device_ctx.cu_qp_delta_enabled_flag = pps.cu_qp_delta_enabled_flag + device_ctx.diff_cu_qp_delta_depth = getattr(pps, 'diff_cu_qp_delta_depth', 0) + device_ctx.pps_cb_qp_offset = pps.pps_cb_qp_offset + device_ctx.pps_cr_qp_offset = pps.pps_cr_qp_offset + device_ctx.pps_slice_chroma_qp_offsets_present_flag = pps.pps_slice_chroma_qp_offsets_present_flag + device_ctx.weighted_pred_flag = pps.weighted_pred_flag + device_ctx.weighted_bipred_flag = pps.weighted_bipred_flag + device_ctx.transquant_bypass_enabled_flag = pps.transquant_bypass_enabled_flag + device_ctx.tiles_enabled_flag = pps.tiles_enabled_flag + device_ctx.entropy_coding_sync_enabled_flag = pps.entropy_coding_sync_enabled_flag + device_ctx.loop_filter_across_slices_enabled_flag = pps.loop_filter_across_slices_enabled_flag + device_ctx.deblocking_filter_control_present_flag = pps.deblocking_filter_control_present_flag + device_ctx.scaling_list_data_present_flag = pps.scaling_list_data_present_flag + device_ctx.lists_modification_present_flag = pps.lists_modification_present_flag + device_ctx.log2_parallel_merge_level = pps.log2_parallel_merge_level + device_ctx.loop_filter_across_tiles_enabled_flag = getattr(pps, 'loop_filter_across_tiles_enabled_flag', 0) + +def parse_hevc_file_headers(dat:bytes, device="NV"): + res = [] + nal_unit_start = 1 + history:list[tuple[int, int, int]] = [] + device_ctx = nv_gpu.nvdec_hevc_pic_s(gptimer_timeout_value=92720000, tileformat=1, sw_start_code_e=1, pattern_id=2) + nal_infos = [] + ctx_bytes = bytes() + align_ctx_bytes_size = 0x300 + + def _flush_picture(): + nonlocal res, history, device_ctx, nal_infos, ctx_bytes, align_ctx_bytes_size + + if not len(nal_infos): return + + hdr, nal_unit_type = nal_infos[0][0] + assert all(nal_unit_type == x[0][1] for x in nal_infos), "all NAL units in a picture must be of the same type" + + device_ctx.curr_pic_idx = next(i for i in range(16) if all(d[0] != i for d in history)) + + if nal_unit_type in {avcodec.HEVC_NAL_IDR_W_RADL, avcodec.HEVC_NAL_IDR_N_LP}: + history = [] + + device_ctx.num_ref_frames = len(history) + device_ctx.IDR_picture_flag = int(nal_unit_type in {avcodec.HEVC_NAL_IDR_W_RADL, avcodec.HEVC_NAL_IDR_N_LP}) + device_ctx.RAP_picture_flag = int(nal_unit_type >= avcodec.HEVC_NAL_BLA_W_LP and nal_unit_type <= avcodec.HEVC_NAL_RSV_IRAP_VCL23) + device_ctx.RefDiffPicOrderCnts=(ctypes.c_int16 * 16)() + device_ctx.colMvBuffersize = (round_up(sps.pic_width_in_luma_samples, 64) * round_up(sps.pic_height_in_luma_samples, 64) // 16) // 256 + device_ctx.framestride=(ctypes.c_uint32 * 2)(round_up(sps.pic_width_in_luma_samples, 64), round_up(sps.pic_width_in_luma_samples, 64)) + device_ctx.sw_hdr_skip_length = hdr.sw_skip_end - hdr.sw_skip_start + device_ctx.num_bits_short_term_ref_pics_in_slice = max(0, device_ctx.sw_hdr_skip_length - 9) + device_ctx.stream_len = sum(x[2] for x in nal_infos) + + if pps.tiles_enabled_flag: + device_ctx.num_tile_columns = pps.num_tile_columns_minus1 + 1 + device_ctx.num_tile_rows = pps.num_tile_rows_minus1 + 1 + + device_ctx.num_short_term_ref_pic_sets = sps.num_short_term_ref_pic_sets + + luma_h_rounded = round_up(sps.pic_height_in_luma_samples, 64) + device_ctx.HevcSaoBufferOffset = (608 * luma_h_rounded) >> 8 + device_ctx.HevcBsdCtrlOffset = ((device_ctx.HevcSaoBufferOffset<<8) + 4864 * luma_h_rounded) >> 8 + + device_ctx.v1.hevc_main10_444_ext.HevcFltAboveOffset = ((device_ctx.HevcBsdCtrlOffset<<8) + 152 * luma_h_rounded) >> 8 + device_ctx.v1.hevc_main10_444_ext.HevcSaoAboveOffset = ((device_ctx.v1.hevc_main10_444_ext.HevcFltAboveOffset<<8) + 2000 * luma_h_rounded) >> 8 + device_ctx.v3.HevcSliceEdgeOffset = device_ctx.v1.hevc_main10_444_ext.HevcSaoAboveOffset + + before_list, after_list = [], [] + for pic_idx, poc, _ in history: + device_ctx.RefDiffPicOrderCnts[pic_idx] = hdr.slice_pic_order_cnt_lsb - poc + if hdr.slice_pic_order_cnt_lsb < poc: after_list.append((poc - hdr.slice_pic_order_cnt_lsb, pic_idx)) + else: before_list.append((hdr.slice_pic_order_cnt_lsb - poc, pic_idx)) + before_list.sort() + after_list.sort() + + device_ctx.initreflistidxl0 = (ctypes.c_uint8 * 16)(*[idx for _,idx in before_list + after_list]) + if hdr.slice_type == avcodec.HEVC_SLICE_B: device_ctx.initreflistidxl1 = (ctypes.c_uint8 * 16)(*[idx for _,idx in after_list + before_list]) + + locl_ctx_bytes = bytes(device_ctx) + locl_ctx_bytes += bytes(0x200 - len(locl_ctx_bytes)) # pad to 512 bytes + + pic_width_in_ctbs = ceildiv(sps.pic_width_in_luma_samples, (1 << sps.log2_max_luma_coding_block_size)) + pic_height_in_ctbs = ceildiv(sps.pic_height_in_luma_samples, (1 << sps.log2_max_luma_coding_block_size)) + # append tile sizes 0x200 + if pps.tiles_enabled_flag and pps.uniform_spacing_flag: + assert device_ctx.num_tile_columns == 1 and device_ctx.num_tile_rows == 1, "not implemented: uniform spacing with multiple tiles" + locl_ctx_bytes += pic_width_in_ctbs.to_bytes(2, "little") + pic_height_in_ctbs.to_bytes(2, "little") + else: + if pps.tiles_enabled_flag and not getattr(pps, 'uniform_spacing_flag', 0): + column_width = [cw_minus1 + 1 for cw_minus1 in pps.column_width_minus1[0:pps.num_tile_columns_minus1]] + row_height = [rh_minus1 + 1 for rh_minus1 in pps.row_height_minus1[0:pps.num_tile_rows_minus1]] + else: + column_width = [] + row_height = [] + + column_width.append(pic_width_in_ctbs - sum(column_width)) + row_height.append(pic_height_in_ctbs - sum(row_height)) + + for c in column_width: + for r in row_height: locl_ctx_bytes += c.to_bytes(2, "little") + r.to_bytes(2, "little") + + luma_size = round_up(sps.pic_width_in_luma_samples, 64) * round_up(sps.pic_height_in_luma_samples, 64) + chroma_size = round_up(sps.pic_width_in_luma_samples, 64) * round_up((sps.pic_height_in_luma_samples + 1) // 2, 64) + is_hist = nal_unit_type in {avcodec.HEVC_NAL_TRAIL_R, avcodec.HEVC_NAL_IDR_N_LP, avcodec.HEVC_NAL_IDR_W_RADL} + + res.append((nal_infos[0][1], device_ctx.stream_len, device_ctx.curr_pic_idx, len(history), is_hist)) + + locl_ctx_bytes += (align_ctx_bytes_size - len(locl_ctx_bytes)) * b'\x00' + ctx_bytes += locl_ctx_bytes + + if nal_unit_type in {avcodec.HEVC_NAL_TRAIL_R, avcodec.HEVC_NAL_IDR_N_LP, avcodec.HEVC_NAL_IDR_W_RADL}: + history.append((device_ctx.curr_pic_idx, hdr.slice_pic_order_cnt_lsb, None)) + + if len(history) >= sps.sps_max_dec_pic_buffering[0]: + # remove the oldest poc + history.pop(0) + + nal_infos = [] + + cnt = 0 + while nal_unit_start < len(dat): + assert dat[nal_unit_start:nal_unit_start+3] == b"\x00\x00\x01", "NAL unit start code not found" + + pos = dat.find(b"\x00\x00\x01", nal_unit_start + 3) + nal_unit_len = (pos if pos != -1 else len(dat)) - nal_unit_start + + # 7.3.1.1 General NAL unit syntax + nal_unit_type = (dat[nal_unit_start+3] >> 1) & 0x3F + slice_dat = dat[nal_unit_start+5:nal_unit_start+nal_unit_len] + + if nal_unit_type == avcodec.HEVC_NAL_SPS: + sps = SPS(BitReader(_hevc_get_rbsp(slice_dat))) + fill_sps_into_dev_context(device_ctx, sps) + elif nal_unit_type == avcodec.HEVC_NAL_PPS: + pps = PPS(BitReader(_hevc_get_rbsp(slice_dat))) + fill_pps_into_dev_context(device_ctx, pps) + elif nal_unit_type in {avcodec.HEVC_NAL_IDR_N_LP, avcodec.HEVC_NAL_IDR_W_RADL, avcodec.HEVC_NAL_TRAIL_R, avcodec.HEVC_NAL_TRAIL_N}: + hdr = SliceSegment(BitReader(slice_dat), nal_unit_type, sps, pps) + + if hdr.first_slice_segment_in_pic_flag == 1: _flush_picture() + nal_infos.append(((hdr, nal_unit_type), nal_unit_start, nal_unit_len)) + + nal_unit_start += nal_unit_len + _flush_picture() + + w = sps.pic_width_in_luma_samples - 2 * (sps.conf_win_left_offset + sps.conf_win_right_offset) + h = sps.pic_height_in_luma_samples - 2 * (sps.conf_win_top_offset + sps.conf_win_bottom_offset) + chroma_off = round_up(sps.pic_width_in_luma_samples, 64) * round_up(sps.pic_height_in_luma_samples, 64) + opaque = Tensor(ctx_bytes, device=device).reshape(len(res), align_ctx_bytes_size) + return opaque, res, w, h, sps.pic_width_in_luma_samples, sps.pic_height_in_luma_samples, chroma_off + +def _addr_table(h, w, w_aligned): + GOB_W, GOB_H = 64, 8 + GOB_SIZE = GOB_W * GOB_H + BLOCK_H_GOBS = 2 + + xs = Tensor.arange(w, dtype=dtypes.uint32).reshape(1, w) + ys = Tensor.arange(h, dtype=dtypes.uint32).reshape(h, 1) + + gob_x = xs // GOB_W + gob_y = ys // GOB_H + super_block_y = gob_y // BLOCK_H_GOBS + gob_y_in_block = gob_y % BLOCK_H_GOBS + stride_gobs = w_aligned // GOB_W + + base = ((super_block_y * stride_gobs + gob_x) * BLOCK_H_GOBS + gob_y_in_block) * GOB_SIZE + + lx, ly = xs % GOB_W, ys % GOB_H + swiz = (lx & 0x0F) | ((ly & 0x03) << 4) | ((lx & 0x10) << 2) | ((ly & 0x04) << 5) | ((lx & 0x20) << 3) + return (base + swiz).reshape(-1) + +def nv12_to_bgr_from_planes(luma: Tensor, chroma: Tensor, h: int, w: int) -> Tensor: + Y = luma.reshape(h, w).cast(dtypes.float32) + + uv = chroma.reshape(h // 2, w // 2, 2).cast(dtypes.float32) + U_small = uv[..., 0] + V_small = uv[..., 1] + + U = U_small.reshape(h // 2, 1, w // 2, 1).expand(h // 2, 2, w // 2, 2).reshape(h, w) + V = V_small.reshape(h // 2, 1, w // 2, 1).expand(h // 2, 2, w // 2, 2).reshape(h, w) + + C = Y - 16.0 + D = U - 128.0 + E = V - 128.0 + + R = 1.1643835616438356 * C + 1.5960267857142858 * E + G = 1.1643835616438356 * C - 0.39176229009491365 * D - 0.8129676472377708 * E + B = 1.1643835616438356 * C + 2.017232142857143 * D + + R = R.maximum(0.0).minimum(255.0) + G = G.maximum(0.0).minimum(255.0) + B = B.maximum(0.0).minimum(255.0) + + return Tensor.stack([B, G, R], dim=2).cast(dtypes.uint8) + +def untile_nv12(src:Tensor, h:int, w:int, luma_w:int, chroma_off:int) -> Tensor: + luma = src.reshape(-1)[_addr_table(h, w, round_up(luma_w, 64))] + chroma = src.reshape(-1)[chroma_off:][_addr_table((h + 1) // 2, w, round_up(luma_w, 64))] + return luma.cat(chroma).realize() + +def to_bgr(tensor:Tensor, h:int, w:int, luma_w:int, chroma_off:int) -> Tensor: + luma = tensor.reshape(-1)[_addr_table(h, w, round_up(luma_w, 64))] + chroma = tensor.reshape(-1)[chroma_off:][_addr_table((h + 1) // 2, w, round_up(luma_w, 64))] + return nv12_to_bgr_from_planes(luma, chroma, h, w).realize() diff --git a/extra/nv_gpu_driver/clc9b0.h b/extra/nv_gpu_driver/clc9b0.h new file mode 100644 index 0000000000..4bd816f82b --- /dev/null +++ b/extra/nv_gpu_driver/clc9b0.h @@ -0,0 +1,603 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef clc9b0_h_ +#define clc9b0_h_ + +#include "nvtypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NVC9B0_VIDEO_DECODER (0x0000C9B0) + +#define NVC9B0_NOP (0x00000100) +#define NVC9B0_NOP_PARAMETER 31:0 +#define NVC9B0_PM_TRIGGER (0x00000140) +#define NVC9B0_PM_TRIGGER_V 31:0 +#define NVC9B0_SET_APPLICATION_ID (0x00000200) +#define NVC9B0_SET_APPLICATION_ID_ID 31:0 +#define NVC9B0_SET_APPLICATION_ID_ID_MPEG12 (0x00000001) +#define NVC9B0_SET_APPLICATION_ID_ID_VC1 (0x00000002) +#define NVC9B0_SET_APPLICATION_ID_ID_H264 (0x00000003) +#define NVC9B0_SET_APPLICATION_ID_ID_MPEG4 (0x00000004) +#define NVC9B0_SET_APPLICATION_ID_ID_VP8 (0x00000005) +#define NVC9B0_SET_APPLICATION_ID_ID_CTR64 (0x00000006) +#define NVC9B0_SET_APPLICATION_ID_ID_HEVC (0x00000007) +#define NVC9B0_SET_APPLICATION_ID_ID_NEW_H264 (0x00000008) +#define NVC9B0_SET_APPLICATION_ID_ID_VP9 (0x00000009) +#define NVC9B0_SET_APPLICATION_ID_ID_PASS1 (0x0000000A) +#define NVC9B0_SET_APPLICATION_ID_ID_HEVC_PARSER (0x0000000C) +#define NVC9B0_SET_APPLICATION_ID_ID_UCODE_TEST (0x0000000D) +#define NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIO (0x0000000E) +#define NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIOMULTIPLE (0x0000000F) +#define NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_PREPROCESSENCRYPTEDDATA (0x00000010) +#define NVC9B0_SET_APPLICATION_ID_ID_VP9_WITH_PARSER (0x00000011) +#define NVC9B0_SET_APPLICATION_ID_ID_AVD (0x00000012) +#define NVC9B0_SET_APPLICATION_ID_ID_HW_DRM_PR4_DECRYPTCONTENTMULTIPLE (0x00000013) +#define NVC9B0_SET_APPLICATION_ID_ID_DHKE (0x00000020) +#define NVC9B0_SET_WATCHDOG_TIMER (0x00000204) +#define NVC9B0_SET_WATCHDOG_TIMER_TIMER 31:0 +#define NVC9B0_SEMAPHORE_A (0x00000240) +#define NVC9B0_SEMAPHORE_A_UPPER 7:0 +#define NVC9B0_SEMAPHORE_B (0x00000244) +#define NVC9B0_SEMAPHORE_B_LOWER 31:0 +#define NVC9B0_SEMAPHORE_C (0x00000248) +#define NVC9B0_SEMAPHORE_C_PAYLOAD 31:0 +#define NVC9B0_CTX_SAVE_AREA (0x0000024C) +#define NVC9B0_CTX_SAVE_AREA_OFFSET 31:0 +#define NVC9B0_CTX_SWITCH (0x00000250) +#define NVC9B0_CTX_SWITCH_OP 1:0 +#define NVC9B0_CTX_SWITCH_OP_CTX_UPDATE (0x00000000) +#define NVC9B0_CTX_SWITCH_OP_CTX_SAVE (0x00000001) +#define NVC9B0_CTX_SWITCH_OP_CTX_RESTORE (0x00000002) +#define NVC9B0_CTX_SWITCH_OP_CTX_FORCERESTORE (0x00000003) +#define NVC9B0_CTX_SWITCH_CTXID_VALID 2:2 +#define NVC9B0_CTX_SWITCH_CTXID_VALID_FALSE (0x00000000) +#define NVC9B0_CTX_SWITCH_CTXID_VALID_TRUE (0x00000001) +#define NVC9B0_CTX_SWITCH_RESERVED0 7:3 +#define NVC9B0_CTX_SWITCH_CTX_ID 23:8 +#define NVC9B0_CTX_SWITCH_RESERVED1 31:24 +#define NVC9B0_SET_SEMAPHORE_PAYLOAD_LOWER (0x00000254) +#define NVC9B0_SET_SEMAPHORE_PAYLOAD_LOWER_PAYLOAD_LOWER 31:0 +#define NVC9B0_SET_SEMAPHORE_PAYLOAD_UPPER (0x00000258) +#define NVC9B0_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD_UPPER 31:0 +#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_A (0x0000025C) +#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_A_LOWER 31:0 +#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_B (0x00000260) +#define NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_B_UPPER 31:0 +#define NVC9B0_EXECUTE (0x00000300) +#define NVC9B0_EXECUTE_NOTIFY 0:0 +#define NVC9B0_EXECUTE_NOTIFY_DISABLE (0x00000000) +#define NVC9B0_EXECUTE_NOTIFY_ENABLE (0x00000001) +#define NVC9B0_EXECUTE_NOTIFY_ON 1:1 +#define NVC9B0_EXECUTE_NOTIFY_ON_END (0x00000000) +#define NVC9B0_EXECUTE_NOTIFY_ON_BEGIN (0x00000001) +#define NVC9B0_EXECUTE_PREDICATION 2:2 +#define NVC9B0_EXECUTE_PREDICATION_DISABLE (0x00000000) +#define NVC9B0_EXECUTE_PREDICATION_ENABLE (0x00000001) +#define NVC9B0_EXECUTE_PREDICATION_OP 3:3 +#define NVC9B0_EXECUTE_PREDICATION_OP_EQUAL_ZERO (0x00000000) +#define NVC9B0_EXECUTE_PREDICATION_OP_NOT_EQUAL_ZERO (0x00000001) +#define NVC9B0_EXECUTE_AWAKEN 8:8 +#define NVC9B0_EXECUTE_AWAKEN_DISABLE (0x00000000) +#define NVC9B0_EXECUTE_AWAKEN_ENABLE (0x00000001) +#define NVC9B0_SEMAPHORE_D (0x00000304) +#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE 1:0 +#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_ONE (0x00000000) +#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_FOUR (0x00000001) +#define NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_TWO (0x00000002) +#define NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE 8:8 +#define NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_FALSE (0x00000000) +#define NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_TRUE (0x00000001) +#define NVC9B0_SEMAPHORE_D_OPERATION 17:16 +#define NVC9B0_SEMAPHORE_D_OPERATION_RELEASE (0x00000000) +#define NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_0 (0x00000001) +#define NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_1 (0x00000002) +#define NVC9B0_SEMAPHORE_D_OPERATION_TRAP (0x00000003) +#define NVC9B0_SEMAPHORE_D_FLUSH_DISABLE 21:21 +#define NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_FALSE (0x00000000) +#define NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_TRUE (0x00000001) +#define NVC9B0_SEMAPHORE_D_TRAP_TYPE 23:22 +#define NVC9B0_SEMAPHORE_D_TRAP_TYPE_UNCONDITIONAL (0x00000000) +#define NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL (0x00000001) +#define NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL_EXT (0x00000002) +#define NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE 24:24 +#define NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_32BIT (0x00000000) +#define NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_64BIT (0x00000001) +#define NVC9B0_SET_PREDICATION_OFFSET_UPPER (0x00000308) +#define NVC9B0_SET_PREDICATION_OFFSET_UPPER_OFFSET 7:0 +#define NVC9B0_SET_PREDICATION_OFFSET_LOWER (0x0000030C) +#define NVC9B0_SET_PREDICATION_OFFSET_LOWER_OFFSET 31:0 +#define NVC9B0_SET_AUXILIARY_DATA_BUFFER (0x00000310) +#define NVC9B0_SET_AUXILIARY_DATA_BUFFER_OFFSET 31:0 +#define NVC9B0_SET_CONTROL_PARAMS (0x00000400) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE 3:0 +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG1 (0x00000000) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG2 (0x00000001) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VC1 (0x00000002) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_H264 (0x00000003) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG4 (0x00000004) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_DIVX3 (0x00000004) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP8 (0x00000005) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_HEVC (0x00000007) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP9 (0x00000009) +#define NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_AV1 (0x0000000A) +#define NVC9B0_SET_CONTROL_PARAMS_GPTIMER_ON 4:4 +#define NVC9B0_SET_CONTROL_PARAMS_RET_ERROR 5:5 +#define NVC9B0_SET_CONTROL_PARAMS_ERR_CONCEAL_ON 6:6 +#define NVC9B0_SET_CONTROL_PARAMS_ERROR_FRM_IDX 12:7 +#define NVC9B0_SET_CONTROL_PARAMS_MBTIMER_ON 13:13 +#define NVC9B0_SET_CONTROL_PARAMS_EC_INTRA_FRAME_USING_PSLC 14:14 +#define NVC9B0_SET_CONTROL_PARAMS_IGNORE_SOME_FIELDS_CRC_CHECK 15:15 +#define NVC9B0_SET_CONTROL_PARAMS_EVENT_TRACE_LOGGING_ON 16:16 +#define NVC9B0_SET_CONTROL_PARAMS_ALL_INTRA_FRAME 17:17 +#define NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV 19:18 +#define NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_TRACE3D_RUN (0x00000000) +#define NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_PROD_RUN (0x00000001) +#define NVC9B0_SET_CONTROL_PARAMS_HINT_DUMP_EN 20:20 +#define NVC9B0_SET_CONTROL_PARAMS_RESERVED 25:21 +#define NVC9B0_SET_CONTROL_PARAMS_NVDECSIM_SKIP_SCP 26:26 +#define NVC9B0_SET_CONTROL_PARAMS_ENABLE_ENCRYPT 27:27 +#define NVC9B0_SET_CONTROL_PARAMS_ENCRYPTMODE 31:28 +#define NVC9B0_SET_DRV_PIC_SETUP_OFFSET (0x00000404) +#define NVC9B0_SET_DRV_PIC_SETUP_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_IN_BUF_BASE_OFFSET (0x00000408) +#define NVC9B0_SET_IN_BUF_BASE_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_INDEX (0x0000040C) +#define NVC9B0_SET_PICTURE_INDEX_INDEX 31:0 +#define NVC9B0_SET_SLICE_OFFSETS_BUF_OFFSET (0x00000410) +#define NVC9B0_SET_SLICE_OFFSETS_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_COLOC_DATA_OFFSET (0x00000414) +#define NVC9B0_SET_COLOC_DATA_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_HISTORY_OFFSET (0x00000418) +#define NVC9B0_SET_HISTORY_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_DISPLAY_BUF_SIZE (0x0000041C) +#define NVC9B0_SET_DISPLAY_BUF_SIZE_SIZE 31:0 +#define NVC9B0_SET_HISTOGRAM_OFFSET (0x00000420) +#define NVC9B0_SET_HISTOGRAM_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_NVDEC_STATUS_OFFSET (0x00000424) +#define NVC9B0_SET_NVDEC_STATUS_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_DISPLAY_BUF_LUMA_OFFSET (0x00000428) +#define NVC9B0_SET_DISPLAY_BUF_LUMA_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_DISPLAY_BUF_CHROMA_OFFSET (0x0000042C) +#define NVC9B0_SET_DISPLAY_BUF_CHROMA_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET0 (0x00000430) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET0_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET1 (0x00000434) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET1_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET2 (0x00000438) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET2_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET3 (0x0000043C) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET3_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET4 (0x00000440) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET4_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET5 (0x00000444) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET5_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET6 (0x00000448) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET6_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET7 (0x0000044C) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET7_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET8 (0x00000450) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET8_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET9 (0x00000454) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET9_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET10 (0x00000458) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET10_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET11 (0x0000045C) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET11_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET12 (0x00000460) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET12_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET13 (0x00000464) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET13_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET14 (0x00000468) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET14_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET15 (0x0000046C) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET15_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_LUMA_OFFSET16 (0x00000470) +#define NVC9B0_SET_PICTURE_LUMA_OFFSET16_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET0 (0x00000474) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET0_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET1 (0x00000478) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET1_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET2 (0x0000047C) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET2_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET3 (0x00000480) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET3_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET4 (0x00000484) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET4_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET5 (0x00000488) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET5_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET6 (0x0000048C) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET6_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET7 (0x00000490) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET7_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET8 (0x00000494) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET8_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET9 (0x00000498) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET9_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET10 (0x0000049C) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET10_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET11 (0x000004A0) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET11_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET12 (0x000004A4) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET12_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET13 (0x000004A8) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET13_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET14 (0x000004AC) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET14_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET15 (0x000004B0) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET15_OFFSET 31:0 +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET16 (0x000004B4) +#define NVC9B0_SET_PICTURE_CHROMA_OFFSET16_OFFSET 31:0 +#define NVC9B0_SET_PIC_SCRATCH_BUF_OFFSET (0x000004B8) +#define NVC9B0_SET_PIC_SCRATCH_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_EXTERNAL_MVBUFFER_OFFSET (0x000004BC) +#define NVC9B0_SET_EXTERNAL_MVBUFFER_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_SUB_SAMPLE_MAP_OFFSET (0x000004C0) +#define NVC9B0_SET_SUB_SAMPLE_MAP_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_SUB_SAMPLE_MAP_IV_OFFSET (0x000004C4) +#define NVC9B0_SET_SUB_SAMPLE_MAP_IV_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_INTRA_TOP_BUF_OFFSET (0x000004C8) +#define NVC9B0_SET_INTRA_TOP_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_TILE_SIZE_BUF_OFFSET (0x000004CC) +#define NVC9B0_SET_TILE_SIZE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_FILTER_BUFFER_OFFSET (0x000004D0) +#define NVC9B0_SET_FILTER_BUFFER_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_CRC_STRUCT_OFFSET (0x000004D4) +#define NVC9B0_SET_CRC_STRUCT_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_PR_SSM_CONTENT_INFO_BUF_OFFSET (0x000004D8) +#define NVC9B0_SET_PR_SSM_CONTENT_INFO_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_H264_SET_MBHIST_BUF_OFFSET (0x00000500) +#define NVC9B0_H264_SET_MBHIST_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP8_SET_PROB_DATA_OFFSET (0x00000540) +#define NVC9B0_VP8_SET_PROB_DATA_OFFSET_OFFSET 31:0 +#define NVC9B0_VP8_SET_HEADER_PARTITION_BUF_BASE_OFFSET (0x00000544) +#define NVC9B0_VP8_SET_HEADER_PARTITION_BUF_BASE_OFFSET_OFFSET 31:0 +#define NVC9B0_HEVC_SET_SCALING_LIST_OFFSET (0x00000580) +#define NVC9B0_HEVC_SET_SCALING_LIST_OFFSET_OFFSET 31:0 +#define NVC9B0_HEVC_SET_TILE_SIZES_OFFSET (0x00000584) +#define NVC9B0_HEVC_SET_TILE_SIZES_OFFSET_OFFSET 31:0 +#define NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET (0x00000588) +#define NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET_OFFSET 31:0 +#define NVC9B0_HEVC_SET_SAO_BUFFER_OFFSET (0x0000058C) +#define NVC9B0_HEVC_SET_SAO_BUFFER_OFFSET_OFFSET 31:0 +#define NVC9B0_HEVC_SET_SLICE_INFO_BUFFER_OFFSET (0x00000590) +#define NVC9B0_HEVC_SET_SLICE_INFO_BUFFER_OFFSET_OFFSET 31:0 +#define NVC9B0_HEVC_SET_SLICE_GROUP_INDEX (0x00000594) +#define NVC9B0_HEVC_SET_SLICE_GROUP_INDEX_OFFSET 31:0 +#define NVC9B0_VP9_SET_PROB_TAB_BUF_OFFSET (0x000005C0) +#define NVC9B0_VP9_SET_PROB_TAB_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_CTX_COUNTER_BUF_OFFSET (0x000005C4) +#define NVC9B0_VP9_SET_CTX_COUNTER_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_SEGMENT_READ_BUF_OFFSET (0x000005C8) +#define NVC9B0_VP9_SET_SEGMENT_READ_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_SEGMENT_WRITE_BUF_OFFSET (0x000005CC) +#define NVC9B0_VP9_SET_SEGMENT_WRITE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_TILE_SIZE_BUF_OFFSET (0x000005D0) +#define NVC9B0_VP9_SET_TILE_SIZE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_COL_MVWRITE_BUF_OFFSET (0x000005D4) +#define NVC9B0_VP9_SET_COL_MVWRITE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_COL_MVREAD_BUF_OFFSET (0x000005D8) +#define NVC9B0_VP9_SET_COL_MVREAD_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_FILTER_BUFFER_OFFSET (0x000005DC) +#define NVC9B0_VP9_SET_FILTER_BUFFER_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_PARSER_SET_PIC_SETUP_OFFSET (0x000005E0) +#define NVC9B0_VP9_PARSER_SET_PIC_SETUP_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_PARSER_SET_PREV_PIC_SETUP_OFFSET (0x000005E4) +#define NVC9B0_VP9_PARSER_SET_PREV_PIC_SETUP_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_PARSER_SET_PROB_TAB_BUF_OFFSET (0x000005E8) +#define NVC9B0_VP9_PARSER_SET_PROB_TAB_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_VP9_SET_HINT_DUMP_BUF_OFFSET (0x000005EC) +#define NVC9B0_VP9_SET_HINT_DUMP_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_PASS1_SET_CLEAR_HEADER_OFFSET (0x00000600) +#define NVC9B0_PASS1_SET_CLEAR_HEADER_OFFSET_OFFSET 31:0 +#define NVC9B0_PASS1_SET_RE_ENCRYPT_OFFSET (0x00000604) +#define NVC9B0_PASS1_SET_RE_ENCRYPT_OFFSET_OFFSET 31:0 +#define NVC9B0_PASS1_SET_VP8_TOKEN_OFFSET (0x00000608) +#define NVC9B0_PASS1_SET_VP8_TOKEN_OFFSET_OFFSET 31:0 +#define NVC9B0_PASS1_SET_INPUT_DATA_OFFSET (0x0000060C) +#define NVC9B0_PASS1_SET_INPUT_DATA_OFFSET_OFFSET 31:0 +#define NVC9B0_PASS1_SET_OUTPUT_DATA_SIZE_OFFSET (0x00000610) +#define NVC9B0_PASS1_SET_OUTPUT_DATA_SIZE_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_PROB_TAB_READ_BUF_OFFSET (0x00000640) +#define NVC9B0_AV1_SET_PROB_TAB_READ_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_PROB_TAB_WRITE_BUF_OFFSET (0x00000644) +#define NVC9B0_AV1_SET_PROB_TAB_WRITE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_SEGMENT_READ_BUF_OFFSET (0x00000648) +#define NVC9B0_AV1_SET_SEGMENT_READ_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_SEGMENT_WRITE_BUF_OFFSET (0x0000064C) +#define NVC9B0_AV1_SET_SEGMENT_WRITE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_COL_MV0_READ_BUF_OFFSET (0x00000650) +#define NVC9B0_AV1_SET_COL_MV0_READ_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_COL_MV1_READ_BUF_OFFSET (0x00000654) +#define NVC9B0_AV1_SET_COL_MV1_READ_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_COL_MV2_READ_BUF_OFFSET (0x00000658) +#define NVC9B0_AV1_SET_COL_MV2_READ_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_COL_MVWRITE_BUF_OFFSET (0x0000065C) +#define NVC9B0_AV1_SET_COL_MVWRITE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_GLOBAL_MODEL_BUF_OFFSET (0x00000660) +#define NVC9B0_AV1_SET_GLOBAL_MODEL_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_FILM_GRAIN_BUF_OFFSET (0x00000664) +#define NVC9B0_AV1_SET_FILM_GRAIN_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_TILE_STREAM_INFO_BUF_OFFSET (0x00000668) +#define NVC9B0_AV1_SET_TILE_STREAM_INFO_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_SUB_STREAM_ENTRY_BUF_OFFSET (0x0000066C) +#define NVC9B0_AV1_SET_SUB_STREAM_ENTRY_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_AV1_SET_HINT_DUMP_BUF_OFFSET (0x00000670) +#define NVC9B0_AV1_SET_HINT_DUMP_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_H264_SET_SCALING_LIST_OFFSET (0x00000680) +#define NVC9B0_H264_SET_SCALING_LIST_OFFSET_OFFSET 31:0 +#define NVC9B0_H264_SET_VLDHIST_BUF_OFFSET (0x00000684) +#define NVC9B0_H264_SET_VLDHIST_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_H264_SET_EDOBOFFSET0 (0x00000688) +#define NVC9B0_H264_SET_EDOBOFFSET0_OFFSET 31:0 +#define NVC9B0_H264_SET_EDOBOFFSET1 (0x0000068C) +#define NVC9B0_H264_SET_EDOBOFFSET1_OFFSET 31:0 +#define NVC9B0_H264_SET_EDOBOFFSET2 (0x00000690) +#define NVC9B0_H264_SET_EDOBOFFSET2_OFFSET 31:0 +#define NVC9B0_H264_SET_EDOBOFFSET3 (0x00000694) +#define NVC9B0_H264_SET_EDOBOFFSET3_OFFSET 31:0 +#define NVC9B0_SET_CONTENT_INITIAL_VECTOR(b) (0x00000C00 + (b)*0x00000004) +#define NVC9B0_SET_CONTENT_INITIAL_VECTOR_VALUE 31:0 +#define NVC9B0_SET_CTL_COUNT (0x00000C10) +#define NVC9B0_SET_CTL_COUNT_VALUE 31:0 +#define NVC9B0_SET_UPPER_SRC (0x00000C14) +#define NVC9B0_SET_UPPER_SRC_OFFSET 7:0 +#define NVC9B0_SET_LOWER_SRC (0x00000C18) +#define NVC9B0_SET_LOWER_SRC_OFFSET 31:0 +#define NVC9B0_SET_UPPER_DST (0x00000C1C) +#define NVC9B0_SET_UPPER_DST_OFFSET 7:0 +#define NVC9B0_SET_LOWER_DST (0x00000C20) +#define NVC9B0_SET_LOWER_DST_OFFSET 31:0 +#define NVC9B0_SET_BLOCK_COUNT (0x00000C24) +#define NVC9B0_SET_BLOCK_COUNT_VALUE 31:0 +#define NVC9B0_PR_SET_REQUEST_BUF_OFFSET (0x00000D00) +#define NVC9B0_PR_SET_REQUEST_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_PR_SET_REQUEST_BUF_SIZE (0x00000D04) +#define NVC9B0_PR_SET_REQUEST_BUF_SIZE_SIZE 31:0 +#define NVC9B0_PR_SET_RESPONSE_BUF_OFFSET (0x00000D08) +#define NVC9B0_PR_SET_RESPONSE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_PR_SET_RESPONSE_BUF_SIZE (0x00000D0C) +#define NVC9B0_PR_SET_RESPONSE_BUF_SIZE_SIZE 31:0 +#define NVC9B0_PR_SET_REQUEST_MESSAGE_BUF_OFFSET (0x00000D10) +#define NVC9B0_PR_SET_REQUEST_MESSAGE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_PR_SET_RESPONSE_MESSAGE_BUF_OFFSET (0x00000D14) +#define NVC9B0_PR_SET_RESPONSE_MESSAGE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_OFFSET (0x00000D18) +#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_SIZE (0x00000D1C) +#define NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_SIZE_SIZE 31:0 +#define NVC9B0_PR_SET_CONTENT_DECRYPT_INFO_BUF_OFFSET (0x00000D20) +#define NVC9B0_PR_SET_CONTENT_DECRYPT_INFO_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_PR_SET_REENCRYPTED_BITSTREAM_BUF_OFFSET (0x00000D24) +#define NVC9B0_PR_SET_REENCRYPTED_BITSTREAM_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_DH_KE_SET_CHALLENGE_BUF_OFFSET (0x00000E00) +#define NVC9B0_DH_KE_SET_CHALLENGE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_DH_KE_SET_RESPONSE_BUF_OFFSET (0x00000E04) +#define NVC9B0_DH_KE_SET_RESPONSE_BUF_OFFSET_OFFSET 31:0 +#define NVC9B0_SET_SESSION_KEY(b) (0x00000F00 + (b)*0x00000004) +#define NVC9B0_SET_SESSION_KEY_VALUE 31:0 +#define NVC9B0_SET_CONTENT_KEY(b) (0x00000F10 + (b)*0x00000004) +#define NVC9B0_SET_CONTENT_KEY_VALUE 31:0 +#define NVC9B0_PM_TRIGGER_END (0x00001114) +#define NVC9B0_PM_TRIGGER_END_V 31:0 + +#define NVC9B0_ERROR_NONE (0x00000000) +#define NVC9B0_OS_ERROR_EXECUTE_INSUFFICIENT_DATA (0x00000001) +#define NVC9B0_OS_ERROR_SEMAPHORE_INSUFFICIENT_DATA (0x00000002) +#define NVC9B0_OS_ERROR_INVALID_METHOD (0x00000003) +#define NVC9B0_OS_ERROR_INVALID_DMA_PAGE (0x00000004) +#define NVC9B0_OS_ERROR_UNHANDLED_INTERRUPT (0x00000005) +#define NVC9B0_OS_ERROR_EXCEPTION (0x00000006) +#define NVC9B0_OS_ERROR_INVALID_CTXSW_REQUEST (0x00000007) +#define NVC9B0_OS_ERROR_APPLICATION (0x00000008) +#define NVC9B0_OS_ERROR_SW_BREAKPT (0x00000009) +#define NVC9B0_OS_INTERRUPT_EXECUTE_AWAKEN (0x00000100) +#define NVC9B0_OS_INTERRUPT_BACKEND_SEMAPHORE_AWAKEN (0x00000200) +#define NVC9B0_OS_INTERRUPT_CTX_ERROR_FBIF (0x00000300) +#define NVC9B0_OS_INTERRUPT_LIMIT_VIOLATION (0x00000400) +#define NVC9B0_OS_INTERRUPT_LIMIT_AND_FBIF_CTX_ERROR (0x00000500) +#define NVC9B0_OS_INTERRUPT_HALT_ENGINE (0x00000600) +#define NVC9B0_OS_INTERRUPT_TRAP_NONSTALL (0x00000700) +#define NVC9B0_H264_VLD_ERR_SEQ_DATA_INCONSISTENT (0x00004001) +#define NVC9B0_H264_VLD_ERR_PIC_DATA_INCONSISTENT (0x00004002) +#define NVC9B0_H264_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS (0x00004100) +#define NVC9B0_H264_VLD_ERR_BITSTREAM_ERROR (0x00004101) +#define NVC9B0_H264_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID (0x000041F8) +#define NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_SIZE_NOT_MULT256 (0x00004200) +#define NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256 (0x00004201) +#define NVC9B0_H264_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID (0x00004203) +#define NVC9B0_H264_VLD_ERR_CTX_DMA_ID_SLC_HDR_OUT_INVALID (0x00004204) +#define NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL (0x00004205) +#define NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_ALREADY_VALID (0x00004206) +#define NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL (0x00004207) +#define NVC9B0_H264_VLD_ERR_DATA_BUF_CNT_TOO_SMALL (0x00004208) +#define NVC9B0_H264_VLD_ERR_BITSTREAM_EMPTY (0x00004209) +#define NVC9B0_H264_VLD_ERR_FRAME_WIDTH_TOO_LARGE (0x0000420A) +#define NVC9B0_H264_VLD_ERR_FRAME_HEIGHT_TOO_LARGE (0x0000420B) +#define NVC9B0_H264_VLD_ERR_HIST_BUF_TOO_SMALL (0x00004300) +#define NVC9B0_VC1_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND (0x00005100) +#define NVC9B0_VC1_VLD_ERR_BITSTREAM_ERROR (0x00005101) +#define NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256 (0x00005200) +#define NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256 (0x00005201) +#define NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID (0x00005202) +#define NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID (0x00005203) +#define NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID (0x00005204) +#define NVC9B0_VC1_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL (0x00005205) +#define NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID (0x00005206) +#define NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL (0x00005207) +#define NVC9B0_VC1_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL (0x00005208) +#define NVC9B0_VC1_VLD_ERR_BITSTREAM_EMPTY (0x00005209) +#define NVC9B0_VC1_VLD_ERR_FRAME_WIDTH_TOO_LARGE (0x0000520A) +#define NVC9B0_VC1_VLD_ERR_FRAME_HEIGHT_TOO_LARGE (0x0000520B) +#define NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT (0x00005300) +#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS (0x00006100) +#define NVC9B0_MPEG12_VLD_ERR_BITSTREAM_ERROR (0x00006101) +#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256 (0x00006200) +#define NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID (0x00006201) +#define NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID (0x00006202) +#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL (0x00006203) +#define NVC9B0_MPEG12_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL (0x00006204) +#define NVC9B0_MPEG12_VLD_ERR_BITSTREAM_EMPTY (0x00006205) +#define NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_STRUCTURE (0x00006206) +#define NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_CODING_TYPE (0x00006207) +#define NVC9B0_MPEG12_VLD_ERR_FRAME_WIDTH_TOO_LARGE (0x00006208) +#define NVC9B0_MPEG12_VLD_ERR_FRAME_HEIGHT_TOO_LARGE (0x00006209) +#define NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_FULL_TIME_OUT (0x00006300) +#define NVC9B0_CMN_VLD_ERR_PDEC_RETURNED_ERROR (0x00007101) +#define NVC9B0_CMN_VLD_ERR_EDOB_FLUSH_TIME_OUT (0x00007102) +#define NVC9B0_CMN_VLD_ERR_EDOB_REWIND_TIME_OUT (0x00007103) +#define NVC9B0_CMN_VLD_ERR_VLD_WD_TIME_OUT (0x00007104) +#define NVC9B0_CMN_VLD_ERR_NUM_SLICES_ZERO (0x00007105) +#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND (0x00008100) +#define NVC9B0_MPEG4_VLD_ERR_BITSTREAM_ERROR (0x00008101) +#define NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256 (0x00008200) +#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256 (0x00008201) +#define NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID (0x00008202) +#define NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID (0x00008203) +#define NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID (0x00008204) +#define NVC9B0_MPEG4_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL (0x00008205) +#define NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID (0x00008206) +#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL (0x00008207) +#define NVC9B0_MPEG4_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL (0x00008208) +#define NVC9B0_MPEG4_VLD_ERR_BITSTREAM_EMPTY (0x00008209) +#define NVC9B0_MPEG4_VLD_ERR_FRAME_WIDTH_TOO_LARGE (0x0000820A) +#define NVC9B0_MPEG4_VLD_ERR_FRAME_HEIGHT_TOO_LARGE (0x0000820B) +#define NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT (0x00051E01) +#define NVC9B0_DEC_ERROR_MPEG12_APPTIMER_EXPIRED (0xDEC10001) +#define NVC9B0_DEC_ERROR_MPEG12_MVTIMER_EXPIRED (0xDEC10002) +#define NVC9B0_DEC_ERROR_MPEG12_INVALID_TOKEN (0xDEC10003) +#define NVC9B0_DEC_ERROR_MPEG12_SLICEDATA_MISSING (0xDEC10004) +#define NVC9B0_DEC_ERROR_MPEG12_HWERR_INTERRUPT (0xDEC10005) +#define NVC9B0_DEC_ERROR_MPEG12_DETECTED_VLD_FAILURE (0xDEC10006) +#define NVC9B0_DEC_ERROR_MPEG12_PICTURE_INIT (0xDEC10100) +#define NVC9B0_DEC_ERROR_MPEG12_STATEMACHINE_FAILURE (0xDEC10101) +#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_PIC (0xDEC10901) +#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_UCODE (0xDEC10902) +#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_FC (0xDEC10903) +#define NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_SLH (0xDEC10904) +#define NVC9B0_DEC_ERROR_MPEG12_INVALID_UCODE_SIZE (0xDEC10905) +#define NVC9B0_DEC_ERROR_MPEG12_INVALID_SLICE_COUNT (0xDEC10906) +#define NVC9B0_DEC_ERROR_VC1_APPTIMER_EXPIRED (0xDEC20001) +#define NVC9B0_DEC_ERROR_VC1_MVTIMER_EXPIRED (0xDEC20002) +#define NVC9B0_DEC_ERROR_VC1_INVALID_TOKEN (0xDEC20003) +#define NVC9B0_DEC_ERROR_VC1_SLICEDATA_MISSING (0xDEC20004) +#define NVC9B0_DEC_ERROR_VC1_HWERR_INTERRUPT (0xDEC20005) +#define NVC9B0_DEC_ERROR_VC1_DETECTED_VLD_FAILURE (0xDEC20006) +#define NVC9B0_DEC_ERROR_VC1_TIMEOUT_POLLING_FOR_DATA (0xDEC20007) +#define NVC9B0_DEC_ERROR_VC1_PDEC_PIC_END_UNALIGNED (0xDEC20008) +#define NVC9B0_DEC_ERROR_VC1_WDTIMER_EXPIRED (0xDEC20009) +#define NVC9B0_DEC_ERROR_VC1_ERRINTSTART (0xDEC20010) +#define NVC9B0_DEC_ERROR_VC1_IQT_ERRINT (0xDEC20011) +#define NVC9B0_DEC_ERROR_VC1_MC_ERRINT (0xDEC20012) +#define NVC9B0_DEC_ERROR_VC1_MC_IQT_ERRINT (0xDEC20013) +#define NVC9B0_DEC_ERROR_VC1_REC_ERRINT (0xDEC20014) +#define NVC9B0_DEC_ERROR_VC1_REC_IQT_ERRINT (0xDEC20015) +#define NVC9B0_DEC_ERROR_VC1_REC_MC_ERRINT (0xDEC20016) +#define NVC9B0_DEC_ERROR_VC1_REC_MC_IQT_ERRINT (0xDEC20017) +#define NVC9B0_DEC_ERROR_VC1_DBF_ERRINT (0xDEC20018) +#define NVC9B0_DEC_ERROR_VC1_DBF_IQT_ERRINT (0xDEC20019) +#define NVC9B0_DEC_ERROR_VC1_DBF_MC_ERRINT (0xDEC2001A) +#define NVC9B0_DEC_ERROR_VC1_DBF_MC_IQT_ERRINT (0xDEC2001B) +#define NVC9B0_DEC_ERROR_VC1_DBF_REC_ERRINT (0xDEC2001C) +#define NVC9B0_DEC_ERROR_VC1_DBF_REC_IQT_ERRINT (0xDEC2001D) +#define NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_ERRINT (0xDEC2001E) +#define NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_IQT_ERRINT (0xDEC2001F) +#define NVC9B0_DEC_ERROR_VC1_PICTURE_INIT (0xDEC20100) +#define NVC9B0_DEC_ERROR_VC1_STATEMACHINE_FAILURE (0xDEC20101) +#define NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_PIC (0xDEC20901) +#define NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_UCODE (0xDEC20902) +#define NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_FC (0xDEC20903) +#define NVC9B0_DEC_ERROR_VC1_INVAILD_CTXID_SLH (0xDEC20904) +#define NVC9B0_DEC_ERROR_VC1_INVALID_UCODE_SIZE (0xDEC20905) +#define NVC9B0_DEC_ERROR_VC1_INVALID_SLICE_COUNT (0xDEC20906) +#define NVC9B0_DEC_ERROR_H264_APPTIMER_EXPIRED (0xDEC30001) +#define NVC9B0_DEC_ERROR_H264_MVTIMER_EXPIRED (0xDEC30002) +#define NVC9B0_DEC_ERROR_H264_INVALID_TOKEN (0xDEC30003) +#define NVC9B0_DEC_ERROR_H264_SLICEDATA_MISSING (0xDEC30004) +#define NVC9B0_DEC_ERROR_H264_HWERR_INTERRUPT (0xDEC30005) +#define NVC9B0_DEC_ERROR_H264_DETECTED_VLD_FAILURE (0xDEC30006) +#define NVC9B0_DEC_ERROR_H264_ERRINTSTART (0xDEC30010) +#define NVC9B0_DEC_ERROR_H264_IQT_ERRINT (0xDEC30011) +#define NVC9B0_DEC_ERROR_H264_MC_ERRINT (0xDEC30012) +#define NVC9B0_DEC_ERROR_H264_MC_IQT_ERRINT (0xDEC30013) +#define NVC9B0_DEC_ERROR_H264_REC_ERRINT (0xDEC30014) +#define NVC9B0_DEC_ERROR_H264_REC_IQT_ERRINT (0xDEC30015) +#define NVC9B0_DEC_ERROR_H264_REC_MC_ERRINT (0xDEC30016) +#define NVC9B0_DEC_ERROR_H264_REC_MC_IQT_ERRINT (0xDEC30017) +#define NVC9B0_DEC_ERROR_H264_DBF_ERRINT (0xDEC30018) +#define NVC9B0_DEC_ERROR_H264_DBF_IQT_ERRINT (0xDEC30019) +#define NVC9B0_DEC_ERROR_H264_DBF_MC_ERRINT (0xDEC3001A) +#define NVC9B0_DEC_ERROR_H264_DBF_MC_IQT_ERRINT (0xDEC3001B) +#define NVC9B0_DEC_ERROR_H264_DBF_REC_ERRINT (0xDEC3001C) +#define NVC9B0_DEC_ERROR_H264_DBF_REC_IQT_ERRINT (0xDEC3001D) +#define NVC9B0_DEC_ERROR_H264_DBF_REC_MC_ERRINT (0xDEC3001E) +#define NVC9B0_DEC_ERROR_H264_DBF_REC_MC_IQT_ERRINT (0xDEC3001F) +#define NVC9B0_DEC_ERROR_H264_PICTURE_INIT (0xDEC30100) +#define NVC9B0_DEC_ERROR_H264_STATEMACHINE_FAILURE (0xDEC30101) +#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_PIC (0xDEC30901) +#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_UCODE (0xDEC30902) +#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_FC (0xDEC30903) +#define NVC9B0_DEC_ERROR_H264_INVALID_CTXID_SLH (0xDEC30904) +#define NVC9B0_DEC_ERROR_H264_INVALID_UCODE_SIZE (0xDEC30905) +#define NVC9B0_DEC_ERROR_H264_INVALID_SLICE_COUNT (0xDEC30906) +#define NVC9B0_DEC_ERROR_MPEG4_APPTIMER_EXPIRED (0xDEC40001) +#define NVC9B0_DEC_ERROR_MPEG4_MVTIMER_EXPIRED (0xDEC40002) +#define NVC9B0_DEC_ERROR_MPEG4_INVALID_TOKEN (0xDEC40003) +#define NVC9B0_DEC_ERROR_MPEG4_SLICEDATA_MISSING (0xDEC40004) +#define NVC9B0_DEC_ERROR_MPEG4_HWERR_INTERRUPT (0xDEC40005) +#define NVC9B0_DEC_ERROR_MPEG4_DETECTED_VLD_FAILURE (0xDEC40006) +#define NVC9B0_DEC_ERROR_MPEG4_TIMEOUT_POLLING_FOR_DATA (0xDEC40007) +#define NVC9B0_DEC_ERROR_MPEG4_PDEC_PIC_END_UNALIGNED (0xDEC40008) +#define NVC9B0_DEC_ERROR_MPEG4_WDTIMER_EXPIRED (0xDEC40009) +#define NVC9B0_DEC_ERROR_MPEG4_ERRINTSTART (0xDEC40010) +#define NVC9B0_DEC_ERROR_MPEG4_IQT_ERRINT (0xDEC40011) +#define NVC9B0_DEC_ERROR_MPEG4_MC_ERRINT (0xDEC40012) +#define NVC9B0_DEC_ERROR_MPEG4_MC_IQT_ERRINT (0xDEC40013) +#define NVC9B0_DEC_ERROR_MPEG4_REC_ERRINT (0xDEC40014) +#define NVC9B0_DEC_ERROR_MPEG4_REC_IQT_ERRINT (0xDEC40015) +#define NVC9B0_DEC_ERROR_MPEG4_REC_MC_ERRINT (0xDEC40016) +#define NVC9B0_DEC_ERROR_MPEG4_REC_MC_IQT_ERRINT (0xDEC40017) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_ERRINT (0xDEC40018) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_IQT_ERRINT (0xDEC40019) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_MC_ERRINT (0xDEC4001A) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_MC_IQT_ERRINT (0xDEC4001B) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_ERRINT (0xDEC4001C) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_IQT_ERRINT (0xDEC4001D) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_ERRINT (0xDEC4001E) +#define NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_IQT_ERRINT (0xDEC4001F) +#define NVC9B0_DEC_ERROR_MPEG4_PICTURE_INIT (0xDEC40100) +#define NVC9B0_DEC_ERROR_MPEG4_STATEMACHINE_FAILURE (0xDEC40101) +#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_PIC (0xDEC40901) +#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_UCODE (0xDEC40902) +#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_FC (0xDEC40903) +#define NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_SLH (0xDEC40904) +#define NVC9B0_DEC_ERROR_MPEG4_INVALID_UCODE_SIZE (0xDEC40905) +#define NVC9B0_DEC_ERROR_MPEG4_INVALID_SLICE_COUNT (0xDEC40906) + +#ifdef __cplusplus +}; /* extern "C" */ +#endif +#endif // clc9b0_h diff --git a/extra/nv_gpu_driver/nvdec_drv.h b/extra/nv_gpu_driver/nvdec_drv.h new file mode 100644 index 0000000000..0aab71f2ee --- /dev/null +++ b/extra/nv_gpu_driver/nvdec_drv.h @@ -0,0 +1,1846 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVDEC_DRV_H_ +#define __NVDEC_DRV_H_ + +// TODO: Many fields can be converted to bitfields to save memory BW +// TODO: Revisit reserved fields for proper alignment and memory savings + +/////////////////////////////////////////////////////////////////////////////// +// NVDEC(MSDEC 5) is a single engine solution, and seperates into VLD, MV, IQT, +// MCFETCH, MC, MCC, REC, DBF, DFBFDMA, HIST etc unit. +// The class(driver to HW) can mainly seperate into VLD parser +// and Decoder part to be consistent with original design. And +// the sequence level info usally set in VLD part. Later codec like +// VP8 won't name in this way. +// MSVLD: Multi-Standard VLD parser. +// +#define ALIGN_UP(v, n) (((v) + ((n)-1)) &~ ((n)-1)) +#define NVDEC_ALIGN(value) ALIGN_UP(value,256) // Align to 256 bytes +#define NVDEC_MAX_MPEG2_SLICE 65536 // at 4096*4096, macroblock count = 65536, 1 macroblock per slice + +#define NVDEC_CODEC_MPEG1 0 +#define NVDEC_CODEC_MPEG2 1 +#define NVDEC_CODEC_VC1 2 +#define NVDEC_CODEC_H264 3 +#define NVDEC_CODEC_MPEG4 4 +#define NVDEC_CODEC_DIVX NVDEC_CODEC_MPEG4 +#define NVDEC_CODEC_VP8 5 +#define NVDEC_CODEC_HEVC 7 +#define NVDEC_CODEC_VP9 9 +#define NVDEC_CODEC_HEVC_PARSER 12 +#define NVDEC_CODEC_AV1 10 + +// AES encryption +enum +{ + AES128_NONE = 0x0, + AES128_CTR = 0x1, + AES128_CBC, + AES128_ECB, + AES128_OFB, + AES128_CTR_LSB16B, + AES128_CLR_AS_ENCRYPT, + AES128_RESERVED = 0x7 +}; + +enum +{ + AES128_CTS_DISABLE = 0x0, + AES128_CTS_ENABLE = 0x1 +}; + +enum +{ + AES128_PADDING_NONE = 0x0, + AES128_PADDING_CARRY_OVER, + AES128_PADDING_RFC2630, + AES128_PADDING_RESERVED = 0x7 +}; + +typedef enum +{ + ENCR_MODE_CTR64 = 0, + ENCR_MODE_CBC = 1, + ENCR_MODE_ECB = 2, + ENCR_MODE_ECB_PARTIAL = 3, + ENCR_MODE_CBC_PARTIAL = 4, + ENCR_MODE_CLEAR_INTO_VPR = 5, // used for clear stream decoding into VPR. + ENCR_MODE_FORCE_INTO_VPR = 6, // used to force decode output into VPR. +} ENCR_MODE; + +// drm_mode configuration +// +// Bit 0:2 AES encryption mode +// Bit 3 CTS (CipherTextStealing) enable/disable +// Bit 4:6 Padding type +// Bit 7:7 Unwrap key enable/disable + +#define AES_MODE_MASK 0x7 +#define AES_CTS_MASK 0x1 +#define AES_PADDING_TYPE_MASK 0x7 +#define AES_UNWRAP_KEY_MASK 0x1 + +#define AES_MODE_SHIFT 0 +#define AES_CTS_SHIFT 3 +#define AES_PADDING_TYPE_SHIFT 4 +#define AES_UNWRAP_KEY_SHIFT 7 + +#define AES_SET_FLAG(M, C, P) ((M & AES_MODE_MASK) << AES_MODE_SHIFT) | \ + ((C & AES_CTS_MASK) << AES_CTS_SHIFT) | \ + ((P & AES_PADDING_TYPE_MASK) << AES_PADDING_TYPE_SHIFT) + +#define AES_GET_FLAG(V, F) ((V & ((AES_##F##_MASK) <<(AES_##F##_SHIFT))) >> (AES_##F##_SHIFT)) + +#define DRM_MODE_MASK 0x7f // Bits 0:6 (0:2 -> AES_MODE, 3 -> AES_CTS, 4:6 -> AES_PADDING_TYPE) +#define AES_GET_DRM_MODE(V) (V & DRM_MODE_MASK) + +enum { DRM_MS_PIFF_CTR = AES_SET_FLAG(AES128_CTR, AES128_CTS_DISABLE, AES128_PADDING_CARRY_OVER) }; +enum { DRM_MS_PIFF_CBC = AES_SET_FLAG(AES128_CBC, AES128_CTS_DISABLE, AES128_PADDING_NONE) }; +enum { DRM_MARLIN_CTR = AES_SET_FLAG(AES128_CTR, AES128_CTS_DISABLE, AES128_PADDING_NONE) }; +enum { DRM_MARLIN_CBC = AES_SET_FLAG(AES128_CBC, AES128_CTS_DISABLE, AES128_PADDING_RFC2630) }; +enum { DRM_WIDEVINE = AES_SET_FLAG(AES128_CBC, AES128_CTS_ENABLE, AES128_PADDING_NONE) }; +enum { DRM_WIDEVINE_CTR = AES_SET_FLAG(AES128_CTR, AES128_CTS_DISABLE, AES128_PADDING_CARRY_OVER) }; +enum { DRM_ULTRA_VIOLET = AES_SET_FLAG(AES128_CTR_LSB16B, AES128_CTS_DISABLE, AES128_PADDING_NONE) }; +enum { DRM_NONE = AES_SET_FLAG(AES128_NONE, AES128_CTS_DISABLE, AES128_PADDING_NONE) }; +enum { DRM_CLR_AS_ENCRYPT = AES_SET_FLAG(AES128_CLR_AS_ENCRYPT, AES128_CTS_DISABLE, AES128_PADDING_NONE)}; + +// SSM entry structure +typedef struct _nvdec_ssm_s { + unsigned int bytes_of_protected_data;//bytes of protected data, follows bytes_of_clear_data. Note: When padding is enabled, it does not include the padding_bytes (1~15), which can be derived by "(16-(bytes_of_protected_data&0xF))&0xF" + unsigned int bytes_of_clear_data:16; //bytes of clear data, located before bytes_of_protected_data + unsigned int skip_byte_blk : 4; //valid when (entry_type==0 && mode = 1) + unsigned int crypt_byte_blk : 4; //valid when (entry_type==0 && mode = 1) + unsigned int skip : 1; //whether this SSM entry should be skipped or not + unsigned int last : 1; //whether this SSM entry is the last one for the whole decoding frame + unsigned int pad : 1; //valid when (entry_type==0 && mode==0 && AES_PADDING_TYPE==AES128_PADDING_RFC2630), 0 for pad_end, 1 for pad_begin + unsigned int mode : 1; //0 for normal mode, 1 for pattern mode + unsigned int entry_type : 1; //0 for DATA, 1 for IV + unsigned int reserved : 3; +} nvdec_ssm_s; /* SubSampleMap, 8bytes */ + +// PASS2 OTF extension structure for SSM support, not exist in nvdec_mpeg4_pic_s (as MPEG4 OTF SW-DRM is not supported yet) +typedef struct _nvdec_pass2_otf_ext_s { + unsigned int ssm_entry_num :16; //specifies how many SSM entries (each in unit of 8 bytes) existed in SET_SUB_SAMPLE_MAP_OFFSET surface + unsigned int ssm_iv_num :16; //specifies how many SSM IV (each in unit of 16 bytes) existed in SET_SUB_SAMPLE_MAP_IV_OFFSET surface + unsigned int real_stream_length; //the real stream length, which is the bitstream length EMD/VLD will get after whole frame SSM processing, sum up of "clear+protected" bytes in SSM entries and removing "non_slice_data/skip". + unsigned int non_slice_data :16; //specifies the first many bytes needed to skip, includes only those of "clear+protected" bytes ("padding" bytes excluded) + unsigned int drm_mode : 7; + unsigned int reserved : 9; +} nvdec_pass2_otf_ext_s; /* 12bytes */ + + +//NVDEC5.0 low latency decoding (partial stream kickoff without context switch), method will reuse HevcSetSliceInfoBufferOffset. +typedef struct _nvdec_substream_entry_s { + unsigned int substream_start_offset; //substream byte start offset to bitstream base address + unsigned int substream_length; //subsream length in byte + unsigned int substream_first_tile_idx : 8; //the first tile index(raster scan in frame) of this substream,max is 255 + unsigned int substream_last_tile_idx : 8; //the last tile index(raster scan in frame) of this substream, max is 255 + unsigned int last_substream_entry_in_frame : 1; //this entry is the last substream entry of this frame + unsigned int reserved : 15; +} nvdec_substream_entry_s;/*low latency without context switch substream entry map,12bytes*/ + + +// GIP + +/* tile border coefficients of filter */ +#define GIP_ASIC_VERT_FILTER_RAM_SIZE 16 /* bytes per pixel */ + +/* BSD control data of current picture at tile border + * 11 * 128 bits per 4x4 tile = 128/(8*4) bytes per row */ +#define GIP_ASIC_BSD_CTRL_RAM_SIZE 4 /* bytes per row */ + +/* 8 dc + 8 to boundary + 6*16 + 2*6*64 + 2*64 -> 63 * 16 bytes */ +#define GIP_ASIC_SCALING_LIST_SIZE (16*64) + +/* tile border coefficients of filter */ +#define GIP_ASIC_VERT_SAO_RAM_SIZE 16 /* bytes per pixel */ + +/* max number of tiles times width and height (2 bytes each), + * rounding up to next 16 bytes boundary + one extra 16 byte + * chunk (HW guys wanted to have this) */ +#define GIP_ASIC_TILE_SIZE ((20*22*2*2+16+15) & ~0xF) + +/* Segment map uses 32 bytes / CTB */ +#define GIP_ASIC_VP9_CTB_SEG_SIZE 32 + +// HEVC Filter FG buffer +#define HEVC_DBLK_TOP_SIZE_IN_SB16 ALIGN_UP(632, 128) // ctb16 + 444 +#define HEVC_DBLK_TOP_BUF_SIZE(w) NVDEC_ALIGN( (ALIGN_UP(w,16)/16 + 2) * HEVC_DBLK_TOP_SIZE_IN_SB16) // 8K: 1285*256 + +#define HEVC_DBLK_LEFT_SIZE_IN_SB16 ALIGN_UP(506, 128) // ctb16 + 444 +#define HEVC_DBLK_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_DBLK_LEFT_SIZE_IN_SB16) // 8K: 1028*256 + +#define HEVC_SAO_LEFT_SIZE_IN_SB16 ALIGN_UP(713, 128) // ctb16 + 444 +#define HEVC_SAO_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_SAO_LEFT_SIZE_IN_SB16) // 8K: 1542*256 + +// VP9 Filter FG buffer +#define VP9_DBLK_TOP_SIZE_IN_SB64 ALIGN_UP(2000, 128) // 420 +#define VP9_DBLK_TOP_BUF_SIZE(w) NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * VP9_DBLK_TOP_SIZE_IN_SB64) // 8K: 1040*256 + +#define VP9_DBLK_LEFT_SIZE_IN_SB64 ALIGN_UP(1600, 128) // 420 +#define VP9_DBLK_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * VP9_DBLK_LEFT_SIZE_IN_SB64) // 8K: 845*256 + +// VP9 Hint Dump Buffer +#define VP9_HINT_DUMP_SIZE_IN_SB64 ((64*64)/(4*4)*8) // 8 bytes per CU, 256 CUs(2048 bytes) per SB64 +#define VP9_HINT_DUMP_SIZE(w, h) NVDEC_ALIGN(VP9_HINT_DUMP_SIZE_IN_SB64*((w+63)/64)*((h+63)/64)) + +// used for ecdma debug +typedef struct _nvdec_ecdma_config_s +{ + unsigned int ecdma_enable; // enable/disable ecdma + unsigned short ecdma_blk_x_src; // src start position x , it's 64x aligned + unsigned short ecdma_blk_y_src; // src start position y , it's 8x aligned + unsigned short ecdma_blk_x_dst; // dst start position x , it's 64x aligned + unsigned short ecdma_blk_y_dst; // dst start position y , it's 8x aligned + unsigned short ref_pic_idx; // ref(src) picture index , used to derived source picture base address + unsigned short boundary0_top; // src insided tile/partition region top boundary + unsigned short boundary0_bottom; // src insided tile/partition region bottom boundary + unsigned short boundary1_left; // src insided tile/partition region left boundary + unsigned short boundary1_right; // src insided tile/partition region right boundary + unsigned char blk_copy_flag; // blk_copy enable flag. + // if it's 1 ,ctb_size ==3,ecdma_blk_x_src == boundary1_left and ecdma_blk_y_src == boundary0_top ; + // if it's 0 ,ecdma_blk_x_src == ecdma_blk_x_dst and ecdma_blk_y_src == ecdma_blk_y_dst; + unsigned char ctb_size; // ctb_size .0:64x64,1:32x32,2:16x16,3:8x8 +} nvdec_ecdma_config_s; + +typedef struct _nvdec_status_hevc_s +{ + unsigned int frame_status_intra_cnt; //Intra block counter, in unit of 8x8 block, IPCM block included + unsigned int frame_status_inter_cnt; //Inter block counter, in unit of 8x8 block, SKIP block included + unsigned int frame_status_skip_cnt; //Skip block counter, in unit of 4x4 block, blocks having NO/ZERO texture/coeff data + unsigned int frame_status_fwd_mvx_cnt; //ABS sum of forward MVx, one 14bit MVx(integer) per 4x4 block + unsigned int frame_status_fwd_mvy_cnt; //ABS sum of forward MVy, one 14bit MVy(integer) per 4x4 block + unsigned int frame_status_bwd_mvx_cnt; //ABS sum of backward MVx, one 14bit MVx(integer) per 4x4 block + unsigned int frame_status_bwd_mvy_cnt; //ABS sum of backward MVy, one 14bit MVy(integer) per 4x4 block + unsigned int error_ctb_pos; //[15:0] error ctb position in Y direction, [31:16] error ctb position in X direction + unsigned int error_slice_pos; //[15:0] error slice position in Y direction, [31:16] error slice position in X direction +} nvdec_status_hevc_s; + +typedef struct _nvdec_status_vp9_s +{ + unsigned int frame_status_intra_cnt; //Intra block counter, in unit of 8x8 block, IPCM block included + unsigned int frame_status_inter_cnt; //Inter block counter, in unit of 8x8 block, SKIP block included + unsigned int frame_status_skip_cnt; //Skip block counter, in unit of 4x4 block, blocks having NO/ZERO texture/coeff data + unsigned int frame_status_fwd_mvx_cnt; //ABS sum of forward MVx, one 14bit MVx(integer) per 4x4 block + unsigned int frame_status_fwd_mvy_cnt; //ABS sum of forward MVy, one 14bit MVy(integer) per 4x4 block + unsigned int frame_status_bwd_mvx_cnt; //ABS sum of backward MVx, one 14bit MVx(integer) per 4x4 block + unsigned int frame_status_bwd_mvy_cnt; //ABS sum of backward MVy, one 14bit MVy(integer) per 4x4 block + unsigned int error_ctb_pos; //[15:0] error ctb position in Y direction, [31:16] error ctb position in X direction + unsigned int error_slice_pos; //[15:0] error slice position in Y direction, [31:16] error slice position in X direction +} nvdec_status_vp9_s; + +typedef struct _nvdec_status_s +{ + unsigned int mbs_correctly_decoded; // total numers of correctly decoded macroblocks + unsigned int mbs_in_error; // number of error macroblocks. + unsigned int cycle_count; // total cycles taken for execute. read from PERF_DECODE_FRAME_V register + unsigned int error_status; // report error if any + union + { + nvdec_status_hevc_s hevc; + nvdec_status_vp9_s vp9; + }; + unsigned int slice_header_error_code; // report error in slice header + +} nvdec_status_s; + +// per 16x16 block, used in hevc/vp9 surface of SetExternalMVBufferOffset when error_external_mv_en = 1 +typedef struct _external_mv_s +{ + int mvx : 14; //integrate pixel precision + int mvy : 14; //integrate pixel precision + unsigned int refidx : 4; +} external_mv_s; + +// HEVC +typedef struct _nvdec_hevc_main10_444_ext_s +{ + unsigned int transformSkipRotationEnableFlag : 1; //sps extension for transform_skip_rotation_enabled_flag + unsigned int transformSkipContextEnableFlag : 1; //sps extension for transform_skip_context_enabled_flag + unsigned int intraBlockCopyEnableFlag :1; //sps intraBlockCopyEnableFlag, always 0 before spec define it + unsigned int implicitRdpcmEnableFlag : 1; //sps implicit_rdpcm_enabled_flag + unsigned int explicitRdpcmEnableFlag : 1; //sps explicit_rdpcm_enabled_flag + unsigned int extendedPrecisionProcessingFlag : 1; //sps extended_precision_processing_flag,always 0 in current profile + unsigned int intraSmoothingDisabledFlag : 1; //sps intra_smoothing_disabled_flag + unsigned int highPrecisionOffsetsEnableFlag :1; //sps high_precision_offsets_enabled_flag + unsigned int fastRiceAdaptationEnableFlag: 1; //sps fast_rice_adaptation_enabled_flag + unsigned int cabacBypassAlignmentEnableFlag : 1; //sps cabac_bypass_alignment_enabled_flag, always 0 in current profile + unsigned int sps_444_extension_reserved : 22; //sps reserve for future extension + + unsigned int log2MaxTransformSkipSize : 4 ; //pps extension log2_max_transform_skip_block_size_minus2, 0...5 + unsigned int crossComponentPredictionEnableFlag: 1; //pps cross_component_prediction_enabled_flag + unsigned int chromaQpAdjustmentEnableFlag:1; //pps chroma_qp_adjustment_enabled_flag + unsigned int diffCuChromaQpAdjustmentDepth:2; //pps diff_cu_chroma_qp_adjustment_depth, 0...3 + unsigned int chromaQpAdjustmentTableSize:3; //pps chroma_qp_adjustment_table_size_minus1+1, 1...6 + unsigned int log2SaoOffsetScaleLuma:3; //pps log2_sao_offset_scale_luma, max(0,bitdepth-10),maxBitdepth 16 for future. + unsigned int log2SaoOffsetScaleChroma: 3; //pps log2_sao_offset_scale_chroma + unsigned int pps_444_extension_reserved : 15; //pps reserved + char cb_qp_adjustment[6]; //-[12,+12] + char cr_qp_adjustment[6]; //-[12,+12] + unsigned int HevcFltAboveOffset; // filter above offset respect to filter buffer, 256 bytes unit + unsigned int HevcSaoAboveOffset; // sao above offset respect to filter buffer, 256 bytes unit +} nvdec_hevc_main10_444_ext_s; + +typedef struct _nvdec_hevc_pic_v1_s +{ + // New fields + //hevc main10 444 extensions + nvdec_hevc_main10_444_ext_s hevc_main10_444_ext; + + //HEVC skip bytes from beginning setting for secure + //it is different to the sw_hdr_skip_length who skips the middle of stream of + //the slice header which is parsed by driver + unsigned int sw_skip_start_length : 14; + unsigned int external_ref_mem_dis : 1; + unsigned int error_recovery_start_pos : 2; //0: from start of frame, 1: from start of slice segment, 2: from error detected ctb, 3: reserved + unsigned int error_external_mv_en : 1; + unsigned int reserved0 : 14; + // Reserved bits padding +} nvdec_hevc_pic_v1_s; + +//No versioning in structure: NVDEC2 (T210 and GM206) +//version v1 : NVDEC3 (T186 and GP100) +//version v2 : NVDEC3.1 (GP10x) + +typedef struct _nvdec_hevc_pic_v2_s +{ + // mv-hevc field + unsigned int mv_hevc_enable :1; + unsigned int nuh_layer_id :6; + unsigned int default_ref_layers_active_flag :1; + unsigned int NumDirectRefLayers :6; + unsigned int max_one_active_ref_layer_flag :1; + unsigned int NumActiveRefLayerPics :6; + unsigned int poc_lsb_not_present_flag :1; + unsigned int reserved0 :10; +} nvdec_hevc_pic_v2_s; + +typedef struct _nvdec_hevc_pic_v3_s +{ + // slice level decoding + unsigned int slice_decoding_enable:1;//1: enable slice level decoding + unsigned int slice_ec_enable:1; //1: enable slice error concealment. When slice_ec_enable=1,slice_decoding_enable must be 1; + unsigned int slice_ec_mv_type:2; //0: zero mv; 1: co-located mv; 2: external mv; + unsigned int err_detected_sw:1; //1: indicate sw/driver has detected error already in frame kick mode + unsigned int slice_ec_slice_type:2; //0: B slice; 1: P slice ; others: reserved + unsigned int slice_strm_recfg_en:1; //enable slice bitstream re-configure or not ; + unsigned int reserved:24; + unsigned int HevcSliceEdgeOffset;// slice edge buffer offset which repsect to filter buffer ,256 bytes as one unit +}nvdec_hevc_pic_v3_s; + +typedef struct _nvdec_hevc_pic_s +{ + //The key/IV addr must be 128bit alignment + unsigned int wrapped_session_key[4]; //session keys + unsigned int wrapped_content_key[4]; //content keys + unsigned int initialization_vector[4]; //Ctrl64 initial vector + // hevc_bitstream_data_info + unsigned int stream_len; // stream length in one frame + unsigned int enable_encryption; // flag to enable/disable encryption + unsigned int key_increment : 6; // added to content key after unwrapping + unsigned int encryption_mode : 4; + unsigned int key_slot_index : 4; + unsigned int ssm_en : 1; + unsigned int enable_histogram : 1; // histogram stats output enable + unsigned int enable_substream_decoding: 1; //frame substream kickoff without context switch + unsigned int reserved0 :15; + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + // general + unsigned char tileformat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned char reserverd_surface_format : 3 ; + unsigned char sw_start_code_e; // 0: stream doesn't contain start codes,1: stream contains start codes + unsigned char disp_output_mode; // 0: Rec.709 8 bit, 1: Rec.709 10 bit, 2: Rec.709 10 bits -> 8 bit, 3: Rec.2020 10 bit -> 8 bit + unsigned char reserved1; + unsigned int framestride[2]; // frame buffer stride for luma and chroma + unsigned int colMvBuffersize; // collocated MV buffer size of one picture ,256 bytes unit + unsigned int HevcSaoBufferOffset; // sao buffer offset respect to filter buffer ,256 bytes unit . + unsigned int HevcBsdCtrlOffset; // bsd buffer offset respect to filter buffer ,256 bytes unit . + // sps + unsigned short pic_width_in_luma_samples; // :15, 48(?)..16384, multiple of 8 (48 is smallest width supported by NVDEC for CTU size 16x16) + unsigned short pic_height_in_luma_samples; // :15, 8..16384, multiple of 8 + unsigned int chroma_format_idc : 4; // always 1 (=4:2:0) + unsigned int bit_depth_luma : 4; // 8..12 + unsigned int bit_depth_chroma : 4; + unsigned int log2_min_luma_coding_block_size : 4; // 3..6 + unsigned int log2_max_luma_coding_block_size : 4; // 3..6 + unsigned int log2_min_transform_block_size : 4; // 2..5 + unsigned int log2_max_transform_block_size : 4; // 2..5 + unsigned int reserved2 : 4; + + unsigned int max_transform_hierarchy_depth_inter : 3; // 0..4 + unsigned int max_transform_hierarchy_depth_intra : 3; // 0..4 + unsigned int scalingListEnable : 1; // + unsigned int amp_enable_flag : 1; // + unsigned int sample_adaptive_offset_enabled_flag : 1; // + unsigned int pcm_enabled_flag : 1; // + unsigned int pcm_sample_bit_depth_luma : 4; // + unsigned int pcm_sample_bit_depth_chroma : 4; + unsigned int log2_min_pcm_luma_coding_block_size : 4; // + unsigned int log2_max_pcm_luma_coding_block_size : 4; // + unsigned int pcm_loop_filter_disabled_flag : 1; // + unsigned int sps_temporal_mvp_enabled_flag : 1; // + unsigned int strong_intra_smoothing_enabled_flag : 1; // + unsigned int reserved3 : 3; + // pps + unsigned int dependent_slice_segments_enabled_flag : 1; // + unsigned int output_flag_present_flag : 1; // + unsigned int num_extra_slice_header_bits : 3; // 0..7 (normally 0) + unsigned int sign_data_hiding_enabled_flag : 1; // + unsigned int cabac_init_present_flag : 1; // + unsigned int num_ref_idx_l0_default_active : 4; // 1..15 + unsigned int num_ref_idx_l1_default_active : 4; // 1..15 + unsigned int init_qp : 7; // 0..127, support higher bitdepth + unsigned int constrained_intra_pred_flag : 1; // + unsigned int transform_skip_enabled_flag : 1; // + unsigned int cu_qp_delta_enabled_flag : 1; // + unsigned int diff_cu_qp_delta_depth : 2; // 0..3 + unsigned int reserved4 : 5; // + + char pps_cb_qp_offset ; // -12..12 + char pps_cr_qp_offset ; // -12..12 + char pps_beta_offset ; // -12..12 + char pps_tc_offset ; // -12..12 + unsigned int pps_slice_chroma_qp_offsets_present_flag : 1; // + unsigned int weighted_pred_flag : 1; // + unsigned int weighted_bipred_flag : 1; // + unsigned int transquant_bypass_enabled_flag : 1; // + unsigned int tiles_enabled_flag : 1; // (redundant: = num_tile_columns_minus1!=0 || num_tile_rows_minus1!=0) + unsigned int entropy_coding_sync_enabled_flag : 1; // + unsigned int num_tile_columns : 5; // 0..20 + unsigned int num_tile_rows : 5; // 0..22 + unsigned int loop_filter_across_tiles_enabled_flag : 1; // + unsigned int loop_filter_across_slices_enabled_flag : 1; // + unsigned int deblocking_filter_control_present_flag : 1; // + unsigned int deblocking_filter_override_enabled_flag : 1; // + unsigned int pps_deblocking_filter_disabled_flag : 1; // + unsigned int lists_modification_present_flag : 1; // + unsigned int log2_parallel_merge_level : 3; // 2..4 + unsigned int slice_segment_header_extension_present_flag : 1; // (normally 0) + unsigned int reserved5 : 6; + + // reference picture related + unsigned char num_ref_frames; + unsigned char reserved6; + unsigned short longtermflag; // long term flag for refpiclist.bit 15 for picidx 0, bit 14 for picidx 1,... + unsigned char initreflistidxl0[16]; // :5, [refPicidx] 0..15 + unsigned char initreflistidxl1[16]; // :5, [refPicidx] 0..15 + short RefDiffPicOrderCnts[16]; // poc diff between current and reference pictures .[-128,127] + // misc + unsigned char IDR_picture_flag; // idr flag for current picture + unsigned char RAP_picture_flag; // rap flag for current picture + unsigned char curr_pic_idx; // current picture store buffer index,used to derive the store addess of frame buffer and MV + unsigned char pattern_id; // used for dithering to select between 2 tables + unsigned short sw_hdr_skip_length; // reference picture inititial related syntax elements(SE) bits in slice header. + // those SE only decoding once in driver,related bits will flush in HW + unsigned short reserved7; + + // used for ecdma debug + nvdec_ecdma_config_s ecdma_cfg; + + //DXVA on windows + unsigned int separate_colour_plane_flag : 1; + unsigned int log2_max_pic_order_cnt_lsb_minus4 : 4; //0~12 + unsigned int num_short_term_ref_pic_sets : 7 ; //0~64 + unsigned int num_long_term_ref_pics_sps : 6; //0~32 + unsigned int bBitParsingDisable : 1 ; //disable parsing + unsigned int num_delta_pocs_of_rps_idx : 8; + unsigned int long_term_ref_pics_present_flag : 1; + unsigned int reserved_dxva : 4; + //the number of bits for short_term_ref_pic_set()in slice header,dxva API + unsigned int num_bits_short_term_ref_pics_in_slice; + + // New additions + nvdec_hevc_pic_v1_s v1; + nvdec_hevc_pic_v2_s v2; + nvdec_hevc_pic_v3_s v3; + nvdec_pass2_otf_ext_s ssm; + +} nvdec_hevc_pic_s; + +//hevc slice info class +typedef struct _hevc_slice_info_s { + unsigned int first_flag :1;//first slice(s) of frame,must valid for slice EC + unsigned int err_flag :1;//error slice(s) .optional info for EC + unsigned int last_flag :1;//last slice segment(s) of frame,this bit is must be valid when slice_strm_recfg_en==1 or slice_ec==1 + unsigned int conceal_partial_slice :1; // indicate do partial slice error conealment for packet loss case + unsigned int available :1; // indicate the slice bitstream is available. + unsigned int reserved0 :7; + unsigned int ctb_count :20;// ctbs counter inside slice(s) .must valid for slice EC + unsigned int bs_offset; //slice(s) bitstream offset in bitstream buffer (in byte unit) + unsigned int bs_length; //slice(s) bitstream length. It is sum of aligned size and skip size and valid slice bitstream size. + unsigned short start_ctbx; //slice start ctbx ,it's optional,HW can output it in previous slice decoding. + //but this is one check points for error + unsigned short start_ctby; //slice start ctby + } hevc_slice_info_s; + + +//hevc slice ctx class +//slice pos and next slice address +typedef struct _slice_edge_ctb_pos_ctx_s { + unsigned int next_slice_pos_ctbxy; //2d address in raster scan + unsigned int next_slice_segment_addr; //1d address in tile scan +}slice_edge_ctb_pos_ctx_s; + +// next slice's first ctb located tile related information +typedef struct _slice_edge_tile_ctx_s { + unsigned int tileInfo1;// Misc tile info includes tile width and tile height and tile col and tile row + unsigned int tileInfo2;// Misc tile info includes tile start ctbx and start ctby and tile index + unsigned int tileInfo3;// Misc tile info includes ctb pos inside tile +} slice_edge_tile_ctx_s; + +//frame level stats +typedef struct _slice_edge_stats_ctx_s { + unsigned int frame_status_intra_cnt;// frame stats for intra block count + unsigned int frame_status_inter_cnt;// frame stats for inter block count + unsigned int frame_status_skip_cnt;// frame stats for skip block count + unsigned int frame_status_fwd_mvx_cnt;// frame stats for sum of abs fwd mvx + unsigned int frame_status_fwd_mvy_cnt;// frame stats for sum of abs fwd mvy + unsigned int frame_status_bwd_mvx_cnt;// frame stats for sum of abs bwd mvx + unsigned int frame_status_bwd_mvy_cnt;// frame stats for sum of abs bwd mvy + unsigned int frame_status_mv_cnt_ext;// extension bits of sum of abs mv to keep full precision. +}slice_edge_stats_ctx_s; + +//ctx of vpc_edge unit for tile left +typedef struct _slice_vpc_edge_ctx_s { + unsigned int reserved; +}slice_vpc_edge_ctx_s; + +//ctx of vpc_main unit +typedef struct _slice_vpc_main_ctx_s { + unsigned int reserved; +} slice_vpc_main_ctx_s; + +//hevc slice edge ctx class +typedef struct _slice_edge_ctx_s { + //ctb pos + slice_edge_ctb_pos_ctx_s slice_ctb_pos_ctx; + // stats + slice_edge_stats_ctx_s slice_stats_ctx; + // tile info + slice_edge_tile_ctx_s slice_tile_ctx; + //vpc_edge + slice_vpc_edge_ctx_s slice_vpc_edge_ctx; + //vpc_main + slice_vpc_main_ctx_s slice_vpc_main_ctx; +} slice_edge_ctx_s; + +//vp9 + +typedef struct _nvdec_vp9_pic_v1_s +{ + // New fields + // new_var : xx; // for variables with expanded bitlength, comment on why the new bit legth is required + // Reserved bits for padding and/or non-HW specific functionality + unsigned int Vp9FltAboveOffset; // filter above offset respect to filter buffer, 256 bytes unit + unsigned int external_ref_mem_dis : 1; + unsigned int bit_depth : 4; + unsigned int error_recovery_start_pos : 2; //0: from start of frame, 1: from start of slice segment, 2: from error detected ctb, 3: reserved + unsigned int error_external_mv_en : 1; + unsigned int Reserved0 : 24; +} nvdec_vp9_pic_v1_s; + +enum VP9_FRAME_SFC_ID +{ + VP9_LAST_FRAME_SFC = 0, + VP9_GOLDEN_FRAME_SFC, + VP9_ALTREF_FRAME_SFC, + VP9_CURR_FRAME_SFC +}; + +typedef struct _nvdec_vp9_pic_s +{ + // vp9_bitstream_data_info + //Key and IV address must 128bit alignment + unsigned int wrapped_session_key[4]; //session keys + unsigned int wrapped_content_key[4]; //content keys + unsigned int initialization_vector[4]; //Ctrl64 initial vector + unsigned int stream_len; // stream length in one frame + unsigned int enable_encryption; // flag to enable/disable encryption + unsigned int key_increment : 6; // added to content key after unwrapping + unsigned int encryption_mode : 4; + unsigned int sw_hdr_skip_length :14; //vp9 skip bytes setting for secure + unsigned int key_slot_index : 4; + unsigned int ssm_en : 1; + unsigned int enable_histogram : 1; // histogram stats output enable + unsigned int reserved0 : 2; + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + //general + unsigned char tileformat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned char reserverd_surface_format : 3 ; + unsigned char reserved1[3]; + unsigned int Vp9BsdCtrlOffset; // bsd buffer offset respect to filter buffer ,256 bytes unit . + + + //ref_last dimensions + unsigned short ref0_width; //ref_last coded width + unsigned short ref0_height; //ref_last coded height + unsigned short ref0_stride[2]; //ref_last stride + + //ref_golden dimensions + unsigned short ref1_width; //ref_golden coded width + unsigned short ref1_height; //ref_golden coded height + unsigned short ref1_stride[2]; //ref_golden stride + + //ref_alt dimensions + unsigned short ref2_width; //ref_alt coded width + unsigned short ref2_height; //ref_alt coded height + unsigned short ref2_stride[2]; //ref_alt stride + + + /* Current frame dimensions */ + unsigned short width; //pic width + unsigned short height; //pic height + unsigned short framestride[2]; // frame buffer stride for luma and chroma + + unsigned char keyFrame :1; + unsigned char prevIsKeyFrame:1; + unsigned char resolutionChange:1; + unsigned char errorResilient:1; + unsigned char prevShowFrame:1; + unsigned char intraOnly:1; + unsigned char reserved2 : 2; + + /* DCT coefficient partitions */ + //unsigned int offsetToDctParts; + + unsigned char reserved3[3]; + //unsigned char activeRefIdx[3];//3 bits + //unsigned char refreshFrameFlags; + //unsigned char refreshEntropyProbs; + //unsigned char frameParallelDecoding; + //unsigned char resetFrameContext; + + unsigned char refFrameSignBias[4]; + char loopFilterLevel;//6 bits + char loopFilterSharpness;//3 bits + + /* Quantization parameters */ + unsigned char qpYAc; + char qpYDc; + char qpChAc; + char qpChDc; + + /* From here down, frame-to-frame persisting stuff */ + + char lossless; + char transform_mode; + char allow_high_precision_mv; + char mcomp_filter_type; + char comp_pred_mode; + char comp_fixed_ref; + char comp_var_ref[2]; + char log2_tile_columns; + char log2_tile_rows; + + /* Segment and macroblock specific values */ + unsigned char segmentEnabled; + unsigned char segmentMapUpdate; + unsigned char segmentMapTemporalUpdate; + unsigned char segmentFeatureMode; /* ABS data or delta data */ + unsigned char segmentFeatureEnable[8][4]; + short segmentFeatureData[8][4]; + char modeRefLfEnabled; + char mbRefLfDelta[4]; + char mbModeLfDelta[2]; + char reserved5; // for alignment + + // New additions + nvdec_vp9_pic_v1_s v1; + nvdec_pass2_otf_ext_s ssm; + +} nvdec_vp9_pic_s; + +#define NVDEC_VP9HWPAD(x, y) unsigned char x[y] + +typedef struct { + /* last bytes of address 41 */ + unsigned char joints[3]; + unsigned char sign[2]; + /* address 42 */ + unsigned char class0[2][1]; + unsigned char fp[2][3]; + unsigned char class0_hp[2]; + unsigned char hp[2]; + unsigned char classes[2][10]; + /* address 43 */ + unsigned char class0_fp[2][2][3]; + unsigned char bits[2][10]; + +} nvdec_nmv_context; + +typedef struct { + unsigned int joints[4]; + unsigned int sign[2][2]; + unsigned int classes[2][11]; + unsigned int class0[2][2]; + unsigned int bits[2][10][2]; + unsigned int class0_fp[2][2][4]; + unsigned int fp[2][4]; + unsigned int class0_hp[2][2]; + unsigned int hp[2][2]; + +} nvdec_nmv_context_counts; + +/* Adaptive entropy contexts, padding elements are added to have + * 256 bit aligned tables for HW access. + * Compile with TRACE_PROB_TABLES to print bases for each table. */ +typedef struct nvdec_vp9AdaptiveEntropyProbs_s +{ + /* address 32 */ + unsigned char inter_mode_prob[7][4]; + unsigned char intra_inter_prob[4]; + + /* address 33 */ + unsigned char uv_mode_prob[10][8]; + unsigned char tx8x8_prob[2][1]; + unsigned char tx16x16_prob[2][2]; + unsigned char tx32x32_prob[2][3]; + unsigned char sb_ymode_probB[4][1]; + unsigned char sb_ymode_prob[4][8]; + + /* address 37 */ + unsigned char partition_prob[2][16][4]; + + /* address 41 */ + unsigned char uv_mode_probB[10][1]; + unsigned char switchable_interp_prob[4][2]; + unsigned char comp_inter_prob[5]; + unsigned char mbskip_probs[3]; + NVDEC_VP9HWPAD(pad1, 1); + + nvdec_nmv_context nmvc; + + /* address 44 */ + unsigned char single_ref_prob[5][2]; + unsigned char comp_ref_prob[5]; + NVDEC_VP9HWPAD(pad2, 17); + + /* address 45 */ + unsigned char probCoeffs[2][2][6][6][4]; + unsigned char probCoeffs8x8[2][2][6][6][4]; + unsigned char probCoeffs16x16[2][2][6][6][4]; + unsigned char probCoeffs32x32[2][2][6][6][4]; + +} nvdec_vp9AdaptiveEntropyProbs_t; + +/* Entropy contexts */ +typedef struct nvdec_vp9EntropyProbs_s +{ + /* Default keyframe probs */ + /* Table formatted for 256b memory, probs 0to7 for all tables followed by + * probs 8toN for all tables. + * Compile with TRACE_PROB_TABLES to print bases for each table. */ + + unsigned char kf_bmode_prob[10][10][8]; + + /* Address 25 */ + unsigned char kf_bmode_probB[10][10][1]; + unsigned char ref_pred_probs[3]; + unsigned char mb_segment_tree_probs[7]; + unsigned char segment_pred_probs[3]; + unsigned char ref_scores[4]; + unsigned char prob_comppred[2]; + NVDEC_VP9HWPAD(pad1, 9); + + /* Address 29 */ + unsigned char kf_uv_mode_prob[10][8]; + unsigned char kf_uv_mode_probB[10][1]; + NVDEC_VP9HWPAD(pad2, 6); + + nvdec_vp9AdaptiveEntropyProbs_t a; /* Probs with backward adaptation */ + +} nvdec_vp9EntropyProbs_t; + +/* Counters for adaptive entropy contexts */ +typedef struct nvdec_vp9EntropyCounts_s +{ + unsigned int inter_mode_counts[7][3][2]; + unsigned int sb_ymode_counts[4][10]; + unsigned int uv_mode_counts[10][10]; + unsigned int partition_counts[16][4]; + unsigned int switchable_interp_counts[4][3]; + unsigned int intra_inter_count[4][2]; + unsigned int comp_inter_count[5][2]; + unsigned int single_ref_count[5][2][2]; + unsigned int comp_ref_count[5][2]; + unsigned int tx32x32_count[2][4]; + unsigned int tx16x16_count[2][3]; + unsigned int tx8x8_count[2][2]; + unsigned int mbskip_count[3][2]; + + nvdec_nmv_context_counts nmvcount; + + unsigned int countCoeffs[2][2][6][6][4]; + unsigned int countCoeffs8x8[2][2][6][6][4]; + unsigned int countCoeffs16x16[2][2][6][6][4]; + unsigned int countCoeffs32x32[2][2][6][6][4]; + + unsigned int countEobs[4][2][2][6][6]; + +} nvdec_vp9EntropyCounts_t; + +// Legacy codecs encryption parameters +typedef struct _nvdec_pass2_otf_s { + unsigned int wrapped_session_key[4]; // session keys + unsigned int wrapped_content_key[4]; // content keys + unsigned int initialization_vector[4];// Ctrl64 initial vector + unsigned int enable_encryption : 1; // flag to enable/disable encryption + unsigned int key_increment : 6; // added to content key after unwrapping + unsigned int encryption_mode : 4; + unsigned int key_slot_index : 4; + unsigned int ssm_en : 1; + unsigned int reserved1 :16; // reserved +} nvdec_pass2_otf_s; // 0x10 bytes + +typedef struct _nvdec_display_param_s +{ + unsigned int enableTFOutput : 1; //=1, enable dbfdma to output the display surface; if disable, then the following configure on tf is useless. + //remap for VC1 + unsigned int VC1MapYFlag : 1; + unsigned int MapYValue : 3; + unsigned int VC1MapUVFlag : 1; + unsigned int MapUVValue : 3; + //tf + unsigned int OutStride : 8; + unsigned int TilingFormat : 3; + unsigned int OutputStructure : 1; //(0=frame, 1=field) + unsigned int reserved0 :11; + int OutputTop[2]; // in units of 256 + int OutputBottom[2]; // in units of 256 + //histogram + unsigned int enableHistogram : 1; // enable histogram info collection. + unsigned int HistogramStartX :12; // start X of Histogram window + unsigned int HistogramStartY :12; // start Y of Histogram window + unsigned int reserved1 : 7; + unsigned int HistogramEndX :12; // end X of Histogram window + unsigned int HistogramEndY :12; // end y of Histogram window + unsigned int reserved2 : 8; +} nvdec_display_param_s; // size 0x1c bytes + +// H.264 +typedef struct _nvdec_dpb_entry_s // 16 bytes +{ + unsigned int index : 7; // uncompressed frame buffer index + unsigned int col_idx : 5; // index of associated co-located motion data buffer + unsigned int state : 2; // bit1(state)=1: top field used for reference, bit1(state)=1: bottom field used for reference + unsigned int is_long_term : 1; // 0=short-term, 1=long-term + unsigned int not_existing : 1; // 1=marked as non-existing + unsigned int is_field : 1; // set if unpaired field or complementary field pair + unsigned int top_field_marking : 4; + unsigned int bottom_field_marking : 4; + unsigned int output_memory_layout : 1; // Set according to picture level output NV12/NV24 setting. + unsigned int reserved : 6; + unsigned int FieldOrderCnt[2]; // : 2*32 [top/bottom] + int FrameIdx; // : 16 short-term: FrameNum (16 bits), long-term: LongTermFrameIdx (4 bits) +} nvdec_dpb_entry_s; + +typedef struct _nvdec_h264_pic_s +{ + nvdec_pass2_otf_s encryption_params; + unsigned char eos[16]; + unsigned char explicitEOSPresentFlag; + unsigned char hint_dump_en; //enable COLOMV surface dump for all frames, which includes hints of "MV/REFIDX/QP/CBP/MBPART/MBTYPE", nvbug: 200212874 + unsigned char reserved0[2]; + unsigned int stream_len; + unsigned int slice_count; + unsigned int mbhist_buffer_size; // to pass buffer size of MBHIST_BUFFER + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + // Fields from msvld_h264_seq_s + int log2_max_pic_order_cnt_lsb_minus4; + int delta_pic_order_always_zero_flag; + int frame_mbs_only_flag; + int PicWidthInMbs; + int FrameHeightInMbs; + + unsigned int tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned int gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned int reserverd_surface_format : 27; + + // Fields from msvld_h264_pic_s + int entropy_coding_mode_flag; + int pic_order_present_flag; + int num_ref_idx_l0_active_minus1; + int num_ref_idx_l1_active_minus1; + int deblocking_filter_control_present_flag; + int redundant_pic_cnt_present_flag; + int transform_8x8_mode_flag; + + // Fields from mspdec_h264_picture_setup_s + unsigned int pitch_luma; // Luma pitch + unsigned int pitch_chroma; // chroma pitch + + unsigned int luma_top_offset; // offset of luma top field in units of 256 + unsigned int luma_bot_offset; // offset of luma bottom field in units of 256 + unsigned int luma_frame_offset; // offset of luma frame in units of 256 + unsigned int chroma_top_offset; // offset of chroma top field in units of 256 + unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256 + unsigned int chroma_frame_offset; // offset of chroma frame in units of 256 + unsigned int HistBufferSize; // in units of 256 + + unsigned int MbaffFrameFlag : 1; // + unsigned int direct_8x8_inference_flag: 1; // + unsigned int weighted_pred_flag : 1; // + unsigned int constrained_intra_pred_flag:1; // + unsigned int ref_pic_flag : 1; // reference picture (nal_ref_idc != 0) + unsigned int field_pic_flag : 1; // + unsigned int bottom_field_flag : 1; // + unsigned int second_field : 1; // second field of complementary reference field + unsigned int log2_max_frame_num_minus4: 4; // (0..12) + unsigned int chroma_format_idc : 2; // + unsigned int pic_order_cnt_type : 2; // (0..2) + int pic_init_qp_minus26 : 6; // : 6 (-26..+25) + int chroma_qp_index_offset : 5; // : 5 (-12..+12) + int second_chroma_qp_index_offset : 5; // : 5 (-12..+12) + + unsigned int weighted_bipred_idc : 2; // : 2 (0..2) + unsigned int CurrPicIdx : 7; // : 7 uncompressed frame buffer index + unsigned int CurrColIdx : 5; // : 5 index of associated co-located motion data buffer + unsigned int frame_num : 16; // + unsigned int frame_surfaces : 1; // frame surfaces flag + unsigned int output_memory_layout : 1; // 0: NV12; 1:NV24. Field pair must use the same setting. + + int CurrFieldOrderCnt[2]; // : 32 [Top_Bottom], [0]=TopFieldOrderCnt, [1]=BottomFieldOrderCnt + nvdec_dpb_entry_s dpb[16]; + unsigned char WeightScale[6][4][4]; // : 6*4*4*8 in raster scan order (not zig-zag order) + unsigned char WeightScale8x8[2][8][8]; // : 2*8*8*8 in raster scan order (not zig-zag order) + + // mvc setup info, must be zero if not mvc + unsigned char num_inter_view_refs_lX[2]; // number of inter-view references + char reserved1[14]; // reserved for alignment + signed char inter_view_refidx_lX[2][16]; // DPB indices (must also be marked as long-term) + + // lossless decode (At the time of writing this manual, x264 and JM encoders, differ in Intra_8x8 reference sample filtering) + unsigned int lossless_ipred8x8_filter_enable : 1; // = 0, skips Intra_8x8 reference sample filtering, for vertical and horizontal predictions (x264 encoded streams); = 1, filter Intra_8x8 reference samples (JM encoded streams) + unsigned int qpprime_y_zero_transform_bypass_flag : 1; // determines the transform bypass mode + unsigned int reserved2 : 30; // kept for alignment; may be used for other parameters + + nvdec_display_param_s displayPara; + nvdec_pass2_otf_ext_s ssm; + +} nvdec_h264_pic_s; + +// VC-1 Scratch buffer +typedef enum _vc1_fcm_e +{ + FCM_PROGRESSIVE = 0, + FCM_FRAME_INTERLACE = 2, + FCM_FIELD_INTERLACE = 3 +} vc1_fcm_e; + +typedef enum _syntax_vc1_ptype_e +{ + PTYPE_I = 0, + PTYPE_P = 1, + PTYPE_B = 2, + PTYPE_BI = 3, //PTYPE_BI is not used to config register NV_CNVDEC_VLD_PIC_INFO_COMMON. field NV_CNVDEC_VLD_PIC_INFO_COMMON_PIC_CODING_VC1 is only 2 bits. I and BI pictures are configured with same value. Please refer to manual. + PTYPE_SKIPPED = 4 +} syntax_vc1_ptype_e; + +// 7.1.1.32, Table 46 etc. +enum vc1_mvmode_e +{ + MVMODE_MIXEDMV = 0, + MVMODE_1MV = 1, + MVMODE_1MV_HALFPEL = 2, + MVMODE_1MV_HALFPEL_BILINEAR = 3, + MVMODE_INTENSITY_COMPENSATION = 4 +}; + +// 9.1.1.42, Table 105 +typedef enum _vc1_fptype_e +{ + FPTYPE_I_I = 0, + FPTYPE_I_P, + FPTYPE_P_I, + FPTYPE_P_P, + FPTYPE_B_B, + FPTYPE_B_BI, + FPTYPE_BI_B, + FPTYPE_BI_BI +} vc1_fptype_e; + +// Table 43 (7.1.1.31.2) +typedef enum _vc1_dqprofile_e +{ + DQPROFILE_ALL_FOUR_EDGES = 0, + DQPROFILE_DOUBLE_EDGE = 1, + DQPROFILE_SINGLE_EDGE = 2, + DQPROFILE_ALL_MACROBLOCKS = 3 +} vc1_dqprofile_e; + +typedef struct _nvdec_vc1_pic_s +{ + nvdec_pass2_otf_s encryption_params; + unsigned char eos[16]; // to pass end of stream data separately if not present in bitstream surface + unsigned char prefixStartCode[4]; // used for dxva to pass prefix start code. + unsigned int bitstream_offset; // offset in words from start of bitstream surface if there is gap. + unsigned char explicitEOSPresentFlag; // to indicate that eos[] is used for passing end of stream data. + unsigned char reserved0[3]; + unsigned int stream_len; + unsigned int slice_count; + unsigned int scratch_pic_buffer_size; + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + // Fields from vc1_seq_s + unsigned short FrameWidth; // actual frame width + unsigned short FrameHeight; // actual frame height + + unsigned char profile; // 1 = SIMPLE or MAIN, 2 = ADVANCED + unsigned char postprocflag; + unsigned char pulldown; + unsigned char interlace; + + unsigned char tfcntrflag; + unsigned char finterpflag; + unsigned char psf; + unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned char reserverd_surface_format : 3 ; + + // simple,main + unsigned char multires; + unsigned char syncmarker; + unsigned char rangered; + unsigned char maxbframes; + + // Fields from vc1_entrypoint_s + unsigned char dquant; + unsigned char panscan_flag; + unsigned char refdist_flag; + unsigned char quantizer; + + unsigned char extended_mv; + unsigned char extended_dmv; + unsigned char overlap; + unsigned char vstransform; + + // Fields from vc1_scratch_s + char refdist; + char reserved1[3]; // for alignment + + // Fields from vld_vc1_pic_s + vc1_fcm_e fcm; + syntax_vc1_ptype_e ptype; + int tfcntr; + int rptfrm; + int tff; + int rndctrl; + int pqindex; + int halfqp; + int pquantizer; + int postproc; + int condover; + int transacfrm; + int transacfrm2; + int transdctab; + int pqdiff; + int abspq; + int dquantfrm; + vc1_dqprofile_e dqprofile; + int dqsbedge; + int dqdbedge; + int dqbilevel; + int mvrange; + enum vc1_mvmode_e mvmode; + enum vc1_mvmode_e mvmode2; + int lumscale; + int lumshift; + int mvtab; + int cbptab; + int ttmbf; + int ttfrm; + int bfraction; + vc1_fptype_e fptype; + int numref; + int reffield; + int dmvrange; + int intcompfield; + int lumscale1; // type was char in ucode + int lumshift1; // type was char in ucode + int lumscale2; // type was char in ucode + int lumshift2; // type was char in ucode + int mbmodetab; + int imvtab; + int icbptab; + int fourmvbptab; + int fourmvswitch; + int intcomp; + int twomvbptab; + // simple,main + int rangeredfrm; + + // Fields from pdec_vc1_pic_s + unsigned int HistBufferSize; // in units of 256 + // frame buffers + unsigned int FrameStride[2]; // [y_c] + unsigned int luma_top_offset; // offset of luma top field in units of 256 + unsigned int luma_bot_offset; // offset of luma bottom field in units of 256 + unsigned int luma_frame_offset; // offset of luma frame in units of 256 + unsigned int chroma_top_offset; // offset of chroma top field in units of 256 + unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256 + unsigned int chroma_frame_offset; // offset of chroma frame in units of 256 + + unsigned short CodedWidth; // entrypoint specific + unsigned short CodedHeight; // entrypoint specific + + unsigned char loopfilter; // entrypoint specific + unsigned char fastuvmc; // entrypoint specific + unsigned char output_memory_layout; // picture specific + unsigned char ref_memory_layout[2]; // picture specific 0: fwd, 1: bwd + unsigned char reserved3[3]; // for alignment + + nvdec_display_param_s displayPara; + nvdec_pass2_otf_ext_s ssm; + +} nvdec_vc1_pic_s; + +// MPEG-2 +typedef struct _nvdec_mpeg2_pic_s +{ + nvdec_pass2_otf_s encryption_params; + unsigned char eos[16]; + unsigned char explicitEOSPresentFlag; + unsigned char reserved0[3]; + unsigned int stream_len; + unsigned int slice_count; + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + // Fields from vld_mpeg2_seq_pic_info_s + short FrameWidth; // actual frame width + short FrameHeight; // actual frame height + unsigned char picture_structure; // 0 => Reserved, 1 => Top field, 2 => Bottom field, 3 => Frame picture. Table 6-14. + unsigned char picture_coding_type; // 0 => Forbidden, 1 => I, 2 => P, 3 => B, 4 => D (for MPEG-2). Table 6-12. + unsigned char intra_dc_precision; // 0 => 8 bits, 1=> 9 bits, 2 => 10 bits, 3 => 11 bits. Table 6-13. + char frame_pred_frame_dct; // as in section 6.3.10 + char concealment_motion_vectors; // as in section 6.3.10 + char intra_vlc_format; // as in section 6.3.10 + unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned char reserverd_surface_format : 3 ; + + char reserved1; // always 0 + char f_code[4]; // as in section 6.3.10 + + // Fields from pdec_mpeg2_picture_setup_s + unsigned short PicWidthInMbs; + unsigned short FrameHeightInMbs; + unsigned int pitch_luma; + unsigned int pitch_chroma; + unsigned int luma_top_offset; + unsigned int luma_bot_offset; + unsigned int luma_frame_offset; + unsigned int chroma_top_offset; + unsigned int chroma_bot_offset; + unsigned int chroma_frame_offset; + unsigned int HistBufferSize; + unsigned short output_memory_layout; + unsigned short alternate_scan; + unsigned short secondfield; + /******************************/ + // Got rid of the union kept for compatibility with NVDEC1. + // Removed field mpeg2, and kept rounding type. + // NVDEC1 ucode is not using the mpeg2 field, instead using codec type from the methods. + // Rounding type should only be set for Divx3.11. + unsigned short rounding_type; + /******************************/ + unsigned int MbInfoSizeInBytes; + unsigned int q_scale_type; + unsigned int top_field_first; + unsigned int full_pel_fwd_vector; + unsigned int full_pel_bwd_vector; + unsigned char quant_mat_8x8intra[64]; + unsigned char quant_mat_8x8nonintra[64]; + unsigned int ref_memory_layout[2]; //0:for fwd; 1:for bwd + + nvdec_display_param_s displayPara; + nvdec_pass2_otf_ext_s ssm; + +} nvdec_mpeg2_pic_s; + +// MPEG-4 +typedef struct _nvdec_mpeg4_pic_s +{ + nvdec_pass2_otf_s encryption_params; + unsigned char eos[16]; + unsigned char explicitEOSPresentFlag; + unsigned char reserved2[3]; // for alignment + unsigned int stream_len; + unsigned int slice_count; + unsigned int scratch_pic_buffer_size; + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + // Fields from vld_mpeg4_seq_s + short FrameWidth; // :13 video_object_layer_width + short FrameHeight; // :13 video_object_layer_height + char vop_time_increment_bitcount; // : 5 1..16 + char resync_marker_disable; // : 1 + unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned char reserverd_surface_format : 3 ; + char reserved3; // for alignment + + // Fields from pdec_mpeg4_picture_setup_s + int width; // : 13 + int height; // : 13 + + unsigned int FrameStride[2]; // [y_c] + unsigned int luma_top_offset; // offset of luma top field in units of 256 + unsigned int luma_bot_offset; // offset of luma bottom field in units of 256 + unsigned int luma_frame_offset; // offset of luma frame in units of 256 + unsigned int chroma_top_offset; // offset of chroma top field in units of 256 + unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256 + unsigned int chroma_frame_offset; // offset of chroma frame in units of 256 + + unsigned int HistBufferSize; // in units of 256, History buffer size + + int trd[2]; // : 16, temporal reference frame distance (only needed for B-VOPs) + int trb[2]; // : 16, temporal reference B-VOP distance from fwd reference frame (only needed for B-VOPs) + + int divx_flags; // : 16 (bit 0: DivX interlaced chroma rounding, bit 1: Divx 4 boundary padding, bit 2: Divx IDCT) + + short vop_fcode_forward; // : 1...7 + short vop_fcode_backward; // : 1...7 + + unsigned char interlaced; // : 1 + unsigned char quant_type; // : 1 + unsigned char quarter_sample; // : 1 + unsigned char short_video_header; // : 1 + + unsigned char curr_output_memory_layout; // : 1 0:NV12; 1:NV24 + unsigned char ptype; // picture type: 0 for PTYPE_I, 1 for PTYPE_P, 2 for PTYPE_B, 3 for PTYPE_BI, 4 for PTYPE_SKIPPED + unsigned char rnd; // : 1, rounding mode + unsigned char alternate_vertical_scan_flag; // : 1 + + unsigned char top_field_flag; // : 1 + unsigned char reserved0[3]; // alignment purpose + + unsigned char intra_quant_mat[64]; // : 64*8 + unsigned char nonintra_quant_mat[64]; // : 64*8 + unsigned char ref_memory_layout[2]; //0:for fwd; 1:for bwd + unsigned char reserved1[34]; // 256 byte alignemnt till now + + nvdec_display_param_s displayPara; + +} nvdec_mpeg4_pic_s; + +// VP8 +enum VP8_FRAME_TYPE +{ + VP8_KEYFRAME = 0, + VP8_INTERFRAME = 1 +}; + +enum VP8_FRAME_SFC_ID +{ + VP8_GOLDEN_FRAME_SFC = 0, + VP8_ALTREF_FRAME_SFC, + VP8_LAST_FRAME_SFC, + VP8_CURR_FRAME_SFC +}; + +typedef struct _nvdec_vp8_pic_s +{ + nvdec_pass2_otf_s encryption_params; + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + unsigned short FrameWidth; // actual frame width + unsigned short FrameHeight; // actual frame height + + unsigned char keyFrame; // 1: key frame; 0: not + unsigned char version; + unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned char reserverd_surface_format : 3 ; + unsigned char errorConcealOn; // 1: error conceal on; 0: off + + unsigned int firstPartSize; // the size of first partition(frame header and mb header partition) + + // ctx + unsigned int HistBufferSize; // in units of 256 + unsigned int VLDBufferSize; // in units of 1 + // current frame buffers + unsigned int FrameStride[2]; // [y_c] + unsigned int luma_top_offset; // offset of luma top field in units of 256 + unsigned int luma_bot_offset; // offset of luma bottom field in units of 256 + unsigned int luma_frame_offset; // offset of luma frame in units of 256 + unsigned int chroma_top_offset; // offset of chroma top field in units of 256 + unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256 + unsigned int chroma_frame_offset; // offset of chroma frame in units of 256 + + nvdec_display_param_s displayPara; + + // decode picture buffere related + char current_output_memory_layout; + char output_memory_layout[3]; // output NV12/NV24 setting. item 0:golden; 1: altref; 2: last + + unsigned char segmentation_feature_data_update; + unsigned char reserved1[3]; + + // ucode return result + unsigned int resultValue; // ucode return the picture header info; includes copy_buffer_to_golden etc. + unsigned int partition_offset[8]; // byte offset to each token partition (used for encrypted streams only) + + nvdec_pass2_otf_ext_s ssm; + +} nvdec_vp8_pic_s; // size is 0xc0 + +// PASS1 + +//Sample means the entire frame is encrypted with a single IV, and subsample means a given frame may be encrypted in multiple chunks with different IVs. +#define NUM_SUBSAMPLES 32 + +typedef struct _bytes_of_data_s +{ + unsigned int clear_bytes; // clear bytes per subsample + unsigned int encypted_bytes; // encrypted bytes per subsample + +} bytes_of_data_s; + +typedef struct _nvdec_pass1_input_data_s +{ + bytes_of_data_s sample_size[NUM_SUBSAMPLES]; // clear/encrypted bytes per subsample + unsigned int initialization_vector[NUM_SUBSAMPLES][4]; // Ctrl64 initial vector per subsample + unsigned char IvValid[NUM_SUBSAMPLES]; // each element will tell whether IV is valid for that subsample or not. + unsigned int stream_len; // encrypted bitstream size. + unsigned int clearBufferSize; // allocated size of clear buffer size + unsigned int reencryptBufferSize; // allocated size of reencrypted buffer size + unsigned int vp8coeffPartitonBufferSize; // allocated buffer for vp8 coeff partition buffer + unsigned int PrevWidth; // required for VP9 + unsigned int num_nals :16; // number of subsamples in a frame + unsigned int drm_mode : 8; // DRM mode + unsigned int key_sel : 4; // key select from keyslot + unsigned int codec : 4; // codecs selection + unsigned int TotalSizeOfClearData; // Used with Pattern based encryption + unsigned int SliceHdrOffset; // This is used with pattern mode encryption where data before slice hdr comes in clear. + unsigned int EncryptBlkCnt :16; + unsigned int SkipBlkCnt :16; +} nvdec_pass1_input_data_s; + +#define VP8_MAX_TOKEN_PARTITIONS 8 +#define VP9_MAX_FRAMES_IN_SUPERFRAME 8 + +typedef struct _nvdec_pass1_output_data_s +{ + unsigned int clear_header_size; // h264/vc1/mpeg2/vp8, decrypted pps/sps/part of slice header info, 128 bits aligned + unsigned int reencrypt_data_size; // h264/vc1/mpeg2, slice level data, vp8 mb header info, 128 bits aligned + unsigned int clear_token_data_size; // vp8, clear token data saved in VPR, 128 bits aligned + unsigned int key_increment : 6; // added to content key after unwrapping + unsigned int encryption_mode : 4; // encryption mode + unsigned int bReEncrypted : 1; // set to 0 if no re-encryption is done. + unsigned int bvp9SuperFrame : 1; // set to 1 for vp9 superframe + unsigned int vp9NumFramesMinus1 : 3; // set equal to numFrames-1 for vp9superframe. Max 8 frames are possible in vp9 superframe. + unsigned int reserved1 :17; // reserved, 32 bit alignment + unsigned int wrapped_session_key[4]; // session keys + unsigned int wrapped_content_key[4]; // content keys + unsigned int initialization_vector[4]; // Ctrl64 initial vector + union { + unsigned int partition_size[VP8_MAX_TOKEN_PARTITIONS]; // size of each token partition (used for encrypted streams of VP8) + unsigned int vp9_frame_sizes[VP9_MAX_FRAMES_IN_SUPERFRAME]; // frame size information for all frames in vp9 superframe. + }; + unsigned int vp9_clear_hdr_size[VP9_MAX_FRAMES_IN_SUPERFRAME]; // clear header size for each frame in vp9 superframe. +} nvdec_pass1_output_data_s; + + +/***************************************************** + AV1 +*****************************************************/ +typedef struct _scale_factors_reference_s{ + short x_scale_fp; // horizontal fixed point scale factor + short y_scale_fp; // vertical fixed point scale factor +}scale_factors_reference_s; + +typedef struct _frame_info_t{ + unsigned short width; // in pixel, av1 support arbitray resolution + unsigned short height; + unsigned short stride[2]; // luma and chroma stride in 16Bytes + unsigned int frame_buffer_idx; // TBD :clean associate the reference frame and frame buffer id to lookup base_addr +} frame_info_t; + +typedef struct _ref_frame_struct_s{ + frame_info_t info; + scale_factors_reference_s sf; // scalefactor for reference frame and current frame size, driver can calculate it + unsigned char sign_bias : 1; // calcuate based on frame_offset and current frame offset + unsigned char wmtype : 2; // global motion parameters : identity,translation,rotzoom,affine + unsigned char reserved_rf : 5; + short frame_off; // relative offset to current frame + short roffset; // relative offset from current frame +} ref_frame_struct_s; + +typedef struct _av1_fgs_cfg_t{ + //from AV1 spec 5.9.30 Film Grain Params syntax + unsigned short apply_grain : 1; + unsigned short overlap_flag : 1; + unsigned short clip_to_restricted_range : 1; + unsigned short chroma_scaling_from_luma : 1; + unsigned short num_y_points_b : 1; // flag indicates num_y_points>0 + unsigned short num_cb_points_b : 1; // flag indicates num_cb_points>0 + unsigned short num_cr_points_b : 1; // flag indicates num_cr_points>0 + unsigned short scaling_shift : 4; + unsigned short reserved_fgs : 5; + unsigned short sw_random_seed; + short cb_offset; + short cr_offset; + char cb_mult; + char cb_luma_mult; + char cr_mult; + char cr_luma_mult; +} av1_fgs_cfg_t; + + +typedef struct _nvdec_av1_pic_s +{ + nvdec_pass2_otf_s encryption_params; + + nvdec_pass2_otf_ext_s ssm; + + av1_fgs_cfg_t fgs_cfg; + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + unsigned int stream_len; // stream length. + unsigned int reserved12; // skip bytes length to real frame data . + + //sequence header + unsigned int use_128x128_superblock : 1; // superblock 128x128 or 64x64, 0:64x64, 1: 128x128 + unsigned int chroma_format : 2; // 1:420, others:reserved for future + unsigned int bit_depth : 4; // bitdepth + unsigned int enable_filter_intra : 1; // tool enable in seq level, 0 : disable 1: frame header control + unsigned int enable_intra_edge_filter : 1; + unsigned int enable_interintra_compound : 1; + unsigned int enable_masked_compound : 1; + unsigned int enable_dual_filter : 1; // enable or disable vertical and horiz filter selection + unsigned int reserved10 : 1; // 0 - disable order hint, and related tools + unsigned int reserved0 : 3; + unsigned int enable_jnt_comp : 1; // 0 - disable joint compound modes + unsigned int reserved1 : 1; + unsigned int enable_cdef : 1; + unsigned int reserved11 : 1; + unsigned int enable_fgs : 1; + unsigned int enable_substream_decoding : 1; //enable frame substream kickoff mode without context switch + unsigned int reserved2 : 10; // reserved bits + + //frame header + unsigned int frame_type : 2; // 0:Key frame, 1:Inter frame, 2:intra only, 3:s-frame + unsigned int show_frame : 1; // show frame flag + unsigned int reserved13 : 1; + unsigned int disable_cdf_update : 1; // disable CDF update during symbol decoding + unsigned int allow_screen_content_tools : 1; // screen content tool enable + unsigned int cur_frame_force_integer_mv : 1; // AMVR enable + unsigned int scale_denom_minus9 : 3; // The denominator minus9 of the superres scale + unsigned int allow_intrabc : 1; // IBC enable + unsigned int allow_high_precision_mv : 1; // 1/8 precision mv enable + unsigned int interp_filter : 3; // interpolation filter : EIGHTTAP_REGULAR,.... + unsigned int switchable_motion_mode : 1; // 0: simple motion mode, 1: SIMPLE, OBMC, LOCAL WARP + unsigned int use_ref_frame_mvs : 1; // 1: current frame can use the previous frame mv information, MFMV + unsigned int refresh_frame_context : 1; // backward update flag + unsigned int delta_q_present_flag : 1; // quantizer index delta values are present in the block level + unsigned int delta_q_res : 2; // left shift will apply to decoded quantizer index delta values + unsigned int delta_lf_present_flag : 1; // specified whether loop filter delta values are present in the block level + unsigned int delta_lf_res : 2; // specifies the left shift will apply to decoded loop filter values + unsigned int delta_lf_multi : 1; // seperate loop filter deltas for Hy,Vy,U,V edges + unsigned int reserved3 : 1; + unsigned int coded_lossless : 1; // 1 means all segments use lossless coding. Frame is fully lossless, CDEF/DBF will disable + unsigned int tile_enabled : 1; // tile enable + unsigned int reserved4 : 2; + unsigned int superres_is_scaled : 1; // frame level frame for using_superres + unsigned int reserved_fh : 1; + + unsigned int tile_cols : 8; // horizontal tile numbers in frame, max is 64 + unsigned int tile_rows : 8; // vertical tile numbers in frame, max is 64 + unsigned int context_update_tile_id : 16; // which tile cdf will be seleted as the backward update CDF, MAXTILEROW=64, MAXTILECOL=64, 12bits + + unsigned int cdef_damping_minus_3 : 2; // controls the amount of damping in the deringing filter + unsigned int cdef_bits : 2; // the number of bits needed to specify which CDEF filter to apply + unsigned int frame_tx_mode : 3; // 0:ONLY4x4,3:LARGEST,4:SELECT + unsigned int frame_reference_mode : 2; // single,compound,select + unsigned int skip_mode_flag : 1; // skip mode + unsigned int skip_ref0 : 4; + unsigned int skip_ref1 : 4; + unsigned int allow_warp : 1; // sequence level & frame level warp enable + unsigned int reduced_tx_set_used : 1; // whether the frame is restricted to oa reduced subset of the full set of transform types + unsigned int ref_scaling_enable : 1; + unsigned int reserved5 : 1; + unsigned int reserved6 : 10; // reserved bits + unsigned short superres_upscaled_width; // upscale width, frame_size_with_refs() syntax,restoration will use it + unsigned short superres_luma_step; + unsigned short superres_chroma_step; + unsigned short superres_init_luma_subpel_x; + unsigned short superres_init_chroma_subpel_x; + + /*frame header qp information*/ + unsigned char base_qindex; // the maximum qp is 255 + char y_dc_delta_q; + char u_dc_delta_q; + char v_dc_delta_q; + char u_ac_delta_q; + char v_ac_delta_q; + unsigned char qm_y; // 4bit: 0-15 + unsigned char qm_u; + unsigned char qm_v; + + /*cdef, need to update in the new spec*/ + unsigned int cdef_y_pri_strength; // 4bit for one, max is 8 + unsigned int cdef_uv_pri_strength; // 4bit for one, max is 8 + unsigned int cdef_y_sec_strength : 16; // 2bit for one, max is 8 + unsigned int cdef_uv_sec_strength : 16; // 2bit for one, max is 8 + + /*segmentation*/ + unsigned char segment_enabled; + unsigned char segment_update_map; + unsigned char reserved7; + unsigned char segment_temporal_update; + short segment_feature_data[8][8]; + unsigned char last_active_segid; // The highest numbered segment id that has some enabled feature. + unsigned char segid_preskip; // Whether the segment id will be read before the skip syntax element. + // 1: the segment id will be read first. + // 0: the skip syntax element will be read first. + unsigned char prevsegid_flag; // 1 : previous segment id is available + unsigned char segment_quant_sign : 8; // sign bit for segment alternative QP + + /*loopfilter*/ + unsigned char filter_level[2]; + unsigned char filter_level_u; + unsigned char filter_level_v; + unsigned char lf_sharpness_level; + char lf_ref_deltas[8]; // 0 = Intra, Last, Last2+Last3, GF, BRF, ARF2, ARF + char lf_mode_deltas[2]; // 0 = ZERO_MV, MV + + /*restoration*/ + unsigned char lr_type ; // restoration type. Y:bit[1:0];U:bit[3:2],V:bit[5:4] + unsigned char lr_unit_size; // restoration unit size 0:32x32, 1:64x64, 2:128x128,3:256x256; Y:bit[1:0];U:bit[3:2],V:bit[5:4] + + //general + frame_info_t current_frame; + ref_frame_struct_s ref_frame[7]; // Last, Last2, Last3, Golden, BWDREF, ALTREF2, ALTREF + + unsigned int use_temporal0_mvs : 1; + unsigned int use_temporal1_mvs : 1; + unsigned int use_temporal2_mvs : 1; + unsigned int mf1_type : 3; + unsigned int mf2_type : 3; + unsigned int mf3_type : 3; + unsigned int reserved_mfmv : 20; + + short mfmv_offset[3][7]; // 3: mf0~2, 7: Last, Last2, Last3, Golden, BWDREF, ALTREF2, ALTREF + char mfmv_side[3][7]; // flag for reverse offset great than 0 + // MFMV relative offset from the ref frame(reference to reference relative offset) + + unsigned char tileformat : 2; // 0: TBL; 1: KBL; + unsigned char gob_height : 3; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned char errorConcealOn : 1; // this field is not used, use ctrl_param.error_conceal_on to enable error concealment in ucode, + // always set NV_CNVDEC_GIP_ERR_CONCEAL_CTRL_ON = 1 to enable error detect in hw + unsigned char reserver8 : 2; // reserve + + unsigned char stream_error_detection : 1; + unsigned char mv_error_detection : 1; + unsigned char coeff_error_detection : 1; + unsigned char reserved_eh : 5; + + // Filt neighbor buffer offset + unsigned int Av1FltTopOffset; // filter top buffer offset respect to filter buffer, 256 bytes unit + unsigned int Av1FltVertOffset; // filter vertical buffer offset respect to filter buffer, 256 bytes unit + unsigned int Av1CdefVertOffset; // cdef vertical buffer offset respect to filter buffer, 256 bytes unit + unsigned int Av1LrVertOffset; // lr vertical buffer offset respect to filter buffer, 256 bytes unit + unsigned int Av1HusVertOffset; // hus vertical buffer offset respect to filter buffer, 256 bytes unit + unsigned int Av1FgsVertOffset; // fgs vertical buffer offset respect to filter buffer, 256 bytes unit + + unsigned int enable_histogram : 1; + unsigned int sw_skip_start_length : 14; //skip start length + unsigned int reserved_stat : 17; + +} nvdec_av1_pic_s; + +////////////////////////////////////////////////////////////////////// +// AV1 Buffer structure +////////////////////////////////////////////////////////////////////// +typedef struct _AV1FilmGrainMemory + { + unsigned char scaling_lut_y[256]; + unsigned char scaling_lut_cb[256]; + unsigned char scaling_lut_cr[256]; + short cropped_luma_grain_block[4096]; + short cropped_cb_grain_block[1024]; + short cropped_cr_grain_block[1024]; +} AV1FilmGrainMemory; + +typedef struct _AV1TileInfo_OLD +{ + unsigned char width_in_sb; + unsigned char height_in_sb; + unsigned char tile_start_b0; + unsigned char tile_start_b1; + unsigned char tile_start_b2; + unsigned char tile_start_b3; + unsigned char tile_end_b0; + unsigned char tile_end_b1; + unsigned char tile_end_b2; + unsigned char tile_end_b3; + unsigned char padding[6]; +} AV1TileInfo_OLD; + +typedef struct _AV1TileInfo +{ + unsigned char width_in_sb; + unsigned char padding_w; + unsigned char height_in_sb; + unsigned char padding_h; +} AV1TileInfo; + +typedef struct _AV1TileStreamInfo +{ + unsigned int tile_start; + unsigned int tile_end; + unsigned char padding[8]; +} AV1TileStreamInfo; + + +// AV1 TileSize buffer +#define AV1_MAX_TILES 256 +#define AV1_TILEINFO_BUF_SIZE_OLD NVDEC_ALIGN(AV1_MAX_TILES * sizeof(AV1TileInfo_OLD)) +#define AV1_TILEINFO_BUF_SIZE NVDEC_ALIGN(AV1_MAX_TILES * sizeof(AV1TileInfo)) + +// AV1 TileStreamInfo buffer +#define AV1_TILESTREAMINFO_BUF_SIZE NVDEC_ALIGN(AV1_MAX_TILES * sizeof(AV1TileStreamInfo)) + +// AV1 SubStreamEntry buffer +#define MAX_SUBSTREAM_ENTRY_SIZE 32 +#define AV1_SUBSTREAM_ENTRY_BUF_SIZE NVDEC_ALIGN(MAX_SUBSTREAM_ENTRY_SIZE * sizeof(nvdec_substream_entry_s)) + +// AV1 FilmGrain Parameter buffer +#define AV1_FGS_BUF_SIZE NVDEC_ALIGN(sizeof(AV1FilmGrainMemory)) + +// AV1 Temporal MV buffer +#define AV1_TEMPORAL_MV_SIZE_IN_64x64 256 // 4Bytes for 8x8 +#define AV1_TEMPORAL_MV_BUF_SIZE(w, h) ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_TEMPORAL_MV_SIZE_IN_64x64, 4096) + +// AV1 SegmentID buffer +#define AV1_SEGMENT_ID_SIZE_IN_64x64 128 // (3bits + 1 pad_bits) for 4x4 +#define AV1_SEGMENT_ID_BUF_SIZE(w, h) ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_SEGMENT_ID_SIZE_IN_64x64, 4096) + +// AV1 Global Motion buffer +#define AV1_GLOBAL_MOTION_BUF_SIZE NVDEC_ALIGN(7*32) + +// AV1 Intra Top buffer +#define AV1_INTRA_TOP_BUF_SIZE NVDEC_ALIGN(8*8192) + +// AV1 Histogram buffer +#define AV1_HISTOGRAM_BUF_SIZE NVDEC_ALIGN(1024) + +// AV1 Filter FG buffer +#define AV1_DBLK_TOP_SIZE_IN_SB64 ALIGN_UP(1920, 128) +#define AV1_DBLK_TOP_BUF_SIZE(w) NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * AV1_DBLK_TOP_SIZE_IN_SB64) + +#define AV1_DBLK_LEFT_SIZE_IN_SB64 ALIGN_UP(1536, 128) +#define AV1_DBLK_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_DBLK_LEFT_SIZE_IN_SB64) + +#define AV1_CDEF_LEFT_SIZE_IN_SB64 ALIGN_UP(1792, 128) +#define AV1_CDEF_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_CDEF_LEFT_SIZE_IN_SB64) + +#define AV1_HUS_LEFT_SIZE_IN_SB64 ALIGN_UP(12544, 128) +#define AV1_ASIC_HUS_LEFT_BUFFER_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_HUS_LEFT_SIZE_IN_SB64) +#define AV1_HUS_LEFT_BUF_SIZE(h) 2*AV1_ASIC_HUS_LEFT_BUFFER_SIZE(h) // Ping-Pong buffers + +#define AV1_LR_LEFT_SIZE_IN_SB64 ALIGN_UP(1920, 128) +#define AV1_LR_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_LR_LEFT_SIZE_IN_SB64) + +#define AV1_FGS_LEFT_SIZE_IN_SB64 ALIGN_UP(320, 128) +#define AV1_FGS_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_FGS_LEFT_SIZE_IN_SB64) + +// AV1 Hint Dump Buffer +#define AV1_HINT_DUMP_SIZE_IN_SB64 ((64*64)/(4*4)*8) // 8 bytes per CU, 256 CUs(2048 bytes) per SB64 +#define AV1_HINT_DUMP_SIZE_IN_SB128 ((128*128)/(4*4)*8) // 8 bytes per CU,1024 CUs(8192 bytes) per SB128 +#define AV1_HINT_DUMP_SIZE(w, h) NVDEC_ALIGN(AV1_HINT_DUMP_SIZE_IN_SB128*((w+127)/128)*((h+127)/128)) // always use SB128 for allocation + + +/******************************************************************* + New H264 +********************************************************************/ +typedef struct _nvdec_new_h264_pic_s +{ + nvdec_pass2_otf_s encryption_params; + unsigned char eos[16]; + unsigned char explicitEOSPresentFlag; + unsigned char hint_dump_en; //enable COLOMV surface dump for all frames, which includes hints of "MV/REFIDX/QP/CBP/MBPART/MBTYPE", nvbug: 200212874 + unsigned char reserved0[2]; + unsigned int stream_len; + unsigned int slice_count; + unsigned int mbhist_buffer_size; // to pass buffer size of MBHIST_BUFFER + + // Driver may or may not use based upon need. + // If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode. + // Driver can send this value based upon resolution using the formula: + // gptimer_timeout_value = 3 * (cycles required for one frame) + unsigned int gptimer_timeout_value; + + // Fields from msvld_h264_seq_s + int log2_max_pic_order_cnt_lsb_minus4; + int delta_pic_order_always_zero_flag; + int frame_mbs_only_flag; + int PicWidthInMbs; + int FrameHeightInMbs; + + unsigned int tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16 + unsigned int gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + unsigned int reserverd_surface_format : 27; + + // Fields from msvld_h264_pic_s + int entropy_coding_mode_flag; + int pic_order_present_flag; + int num_ref_idx_l0_active_minus1; + int num_ref_idx_l1_active_minus1; + int deblocking_filter_control_present_flag; + int redundant_pic_cnt_present_flag; + int transform_8x8_mode_flag; + + // Fields from mspdec_h264_picture_setup_s + unsigned int pitch_luma; // Luma pitch + unsigned int pitch_chroma; // chroma pitch + + unsigned int luma_top_offset; // offset of luma top field in units of 256 + unsigned int luma_bot_offset; // offset of luma bottom field in units of 256 + unsigned int luma_frame_offset; // offset of luma frame in units of 256 + unsigned int chroma_top_offset; // offset of chroma top field in units of 256 + unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256 + unsigned int chroma_frame_offset; // offset of chroma frame in units of 256 + unsigned int HistBufferSize; // in units of 256 + + unsigned int MbaffFrameFlag : 1; // + unsigned int direct_8x8_inference_flag: 1; // + unsigned int weighted_pred_flag : 1; // + unsigned int constrained_intra_pred_flag:1; // + unsigned int ref_pic_flag : 1; // reference picture (nal_ref_idc != 0) + unsigned int field_pic_flag : 1; // + unsigned int bottom_field_flag : 1; // + unsigned int second_field : 1; // second field of complementary reference field + unsigned int log2_max_frame_num_minus4: 4; // (0..12) + unsigned int chroma_format_idc : 2; // + unsigned int pic_order_cnt_type : 2; // (0..2) + int pic_init_qp_minus26 : 6; // : 6 (-26..+25) + int chroma_qp_index_offset : 5; // : 5 (-12..+12) + int second_chroma_qp_index_offset : 5; // : 5 (-12..+12) + + unsigned int weighted_bipred_idc : 2; // : 2 (0..2) + unsigned int CurrPicIdx : 7; // : 7 uncompressed frame buffer index + unsigned int CurrColIdx : 5; // : 5 index of associated co-located motion data buffer + unsigned int frame_num : 16; // + unsigned int frame_surfaces : 1; // frame surfaces flag + unsigned int output_memory_layout : 1; // 0: NV12; 1:NV24. Field pair must use the same setting. + + int CurrFieldOrderCnt[2]; // : 32 [Top_Bottom], [0]=TopFieldOrderCnt, [1]=BottomFieldOrderCnt + nvdec_dpb_entry_s dpb[16]; + unsigned char WeightScale[6][4][4]; // : 6*4*4*8 in raster scan order (not zig-zag order) + unsigned char WeightScale8x8[2][8][8]; // : 2*8*8*8 in raster scan order (not zig-zag order) + + // mvc setup info, must be zero if not mvc + unsigned char num_inter_view_refs_lX[2]; // number of inter-view references + char reserved1[14]; // reserved for alignment + signed char inter_view_refidx_lX[2][16]; // DPB indices (must also be marked as long-term) + + // lossless decode (At the time of writing this manual, x264 and JM encoders, differ in Intra_8x8 reference sample filtering) + unsigned int lossless_ipred8x8_filter_enable : 1; // = 0, skips Intra_8x8 reference sample filtering, for vertical and horizontal predictions (x264 encoded streams); = 1, filter Intra_8x8 reference samples (JM encoded streams) + unsigned int qpprime_y_zero_transform_bypass_flag : 1; // determines the transform bypass mode + unsigned int reserved2 : 30; // kept for alignment; may be used for other parameters + + nvdec_display_param_s displayPara; + nvdec_pass2_otf_ext_s ssm; + +} nvdec_new_h264_pic_s; + +// golden crc struct dumped into surface +// for each part, if golden crc compare is enabled, one interface is selected to do crc calculation in vmod. +// vmod's crc is compared with cmod's golden crc (4*32 bits), and compare reuslt is written into surface. +typedef struct +{ + // input + unsigned int dbg_crc_enable_partb : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part b + unsigned int dbg_crc_enable_partc : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part c + unsigned int dbg_crc_enable_partd : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part d + unsigned int dbg_crc_enable_parte : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part e + unsigned int dbg_crc_intf_partb : 6; // For partb to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface + unsigned int dbg_crc_intf_partc : 6; // For partc to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface + unsigned int dbg_crc_intf_partd : 6; // For partd to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface + unsigned int dbg_crc_intf_parte : 6; // For parte to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface + unsigned int reserved0 : 4; + + unsigned int dbg_crc_partb_golden[4]; // Golden crc values for part b + unsigned int dbg_crc_partc_golden[4]; // Golden crc values for part c + unsigned int dbg_crc_partd_golden[4]; // Golden crc values for part d + unsigned int dbg_crc_parte_golden[4]; // Golden crc values for part e + + // output + unsigned int dbg_crc_comp_partb : 4; // Compare result for part b + unsigned int dbg_crc_comp_partc : 4; // Compare result for part c + unsigned int dbg_crc_comp_partd : 4; // Compare result for part d + unsigned int dbg_crc_comp_parte : 4; // Compare result for part e + unsigned int reserved1 : 16; + + unsigned char reserved2[56]; +}nvdec_crc_s; // 128 Bytes + +#endif // __DRV_NVDEC_H_ \ No newline at end of file diff --git a/test/mockgpu/nv/nvdriver.py b/test/mockgpu/nv/nvdriver.py index 1acdee94c6..f895ca5c67 100644 --- a/test/mockgpu/nv/nvdriver.py +++ b/test/mockgpu/nv/nvdriver.py @@ -100,6 +100,9 @@ class NVDriver(VirtDriver): assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU) struct.hObjectNew = self._alloc_handle() self.object_by_handle[struct.hObjectNew] = NVSubDevice(self.object_by_handle[struct.hObjectParent]) + elif struct.hClass == nv_gpu.NV01_MEMORY_VIRTUAL: + assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU) + struct.hObjectNew = self._alloc_handle() elif struct.hClass == nv_gpu.TURING_USERMODE_A: assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVSubDevice) struct.hObjectNew = self._alloc_handle() @@ -215,6 +218,8 @@ class NVDriver(VirtDriver): elif nr == nv_gpu.NV_ESC_RM_FREE: st = nv_gpu.NVOS00_PARAMETERS.from_address(argp) self.object_by_handle.pop(st.hObjectOld) + elif nr == nv_gpu.NV_ESC_RM_MAP_MEMORY_DMA: + pass # mappings are same as uvm elif nr == nv_gpu.NV_ESC_CARD_INFO: for i,gpu in enumerate(self.gpus.values()): st = nv_gpu.nv_ioctl_card_info_t.from_address(argp + i * ctypes.sizeof(nv_gpu.nv_ioctl_card_info_t)) diff --git a/test/testextra/test_hevc.py b/test/testextra/test_hevc.py new file mode 100644 index 0000000000..331abd5f9e --- /dev/null +++ b/test/testextra/test_hevc.py @@ -0,0 +1,65 @@ +import unittest + +from tinygrad import Tensor, Device +from extra.hevc.hevc import parse_hevc_file_headers, nv_gpu + +class TestHevc(unittest.TestCase): + def test_hevc_parser(self): + url = "https://github.com/haraschax/filedump/raw/09a497959f7fa6fd8dba501a25f2cdb3a41ecb12/comma_video.hevc" + hevc_tensor = Tensor.from_url(url, device="CPU") + + dat = bytes(hevc_tensor.data()) + opaque, frame_info, w, h, luma_w, luma_h, chroma_off = parse_hevc_file_headers(dat, device=Device.DEFAULT) + + def _test_common(frame, bts): + self.assertEqual(frame0.pic_width_in_luma_samples, 1952) + self.assertEqual(frame0.pic_height_in_luma_samples, 1216) + self.assertEqual(frame0.chroma_format_idc, 1) + self.assertEqual(frame0.bit_depth_luma, 8) + self.assertEqual(frame0.bit_depth_chroma, 8) + self.assertEqual(frame0.log2_min_luma_coding_block_size, 3) + self.assertEqual(frame0.log2_max_luma_coding_block_size, 5) + self.assertEqual(frame0.log2_min_transform_block_size, 2) + self.assertEqual(frame0.log2_max_transform_block_size, 5) + self.assertEqual(frame0.num_tile_columns, 3) + self.assertEqual(frame0.num_tile_rows, 1) + self.assertEqual(frame0.colMvBuffersize, 589) + self.assertEqual(frame0.HevcSaoBufferOffset, 2888) + self.assertEqual(frame0.HevcBsdCtrlOffset, 25992) + self.assertEqual(frame0.v1.hevc_main10_444_ext.HevcFltAboveOffset, 26714) + self.assertEqual(frame0.v1.hevc_main10_444_ext.HevcSaoAboveOffset, 36214) + + # tiles + self.assertEqual(bytes(bts[0x200:0x210]), b'\x18\x00&\x00\x18\x00&\x00\r\x00&\x00\x00\x00\x00\x00') + + frame0 = nv_gpu.nvdec_hevc_pic_s.from_buffer(opaque[0].data()) + _test_common(frame0, opaque[0].data()) + self.assertEqual(frame0.stream_len, 148063) + self.assertEqual(frame0.IDR_picture_flag, 1) + self.assertEqual(frame0.RAP_picture_flag, 1) + self.assertEqual(frame0.sw_hdr_skip_length, 0) + self.assertEqual(frame0.num_ref_frames, 0) + + frame1 = nv_gpu.nvdec_hevc_pic_s.from_buffer(opaque[1].data()) + _test_common(frame1, opaque[1].data()) + self.assertEqual(frame1.stream_len, 57110) + self.assertEqual(frame1.IDR_picture_flag, 0) + self.assertEqual(frame1.RAP_picture_flag, 0) + self.assertEqual(frame1.sw_hdr_skip_length, 9) + self.assertEqual(frame1.num_ref_frames, 1) + self.assertEqual(list(frame1.initreflistidxl0), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(list(frame1.initreflistidxl1), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(list(frame1.RefDiffPicOrderCnts), [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + + frame3 = nv_gpu.nvdec_hevc_pic_s.from_buffer(opaque[3].data()) + _test_common(frame3, opaque[3].data()) + self.assertEqual(frame3.stream_len, 47036) + self.assertEqual(frame3.IDR_picture_flag, 0) + self.assertEqual(frame3.RAP_picture_flag, 0) + self.assertEqual(frame3.sw_hdr_skip_length, 9) + self.assertEqual(frame3.num_ref_frames, 1) + self.assertEqual(list(frame3.initreflistidxl0), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(list(frame3.initreflistidxl1), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(list(frame3.RefDiffPicOrderCnts), [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +if __name__ == "__main__": + unittest.main() diff --git a/tinygrad/device.py b/tinygrad/device.py index 9b5634d145..680e0e67fc 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -238,6 +238,7 @@ class Allocator(Generic[DeviceType]): # def _as_buffer(self, src) -> memoryview: # def _offset(self, buf, size:int, offset:int): # def _transfer(self, dest, src, sz:int, src_dev, dest_dev): + def _encode_decode(self, bufout, bufin, desc, hist:list, shape:tuple[int,...], frame_pos:int): raise NotImplementedError("need encdec") # optional class LRUAllocator(Allocator, Generic[DeviceType]): """ diff --git a/tinygrad/engine/realize.py b/tinygrad/engine/realize.py index 58117ee1b4..3529dbefee 100644 --- a/tinygrad/engine/realize.py +++ b/tinygrad/engine/realize.py @@ -141,6 +141,19 @@ class BufferCopy(Runner): class BufferXfer(BufferCopy): def copy(self, dest, src): dest.allocator._transfer(dest._buf, src._buf, dest.nbytes, src_dev=src.allocator.dev, dest_dev=dest.allocator.dev) +class EncDec(Runner): + def __init__(self, encdec:UOp, total_sz:int, device:str): + self.shape, self.pos_var = encdec.arg[0], encdec.variables()[0].expr + name = f"enc/dec {total_sz/1e6:7.2f}M, HEVC" if total_sz >= 1e6 else f"enc/dec {total_sz:8d}, HEVC" + super().__init__(colored(name, "yellow"), device, Estimates(lds=total_sz, mem=total_sz)) + def __call__(self, rawbufs:list[Buffer], var_vals:dict[str, int], wait=False): + st = time.perf_counter() + rawbufs[0].allocator._encode_decode(rawbufs[0]._buf, rawbufs[1]._buf, rawbufs[2]._buf, + [x._buf for x in rawbufs[3:]], self.shape, var_vals[self.pos_var]) + if wait: + Device[rawbufs[0].device].synchronize() + return time.perf_counter() - st + # **************** method cache **************** method_cache: dict[tuple[str, type, bytes, tuple[int, ...], bool], CompiledRunner] = {} @@ -201,6 +214,7 @@ si_lowerer = PatternMatcher([ (UPat(Ops.COPY, name="copy"), lambda ctx,copy: ((BufferXfer(ctx[0].nbytes, ctx[0].device, ctx[1].device) \ if hasattr(Device[ctx[0].device].allocator, '_transfer') and all_same([x.device.split(":")[0] for x in ctx]) \ else BufferCopy(ctx[0].nbytes, ctx[0].device, ctx[1].device)), list(ctx))), + (UPat(Ops.ENCDEC, name="encdec"), lambda ctx,encdec: ((EncDec(encdec, ctx[0].nbytes, ctx[1].device)), list(ctx))), ]) def lower_schedule_item(si:ScheduleItem) -> ExecItem: return ExecItem(*cast(tuple[Runner,list], si_lowerer.rewrite(si.ast, si.bufs)), si.metadata, si.fixedvars) diff --git a/tinygrad/runtime/autogen/__init__.py b/tinygrad/runtime/autogen/__init__.py index 1d7e9be2f7..cc7f3871c8 100644 --- a/tinygrad/runtime/autogen/__init__.py +++ b/tinygrad/runtime/autogen/__init__.py @@ -4,6 +4,7 @@ from tinygrad.helpers import fetch, flatten, system, getenv root = (here:=pathlib.Path(__file__).parent).parents[2] nv_src = {"nv_570": "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/81fe4fb417c8ac3b9bdcc1d56827d116743892a5.tar.gz", "nv_580": "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/2af9f1f0f7de4988432d4ae875b5858ffdb09cc2.tar.gz"} +ffmpeg_src = "https://ffmpeg.org/releases/ffmpeg-8.0.1.tar.gz" macossdk = "/var/db/xcode_select_link/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk" def load(name, dll, files, **kwargs): @@ -27,6 +28,7 @@ def __getattr__(nm): case "libc": return load("libc", ["find_library('c')"], lambda: ( [i for i in system("dpkg -L libc6-dev").split() if 'sys/mman.h' in i or 'sys/syscall.h' in i] + ["/usr/include/string.h", "/usr/include/elf.h", "/usr/include/unistd.h", "/usr/include/asm-generic/mman-common.h"]), use_errno=True) + case "avcodec": return load("avcodec", [], ["{}/libavcodec/hevc/hevc.h", "{}/libavcodec/cbs_h265.h"], tarball=ffmpeg_src) case "opencl": return load("opencl", ["find_library('OpenCL')"], ["/usr/include/CL/cl.h"]) case "cuda": return load("cuda", ["find_library('cuda')"], ["/usr/include/cuda.h"], args=["-D__CUDA_API_VERSION_INTERNAL"], parse_macros=False) case "nvrtc": return load("nvrtc", ["find_library('nvrtc')"], ["/usr/include/nvrtc.h"]) @@ -34,14 +36,14 @@ def __getattr__(nm): case "kfd": return load("kfd", [], ["/usr/include/linux/kfd_ioctl.h"]) case "nv_570" | "nv_580": return load(nm, [], [ - *[root/"extra/nv_gpu_driver"/s for s in ["clc6c0qmd.h","clcec0qmd.h"]], "{}/kernel-open/common/inc/nvmisc.h", - *[f"{{}}/src/common/sdk/nvidia/inc/class/cl{s}.h" for s in ["0000", "0080", "2080", "2080_notification", "c56f", "c86f", "c96f", "c761", + *[root/"extra/nv_gpu_driver"/s for s in ["clc9b0.h", "clc6c0qmd.h","clcec0qmd.h", "nvdec_drv.h"]], "{}/kernel-open/common/inc/nvmisc.h", + *[f"{{}}/src/common/sdk/nvidia/inc/class/cl{s}.h" for s in ["0000", "0070", "0080", "2080", "2080_notification", "c56f", "c86f", "c96f", "c761", "83de", "c6c0", "cdc0"]], *[f"{{}}/kernel-open/nvidia-uvm/{s}.h" for s in ["clc6b5", "clc9b5", "uvm_ioctl", "uvm_linux_ioctl", "hwref/ampere/ga100/dev_fault"]], *[f"{{}}/src/nvidia/arch/nvalloc/unix/include/nv{s}.h" for s in ["_escape", "-ioctl", "-ioctl-numbers", "-ioctl-numa", "-unix-nvos-params-wrappers"]], *[f"{{}}/src/common/sdk/nvidia/inc/{s}.h" for s in ["alloc/alloc_channel", "nvos", "ctrl/ctrlc36f", "ctrl/ctrlcb33", - "ctrl/ctrla06c", "ctrl/ctrl90f1"]], + "ctrl/ctrla06c", "ctrl/ctrl90f1", "ctrl/ctrla06f/ctrla06fgpfifo"]], *[f"{{}}/src/common/sdk/nvidia/inc/ctrl/ctrl{s}/*.h" for s in ["0000", "0080", "2080", "83de"]], "{}/kernel-open/common/inc/nvstatus.h", "{}/src/nvidia/generated/g_allclasses.h" ], args=[ @@ -129,4 +131,4 @@ python3 src/compiler/builtin_types_h.py gen/builtin_types.h""", cwd=path, shell= return load("metal", ["find_library('Metal')"],[f"{macossdk}/System/Library/Frameworks/Metal.framework/Headers/MTL{s}.h" for s in ["ComputeCommandEncoder", "ComputePipeline", "CommandQueue", "Device", "IndirectCommandBuffer", "Resource", "CommandEncoder"]], args=["-xobjective-c","-isysroot",macossdk], types={"dispatch_data_t":"objc.id_"}) - case _: raise AttributeError(f"no such autogen: {nm}") + case _: raise AttributeError(f"no such autogen: {nm}") \ No newline at end of file diff --git a/tinygrad/runtime/autogen/avcodec.py b/tinygrad/runtime/autogen/avcodec.py new file mode 100644 index 0000000000..37cc054c9d --- /dev/null +++ b/tinygrad/runtime/autogen/avcodec.py @@ -0,0 +1,543 @@ +# mypy: ignore-errors +import ctypes +from tinygrad.helpers import unwrap +from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR +enum_HEVCNALUnitType = CEnum(ctypes.c_uint32) +HEVC_NAL_TRAIL_N = enum_HEVCNALUnitType.define('HEVC_NAL_TRAIL_N', 0) +HEVC_NAL_TRAIL_R = enum_HEVCNALUnitType.define('HEVC_NAL_TRAIL_R', 1) +HEVC_NAL_TSA_N = enum_HEVCNALUnitType.define('HEVC_NAL_TSA_N', 2) +HEVC_NAL_TSA_R = enum_HEVCNALUnitType.define('HEVC_NAL_TSA_R', 3) +HEVC_NAL_STSA_N = enum_HEVCNALUnitType.define('HEVC_NAL_STSA_N', 4) +HEVC_NAL_STSA_R = enum_HEVCNALUnitType.define('HEVC_NAL_STSA_R', 5) +HEVC_NAL_RADL_N = enum_HEVCNALUnitType.define('HEVC_NAL_RADL_N', 6) +HEVC_NAL_RADL_R = enum_HEVCNALUnitType.define('HEVC_NAL_RADL_R', 7) +HEVC_NAL_RASL_N = enum_HEVCNALUnitType.define('HEVC_NAL_RASL_N', 8) +HEVC_NAL_RASL_R = enum_HEVCNALUnitType.define('HEVC_NAL_RASL_R', 9) +HEVC_NAL_VCL_N10 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_N10', 10) +HEVC_NAL_VCL_R11 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_R11', 11) +HEVC_NAL_VCL_N12 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_N12', 12) +HEVC_NAL_VCL_R13 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_R13', 13) +HEVC_NAL_VCL_N14 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_N14', 14) +HEVC_NAL_VCL_R15 = enum_HEVCNALUnitType.define('HEVC_NAL_VCL_R15', 15) +HEVC_NAL_BLA_W_LP = enum_HEVCNALUnitType.define('HEVC_NAL_BLA_W_LP', 16) +HEVC_NAL_BLA_W_RADL = enum_HEVCNALUnitType.define('HEVC_NAL_BLA_W_RADL', 17) +HEVC_NAL_BLA_N_LP = enum_HEVCNALUnitType.define('HEVC_NAL_BLA_N_LP', 18) +HEVC_NAL_IDR_W_RADL = enum_HEVCNALUnitType.define('HEVC_NAL_IDR_W_RADL', 19) +HEVC_NAL_IDR_N_LP = enum_HEVCNALUnitType.define('HEVC_NAL_IDR_N_LP', 20) +HEVC_NAL_CRA_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_CRA_NUT', 21) +HEVC_NAL_RSV_IRAP_VCL22 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_IRAP_VCL22', 22) +HEVC_NAL_RSV_IRAP_VCL23 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_IRAP_VCL23', 23) +HEVC_NAL_RSV_VCL24 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL24', 24) +HEVC_NAL_RSV_VCL25 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL25', 25) +HEVC_NAL_RSV_VCL26 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL26', 26) +HEVC_NAL_RSV_VCL27 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL27', 27) +HEVC_NAL_RSV_VCL28 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL28', 28) +HEVC_NAL_RSV_VCL29 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL29', 29) +HEVC_NAL_RSV_VCL30 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL30', 30) +HEVC_NAL_RSV_VCL31 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_VCL31', 31) +HEVC_NAL_VPS = enum_HEVCNALUnitType.define('HEVC_NAL_VPS', 32) +HEVC_NAL_SPS = enum_HEVCNALUnitType.define('HEVC_NAL_SPS', 33) +HEVC_NAL_PPS = enum_HEVCNALUnitType.define('HEVC_NAL_PPS', 34) +HEVC_NAL_AUD = enum_HEVCNALUnitType.define('HEVC_NAL_AUD', 35) +HEVC_NAL_EOS_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_EOS_NUT', 36) +HEVC_NAL_EOB_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_EOB_NUT', 37) +HEVC_NAL_FD_NUT = enum_HEVCNALUnitType.define('HEVC_NAL_FD_NUT', 38) +HEVC_NAL_SEI_PREFIX = enum_HEVCNALUnitType.define('HEVC_NAL_SEI_PREFIX', 39) +HEVC_NAL_SEI_SUFFIX = enum_HEVCNALUnitType.define('HEVC_NAL_SEI_SUFFIX', 40) +HEVC_NAL_RSV_NVCL41 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL41', 41) +HEVC_NAL_RSV_NVCL42 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL42', 42) +HEVC_NAL_RSV_NVCL43 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL43', 43) +HEVC_NAL_RSV_NVCL44 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL44', 44) +HEVC_NAL_RSV_NVCL45 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL45', 45) +HEVC_NAL_RSV_NVCL46 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL46', 46) +HEVC_NAL_RSV_NVCL47 = enum_HEVCNALUnitType.define('HEVC_NAL_RSV_NVCL47', 47) +HEVC_NAL_UNSPEC48 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC48', 48) +HEVC_NAL_UNSPEC49 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC49', 49) +HEVC_NAL_UNSPEC50 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC50', 50) +HEVC_NAL_UNSPEC51 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC51', 51) +HEVC_NAL_UNSPEC52 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC52', 52) +HEVC_NAL_UNSPEC53 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC53', 53) +HEVC_NAL_UNSPEC54 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC54', 54) +HEVC_NAL_UNSPEC55 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC55', 55) +HEVC_NAL_UNSPEC56 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC56', 56) +HEVC_NAL_UNSPEC57 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC57', 57) +HEVC_NAL_UNSPEC58 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC58', 58) +HEVC_NAL_UNSPEC59 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC59', 59) +HEVC_NAL_UNSPEC60 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC60', 60) +HEVC_NAL_UNSPEC61 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC61', 61) +HEVC_NAL_UNSPEC62 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC62', 62) +HEVC_NAL_UNSPEC63 = enum_HEVCNALUnitType.define('HEVC_NAL_UNSPEC63', 63) + +enum_HEVCSliceType = CEnum(ctypes.c_uint32) +HEVC_SLICE_B = enum_HEVCSliceType.define('HEVC_SLICE_B', 0) +HEVC_SLICE_P = enum_HEVCSliceType.define('HEVC_SLICE_P', 1) +HEVC_SLICE_I = enum_HEVCSliceType.define('HEVC_SLICE_I', 2) + +_anonenum0 = CEnum(ctypes.c_uint32) +HEVC_MAX_LAYERS = _anonenum0.define('HEVC_MAX_LAYERS', 63) +HEVC_MAX_SUB_LAYERS = _anonenum0.define('HEVC_MAX_SUB_LAYERS', 7) +HEVC_MAX_LAYER_SETS = _anonenum0.define('HEVC_MAX_LAYER_SETS', 1024) +HEVC_MAX_LAYER_ID = _anonenum0.define('HEVC_MAX_LAYER_ID', 63) +HEVC_MAX_NUH_LAYER_ID = _anonenum0.define('HEVC_MAX_NUH_LAYER_ID', 62) +HEVC_MAX_VPS_COUNT = _anonenum0.define('HEVC_MAX_VPS_COUNT', 16) +HEVC_MAX_SPS_COUNT = _anonenum0.define('HEVC_MAX_SPS_COUNT', 16) +HEVC_MAX_PPS_COUNT = _anonenum0.define('HEVC_MAX_PPS_COUNT', 64) +HEVC_MAX_DPB_SIZE = _anonenum0.define('HEVC_MAX_DPB_SIZE', 16) +HEVC_MAX_REFS = _anonenum0.define('HEVC_MAX_REFS', 16) +HEVC_MAX_SHORT_TERM_REF_PIC_SETS = _anonenum0.define('HEVC_MAX_SHORT_TERM_REF_PIC_SETS', 64) +HEVC_MAX_LONG_TERM_REF_PICS = _anonenum0.define('HEVC_MAX_LONG_TERM_REF_PICS', 32) +HEVC_MIN_LOG2_CTB_SIZE = _anonenum0.define('HEVC_MIN_LOG2_CTB_SIZE', 4) +HEVC_MAX_LOG2_CTB_SIZE = _anonenum0.define('HEVC_MAX_LOG2_CTB_SIZE', 6) +HEVC_MAX_CPB_CNT = _anonenum0.define('HEVC_MAX_CPB_CNT', 32) +HEVC_MAX_LUMA_PS = _anonenum0.define('HEVC_MAX_LUMA_PS', 35651584) +HEVC_MAX_WIDTH = _anonenum0.define('HEVC_MAX_WIDTH', 16888) +HEVC_MAX_HEIGHT = _anonenum0.define('HEVC_MAX_HEIGHT', 16888) +HEVC_MAX_TILE_ROWS = _anonenum0.define('HEVC_MAX_TILE_ROWS', 22) +HEVC_MAX_TILE_COLUMNS = _anonenum0.define('HEVC_MAX_TILE_COLUMNS', 20) +HEVC_MAX_SLICE_SEGMENTS = _anonenum0.define('HEVC_MAX_SLICE_SEGMENTS', 600) +HEVC_MAX_ENTRY_POINT_OFFSETS = _anonenum0.define('HEVC_MAX_ENTRY_POINT_OFFSETS', 2700) +HEVC_MAX_PALETTE_PREDICTOR_SIZE = _anonenum0.define('HEVC_MAX_PALETTE_PREDICTOR_SIZE', 128) + +enum_HEVCScalabilityMask = CEnum(ctypes.c_uint32) +HEVC_SCALABILITY_DEPTH = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_DEPTH', 32768) +HEVC_SCALABILITY_MULTIVIEW = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_MULTIVIEW', 16384) +HEVC_SCALABILITY_SPATIAL = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_SPATIAL', 8192) +HEVC_SCALABILITY_AUXILIARY = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_AUXILIARY', 4096) +HEVC_SCALABILITY_MASK_MAX = enum_HEVCScalabilityMask.define('HEVC_SCALABILITY_MASK_MAX', 65535) + +enum_HEVCAuxId = CEnum(ctypes.c_uint32) +HEVC_AUX_ALPHA = enum_HEVCAuxId.define('HEVC_AUX_ALPHA', 1) +HEVC_AUX_DEPTH = enum_HEVCAuxId.define('HEVC_AUX_DEPTH', 2) + +class struct_H265RawNALUnitHeader(Struct): pass +uint8_t = ctypes.c_ubyte +struct_H265RawNALUnitHeader._fields_ = [ + ('nal_unit_type', uint8_t), + ('nuh_layer_id', uint8_t), + ('nuh_temporal_id_plus1', uint8_t), +] +H265RawNALUnitHeader = struct_H265RawNALUnitHeader +class struct_H265RawProfileTierLevel(Struct): pass +struct_H265RawProfileTierLevel._fields_ = [ + ('general_profile_space', uint8_t), + ('general_tier_flag', uint8_t), + ('general_profile_idc', uint8_t), + ('general_profile_compatibility_flag', (uint8_t * 32)), + ('general_progressive_source_flag', uint8_t), + ('general_interlaced_source_flag', uint8_t), + ('general_non_packed_constraint_flag', uint8_t), + ('general_frame_only_constraint_flag', uint8_t), + ('general_max_12bit_constraint_flag', uint8_t), + ('general_max_10bit_constraint_flag', uint8_t), + ('general_max_8bit_constraint_flag', uint8_t), + ('general_max_422chroma_constraint_flag', uint8_t), + ('general_max_420chroma_constraint_flag', uint8_t), + ('general_max_monochrome_constraint_flag', uint8_t), + ('general_intra_constraint_flag', uint8_t), + ('general_one_picture_only_constraint_flag', uint8_t), + ('general_lower_bit_rate_constraint_flag', uint8_t), + ('general_max_14bit_constraint_flag', uint8_t), + ('general_inbld_flag', uint8_t), + ('general_level_idc', uint8_t), + ('sub_layer_profile_present_flag', (uint8_t * 7)), + ('sub_layer_level_present_flag', (uint8_t * 7)), + ('sub_layer_profile_space', (uint8_t * 7)), + ('sub_layer_tier_flag', (uint8_t * 7)), + ('sub_layer_profile_idc', (uint8_t * 7)), + ('sub_layer_profile_compatibility_flag', ((uint8_t * 32) * 7)), + ('sub_layer_progressive_source_flag', (uint8_t * 7)), + ('sub_layer_interlaced_source_flag', (uint8_t * 7)), + ('sub_layer_non_packed_constraint_flag', (uint8_t * 7)), + ('sub_layer_frame_only_constraint_flag', (uint8_t * 7)), + ('sub_layer_max_12bit_constraint_flag', (uint8_t * 7)), + ('sub_layer_max_10bit_constraint_flag', (uint8_t * 7)), + ('sub_layer_max_8bit_constraint_flag', (uint8_t * 7)), + ('sub_layer_max_422chroma_constraint_flag', (uint8_t * 7)), + ('sub_layer_max_420chroma_constraint_flag', (uint8_t * 7)), + ('sub_layer_max_monochrome_constraint_flag', (uint8_t * 7)), + ('sub_layer_intra_constraint_flag', (uint8_t * 7)), + ('sub_layer_one_picture_only_constraint_flag', (uint8_t * 7)), + ('sub_layer_lower_bit_rate_constraint_flag', (uint8_t * 7)), + ('sub_layer_max_14bit_constraint_flag', (uint8_t * 7)), + ('sub_layer_inbld_flag', (uint8_t * 7)), + ('sub_layer_level_idc', (uint8_t * 7)), +] +H265RawProfileTierLevel = struct_H265RawProfileTierLevel +class struct_H265RawSubLayerHRDParameters(Struct): pass +uint32_t = ctypes.c_uint32 +struct_H265RawSubLayerHRDParameters._fields_ = [ + ('bit_rate_value_minus1', (uint32_t * 32)), + ('cpb_size_value_minus1', (uint32_t * 32)), + ('cpb_size_du_value_minus1', (uint32_t * 32)), + ('bit_rate_du_value_minus1', (uint32_t * 32)), + ('cbr_flag', (uint8_t * 32)), +] +H265RawSubLayerHRDParameters = struct_H265RawSubLayerHRDParameters +class struct_H265RawHRDParameters(Struct): pass +uint16_t = ctypes.c_uint16 +struct_H265RawHRDParameters._fields_ = [ + ('nal_hrd_parameters_present_flag', uint8_t), + ('vcl_hrd_parameters_present_flag', uint8_t), + ('sub_pic_hrd_params_present_flag', uint8_t), + ('tick_divisor_minus2', uint8_t), + ('du_cpb_removal_delay_increment_length_minus1', uint8_t), + ('sub_pic_cpb_params_in_pic_timing_sei_flag', uint8_t), + ('dpb_output_delay_du_length_minus1', uint8_t), + ('bit_rate_scale', uint8_t), + ('cpb_size_scale', uint8_t), + ('cpb_size_du_scale', uint8_t), + ('initial_cpb_removal_delay_length_minus1', uint8_t), + ('au_cpb_removal_delay_length_minus1', uint8_t), + ('dpb_output_delay_length_minus1', uint8_t), + ('fixed_pic_rate_general_flag', (uint8_t * 7)), + ('fixed_pic_rate_within_cvs_flag', (uint8_t * 7)), + ('elemental_duration_in_tc_minus1', (uint16_t * 7)), + ('low_delay_hrd_flag', (uint8_t * 7)), + ('cpb_cnt_minus1', (uint8_t * 7)), + ('nal_sub_layer_hrd_parameters', (H265RawSubLayerHRDParameters * 7)), + ('vcl_sub_layer_hrd_parameters', (H265RawSubLayerHRDParameters * 7)), +] +H265RawHRDParameters = struct_H265RawHRDParameters +class struct_H265RawVUI(Struct): pass +struct_H265RawVUI._fields_ = [ + ('aspect_ratio_info_present_flag', uint8_t), + ('aspect_ratio_idc', uint8_t), + ('sar_width', uint16_t), + ('sar_height', uint16_t), + ('overscan_info_present_flag', uint8_t), + ('overscan_appropriate_flag', uint8_t), + ('video_signal_type_present_flag', uint8_t), + ('video_format', uint8_t), + ('video_full_range_flag', uint8_t), + ('colour_description_present_flag', uint8_t), + ('colour_primaries', uint8_t), + ('transfer_characteristics', uint8_t), + ('matrix_coefficients', uint8_t), + ('chroma_loc_info_present_flag', uint8_t), + ('chroma_sample_loc_type_top_field', uint8_t), + ('chroma_sample_loc_type_bottom_field', uint8_t), + ('neutral_chroma_indication_flag', uint8_t), + ('field_seq_flag', uint8_t), + ('frame_field_info_present_flag', uint8_t), + ('default_display_window_flag', uint8_t), + ('def_disp_win_left_offset', uint16_t), + ('def_disp_win_right_offset', uint16_t), + ('def_disp_win_top_offset', uint16_t), + ('def_disp_win_bottom_offset', uint16_t), + ('vui_timing_info_present_flag', uint8_t), + ('vui_num_units_in_tick', uint32_t), + ('vui_time_scale', uint32_t), + ('vui_poc_proportional_to_timing_flag', uint8_t), + ('vui_num_ticks_poc_diff_one_minus1', uint32_t), + ('vui_hrd_parameters_present_flag', uint8_t), + ('hrd_parameters', H265RawHRDParameters), + ('bitstream_restriction_flag', uint8_t), + ('tiles_fixed_structure_flag', uint8_t), + ('motion_vectors_over_pic_boundaries_flag', uint8_t), + ('restricted_ref_pic_lists_flag', uint8_t), + ('min_spatial_segmentation_idc', uint16_t), + ('max_bytes_per_pic_denom', uint8_t), + ('max_bits_per_min_cu_denom', uint8_t), + ('log2_max_mv_length_horizontal', uint8_t), + ('log2_max_mv_length_vertical', uint8_t), +] +H265RawVUI = struct_H265RawVUI +class struct_H265RawExtensionData(Struct): pass +H265RawExtensionData = struct_H265RawExtensionData +class struct_H265RawVPS(Struct): pass +H265RawVPS = struct_H265RawVPS +class struct_H265RawSTRefPicSet(Struct): pass +struct_H265RawSTRefPicSet._fields_ = [ + ('inter_ref_pic_set_prediction_flag', uint8_t), + ('delta_idx_minus1', uint8_t), + ('delta_rps_sign', uint8_t), + ('abs_delta_rps_minus1', uint16_t), + ('used_by_curr_pic_flag', (uint8_t * 16)), + ('use_delta_flag', (uint8_t * 16)), + ('num_negative_pics', uint8_t), + ('num_positive_pics', uint8_t), + ('delta_poc_s0_minus1', (uint16_t * 16)), + ('used_by_curr_pic_s0_flag', (uint8_t * 16)), + ('delta_poc_s1_minus1', (uint16_t * 16)), + ('used_by_curr_pic_s1_flag', (uint8_t * 16)), +] +H265RawSTRefPicSet = struct_H265RawSTRefPicSet +class struct_H265RawScalingList(Struct): pass +int16_t = ctypes.c_int16 +int8_t = ctypes.c_byte +struct_H265RawScalingList._fields_ = [ + ('scaling_list_pred_mode_flag', ((uint8_t * 6) * 4)), + ('scaling_list_pred_matrix_id_delta', ((uint8_t * 6) * 4)), + ('scaling_list_dc_coef_minus8', ((int16_t * 6) * 4)), + ('scaling_list_delta_coeff', (((int8_t * 64) * 6) * 4)), +] +H265RawScalingList = struct_H265RawScalingList +class struct_H265RawSPS(Struct): pass +H265RawSPS = struct_H265RawSPS +class struct_H265RawPPS(Struct): pass +H265RawPPS = struct_H265RawPPS +class struct_H265RawAUD(Struct): pass +struct_H265RawAUD._fields_ = [ + ('nal_unit_header', H265RawNALUnitHeader), + ('pic_type', uint8_t), +] +H265RawAUD = struct_H265RawAUD +class struct_H265RawSliceHeader(Struct): pass +struct_H265RawSliceHeader._fields_ = [ + ('nal_unit_header', H265RawNALUnitHeader), + ('first_slice_segment_in_pic_flag', uint8_t), + ('no_output_of_prior_pics_flag', uint8_t), + ('slice_pic_parameter_set_id', uint8_t), + ('dependent_slice_segment_flag', uint8_t), + ('slice_segment_address', uint16_t), + ('slice_reserved_flag', (uint8_t * 8)), + ('slice_type', uint8_t), + ('pic_output_flag', uint8_t), + ('colour_plane_id', uint8_t), + ('slice_pic_order_cnt_lsb', uint16_t), + ('short_term_ref_pic_set_sps_flag', uint8_t), + ('short_term_ref_pic_set', H265RawSTRefPicSet), + ('short_term_ref_pic_set_idx', uint8_t), + ('num_long_term_sps', uint8_t), + ('num_long_term_pics', uint8_t), + ('lt_idx_sps', (uint8_t * 16)), + ('poc_lsb_lt', (uint8_t * 16)), + ('used_by_curr_pic_lt_flag', (uint8_t * 16)), + ('delta_poc_msb_present_flag', (uint8_t * 16)), + ('delta_poc_msb_cycle_lt', (uint32_t * 16)), + ('slice_temporal_mvp_enabled_flag', uint8_t), + ('slice_sao_luma_flag', uint8_t), + ('slice_sao_chroma_flag', uint8_t), + ('num_ref_idx_active_override_flag', uint8_t), + ('num_ref_idx_l0_active_minus1', uint8_t), + ('num_ref_idx_l1_active_minus1', uint8_t), + ('ref_pic_list_modification_flag_l0', uint8_t), + ('list_entry_l0', (uint8_t * 16)), + ('ref_pic_list_modification_flag_l1', uint8_t), + ('list_entry_l1', (uint8_t * 16)), + ('mvd_l1_zero_flag', uint8_t), + ('cabac_init_flag', uint8_t), + ('collocated_from_l0_flag', uint8_t), + ('collocated_ref_idx', uint8_t), + ('luma_log2_weight_denom', uint8_t), + ('delta_chroma_log2_weight_denom', int8_t), + ('luma_weight_l0_flag', (uint8_t * 16)), + ('chroma_weight_l0_flag', (uint8_t * 16)), + ('delta_luma_weight_l0', (int8_t * 16)), + ('luma_offset_l0', (int16_t * 16)), + ('delta_chroma_weight_l0', ((int8_t * 2) * 16)), + ('chroma_offset_l0', ((int16_t * 2) * 16)), + ('luma_weight_l1_flag', (uint8_t * 16)), + ('chroma_weight_l1_flag', (uint8_t * 16)), + ('delta_luma_weight_l1', (int8_t * 16)), + ('luma_offset_l1', (int16_t * 16)), + ('delta_chroma_weight_l1', ((int8_t * 2) * 16)), + ('chroma_offset_l1', ((int16_t * 2) * 16)), + ('five_minus_max_num_merge_cand', uint8_t), + ('use_integer_mv_flag', uint8_t), + ('slice_qp_delta', int8_t), + ('slice_cb_qp_offset', int8_t), + ('slice_cr_qp_offset', int8_t), + ('slice_act_y_qp_offset', int8_t), + ('slice_act_cb_qp_offset', int8_t), + ('slice_act_cr_qp_offset', int8_t), + ('cu_chroma_qp_offset_enabled_flag', uint8_t), + ('deblocking_filter_override_flag', uint8_t), + ('slice_deblocking_filter_disabled_flag', uint8_t), + ('slice_beta_offset_div2', int8_t), + ('slice_tc_offset_div2', int8_t), + ('slice_loop_filter_across_slices_enabled_flag', uint8_t), + ('num_entry_point_offsets', uint16_t), + ('offset_len_minus1', uint8_t), + ('entry_point_offset_minus1', (uint32_t * 2700)), + ('slice_segment_header_extension_length', uint16_t), + ('slice_segment_header_extension_data_byte', (uint8_t * 256)), +] +H265RawSliceHeader = struct_H265RawSliceHeader +class struct_H265RawSlice(Struct): pass +H265RawSlice = struct_H265RawSlice +class struct_H265RawSEIBufferingPeriod(Struct): pass +struct_H265RawSEIBufferingPeriod._fields_ = [ + ('bp_seq_parameter_set_id', uint8_t), + ('irap_cpb_params_present_flag', uint8_t), + ('cpb_delay_offset', uint32_t), + ('dpb_delay_offset', uint32_t), + ('concatenation_flag', uint8_t), + ('au_cpb_removal_delay_delta_minus1', uint32_t), + ('nal_initial_cpb_removal_delay', (uint32_t * 32)), + ('nal_initial_cpb_removal_offset', (uint32_t * 32)), + ('nal_initial_alt_cpb_removal_delay', (uint32_t * 32)), + ('nal_initial_alt_cpb_removal_offset', (uint32_t * 32)), + ('vcl_initial_cpb_removal_delay', (uint32_t * 32)), + ('vcl_initial_cpb_removal_offset', (uint32_t * 32)), + ('vcl_initial_alt_cpb_removal_delay', (uint32_t * 32)), + ('vcl_initial_alt_cpb_removal_offset', (uint32_t * 32)), + ('use_alt_cpb_params_flag', uint8_t), +] +H265RawSEIBufferingPeriod = struct_H265RawSEIBufferingPeriod +class struct_H265RawSEIPicTiming(Struct): pass +struct_H265RawSEIPicTiming._fields_ = [ + ('pic_struct', uint8_t), + ('source_scan_type', uint8_t), + ('duplicate_flag', uint8_t), + ('au_cpb_removal_delay_minus1', uint32_t), + ('pic_dpb_output_delay', uint32_t), + ('pic_dpb_output_du_delay', uint32_t), + ('num_decoding_units_minus1', uint16_t), + ('du_common_cpb_removal_delay_flag', uint8_t), + ('du_common_cpb_removal_delay_increment_minus1', uint32_t), + ('num_nalus_in_du_minus1', (uint16_t * 600)), + ('du_cpb_removal_delay_increment_minus1', (uint32_t * 600)), +] +H265RawSEIPicTiming = struct_H265RawSEIPicTiming +class struct_H265RawSEIPanScanRect(Struct): pass +int32_t = ctypes.c_int32 +struct_H265RawSEIPanScanRect._fields_ = [ + ('pan_scan_rect_id', uint32_t), + ('pan_scan_rect_cancel_flag', uint8_t), + ('pan_scan_cnt_minus1', uint8_t), + ('pan_scan_rect_left_offset', (int32_t * 3)), + ('pan_scan_rect_right_offset', (int32_t * 3)), + ('pan_scan_rect_top_offset', (int32_t * 3)), + ('pan_scan_rect_bottom_offset', (int32_t * 3)), + ('pan_scan_rect_persistence_flag', uint16_t), +] +H265RawSEIPanScanRect = struct_H265RawSEIPanScanRect +class struct_H265RawSEIRecoveryPoint(Struct): pass +struct_H265RawSEIRecoveryPoint._fields_ = [ + ('recovery_poc_cnt', int16_t), + ('exact_match_flag', uint8_t), + ('broken_link_flag', uint8_t), +] +H265RawSEIRecoveryPoint = struct_H265RawSEIRecoveryPoint +class struct_H265RawFilmGrainCharacteristics(Struct): pass +struct_H265RawFilmGrainCharacteristics._fields_ = [ + ('film_grain_characteristics_cancel_flag', uint8_t), + ('film_grain_model_id', uint8_t), + ('separate_colour_description_present_flag', uint8_t), + ('film_grain_bit_depth_luma_minus8', uint8_t), + ('film_grain_bit_depth_chroma_minus8', uint8_t), + ('film_grain_full_range_flag', uint8_t), + ('film_grain_colour_primaries', uint8_t), + ('film_grain_transfer_characteristics', uint8_t), + ('film_grain_matrix_coeffs', uint8_t), + ('blending_mode_id', uint8_t), + ('log2_scale_factor', uint8_t), + ('comp_model_present_flag', (uint8_t * 3)), + ('num_intensity_intervals_minus1', (uint8_t * 3)), + ('num_model_values_minus1', (uint8_t * 3)), + ('intensity_interval_lower_bound', ((uint8_t * 256) * 3)), + ('intensity_interval_upper_bound', ((uint8_t * 256) * 3)), + ('comp_model_value', (((int16_t * 6) * 256) * 3)), + ('film_grain_characteristics_persistence_flag', uint8_t), +] +H265RawFilmGrainCharacteristics = struct_H265RawFilmGrainCharacteristics +class struct_H265RawSEIDisplayOrientation(Struct): pass +struct_H265RawSEIDisplayOrientation._fields_ = [ + ('display_orientation_cancel_flag', uint8_t), + ('hor_flip', uint8_t), + ('ver_flip', uint8_t), + ('anticlockwise_rotation', uint16_t), + ('display_orientation_repetition_period', uint16_t), + ('display_orientation_persistence_flag', uint8_t), +] +H265RawSEIDisplayOrientation = struct_H265RawSEIDisplayOrientation +class struct_H265RawSEIActiveParameterSets(Struct): pass +struct_H265RawSEIActiveParameterSets._fields_ = [ + ('active_video_parameter_set_id', uint8_t), + ('self_contained_cvs_flag', uint8_t), + ('no_parameter_set_update_flag', uint8_t), + ('num_sps_ids_minus1', uint8_t), + ('active_seq_parameter_set_id', (uint8_t * 16)), + ('layer_sps_idx', (uint8_t * 63)), +] +H265RawSEIActiveParameterSets = struct_H265RawSEIActiveParameterSets +class struct_H265RawSEIDecodedPictureHash(Struct): pass +struct_H265RawSEIDecodedPictureHash._fields_ = [ + ('hash_type', uint8_t), + ('picture_md5', ((uint8_t * 16) * 3)), + ('picture_crc', (uint16_t * 3)), + ('picture_checksum', (uint32_t * 3)), +] +H265RawSEIDecodedPictureHash = struct_H265RawSEIDecodedPictureHash +class struct_H265RawSEITimeCode(Struct): pass +struct_H265RawSEITimeCode._fields_ = [ + ('num_clock_ts', uint8_t), + ('clock_timestamp_flag', (uint8_t * 3)), + ('units_field_based_flag', (uint8_t * 3)), + ('counting_type', (uint8_t * 3)), + ('full_timestamp_flag', (uint8_t * 3)), + ('discontinuity_flag', (uint8_t * 3)), + ('cnt_dropped_flag', (uint8_t * 3)), + ('n_frames', (uint16_t * 3)), + ('seconds_value', (uint8_t * 3)), + ('minutes_value', (uint8_t * 3)), + ('hours_value', (uint8_t * 3)), + ('seconds_flag', (uint8_t * 3)), + ('minutes_flag', (uint8_t * 3)), + ('hours_flag', (uint8_t * 3)), + ('time_offset_length', (uint8_t * 3)), + ('time_offset_value', (int32_t * 3)), +] +H265RawSEITimeCode = struct_H265RawSEITimeCode +class struct_H265RawSEIAlphaChannelInfo(Struct): pass +struct_H265RawSEIAlphaChannelInfo._fields_ = [ + ('alpha_channel_cancel_flag', uint8_t), + ('alpha_channel_use_idc', uint8_t), + ('alpha_channel_bit_depth_minus8', uint8_t), + ('alpha_transparent_value', uint16_t), + ('alpha_opaque_value', uint16_t), + ('alpha_channel_incr_flag', uint8_t), + ('alpha_channel_clip_flag', uint8_t), + ('alpha_channel_clip_type_flag', uint8_t), +] +H265RawSEIAlphaChannelInfo = struct_H265RawSEIAlphaChannelInfo +class struct_H265RawSEI3DReferenceDisplaysInfo(Struct): pass +struct_H265RawSEI3DReferenceDisplaysInfo._fields_ = [ + ('prec_ref_display_width', uint8_t), + ('ref_viewing_distance_flag', uint8_t), + ('prec_ref_viewing_dist', uint8_t), + ('num_ref_displays_minus1', uint8_t), + ('left_view_id', (uint16_t * 32)), + ('right_view_id', (uint16_t * 32)), + ('exponent_ref_display_width', (uint8_t * 32)), + ('mantissa_ref_display_width', (uint8_t * 32)), + ('exponent_ref_viewing_distance', (uint8_t * 32)), + ('mantissa_ref_viewing_distance', (uint8_t * 32)), + ('additional_shift_present_flag', (uint8_t * 32)), + ('num_sample_shift_plus512', (uint16_t * 32)), + ('three_dimensional_reference_displays_extension_flag', uint8_t), +] +H265RawSEI3DReferenceDisplaysInfo = struct_H265RawSEI3DReferenceDisplaysInfo +class struct_H265RawSEI(Struct): pass +class struct_SEIRawMessageList(Struct): pass +SEIRawMessageList = struct_SEIRawMessageList +class struct_SEIRawMessage(Struct): pass +SEIRawMessage = struct_SEIRawMessage +size_t = ctypes.c_uint64 +struct_SEIRawMessage._fields_ = [ + ('payload_type', uint32_t), + ('payload_size', uint32_t), + ('payload', ctypes.c_void_p), + ('payload_ref', ctypes.c_void_p), + ('extension_data', ctypes.POINTER(uint8_t)), + ('extension_bit_length', size_t), +] +struct_SEIRawMessageList._fields_ = [ + ('messages', ctypes.POINTER(SEIRawMessage)), + ('nb_messages', ctypes.c_int32), + ('nb_messages_allocated', ctypes.c_int32), +] +struct_H265RawSEI._fields_ = [ + ('nal_unit_header', H265RawNALUnitHeader), + ('message_list', SEIRawMessageList), +] +H265RawSEI = struct_H265RawSEI +class struct_H265RawFiller(Struct): pass +struct_H265RawFiller._fields_ = [ + ('nal_unit_header', H265RawNALUnitHeader), + ('filler_size', uint32_t), +] +H265RawFiller = struct_H265RawFiller +class struct_CodedBitstreamH265Context(Struct): pass +CodedBitstreamH265Context = struct_CodedBitstreamH265Context diff --git a/tinygrad/runtime/autogen/nv_570.py b/tinygrad/runtime/autogen/nv_570.py index 83b7a4328d..65a5cbb58a 100644 --- a/tinygrad/runtime/autogen/nv_570.py +++ b/tinygrad/runtime/autogen/nv_570.py @@ -2,14 +2,1335 @@ import ctypes from tinygrad.helpers import unwrap from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR -class _anonunion0(ctypes.Union): pass +_anonenum0 = CEnum(ctypes.c_uint32) +AES128_NONE = _anonenum0.define('AES128_NONE', 0) +AES128_CTR = _anonenum0.define('AES128_CTR', 1) +AES128_CBC = _anonenum0.define('AES128_CBC', 2) +AES128_ECB = _anonenum0.define('AES128_ECB', 3) +AES128_OFB = _anonenum0.define('AES128_OFB', 4) +AES128_CTR_LSB16B = _anonenum0.define('AES128_CTR_LSB16B', 5) +AES128_CLR_AS_ENCRYPT = _anonenum0.define('AES128_CLR_AS_ENCRYPT', 6) +AES128_RESERVED = _anonenum0.define('AES128_RESERVED', 7) + +_anonenum1 = CEnum(ctypes.c_uint32) +AES128_CTS_DISABLE = _anonenum1.define('AES128_CTS_DISABLE', 0) +AES128_CTS_ENABLE = _anonenum1.define('AES128_CTS_ENABLE', 1) + +_anonenum2 = CEnum(ctypes.c_uint32) +AES128_PADDING_NONE = _anonenum2.define('AES128_PADDING_NONE', 0) +AES128_PADDING_CARRY_OVER = _anonenum2.define('AES128_PADDING_CARRY_OVER', 1) +AES128_PADDING_RFC2630 = _anonenum2.define('AES128_PADDING_RFC2630', 2) +AES128_PADDING_RESERVED = _anonenum2.define('AES128_PADDING_RESERVED', 7) + +ENCR_MODE = CEnum(ctypes.c_uint32) +ENCR_MODE_CTR64 = ENCR_MODE.define('ENCR_MODE_CTR64', 0) +ENCR_MODE_CBC = ENCR_MODE.define('ENCR_MODE_CBC', 1) +ENCR_MODE_ECB = ENCR_MODE.define('ENCR_MODE_ECB', 2) +ENCR_MODE_ECB_PARTIAL = ENCR_MODE.define('ENCR_MODE_ECB_PARTIAL', 3) +ENCR_MODE_CBC_PARTIAL = ENCR_MODE.define('ENCR_MODE_CBC_PARTIAL', 4) +ENCR_MODE_CLEAR_INTO_VPR = ENCR_MODE.define('ENCR_MODE_CLEAR_INTO_VPR', 5) +ENCR_MODE_FORCE_INTO_VPR = ENCR_MODE.define('ENCR_MODE_FORCE_INTO_VPR', 6) + +_anonenum3 = CEnum(ctypes.c_uint32) +DRM_MS_PIFF_CTR = _anonenum3.define('DRM_MS_PIFF_CTR', 17) + +_anonenum4 = CEnum(ctypes.c_uint32) +DRM_MS_PIFF_CBC = _anonenum4.define('DRM_MS_PIFF_CBC', 2) + +_anonenum5 = CEnum(ctypes.c_uint32) +DRM_MARLIN_CTR = _anonenum5.define('DRM_MARLIN_CTR', 1) + +_anonenum6 = CEnum(ctypes.c_uint32) +DRM_MARLIN_CBC = _anonenum6.define('DRM_MARLIN_CBC', 34) + +_anonenum7 = CEnum(ctypes.c_uint32) +DRM_WIDEVINE = _anonenum7.define('DRM_WIDEVINE', 10) + +_anonenum8 = CEnum(ctypes.c_uint32) +DRM_WIDEVINE_CTR = _anonenum8.define('DRM_WIDEVINE_CTR', 17) + +_anonenum9 = CEnum(ctypes.c_uint32) +DRM_ULTRA_VIOLET = _anonenum9.define('DRM_ULTRA_VIOLET', 5) + +_anonenum10 = CEnum(ctypes.c_uint32) +DRM_NONE = _anonenum10.define('DRM_NONE', 0) + +_anonenum11 = CEnum(ctypes.c_uint32) +DRM_CLR_AS_ENCRYPT = _anonenum11.define('DRM_CLR_AS_ENCRYPT', 6) + +class struct__nvdec_ssm_s(Struct): pass +struct__nvdec_ssm_s._fields_ = [ + ('bytes_of_protected_data', ctypes.c_uint32), + ('bytes_of_clear_data', ctypes.c_uint32,16), + ('skip_byte_blk', ctypes.c_uint32,4), + ('crypt_byte_blk', ctypes.c_uint32,4), + ('skip', ctypes.c_uint32,1), + ('last', ctypes.c_uint32,1), + ('pad', ctypes.c_uint32,1), + ('mode', ctypes.c_uint32,1), + ('entry_type', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,3), +] +nvdec_ssm_s = struct__nvdec_ssm_s +class struct__nvdec_pass2_otf_ext_s(Struct): pass +struct__nvdec_pass2_otf_ext_s._fields_ = [ + ('ssm_entry_num', ctypes.c_uint32,16), + ('ssm_iv_num', ctypes.c_uint32,16), + ('real_stream_length', ctypes.c_uint32), + ('non_slice_data', ctypes.c_uint32,16), + ('drm_mode', ctypes.c_uint32,7), + ('reserved', ctypes.c_uint32,9), +] +nvdec_pass2_otf_ext_s = struct__nvdec_pass2_otf_ext_s +class struct__nvdec_substream_entry_s(Struct): pass +struct__nvdec_substream_entry_s._fields_ = [ + ('substream_start_offset', ctypes.c_uint32), + ('substream_length', ctypes.c_uint32), + ('substream_first_tile_idx', ctypes.c_uint32,8), + ('substream_last_tile_idx', ctypes.c_uint32,8), + ('last_substream_entry_in_frame', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,15), +] +nvdec_substream_entry_s = struct__nvdec_substream_entry_s +class struct__nvdec_ecdma_config_s(Struct): pass +struct__nvdec_ecdma_config_s._fields_ = [ + ('ecdma_enable', ctypes.c_uint32), + ('ecdma_blk_x_src', ctypes.c_uint16), + ('ecdma_blk_y_src', ctypes.c_uint16), + ('ecdma_blk_x_dst', ctypes.c_uint16), + ('ecdma_blk_y_dst', ctypes.c_uint16), + ('ref_pic_idx', ctypes.c_uint16), + ('boundary0_top', ctypes.c_uint16), + ('boundary0_bottom', ctypes.c_uint16), + ('boundary1_left', ctypes.c_uint16), + ('boundary1_right', ctypes.c_uint16), + ('blk_copy_flag', ctypes.c_ubyte), + ('ctb_size', ctypes.c_ubyte), +] +nvdec_ecdma_config_s = struct__nvdec_ecdma_config_s +class struct__nvdec_status_hevc_s(Struct): pass +struct__nvdec_status_hevc_s._fields_ = [ + ('frame_status_intra_cnt', ctypes.c_uint32), + ('frame_status_inter_cnt', ctypes.c_uint32), + ('frame_status_skip_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvy_cnt', ctypes.c_uint32), + ('error_ctb_pos', ctypes.c_uint32), + ('error_slice_pos', ctypes.c_uint32), +] +nvdec_status_hevc_s = struct__nvdec_status_hevc_s +class struct__nvdec_status_vp9_s(Struct): pass +struct__nvdec_status_vp9_s._fields_ = [ + ('frame_status_intra_cnt', ctypes.c_uint32), + ('frame_status_inter_cnt', ctypes.c_uint32), + ('frame_status_skip_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvy_cnt', ctypes.c_uint32), + ('error_ctb_pos', ctypes.c_uint32), + ('error_slice_pos', ctypes.c_uint32), +] +nvdec_status_vp9_s = struct__nvdec_status_vp9_s +class struct__nvdec_status_s(Struct): pass +class struct__nvdec_status_s_0(ctypes.Union): pass +struct__nvdec_status_s_0._fields_ = [ + ('hevc', nvdec_status_hevc_s), + ('vp9', nvdec_status_vp9_s), +] +struct__nvdec_status_s._anonymous_ = ['_0'] +struct__nvdec_status_s._fields_ = [ + ('mbs_correctly_decoded', ctypes.c_uint32), + ('mbs_in_error', ctypes.c_uint32), + ('cycle_count', ctypes.c_uint32), + ('error_status', ctypes.c_uint32), + ('_0', struct__nvdec_status_s_0), + ('slice_header_error_code', ctypes.c_uint32), +] +nvdec_status_s = struct__nvdec_status_s +class struct__external_mv_s(Struct): pass +struct__external_mv_s._fields_ = [ + ('mvx', ctypes.c_int32,14), + ('mvy', ctypes.c_int32,14), + ('refidx', ctypes.c_uint32,4), +] +external_mv_s = struct__external_mv_s +class struct__nvdec_hevc_main10_444_ext_s(Struct): pass +struct__nvdec_hevc_main10_444_ext_s._fields_ = [ + ('transformSkipRotationEnableFlag', ctypes.c_uint32,1), + ('transformSkipContextEnableFlag', ctypes.c_uint32,1), + ('intraBlockCopyEnableFlag', ctypes.c_uint32,1), + ('implicitRdpcmEnableFlag', ctypes.c_uint32,1), + ('explicitRdpcmEnableFlag', ctypes.c_uint32,1), + ('extendedPrecisionProcessingFlag', ctypes.c_uint32,1), + ('intraSmoothingDisabledFlag', ctypes.c_uint32,1), + ('highPrecisionOffsetsEnableFlag', ctypes.c_uint32,1), + ('fastRiceAdaptationEnableFlag', ctypes.c_uint32,1), + ('cabacBypassAlignmentEnableFlag', ctypes.c_uint32,1), + ('sps_444_extension_reserved', ctypes.c_uint32,22), + ('log2MaxTransformSkipSize', ctypes.c_uint32,4), + ('crossComponentPredictionEnableFlag', ctypes.c_uint32,1), + ('chromaQpAdjustmentEnableFlag', ctypes.c_uint32,1), + ('diffCuChromaQpAdjustmentDepth', ctypes.c_uint32,2), + ('chromaQpAdjustmentTableSize', ctypes.c_uint32,3), + ('log2SaoOffsetScaleLuma', ctypes.c_uint32,3), + ('log2SaoOffsetScaleChroma', ctypes.c_uint32,3), + ('pps_444_extension_reserved', ctypes.c_uint32,15), + ('cb_qp_adjustment', (ctypes.c_char * 6)), + ('cr_qp_adjustment', (ctypes.c_char * 6)), + ('HevcFltAboveOffset', ctypes.c_uint32), + ('HevcSaoAboveOffset', ctypes.c_uint32), +] +nvdec_hevc_main10_444_ext_s = struct__nvdec_hevc_main10_444_ext_s +class struct__nvdec_hevc_pic_v1_s(Struct): pass +struct__nvdec_hevc_pic_v1_s._fields_ = [ + ('hevc_main10_444_ext', nvdec_hevc_main10_444_ext_s), + ('sw_skip_start_length', ctypes.c_uint32,14), + ('external_ref_mem_dis', ctypes.c_uint32,1), + ('error_recovery_start_pos', ctypes.c_uint32,2), + ('error_external_mv_en', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,14), +] +nvdec_hevc_pic_v1_s = struct__nvdec_hevc_pic_v1_s +class struct__nvdec_hevc_pic_v2_s(Struct): pass +struct__nvdec_hevc_pic_v2_s._fields_ = [ + ('mv_hevc_enable', ctypes.c_uint32,1), + ('nuh_layer_id', ctypes.c_uint32,6), + ('default_ref_layers_active_flag', ctypes.c_uint32,1), + ('NumDirectRefLayers', ctypes.c_uint32,6), + ('max_one_active_ref_layer_flag', ctypes.c_uint32,1), + ('NumActiveRefLayerPics', ctypes.c_uint32,6), + ('poc_lsb_not_present_flag', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,10), +] +nvdec_hevc_pic_v2_s = struct__nvdec_hevc_pic_v2_s +class struct__nvdec_hevc_pic_v3_s(Struct): pass +struct__nvdec_hevc_pic_v3_s._fields_ = [ + ('slice_decoding_enable', ctypes.c_uint32,1), + ('slice_ec_enable', ctypes.c_uint32,1), + ('slice_ec_mv_type', ctypes.c_uint32,2), + ('err_detected_sw', ctypes.c_uint32,1), + ('slice_ec_slice_type', ctypes.c_uint32,2), + ('slice_strm_recfg_en', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,24), + ('HevcSliceEdgeOffset', ctypes.c_uint32), +] +nvdec_hevc_pic_v3_s = struct__nvdec_hevc_pic_v3_s +class struct__nvdec_hevc_pic_s(Struct): pass +struct__nvdec_hevc_pic_s._fields_ = [ + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('stream_len', ctypes.c_uint32), + ('enable_encryption', ctypes.c_uint32), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('key_slot_index', ctypes.c_uint32,4), + ('ssm_en', ctypes.c_uint32,1), + ('enable_histogram', ctypes.c_uint32,1), + ('enable_substream_decoding', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,15), + ('gptimer_timeout_value', ctypes.c_uint32), + ('tileformat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('sw_start_code_e', ctypes.c_ubyte), + ('disp_output_mode', ctypes.c_ubyte), + ('reserved1', ctypes.c_ubyte), + ('framestride', (ctypes.c_uint32 * 2)), + ('colMvBuffersize', ctypes.c_uint32), + ('HevcSaoBufferOffset', ctypes.c_uint32), + ('HevcBsdCtrlOffset', ctypes.c_uint32), + ('pic_width_in_luma_samples', ctypes.c_uint16), + ('pic_height_in_luma_samples', ctypes.c_uint16), + ('chroma_format_idc', ctypes.c_uint32,4), + ('bit_depth_luma', ctypes.c_uint32,4), + ('bit_depth_chroma', ctypes.c_uint32,4), + ('log2_min_luma_coding_block_size', ctypes.c_uint32,4), + ('log2_max_luma_coding_block_size', ctypes.c_uint32,4), + ('log2_min_transform_block_size', ctypes.c_uint32,4), + ('log2_max_transform_block_size', ctypes.c_uint32,4), + ('reserved2', ctypes.c_uint32,4), + ('max_transform_hierarchy_depth_inter', ctypes.c_uint32,3), + ('max_transform_hierarchy_depth_intra', ctypes.c_uint32,3), + ('scalingListEnable', ctypes.c_uint32,1), + ('amp_enable_flag', ctypes.c_uint32,1), + ('sample_adaptive_offset_enabled_flag', ctypes.c_uint32,1), + ('pcm_enabled_flag', ctypes.c_uint32,1), + ('pcm_sample_bit_depth_luma', ctypes.c_uint32,4), + ('pcm_sample_bit_depth_chroma', ctypes.c_uint32,4), + ('log2_min_pcm_luma_coding_block_size', ctypes.c_uint32,4), + ('log2_max_pcm_luma_coding_block_size', ctypes.c_uint32,4), + ('pcm_loop_filter_disabled_flag', ctypes.c_uint32,1), + ('sps_temporal_mvp_enabled_flag', ctypes.c_uint32,1), + ('strong_intra_smoothing_enabled_flag', ctypes.c_uint32,1), + ('reserved3', ctypes.c_uint32,3), + ('dependent_slice_segments_enabled_flag', ctypes.c_uint32,1), + ('output_flag_present_flag', ctypes.c_uint32,1), + ('num_extra_slice_header_bits', ctypes.c_uint32,3), + ('sign_data_hiding_enabled_flag', ctypes.c_uint32,1), + ('cabac_init_present_flag', ctypes.c_uint32,1), + ('num_ref_idx_l0_default_active', ctypes.c_uint32,4), + ('num_ref_idx_l1_default_active', ctypes.c_uint32,4), + ('init_qp', ctypes.c_uint32,7), + ('constrained_intra_pred_flag', ctypes.c_uint32,1), + ('transform_skip_enabled_flag', ctypes.c_uint32,1), + ('cu_qp_delta_enabled_flag', ctypes.c_uint32,1), + ('diff_cu_qp_delta_depth', ctypes.c_uint32,2), + ('reserved4', ctypes.c_uint32,5), + ('pps_cb_qp_offset', ctypes.c_char), + ('pps_cr_qp_offset', ctypes.c_char), + ('pps_beta_offset', ctypes.c_char), + ('pps_tc_offset', ctypes.c_char), + ('pps_slice_chroma_qp_offsets_present_flag', ctypes.c_uint32,1), + ('weighted_pred_flag', ctypes.c_uint32,1), + ('weighted_bipred_flag', ctypes.c_uint32,1), + ('transquant_bypass_enabled_flag', ctypes.c_uint32,1), + ('tiles_enabled_flag', ctypes.c_uint32,1), + ('entropy_coding_sync_enabled_flag', ctypes.c_uint32,1), + ('num_tile_columns', ctypes.c_uint32,5), + ('num_tile_rows', ctypes.c_uint32,5), + ('loop_filter_across_tiles_enabled_flag', ctypes.c_uint32,1), + ('loop_filter_across_slices_enabled_flag', ctypes.c_uint32,1), + ('deblocking_filter_control_present_flag', ctypes.c_uint32,1), + ('deblocking_filter_override_enabled_flag', ctypes.c_uint32,1), + ('pps_deblocking_filter_disabled_flag', ctypes.c_uint32,1), + ('lists_modification_present_flag', ctypes.c_uint32,1), + ('log2_parallel_merge_level', ctypes.c_uint32,3), + ('slice_segment_header_extension_present_flag', ctypes.c_uint32,1), + ('reserved5', ctypes.c_uint32,6), + ('num_ref_frames', ctypes.c_ubyte), + ('reserved6', ctypes.c_ubyte), + ('longtermflag', ctypes.c_uint16), + ('initreflistidxl0', (ctypes.c_ubyte * 16)), + ('initreflistidxl1', (ctypes.c_ubyte * 16)), + ('RefDiffPicOrderCnts', (ctypes.c_int16 * 16)), + ('IDR_picture_flag', ctypes.c_ubyte), + ('RAP_picture_flag', ctypes.c_ubyte), + ('curr_pic_idx', ctypes.c_ubyte), + ('pattern_id', ctypes.c_ubyte), + ('sw_hdr_skip_length', ctypes.c_uint16), + ('reserved7', ctypes.c_uint16), + ('ecdma_cfg', nvdec_ecdma_config_s), + ('separate_colour_plane_flag', ctypes.c_uint32,1), + ('log2_max_pic_order_cnt_lsb_minus4', ctypes.c_uint32,4), + ('num_short_term_ref_pic_sets', ctypes.c_uint32,7), + ('num_long_term_ref_pics_sps', ctypes.c_uint32,6), + ('bBitParsingDisable', ctypes.c_uint32,1), + ('num_delta_pocs_of_rps_idx', ctypes.c_uint32,8), + ('long_term_ref_pics_present_flag', ctypes.c_uint32,1), + ('reserved_dxva', ctypes.c_uint32,4), + ('num_bits_short_term_ref_pics_in_slice', ctypes.c_uint32), + ('v1', nvdec_hevc_pic_v1_s), + ('v2', nvdec_hevc_pic_v2_s), + ('v3', nvdec_hevc_pic_v3_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_hevc_pic_s = struct__nvdec_hevc_pic_s +class struct__hevc_slice_info_s(Struct): pass +struct__hevc_slice_info_s._fields_ = [ + ('first_flag', ctypes.c_uint32,1), + ('err_flag', ctypes.c_uint32,1), + ('last_flag', ctypes.c_uint32,1), + ('conceal_partial_slice', ctypes.c_uint32,1), + ('available', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,7), + ('ctb_count', ctypes.c_uint32,20), + ('bs_offset', ctypes.c_uint32), + ('bs_length', ctypes.c_uint32), + ('start_ctbx', ctypes.c_uint16), + ('start_ctby', ctypes.c_uint16), +] +hevc_slice_info_s = struct__hevc_slice_info_s +class struct__slice_edge_ctb_pos_ctx_s(Struct): pass +struct__slice_edge_ctb_pos_ctx_s._fields_ = [ + ('next_slice_pos_ctbxy', ctypes.c_uint32), + ('next_slice_segment_addr', ctypes.c_uint32), +] +slice_edge_ctb_pos_ctx_s = struct__slice_edge_ctb_pos_ctx_s +class struct__slice_edge_tile_ctx_s(Struct): pass +struct__slice_edge_tile_ctx_s._fields_ = [ + ('tileInfo1', ctypes.c_uint32), + ('tileInfo2', ctypes.c_uint32), + ('tileInfo3', ctypes.c_uint32), +] +slice_edge_tile_ctx_s = struct__slice_edge_tile_ctx_s +class struct__slice_edge_stats_ctx_s(Struct): pass +struct__slice_edge_stats_ctx_s._fields_ = [ + ('frame_status_intra_cnt', ctypes.c_uint32), + ('frame_status_inter_cnt', ctypes.c_uint32), + ('frame_status_skip_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_mv_cnt_ext', ctypes.c_uint32), +] +slice_edge_stats_ctx_s = struct__slice_edge_stats_ctx_s +class struct__slice_vpc_edge_ctx_s(Struct): pass +struct__slice_vpc_edge_ctx_s._fields_ = [ + ('reserved', ctypes.c_uint32), +] +slice_vpc_edge_ctx_s = struct__slice_vpc_edge_ctx_s +class struct__slice_vpc_main_ctx_s(Struct): pass +struct__slice_vpc_main_ctx_s._fields_ = [ + ('reserved', ctypes.c_uint32), +] +slice_vpc_main_ctx_s = struct__slice_vpc_main_ctx_s +class struct__slice_edge_ctx_s(Struct): pass +struct__slice_edge_ctx_s._fields_ = [ + ('slice_ctb_pos_ctx', slice_edge_ctb_pos_ctx_s), + ('slice_stats_ctx', slice_edge_stats_ctx_s), + ('slice_tile_ctx', slice_edge_tile_ctx_s), + ('slice_vpc_edge_ctx', slice_vpc_edge_ctx_s), + ('slice_vpc_main_ctx', slice_vpc_main_ctx_s), +] +slice_edge_ctx_s = struct__slice_edge_ctx_s +class struct__nvdec_vp9_pic_v1_s(Struct): pass +struct__nvdec_vp9_pic_v1_s._fields_ = [ + ('Vp9FltAboveOffset', ctypes.c_uint32), + ('external_ref_mem_dis', ctypes.c_uint32,1), + ('bit_depth', ctypes.c_uint32,4), + ('error_recovery_start_pos', ctypes.c_uint32,2), + ('error_external_mv_en', ctypes.c_uint32,1), + ('Reserved0', ctypes.c_uint32,24), +] +nvdec_vp9_pic_v1_s = struct__nvdec_vp9_pic_v1_s +enum_VP9_FRAME_SFC_ID = CEnum(ctypes.c_uint32) +VP9_LAST_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_LAST_FRAME_SFC', 0) +VP9_GOLDEN_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_GOLDEN_FRAME_SFC', 1) +VP9_ALTREF_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_ALTREF_FRAME_SFC', 2) +VP9_CURR_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_CURR_FRAME_SFC', 3) + +class struct__nvdec_vp9_pic_s(Struct): pass +struct__nvdec_vp9_pic_s._fields_ = [ + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('stream_len', ctypes.c_uint32), + ('enable_encryption', ctypes.c_uint32), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('sw_hdr_skip_length', ctypes.c_uint32,14), + ('key_slot_index', ctypes.c_uint32,4), + ('ssm_en', ctypes.c_uint32,1), + ('enable_histogram', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,2), + ('gptimer_timeout_value', ctypes.c_uint32), + ('tileformat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('reserved1', (ctypes.c_ubyte * 3)), + ('Vp9BsdCtrlOffset', ctypes.c_uint32), + ('ref0_width', ctypes.c_uint16), + ('ref0_height', ctypes.c_uint16), + ('ref0_stride', (ctypes.c_uint16 * 2)), + ('ref1_width', ctypes.c_uint16), + ('ref1_height', ctypes.c_uint16), + ('ref1_stride', (ctypes.c_uint16 * 2)), + ('ref2_width', ctypes.c_uint16), + ('ref2_height', ctypes.c_uint16), + ('ref2_stride', (ctypes.c_uint16 * 2)), + ('width', ctypes.c_uint16), + ('height', ctypes.c_uint16), + ('framestride', (ctypes.c_uint16 * 2)), + ('keyFrame', ctypes.c_ubyte,1), + ('prevIsKeyFrame', ctypes.c_ubyte,1), + ('resolutionChange', ctypes.c_ubyte,1), + ('errorResilient', ctypes.c_ubyte,1), + ('prevShowFrame', ctypes.c_ubyte,1), + ('intraOnly', ctypes.c_ubyte,1), + ('reserved2', ctypes.c_ubyte,2), + ('reserved3', (ctypes.c_ubyte * 3)), + ('refFrameSignBias', (ctypes.c_ubyte * 4)), + ('loopFilterLevel', ctypes.c_char), + ('loopFilterSharpness', ctypes.c_char), + ('qpYAc', ctypes.c_ubyte), + ('qpYDc', ctypes.c_char), + ('qpChAc', ctypes.c_char), + ('qpChDc', ctypes.c_char), + ('lossless', ctypes.c_char), + ('transform_mode', ctypes.c_char), + ('allow_high_precision_mv', ctypes.c_char), + ('mcomp_filter_type', ctypes.c_char), + ('comp_pred_mode', ctypes.c_char), + ('comp_fixed_ref', ctypes.c_char), + ('comp_var_ref', (ctypes.c_char * 2)), + ('log2_tile_columns', ctypes.c_char), + ('log2_tile_rows', ctypes.c_char), + ('segmentEnabled', ctypes.c_ubyte), + ('segmentMapUpdate', ctypes.c_ubyte), + ('segmentMapTemporalUpdate', ctypes.c_ubyte), + ('segmentFeatureMode', ctypes.c_ubyte), + ('segmentFeatureEnable', ((ctypes.c_ubyte * 4) * 8)), + ('segmentFeatureData', ((ctypes.c_int16 * 4) * 8)), + ('modeRefLfEnabled', ctypes.c_char), + ('mbRefLfDelta', (ctypes.c_char * 4)), + ('mbModeLfDelta', (ctypes.c_char * 2)), + ('reserved5', ctypes.c_char), + ('v1', nvdec_vp9_pic_v1_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_vp9_pic_s = struct__nvdec_vp9_pic_s +class nvdec_nmv_context(Struct): pass +nvdec_nmv_context._fields_ = [ + ('joints', (ctypes.c_ubyte * 3)), + ('sign', (ctypes.c_ubyte * 2)), + ('class0', ((ctypes.c_ubyte * 1) * 2)), + ('fp', ((ctypes.c_ubyte * 3) * 2)), + ('class0_hp', (ctypes.c_ubyte * 2)), + ('hp', (ctypes.c_ubyte * 2)), + ('classes', ((ctypes.c_ubyte * 10) * 2)), + ('class0_fp', (((ctypes.c_ubyte * 3) * 2) * 2)), + ('bits', ((ctypes.c_ubyte * 10) * 2)), +] +class nvdec_nmv_context_counts(Struct): pass +nvdec_nmv_context_counts._fields_ = [ + ('joints', (ctypes.c_uint32 * 4)), + ('sign', ((ctypes.c_uint32 * 2) * 2)), + ('classes', ((ctypes.c_uint32 * 11) * 2)), + ('class0', ((ctypes.c_uint32 * 2) * 2)), + ('bits', (((ctypes.c_uint32 * 2) * 10) * 2)), + ('class0_fp', (((ctypes.c_uint32 * 4) * 2) * 2)), + ('fp', ((ctypes.c_uint32 * 4) * 2)), + ('class0_hp', ((ctypes.c_uint32 * 2) * 2)), + ('hp', ((ctypes.c_uint32 * 2) * 2)), +] +class struct_nvdec_vp9AdaptiveEntropyProbs_s(Struct): pass +struct_nvdec_vp9AdaptiveEntropyProbs_s._fields_ = [ + ('inter_mode_prob', ((ctypes.c_ubyte * 4) * 7)), + ('intra_inter_prob', (ctypes.c_ubyte * 4)), + ('uv_mode_prob', ((ctypes.c_ubyte * 8) * 10)), + ('tx8x8_prob', ((ctypes.c_ubyte * 1) * 2)), + ('tx16x16_prob', ((ctypes.c_ubyte * 2) * 2)), + ('tx32x32_prob', ((ctypes.c_ubyte * 3) * 2)), + ('sb_ymode_probB', ((ctypes.c_ubyte * 1) * 4)), + ('sb_ymode_prob', ((ctypes.c_ubyte * 8) * 4)), + ('partition_prob', (((ctypes.c_ubyte * 4) * 16) * 2)), + ('uv_mode_probB', ((ctypes.c_ubyte * 1) * 10)), + ('switchable_interp_prob', ((ctypes.c_ubyte * 2) * 4)), + ('comp_inter_prob', (ctypes.c_ubyte * 5)), + ('mbskip_probs', (ctypes.c_ubyte * 3)), + ('pad1', (ctypes.c_ubyte * 1)), + ('nmvc', nvdec_nmv_context), + ('single_ref_prob', ((ctypes.c_ubyte * 2) * 5)), + ('comp_ref_prob', (ctypes.c_ubyte * 5)), + ('pad2', (ctypes.c_ubyte * 17)), + ('probCoeffs', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), + ('probCoeffs8x8', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), + ('probCoeffs16x16', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), + ('probCoeffs32x32', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), +] +nvdec_vp9AdaptiveEntropyProbs_t = struct_nvdec_vp9AdaptiveEntropyProbs_s +class struct_nvdec_vp9EntropyProbs_s(Struct): pass +struct_nvdec_vp9EntropyProbs_s._fields_ = [ + ('kf_bmode_prob', (((ctypes.c_ubyte * 8) * 10) * 10)), + ('kf_bmode_probB', (((ctypes.c_ubyte * 1) * 10) * 10)), + ('ref_pred_probs', (ctypes.c_ubyte * 3)), + ('mb_segment_tree_probs', (ctypes.c_ubyte * 7)), + ('segment_pred_probs', (ctypes.c_ubyte * 3)), + ('ref_scores', (ctypes.c_ubyte * 4)), + ('prob_comppred', (ctypes.c_ubyte * 2)), + ('pad1', (ctypes.c_ubyte * 9)), + ('kf_uv_mode_prob', ((ctypes.c_ubyte * 8) * 10)), + ('kf_uv_mode_probB', ((ctypes.c_ubyte * 1) * 10)), + ('pad2', (ctypes.c_ubyte * 6)), + ('a', nvdec_vp9AdaptiveEntropyProbs_t), +] +nvdec_vp9EntropyProbs_t = struct_nvdec_vp9EntropyProbs_s +class struct_nvdec_vp9EntropyCounts_s(Struct): pass +struct_nvdec_vp9EntropyCounts_s._fields_ = [ + ('inter_mode_counts', (((ctypes.c_uint32 * 2) * 3) * 7)), + ('sb_ymode_counts', ((ctypes.c_uint32 * 10) * 4)), + ('uv_mode_counts', ((ctypes.c_uint32 * 10) * 10)), + ('partition_counts', ((ctypes.c_uint32 * 4) * 16)), + ('switchable_interp_counts', ((ctypes.c_uint32 * 3) * 4)), + ('intra_inter_count', ((ctypes.c_uint32 * 2) * 4)), + ('comp_inter_count', ((ctypes.c_uint32 * 2) * 5)), + ('single_ref_count', (((ctypes.c_uint32 * 2) * 2) * 5)), + ('comp_ref_count', ((ctypes.c_uint32 * 2) * 5)), + ('tx32x32_count', ((ctypes.c_uint32 * 4) * 2)), + ('tx16x16_count', ((ctypes.c_uint32 * 3) * 2)), + ('tx8x8_count', ((ctypes.c_uint32 * 2) * 2)), + ('mbskip_count', ((ctypes.c_uint32 * 2) * 3)), + ('nmvcount', nvdec_nmv_context_counts), + ('countCoeffs', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countCoeffs8x8', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countCoeffs16x16', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countCoeffs32x32', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countEobs', (((((ctypes.c_uint32 * 6) * 6) * 2) * 2) * 4)), +] +nvdec_vp9EntropyCounts_t = struct_nvdec_vp9EntropyCounts_s +class struct__nvdec_pass2_otf_s(Struct): pass +struct__nvdec_pass2_otf_s._fields_ = [ + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('enable_encryption', ctypes.c_uint32,1), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('key_slot_index', ctypes.c_uint32,4), + ('ssm_en', ctypes.c_uint32,1), + ('reserved1', ctypes.c_uint32,16), +] +nvdec_pass2_otf_s = struct__nvdec_pass2_otf_s +class struct__nvdec_display_param_s(Struct): pass +struct__nvdec_display_param_s._fields_ = [ + ('enableTFOutput', ctypes.c_uint32,1), + ('VC1MapYFlag', ctypes.c_uint32,1), + ('MapYValue', ctypes.c_uint32,3), + ('VC1MapUVFlag', ctypes.c_uint32,1), + ('MapUVValue', ctypes.c_uint32,3), + ('OutStride', ctypes.c_uint32,8), + ('TilingFormat', ctypes.c_uint32,3), + ('OutputStructure', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,11), + ('OutputTop', (ctypes.c_int32 * 2)), + ('OutputBottom', (ctypes.c_int32 * 2)), + ('enableHistogram', ctypes.c_uint32,1), + ('HistogramStartX', ctypes.c_uint32,12), + ('HistogramStartY', ctypes.c_uint32,12), + ('reserved1', ctypes.c_uint32,7), + ('HistogramEndX', ctypes.c_uint32,12), + ('HistogramEndY', ctypes.c_uint32,12), + ('reserved2', ctypes.c_uint32,8), +] +nvdec_display_param_s = struct__nvdec_display_param_s +class struct__nvdec_dpb_entry_s(Struct): pass +struct__nvdec_dpb_entry_s._fields_ = [ + ('index', ctypes.c_uint32,7), + ('col_idx', ctypes.c_uint32,5), + ('state', ctypes.c_uint32,2), + ('is_long_term', ctypes.c_uint32,1), + ('not_existing', ctypes.c_uint32,1), + ('is_field', ctypes.c_uint32,1), + ('top_field_marking', ctypes.c_uint32,4), + ('bottom_field_marking', ctypes.c_uint32,4), + ('output_memory_layout', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,6), + ('FieldOrderCnt', (ctypes.c_uint32 * 2)), + ('FrameIdx', ctypes.c_int32), +] +nvdec_dpb_entry_s = struct__nvdec_dpb_entry_s +class struct__nvdec_h264_pic_s(Struct): pass +struct__nvdec_h264_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('hint_dump_en', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 2)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('mbhist_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('log2_max_pic_order_cnt_lsb_minus4', ctypes.c_int32), + ('delta_pic_order_always_zero_flag', ctypes.c_int32), + ('frame_mbs_only_flag', ctypes.c_int32), + ('PicWidthInMbs', ctypes.c_int32), + ('FrameHeightInMbs', ctypes.c_int32), + ('tileFormat', ctypes.c_uint32,2), + ('gob_height', ctypes.c_uint32,3), + ('reserverd_surface_format', ctypes.c_uint32,27), + ('entropy_coding_mode_flag', ctypes.c_int32), + ('pic_order_present_flag', ctypes.c_int32), + ('num_ref_idx_l0_active_minus1', ctypes.c_int32), + ('num_ref_idx_l1_active_minus1', ctypes.c_int32), + ('deblocking_filter_control_present_flag', ctypes.c_int32), + ('redundant_pic_cnt_present_flag', ctypes.c_int32), + ('transform_8x8_mode_flag', ctypes.c_int32), + ('pitch_luma', ctypes.c_uint32), + ('pitch_chroma', ctypes.c_uint32), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('MbaffFrameFlag', ctypes.c_uint32,1), + ('direct_8x8_inference_flag', ctypes.c_uint32,1), + ('weighted_pred_flag', ctypes.c_uint32,1), + ('constrained_intra_pred_flag', ctypes.c_uint32,1), + ('ref_pic_flag', ctypes.c_uint32,1), + ('field_pic_flag', ctypes.c_uint32,1), + ('bottom_field_flag', ctypes.c_uint32,1), + ('second_field', ctypes.c_uint32,1), + ('log2_max_frame_num_minus4', ctypes.c_uint32,4), + ('chroma_format_idc', ctypes.c_uint32,2), + ('pic_order_cnt_type', ctypes.c_uint32,2), + ('pic_init_qp_minus26', ctypes.c_int32,6), + ('chroma_qp_index_offset', ctypes.c_int32,5), + ('second_chroma_qp_index_offset', ctypes.c_int32,5), + ('weighted_bipred_idc', ctypes.c_uint32,2), + ('CurrPicIdx', ctypes.c_uint32,7), + ('CurrColIdx', ctypes.c_uint32,5), + ('frame_num', ctypes.c_uint32,16), + ('frame_surfaces', ctypes.c_uint32,1), + ('output_memory_layout', ctypes.c_uint32,1), + ('CurrFieldOrderCnt', (ctypes.c_int32 * 2)), + ('dpb', (nvdec_dpb_entry_s * 16)), + ('WeightScale', (((ctypes.c_ubyte * 4) * 4) * 6)), + ('WeightScale8x8', (((ctypes.c_ubyte * 8) * 8) * 2)), + ('num_inter_view_refs_lX', (ctypes.c_ubyte * 2)), + ('reserved1', (ctypes.c_char * 14)), + ('inter_view_refidx_lX', ((ctypes.c_byte * 16) * 2)), + ('lossless_ipred8x8_filter_enable', ctypes.c_uint32,1), + ('qpprime_y_zero_transform_bypass_flag', ctypes.c_uint32,1), + ('reserved2', ctypes.c_uint32,30), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_h264_pic_s = struct__nvdec_h264_pic_s +enum__vc1_fcm_e = CEnum(ctypes.c_uint32) +FCM_PROGRESSIVE = enum__vc1_fcm_e.define('FCM_PROGRESSIVE', 0) +FCM_FRAME_INTERLACE = enum__vc1_fcm_e.define('FCM_FRAME_INTERLACE', 2) +FCM_FIELD_INTERLACE = enum__vc1_fcm_e.define('FCM_FIELD_INTERLACE', 3) + +vc1_fcm_e = enum__vc1_fcm_e +enum__syntax_vc1_ptype_e = CEnum(ctypes.c_uint32) +PTYPE_I = enum__syntax_vc1_ptype_e.define('PTYPE_I', 0) +PTYPE_P = enum__syntax_vc1_ptype_e.define('PTYPE_P', 1) +PTYPE_B = enum__syntax_vc1_ptype_e.define('PTYPE_B', 2) +PTYPE_BI = enum__syntax_vc1_ptype_e.define('PTYPE_BI', 3) +PTYPE_SKIPPED = enum__syntax_vc1_ptype_e.define('PTYPE_SKIPPED', 4) + +syntax_vc1_ptype_e = enum__syntax_vc1_ptype_e +enum_vc1_mvmode_e = CEnum(ctypes.c_uint32) +MVMODE_MIXEDMV = enum_vc1_mvmode_e.define('MVMODE_MIXEDMV', 0) +MVMODE_1MV = enum_vc1_mvmode_e.define('MVMODE_1MV', 1) +MVMODE_1MV_HALFPEL = enum_vc1_mvmode_e.define('MVMODE_1MV_HALFPEL', 2) +MVMODE_1MV_HALFPEL_BILINEAR = enum_vc1_mvmode_e.define('MVMODE_1MV_HALFPEL_BILINEAR', 3) +MVMODE_INTENSITY_COMPENSATION = enum_vc1_mvmode_e.define('MVMODE_INTENSITY_COMPENSATION', 4) + +enum__vc1_fptype_e = CEnum(ctypes.c_uint32) +FPTYPE_I_I = enum__vc1_fptype_e.define('FPTYPE_I_I', 0) +FPTYPE_I_P = enum__vc1_fptype_e.define('FPTYPE_I_P', 1) +FPTYPE_P_I = enum__vc1_fptype_e.define('FPTYPE_P_I', 2) +FPTYPE_P_P = enum__vc1_fptype_e.define('FPTYPE_P_P', 3) +FPTYPE_B_B = enum__vc1_fptype_e.define('FPTYPE_B_B', 4) +FPTYPE_B_BI = enum__vc1_fptype_e.define('FPTYPE_B_BI', 5) +FPTYPE_BI_B = enum__vc1_fptype_e.define('FPTYPE_BI_B', 6) +FPTYPE_BI_BI = enum__vc1_fptype_e.define('FPTYPE_BI_BI', 7) + +vc1_fptype_e = enum__vc1_fptype_e +enum__vc1_dqprofile_e = CEnum(ctypes.c_uint32) +DQPROFILE_ALL_FOUR_EDGES = enum__vc1_dqprofile_e.define('DQPROFILE_ALL_FOUR_EDGES', 0) +DQPROFILE_DOUBLE_EDGE = enum__vc1_dqprofile_e.define('DQPROFILE_DOUBLE_EDGE', 1) +DQPROFILE_SINGLE_EDGE = enum__vc1_dqprofile_e.define('DQPROFILE_SINGLE_EDGE', 2) +DQPROFILE_ALL_MACROBLOCKS = enum__vc1_dqprofile_e.define('DQPROFILE_ALL_MACROBLOCKS', 3) + +vc1_dqprofile_e = enum__vc1_dqprofile_e +class struct__nvdec_vc1_pic_s(Struct): pass +struct__nvdec_vc1_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('prefixStartCode', (ctypes.c_ubyte * 4)), + ('bitstream_offset', ctypes.c_uint32), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 3)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('scratch_pic_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_uint16), + ('FrameHeight', ctypes.c_uint16), + ('profile', ctypes.c_ubyte), + ('postprocflag', ctypes.c_ubyte), + ('pulldown', ctypes.c_ubyte), + ('interlace', ctypes.c_ubyte), + ('tfcntrflag', ctypes.c_ubyte), + ('finterpflag', ctypes.c_ubyte), + ('psf', ctypes.c_ubyte), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('multires', ctypes.c_ubyte), + ('syncmarker', ctypes.c_ubyte), + ('rangered', ctypes.c_ubyte), + ('maxbframes', ctypes.c_ubyte), + ('dquant', ctypes.c_ubyte), + ('panscan_flag', ctypes.c_ubyte), + ('refdist_flag', ctypes.c_ubyte), + ('quantizer', ctypes.c_ubyte), + ('extended_mv', ctypes.c_ubyte), + ('extended_dmv', ctypes.c_ubyte), + ('overlap', ctypes.c_ubyte), + ('vstransform', ctypes.c_ubyte), + ('refdist', ctypes.c_char), + ('reserved1', (ctypes.c_char * 3)), + ('fcm', vc1_fcm_e), + ('ptype', syntax_vc1_ptype_e), + ('tfcntr', ctypes.c_int32), + ('rptfrm', ctypes.c_int32), + ('tff', ctypes.c_int32), + ('rndctrl', ctypes.c_int32), + ('pqindex', ctypes.c_int32), + ('halfqp', ctypes.c_int32), + ('pquantizer', ctypes.c_int32), + ('postproc', ctypes.c_int32), + ('condover', ctypes.c_int32), + ('transacfrm', ctypes.c_int32), + ('transacfrm2', ctypes.c_int32), + ('transdctab', ctypes.c_int32), + ('pqdiff', ctypes.c_int32), + ('abspq', ctypes.c_int32), + ('dquantfrm', ctypes.c_int32), + ('dqprofile', vc1_dqprofile_e), + ('dqsbedge', ctypes.c_int32), + ('dqdbedge', ctypes.c_int32), + ('dqbilevel', ctypes.c_int32), + ('mvrange', ctypes.c_int32), + ('mvmode', enum_vc1_mvmode_e), + ('mvmode2', enum_vc1_mvmode_e), + ('lumscale', ctypes.c_int32), + ('lumshift', ctypes.c_int32), + ('mvtab', ctypes.c_int32), + ('cbptab', ctypes.c_int32), + ('ttmbf', ctypes.c_int32), + ('ttfrm', ctypes.c_int32), + ('bfraction', ctypes.c_int32), + ('fptype', vc1_fptype_e), + ('numref', ctypes.c_int32), + ('reffield', ctypes.c_int32), + ('dmvrange', ctypes.c_int32), + ('intcompfield', ctypes.c_int32), + ('lumscale1', ctypes.c_int32), + ('lumshift1', ctypes.c_int32), + ('lumscale2', ctypes.c_int32), + ('lumshift2', ctypes.c_int32), + ('mbmodetab', ctypes.c_int32), + ('imvtab', ctypes.c_int32), + ('icbptab', ctypes.c_int32), + ('fourmvbptab', ctypes.c_int32), + ('fourmvswitch', ctypes.c_int32), + ('intcomp', ctypes.c_int32), + ('twomvbptab', ctypes.c_int32), + ('rangeredfrm', ctypes.c_int32), + ('HistBufferSize', ctypes.c_uint32), + ('FrameStride', (ctypes.c_uint32 * 2)), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('CodedWidth', ctypes.c_uint16), + ('CodedHeight', ctypes.c_uint16), + ('loopfilter', ctypes.c_ubyte), + ('fastuvmc', ctypes.c_ubyte), + ('output_memory_layout', ctypes.c_ubyte), + ('ref_memory_layout', (ctypes.c_ubyte * 2)), + ('reserved3', (ctypes.c_ubyte * 3)), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_vc1_pic_s = struct__nvdec_vc1_pic_s +class struct__nvdec_mpeg2_pic_s(Struct): pass +struct__nvdec_mpeg2_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 3)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_int16), + ('FrameHeight', ctypes.c_int16), + ('picture_structure', ctypes.c_ubyte), + ('picture_coding_type', ctypes.c_ubyte), + ('intra_dc_precision', ctypes.c_ubyte), + ('frame_pred_frame_dct', ctypes.c_char), + ('concealment_motion_vectors', ctypes.c_char), + ('intra_vlc_format', ctypes.c_char), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('reserved1', ctypes.c_char), + ('f_code', (ctypes.c_char * 4)), + ('PicWidthInMbs', ctypes.c_uint16), + ('FrameHeightInMbs', ctypes.c_uint16), + ('pitch_luma', ctypes.c_uint32), + ('pitch_chroma', ctypes.c_uint32), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('output_memory_layout', ctypes.c_uint16), + ('alternate_scan', ctypes.c_uint16), + ('secondfield', ctypes.c_uint16), + ('rounding_type', ctypes.c_uint16), + ('MbInfoSizeInBytes', ctypes.c_uint32), + ('q_scale_type', ctypes.c_uint32), + ('top_field_first', ctypes.c_uint32), + ('full_pel_fwd_vector', ctypes.c_uint32), + ('full_pel_bwd_vector', ctypes.c_uint32), + ('quant_mat_8x8intra', (ctypes.c_ubyte * 64)), + ('quant_mat_8x8nonintra', (ctypes.c_ubyte * 64)), + ('ref_memory_layout', (ctypes.c_uint32 * 2)), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_mpeg2_pic_s = struct__nvdec_mpeg2_pic_s +class struct__nvdec_mpeg4_pic_s(Struct): pass +struct__nvdec_mpeg4_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('reserved2', (ctypes.c_ubyte * 3)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('scratch_pic_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_int16), + ('FrameHeight', ctypes.c_int16), + ('vop_time_increment_bitcount', ctypes.c_char), + ('resync_marker_disable', ctypes.c_char), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('reserved3', ctypes.c_char), + ('width', ctypes.c_int32), + ('height', ctypes.c_int32), + ('FrameStride', (ctypes.c_uint32 * 2)), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('trd', (ctypes.c_int32 * 2)), + ('trb', (ctypes.c_int32 * 2)), + ('divx_flags', ctypes.c_int32), + ('vop_fcode_forward', ctypes.c_int16), + ('vop_fcode_backward', ctypes.c_int16), + ('interlaced', ctypes.c_ubyte), + ('quant_type', ctypes.c_ubyte), + ('quarter_sample', ctypes.c_ubyte), + ('short_video_header', ctypes.c_ubyte), + ('curr_output_memory_layout', ctypes.c_ubyte), + ('ptype', ctypes.c_ubyte), + ('rnd', ctypes.c_ubyte), + ('alternate_vertical_scan_flag', ctypes.c_ubyte), + ('top_field_flag', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 3)), + ('intra_quant_mat', (ctypes.c_ubyte * 64)), + ('nonintra_quant_mat', (ctypes.c_ubyte * 64)), + ('ref_memory_layout', (ctypes.c_ubyte * 2)), + ('reserved1', (ctypes.c_ubyte * 34)), + ('displayPara', nvdec_display_param_s), +] +nvdec_mpeg4_pic_s = struct__nvdec_mpeg4_pic_s +enum_VP8_FRAME_TYPE = CEnum(ctypes.c_uint32) +VP8_KEYFRAME = enum_VP8_FRAME_TYPE.define('VP8_KEYFRAME', 0) +VP8_INTERFRAME = enum_VP8_FRAME_TYPE.define('VP8_INTERFRAME', 1) + +enum_VP8_FRAME_SFC_ID = CEnum(ctypes.c_uint32) +VP8_GOLDEN_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_GOLDEN_FRAME_SFC', 0) +VP8_ALTREF_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_ALTREF_FRAME_SFC', 1) +VP8_LAST_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_LAST_FRAME_SFC', 2) +VP8_CURR_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_CURR_FRAME_SFC', 3) + +class struct__nvdec_vp8_pic_s(Struct): pass +struct__nvdec_vp8_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_uint16), + ('FrameHeight', ctypes.c_uint16), + ('keyFrame', ctypes.c_ubyte), + ('version', ctypes.c_ubyte), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('errorConcealOn', ctypes.c_ubyte), + ('firstPartSize', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('VLDBufferSize', ctypes.c_uint32), + ('FrameStride', (ctypes.c_uint32 * 2)), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('displayPara', nvdec_display_param_s), + ('current_output_memory_layout', ctypes.c_char), + ('output_memory_layout', (ctypes.c_char * 3)), + ('segmentation_feature_data_update', ctypes.c_ubyte), + ('reserved1', (ctypes.c_ubyte * 3)), + ('resultValue', ctypes.c_uint32), + ('partition_offset', (ctypes.c_uint32 * 8)), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_vp8_pic_s = struct__nvdec_vp8_pic_s +class struct__bytes_of_data_s(Struct): pass +struct__bytes_of_data_s._fields_ = [ + ('clear_bytes', ctypes.c_uint32), + ('encypted_bytes', ctypes.c_uint32), +] +bytes_of_data_s = struct__bytes_of_data_s +class struct__nvdec_pass1_input_data_s(Struct): pass +struct__nvdec_pass1_input_data_s._fields_ = [ + ('sample_size', (bytes_of_data_s * 32)), + ('initialization_vector', ((ctypes.c_uint32 * 4) * 32)), + ('IvValid', (ctypes.c_ubyte * 32)), + ('stream_len', ctypes.c_uint32), + ('clearBufferSize', ctypes.c_uint32), + ('reencryptBufferSize', ctypes.c_uint32), + ('vp8coeffPartitonBufferSize', ctypes.c_uint32), + ('PrevWidth', ctypes.c_uint32), + ('num_nals', ctypes.c_uint32,16), + ('drm_mode', ctypes.c_uint32,8), + ('key_sel', ctypes.c_uint32,4), + ('codec', ctypes.c_uint32,4), + ('TotalSizeOfClearData', ctypes.c_uint32), + ('SliceHdrOffset', ctypes.c_uint32), + ('EncryptBlkCnt', ctypes.c_uint32,16), + ('SkipBlkCnt', ctypes.c_uint32,16), +] +nvdec_pass1_input_data_s = struct__nvdec_pass1_input_data_s +class struct__nvdec_pass1_output_data_s(Struct): pass +class struct__nvdec_pass1_output_data_s_0(ctypes.Union): pass +struct__nvdec_pass1_output_data_s_0._fields_ = [ + ('partition_size', (ctypes.c_uint32 * 8)), + ('vp9_frame_sizes', (ctypes.c_uint32 * 8)), +] +struct__nvdec_pass1_output_data_s._anonymous_ = ['_0'] +struct__nvdec_pass1_output_data_s._fields_ = [ + ('clear_header_size', ctypes.c_uint32), + ('reencrypt_data_size', ctypes.c_uint32), + ('clear_token_data_size', ctypes.c_uint32), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('bReEncrypted', ctypes.c_uint32,1), + ('bvp9SuperFrame', ctypes.c_uint32,1), + ('vp9NumFramesMinus1', ctypes.c_uint32,3), + ('reserved1', ctypes.c_uint32,17), + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('_0', struct__nvdec_pass1_output_data_s_0), + ('vp9_clear_hdr_size', (ctypes.c_uint32 * 8)), +] +nvdec_pass1_output_data_s = struct__nvdec_pass1_output_data_s +class struct__scale_factors_reference_s(Struct): pass +struct__scale_factors_reference_s._fields_ = [ + ('x_scale_fp', ctypes.c_int16), + ('y_scale_fp', ctypes.c_int16), +] +scale_factors_reference_s = struct__scale_factors_reference_s +class struct__frame_info_t(Struct): pass +struct__frame_info_t._fields_ = [ + ('width', ctypes.c_uint16), + ('height', ctypes.c_uint16), + ('stride', (ctypes.c_uint16 * 2)), + ('frame_buffer_idx', ctypes.c_uint32), +] +frame_info_t = struct__frame_info_t +class struct__ref_frame_struct_s(Struct): pass +struct__ref_frame_struct_s._fields_ = [ + ('info', frame_info_t), + ('sf', scale_factors_reference_s), + ('sign_bias', ctypes.c_ubyte,1), + ('wmtype', ctypes.c_ubyte,2), + ('reserved_rf', ctypes.c_ubyte,5), + ('frame_off', ctypes.c_int16), + ('roffset', ctypes.c_int16), +] +ref_frame_struct_s = struct__ref_frame_struct_s +class struct__av1_fgs_cfg_t(Struct): pass +struct__av1_fgs_cfg_t._fields_ = [ + ('apply_grain', ctypes.c_uint16,1), + ('overlap_flag', ctypes.c_uint16,1), + ('clip_to_restricted_range', ctypes.c_uint16,1), + ('chroma_scaling_from_luma', ctypes.c_uint16,1), + ('num_y_points_b', ctypes.c_uint16,1), + ('num_cb_points_b', ctypes.c_uint16,1), + ('num_cr_points_b', ctypes.c_uint16,1), + ('scaling_shift', ctypes.c_uint16,4), + ('reserved_fgs', ctypes.c_uint16,5), + ('sw_random_seed', ctypes.c_uint16), + ('cb_offset', ctypes.c_int16), + ('cr_offset', ctypes.c_int16), + ('cb_mult', ctypes.c_char), + ('cb_luma_mult', ctypes.c_char), + ('cr_mult', ctypes.c_char), + ('cr_luma_mult', ctypes.c_char), +] +av1_fgs_cfg_t = struct__av1_fgs_cfg_t +class struct__nvdec_av1_pic_s(Struct): pass +struct__nvdec_av1_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('ssm', nvdec_pass2_otf_ext_s), + ('fgs_cfg', av1_fgs_cfg_t), + ('gptimer_timeout_value', ctypes.c_uint32), + ('stream_len', ctypes.c_uint32), + ('reserved12', ctypes.c_uint32), + ('use_128x128_superblock', ctypes.c_uint32,1), + ('chroma_format', ctypes.c_uint32,2), + ('bit_depth', ctypes.c_uint32,4), + ('enable_filter_intra', ctypes.c_uint32,1), + ('enable_intra_edge_filter', ctypes.c_uint32,1), + ('enable_interintra_compound', ctypes.c_uint32,1), + ('enable_masked_compound', ctypes.c_uint32,1), + ('enable_dual_filter', ctypes.c_uint32,1), + ('reserved10', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,3), + ('enable_jnt_comp', ctypes.c_uint32,1), + ('reserved1', ctypes.c_uint32,1), + ('enable_cdef', ctypes.c_uint32,1), + ('reserved11', ctypes.c_uint32,1), + ('enable_fgs', ctypes.c_uint32,1), + ('enable_substream_decoding', ctypes.c_uint32,1), + ('reserved2', ctypes.c_uint32,10), + ('frame_type', ctypes.c_uint32,2), + ('show_frame', ctypes.c_uint32,1), + ('reserved13', ctypes.c_uint32,1), + ('disable_cdf_update', ctypes.c_uint32,1), + ('allow_screen_content_tools', ctypes.c_uint32,1), + ('cur_frame_force_integer_mv', ctypes.c_uint32,1), + ('scale_denom_minus9', ctypes.c_uint32,3), + ('allow_intrabc', ctypes.c_uint32,1), + ('allow_high_precision_mv', ctypes.c_uint32,1), + ('interp_filter', ctypes.c_uint32,3), + ('switchable_motion_mode', ctypes.c_uint32,1), + ('use_ref_frame_mvs', ctypes.c_uint32,1), + ('refresh_frame_context', ctypes.c_uint32,1), + ('delta_q_present_flag', ctypes.c_uint32,1), + ('delta_q_res', ctypes.c_uint32,2), + ('delta_lf_present_flag', ctypes.c_uint32,1), + ('delta_lf_res', ctypes.c_uint32,2), + ('delta_lf_multi', ctypes.c_uint32,1), + ('reserved3', ctypes.c_uint32,1), + ('coded_lossless', ctypes.c_uint32,1), + ('tile_enabled', ctypes.c_uint32,1), + ('reserved4', ctypes.c_uint32,2), + ('superres_is_scaled', ctypes.c_uint32,1), + ('reserved_fh', ctypes.c_uint32,1), + ('tile_cols', ctypes.c_uint32,8), + ('tile_rows', ctypes.c_uint32,8), + ('context_update_tile_id', ctypes.c_uint32,16), + ('cdef_damping_minus_3', ctypes.c_uint32,2), + ('cdef_bits', ctypes.c_uint32,2), + ('frame_tx_mode', ctypes.c_uint32,3), + ('frame_reference_mode', ctypes.c_uint32,2), + ('skip_mode_flag', ctypes.c_uint32,1), + ('skip_ref0', ctypes.c_uint32,4), + ('skip_ref1', ctypes.c_uint32,4), + ('allow_warp', ctypes.c_uint32,1), + ('reduced_tx_set_used', ctypes.c_uint32,1), + ('ref_scaling_enable', ctypes.c_uint32,1), + ('reserved5', ctypes.c_uint32,1), + ('reserved6', ctypes.c_uint32,10), + ('superres_upscaled_width', ctypes.c_uint16), + ('superres_luma_step', ctypes.c_uint16), + ('superres_chroma_step', ctypes.c_uint16), + ('superres_init_luma_subpel_x', ctypes.c_uint16), + ('superres_init_chroma_subpel_x', ctypes.c_uint16), + ('base_qindex', ctypes.c_ubyte), + ('y_dc_delta_q', ctypes.c_char), + ('u_dc_delta_q', ctypes.c_char), + ('v_dc_delta_q', ctypes.c_char), + ('u_ac_delta_q', ctypes.c_char), + ('v_ac_delta_q', ctypes.c_char), + ('qm_y', ctypes.c_ubyte), + ('qm_u', ctypes.c_ubyte), + ('qm_v', ctypes.c_ubyte), + ('cdef_y_pri_strength', ctypes.c_uint32), + ('cdef_uv_pri_strength', ctypes.c_uint32), + ('cdef_y_sec_strength', ctypes.c_uint32,16), + ('cdef_uv_sec_strength', ctypes.c_uint32,16), + ('segment_enabled', ctypes.c_ubyte), + ('segment_update_map', ctypes.c_ubyte), + ('reserved7', ctypes.c_ubyte), + ('segment_temporal_update', ctypes.c_ubyte), + ('segment_feature_data', ((ctypes.c_int16 * 8) * 8)), + ('last_active_segid', ctypes.c_ubyte), + ('segid_preskip', ctypes.c_ubyte), + ('prevsegid_flag', ctypes.c_ubyte), + ('segment_quant_sign', ctypes.c_ubyte,8), + ('filter_level', (ctypes.c_ubyte * 2)), + ('filter_level_u', ctypes.c_ubyte), + ('filter_level_v', ctypes.c_ubyte), + ('lf_sharpness_level', ctypes.c_ubyte), + ('lf_ref_deltas', (ctypes.c_char * 8)), + ('lf_mode_deltas', (ctypes.c_char * 2)), + ('lr_type', ctypes.c_ubyte), + ('lr_unit_size', ctypes.c_ubyte), + ('current_frame', frame_info_t), + ('ref_frame', (ref_frame_struct_s * 7)), + ('use_temporal0_mvs', ctypes.c_uint32,1), + ('use_temporal1_mvs', ctypes.c_uint32,1), + ('use_temporal2_mvs', ctypes.c_uint32,1), + ('mf1_type', ctypes.c_uint32,3), + ('mf2_type', ctypes.c_uint32,3), + ('mf3_type', ctypes.c_uint32,3), + ('reserved_mfmv', ctypes.c_uint32,20), + ('mfmv_offset', ((ctypes.c_int16 * 7) * 3)), + ('mfmv_side', ((ctypes.c_char * 7) * 3)), + ('tileformat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('errorConcealOn', ctypes.c_ubyte,1), + ('reserver8', ctypes.c_ubyte,2), + ('stream_error_detection', ctypes.c_ubyte,1), + ('mv_error_detection', ctypes.c_ubyte,1), + ('coeff_error_detection', ctypes.c_ubyte,1), + ('reserved_eh', ctypes.c_ubyte,5), + ('Av1FltTopOffset', ctypes.c_uint32), + ('Av1FltVertOffset', ctypes.c_uint32), + ('Av1CdefVertOffset', ctypes.c_uint32), + ('Av1LrVertOffset', ctypes.c_uint32), + ('Av1HusVertOffset', ctypes.c_uint32), + ('Av1FgsVertOffset', ctypes.c_uint32), + ('enable_histogram', ctypes.c_uint32,1), + ('sw_skip_start_length', ctypes.c_uint32,14), + ('reserved_stat', ctypes.c_uint32,17), +] +nvdec_av1_pic_s = struct__nvdec_av1_pic_s +class struct__AV1FilmGrainMemory(Struct): pass +struct__AV1FilmGrainMemory._fields_ = [ + ('scaling_lut_y', (ctypes.c_ubyte * 256)), + ('scaling_lut_cb', (ctypes.c_ubyte * 256)), + ('scaling_lut_cr', (ctypes.c_ubyte * 256)), + ('cropped_luma_grain_block', (ctypes.c_int16 * 4096)), + ('cropped_cb_grain_block', (ctypes.c_int16 * 1024)), + ('cropped_cr_grain_block', (ctypes.c_int16 * 1024)), +] +AV1FilmGrainMemory = struct__AV1FilmGrainMemory +class struct__AV1TileInfo_OLD(Struct): pass +struct__AV1TileInfo_OLD._fields_ = [ + ('width_in_sb', ctypes.c_ubyte), + ('height_in_sb', ctypes.c_ubyte), + ('tile_start_b0', ctypes.c_ubyte), + ('tile_start_b1', ctypes.c_ubyte), + ('tile_start_b2', ctypes.c_ubyte), + ('tile_start_b3', ctypes.c_ubyte), + ('tile_end_b0', ctypes.c_ubyte), + ('tile_end_b1', ctypes.c_ubyte), + ('tile_end_b2', ctypes.c_ubyte), + ('tile_end_b3', ctypes.c_ubyte), + ('padding', (ctypes.c_ubyte * 6)), +] +AV1TileInfo_OLD = struct__AV1TileInfo_OLD +class struct__AV1TileInfo(Struct): pass +struct__AV1TileInfo._fields_ = [ + ('width_in_sb', ctypes.c_ubyte), + ('padding_w', ctypes.c_ubyte), + ('height_in_sb', ctypes.c_ubyte), + ('padding_h', ctypes.c_ubyte), +] +AV1TileInfo = struct__AV1TileInfo +class struct__AV1TileStreamInfo(Struct): pass +struct__AV1TileStreamInfo._fields_ = [ + ('tile_start', ctypes.c_uint32), + ('tile_end', ctypes.c_uint32), + ('padding', (ctypes.c_ubyte * 8)), +] +AV1TileStreamInfo = struct__AV1TileStreamInfo +class struct__nvdec_new_h264_pic_s(Struct): pass +struct__nvdec_new_h264_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('hint_dump_en', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 2)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('mbhist_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('log2_max_pic_order_cnt_lsb_minus4', ctypes.c_int32), + ('delta_pic_order_always_zero_flag', ctypes.c_int32), + ('frame_mbs_only_flag', ctypes.c_int32), + ('PicWidthInMbs', ctypes.c_int32), + ('FrameHeightInMbs', ctypes.c_int32), + ('tileFormat', ctypes.c_uint32,2), + ('gob_height', ctypes.c_uint32,3), + ('reserverd_surface_format', ctypes.c_uint32,27), + ('entropy_coding_mode_flag', ctypes.c_int32), + ('pic_order_present_flag', ctypes.c_int32), + ('num_ref_idx_l0_active_minus1', ctypes.c_int32), + ('num_ref_idx_l1_active_minus1', ctypes.c_int32), + ('deblocking_filter_control_present_flag', ctypes.c_int32), + ('redundant_pic_cnt_present_flag', ctypes.c_int32), + ('transform_8x8_mode_flag', ctypes.c_int32), + ('pitch_luma', ctypes.c_uint32), + ('pitch_chroma', ctypes.c_uint32), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('MbaffFrameFlag', ctypes.c_uint32,1), + ('direct_8x8_inference_flag', ctypes.c_uint32,1), + ('weighted_pred_flag', ctypes.c_uint32,1), + ('constrained_intra_pred_flag', ctypes.c_uint32,1), + ('ref_pic_flag', ctypes.c_uint32,1), + ('field_pic_flag', ctypes.c_uint32,1), + ('bottom_field_flag', ctypes.c_uint32,1), + ('second_field', ctypes.c_uint32,1), + ('log2_max_frame_num_minus4', ctypes.c_uint32,4), + ('chroma_format_idc', ctypes.c_uint32,2), + ('pic_order_cnt_type', ctypes.c_uint32,2), + ('pic_init_qp_minus26', ctypes.c_int32,6), + ('chroma_qp_index_offset', ctypes.c_int32,5), + ('second_chroma_qp_index_offset', ctypes.c_int32,5), + ('weighted_bipred_idc', ctypes.c_uint32,2), + ('CurrPicIdx', ctypes.c_uint32,7), + ('CurrColIdx', ctypes.c_uint32,5), + ('frame_num', ctypes.c_uint32,16), + ('frame_surfaces', ctypes.c_uint32,1), + ('output_memory_layout', ctypes.c_uint32,1), + ('CurrFieldOrderCnt', (ctypes.c_int32 * 2)), + ('dpb', (nvdec_dpb_entry_s * 16)), + ('WeightScale', (((ctypes.c_ubyte * 4) * 4) * 6)), + ('WeightScale8x8', (((ctypes.c_ubyte * 8) * 8) * 2)), + ('num_inter_view_refs_lX', (ctypes.c_ubyte * 2)), + ('reserved1', (ctypes.c_char * 14)), + ('inter_view_refidx_lX', ((ctypes.c_byte * 16) * 2)), + ('lossless_ipred8x8_filter_enable', ctypes.c_uint32,1), + ('qpprime_y_zero_transform_bypass_flag', ctypes.c_uint32,1), + ('reserved2', ctypes.c_uint32,30), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_new_h264_pic_s = struct__nvdec_new_h264_pic_s +class nvdec_crc_s(Struct): pass +nvdec_crc_s._fields_ = [ + ('dbg_crc_enable_partb', ctypes.c_uint32,1), + ('dbg_crc_enable_partc', ctypes.c_uint32,1), + ('dbg_crc_enable_partd', ctypes.c_uint32,1), + ('dbg_crc_enable_parte', ctypes.c_uint32,1), + ('dbg_crc_intf_partb', ctypes.c_uint32,6), + ('dbg_crc_intf_partc', ctypes.c_uint32,6), + ('dbg_crc_intf_partd', ctypes.c_uint32,6), + ('dbg_crc_intf_parte', ctypes.c_uint32,6), + ('reserved0', ctypes.c_uint32,4), + ('dbg_crc_partb_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_partc_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_partd_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_parte_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_comp_partb', ctypes.c_uint32,4), + ('dbg_crc_comp_partc', ctypes.c_uint32,4), + ('dbg_crc_comp_partd', ctypes.c_uint32,4), + ('dbg_crc_comp_parte', ctypes.c_uint32,4), + ('reserved1', ctypes.c_uint32,16), + ('reserved2', (ctypes.c_ubyte * 56)), +] +class _anonunion12(ctypes.Union): pass NvUPtr = ctypes.c_uint64 -_anonunion0._fields_ = [ +_anonunion12._fields_ = [ ('v', NvUPtr), ('p', ctypes.c_void_p), ] -class _anonunion1(ctypes.Union): pass -_anonunion1._fields_ = [ +class _anonunion13(ctypes.Union): pass +_anonunion13._fields_ = [ ('v', NvUPtr), ('p', ctypes.c_void_p), ] @@ -24,9 +1345,16 @@ struct_NV0000_ALLOC_PARAMETERS._fields_ = [ ('pOsPidInfo', NvP64), ] NV0000_ALLOC_PARAMETERS = struct_NV0000_ALLOC_PARAMETERS +class struct_NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS(Struct): pass +NvU64 = ctypes.c_uint64 +struct_NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS._fields_ = [ + ('offset', NvU64), + ('limit', NvU64), + ('hVASpace', NvHandle), +] +NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS = struct_NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS class struct_NV0080_ALLOC_PARAMETERS(Struct): pass NvV32 = ctypes.c_uint32 -NvU64 = ctypes.c_uint64 struct_NV0080_ALLOC_PARAMETERS._fields_ = [ ('deviceId', NvU32), ('hClientShare', NvHandle), @@ -2121,6 +3449,36 @@ struct_NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS._fields_ = [ ('numFreeBlocks', NvU32), ] NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS = struct_NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS +NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS = struct_NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS +NVA06F_CTRL_BIND_PARAMS = struct_NVA06F_CTRL_BIND_PARAMS +class struct_NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS(Struct): pass +struct_NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS._fields_ = [ + ('bNotifyEachChannelInTSG', NvBool), +] +NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS = struct_NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS +class struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS(Struct): pass +struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS._fields_ = [ + ('channelInterleaveLevel', NvU32), +] +NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS = struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS +NVA06F_CTRL_SET_INTERLEAVE_LEVEL_PARAMS = struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS +NVA06F_CTRL_GET_INTERLEAVE_LEVEL_PARAMS = struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS +class struct_NVA06F_CTRL_RESTART_RUNLIST_PARAMS(Struct): pass +struct_NVA06F_CTRL_RESTART_RUNLIST_PARAMS._fields_ = [ + ('bForceRestart', NvBool), + ('bBypassWait', NvBool), +] +NVA06F_CTRL_RESTART_RUNLIST_PARAMS = struct_NVA06F_CTRL_RESTART_RUNLIST_PARAMS +class struct_NVA06F_CTRL_STOP_CHANNEL_PARAMS(Struct): pass +struct_NVA06F_CTRL_STOP_CHANNEL_PARAMS._fields_ = [ + ('bImmediate', NvBool), +] +NVA06F_CTRL_STOP_CHANNEL_PARAMS = struct_NVA06F_CTRL_STOP_CHANNEL_PARAMS +class struct_NVA06F_CTRL_GET_CONTEXT_ID_PARAMS(Struct): pass +struct_NVA06F_CTRL_GET_CONTEXT_ID_PARAMS._fields_ = [ + ('contextId', NvU32), +] +NVA06F_CTRL_GET_CONTEXT_ID_PARAMS = struct_NVA06F_CTRL_GET_CONTEXT_ID_PARAMS class struct_NV0000_CTRL_CLIENT_GET_ADDR_SPACE_TYPE_PARAMS(Struct): pass struct_NV0000_CTRL_CLIENT_GET_ADDR_SPACE_TYPE_PARAMS._fields_ = [ ('hObject', NvHandle), @@ -10869,6 +12227,402 @@ NV_WARN_NOTHING_TO_DO = nv_status_codes.define('NV_WARN_NOTHING_TO_DO', 65542) NV_WARN_NULL_OBJECT = nv_status_codes.define('NV_WARN_NULL_OBJECT', 65543) NV_WARN_OUT_OF_RANGE = nv_status_codes.define('NV_WARN_OUT_OF_RANGE', 65544) +NVC9B0_VIDEO_DECODER = (0x0000C9B0) +NVC9B0_NOP = (0x00000100) +NVC9B0_PM_TRIGGER = (0x00000140) +NVC9B0_SET_APPLICATION_ID = (0x00000200) +NVC9B0_SET_APPLICATION_ID_ID_MPEG12 = (0x00000001) +NVC9B0_SET_APPLICATION_ID_ID_VC1 = (0x00000002) +NVC9B0_SET_APPLICATION_ID_ID_H264 = (0x00000003) +NVC9B0_SET_APPLICATION_ID_ID_MPEG4 = (0x00000004) +NVC9B0_SET_APPLICATION_ID_ID_VP8 = (0x00000005) +NVC9B0_SET_APPLICATION_ID_ID_CTR64 = (0x00000006) +NVC9B0_SET_APPLICATION_ID_ID_HEVC = (0x00000007) +NVC9B0_SET_APPLICATION_ID_ID_NEW_H264 = (0x00000008) +NVC9B0_SET_APPLICATION_ID_ID_VP9 = (0x00000009) +NVC9B0_SET_APPLICATION_ID_ID_PASS1 = (0x0000000A) +NVC9B0_SET_APPLICATION_ID_ID_HEVC_PARSER = (0x0000000C) +NVC9B0_SET_APPLICATION_ID_ID_UCODE_TEST = (0x0000000D) +NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIO = (0x0000000E) +NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIOMULTIPLE = (0x0000000F) +NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_PREPROCESSENCRYPTEDDATA = (0x00000010) +NVC9B0_SET_APPLICATION_ID_ID_VP9_WITH_PARSER = (0x00000011) +NVC9B0_SET_APPLICATION_ID_ID_AVD = (0x00000012) +NVC9B0_SET_APPLICATION_ID_ID_HW_DRM_PR4_DECRYPTCONTENTMULTIPLE = (0x00000013) +NVC9B0_SET_APPLICATION_ID_ID_DHKE = (0x00000020) +NVC9B0_SET_WATCHDOG_TIMER = (0x00000204) +NVC9B0_SEMAPHORE_A = (0x00000240) +NVC9B0_SEMAPHORE_B = (0x00000244) +NVC9B0_SEMAPHORE_C = (0x00000248) +NVC9B0_CTX_SAVE_AREA = (0x0000024C) +NVC9B0_CTX_SWITCH = (0x00000250) +NVC9B0_CTX_SWITCH_OP_CTX_UPDATE = (0x00000000) +NVC9B0_CTX_SWITCH_OP_CTX_SAVE = (0x00000001) +NVC9B0_CTX_SWITCH_OP_CTX_RESTORE = (0x00000002) +NVC9B0_CTX_SWITCH_OP_CTX_FORCERESTORE = (0x00000003) +NVC9B0_CTX_SWITCH_CTXID_VALID_FALSE = (0x00000000) +NVC9B0_CTX_SWITCH_CTXID_VALID_TRUE = (0x00000001) +NVC9B0_SET_SEMAPHORE_PAYLOAD_LOWER = (0x00000254) +NVC9B0_SET_SEMAPHORE_PAYLOAD_UPPER = (0x00000258) +NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_A = (0x0000025C) +NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_B = (0x00000260) +NVC9B0_EXECUTE = (0x00000300) +NVC9B0_EXECUTE_NOTIFY_DISABLE = (0x00000000) +NVC9B0_EXECUTE_NOTIFY_ENABLE = (0x00000001) +NVC9B0_EXECUTE_NOTIFY_ON_END = (0x00000000) +NVC9B0_EXECUTE_NOTIFY_ON_BEGIN = (0x00000001) +NVC9B0_EXECUTE_PREDICATION_DISABLE = (0x00000000) +NVC9B0_EXECUTE_PREDICATION_ENABLE = (0x00000001) +NVC9B0_EXECUTE_PREDICATION_OP_EQUAL_ZERO = (0x00000000) +NVC9B0_EXECUTE_PREDICATION_OP_NOT_EQUAL_ZERO = (0x00000001) +NVC9B0_EXECUTE_AWAKEN_DISABLE = (0x00000000) +NVC9B0_EXECUTE_AWAKEN_ENABLE = (0x00000001) +NVC9B0_SEMAPHORE_D = (0x00000304) +NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_ONE = (0x00000000) +NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_FOUR = (0x00000001) +NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_TWO = (0x00000002) +NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_FALSE = (0x00000000) +NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_TRUE = (0x00000001) +NVC9B0_SEMAPHORE_D_OPERATION_RELEASE = (0x00000000) +NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_0 = (0x00000001) +NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_1 = (0x00000002) +NVC9B0_SEMAPHORE_D_OPERATION_TRAP = (0x00000003) +NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_FALSE = (0x00000000) +NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_TRUE = (0x00000001) +NVC9B0_SEMAPHORE_D_TRAP_TYPE_UNCONDITIONAL = (0x00000000) +NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL = (0x00000001) +NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL_EXT = (0x00000002) +NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_32BIT = (0x00000000) +NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_64BIT = (0x00000001) +NVC9B0_SET_PREDICATION_OFFSET_UPPER = (0x00000308) +NVC9B0_SET_PREDICATION_OFFSET_LOWER = (0x0000030C) +NVC9B0_SET_AUXILIARY_DATA_BUFFER = (0x00000310) +NVC9B0_SET_CONTROL_PARAMS = (0x00000400) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG1 = (0x00000000) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG2 = (0x00000001) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VC1 = (0x00000002) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_H264 = (0x00000003) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG4 = (0x00000004) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_DIVX3 = (0x00000004) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP8 = (0x00000005) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_HEVC = (0x00000007) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP9 = (0x00000009) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_AV1 = (0x0000000A) +NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_TRACE3D_RUN = (0x00000000) +NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_PROD_RUN = (0x00000001) +NVC9B0_SET_DRV_PIC_SETUP_OFFSET = (0x00000404) +NVC9B0_SET_IN_BUF_BASE_OFFSET = (0x00000408) +NVC9B0_SET_PICTURE_INDEX = (0x0000040C) +NVC9B0_SET_SLICE_OFFSETS_BUF_OFFSET = (0x00000410) +NVC9B0_SET_COLOC_DATA_OFFSET = (0x00000414) +NVC9B0_SET_HISTORY_OFFSET = (0x00000418) +NVC9B0_SET_DISPLAY_BUF_SIZE = (0x0000041C) +NVC9B0_SET_HISTOGRAM_OFFSET = (0x00000420) +NVC9B0_SET_NVDEC_STATUS_OFFSET = (0x00000424) +NVC9B0_SET_DISPLAY_BUF_LUMA_OFFSET = (0x00000428) +NVC9B0_SET_DISPLAY_BUF_CHROMA_OFFSET = (0x0000042C) +NVC9B0_SET_PICTURE_LUMA_OFFSET0 = (0x00000430) +NVC9B0_SET_PICTURE_LUMA_OFFSET1 = (0x00000434) +NVC9B0_SET_PICTURE_LUMA_OFFSET2 = (0x00000438) +NVC9B0_SET_PICTURE_LUMA_OFFSET3 = (0x0000043C) +NVC9B0_SET_PICTURE_LUMA_OFFSET4 = (0x00000440) +NVC9B0_SET_PICTURE_LUMA_OFFSET5 = (0x00000444) +NVC9B0_SET_PICTURE_LUMA_OFFSET6 = (0x00000448) +NVC9B0_SET_PICTURE_LUMA_OFFSET7 = (0x0000044C) +NVC9B0_SET_PICTURE_LUMA_OFFSET8 = (0x00000450) +NVC9B0_SET_PICTURE_LUMA_OFFSET9 = (0x00000454) +NVC9B0_SET_PICTURE_LUMA_OFFSET10 = (0x00000458) +NVC9B0_SET_PICTURE_LUMA_OFFSET11 = (0x0000045C) +NVC9B0_SET_PICTURE_LUMA_OFFSET12 = (0x00000460) +NVC9B0_SET_PICTURE_LUMA_OFFSET13 = (0x00000464) +NVC9B0_SET_PICTURE_LUMA_OFFSET14 = (0x00000468) +NVC9B0_SET_PICTURE_LUMA_OFFSET15 = (0x0000046C) +NVC9B0_SET_PICTURE_LUMA_OFFSET16 = (0x00000470) +NVC9B0_SET_PICTURE_CHROMA_OFFSET0 = (0x00000474) +NVC9B0_SET_PICTURE_CHROMA_OFFSET1 = (0x00000478) +NVC9B0_SET_PICTURE_CHROMA_OFFSET2 = (0x0000047C) +NVC9B0_SET_PICTURE_CHROMA_OFFSET3 = (0x00000480) +NVC9B0_SET_PICTURE_CHROMA_OFFSET4 = (0x00000484) +NVC9B0_SET_PICTURE_CHROMA_OFFSET5 = (0x00000488) +NVC9B0_SET_PICTURE_CHROMA_OFFSET6 = (0x0000048C) +NVC9B0_SET_PICTURE_CHROMA_OFFSET7 = (0x00000490) +NVC9B0_SET_PICTURE_CHROMA_OFFSET8 = (0x00000494) +NVC9B0_SET_PICTURE_CHROMA_OFFSET9 = (0x00000498) +NVC9B0_SET_PICTURE_CHROMA_OFFSET10 = (0x0000049C) +NVC9B0_SET_PICTURE_CHROMA_OFFSET11 = (0x000004A0) +NVC9B0_SET_PICTURE_CHROMA_OFFSET12 = (0x000004A4) +NVC9B0_SET_PICTURE_CHROMA_OFFSET13 = (0x000004A8) +NVC9B0_SET_PICTURE_CHROMA_OFFSET14 = (0x000004AC) +NVC9B0_SET_PICTURE_CHROMA_OFFSET15 = (0x000004B0) +NVC9B0_SET_PICTURE_CHROMA_OFFSET16 = (0x000004B4) +NVC9B0_SET_PIC_SCRATCH_BUF_OFFSET = (0x000004B8) +NVC9B0_SET_EXTERNAL_MVBUFFER_OFFSET = (0x000004BC) +NVC9B0_SET_SUB_SAMPLE_MAP_OFFSET = (0x000004C0) +NVC9B0_SET_SUB_SAMPLE_MAP_IV_OFFSET = (0x000004C4) +NVC9B0_SET_INTRA_TOP_BUF_OFFSET = (0x000004C8) +NVC9B0_SET_TILE_SIZE_BUF_OFFSET = (0x000004CC) +NVC9B0_SET_FILTER_BUFFER_OFFSET = (0x000004D0) +NVC9B0_SET_CRC_STRUCT_OFFSET = (0x000004D4) +NVC9B0_SET_PR_SSM_CONTENT_INFO_BUF_OFFSET = (0x000004D8) +NVC9B0_H264_SET_MBHIST_BUF_OFFSET = (0x00000500) +NVC9B0_VP8_SET_PROB_DATA_OFFSET = (0x00000540) +NVC9B0_VP8_SET_HEADER_PARTITION_BUF_BASE_OFFSET = (0x00000544) +NVC9B0_HEVC_SET_SCALING_LIST_OFFSET = (0x00000580) +NVC9B0_HEVC_SET_TILE_SIZES_OFFSET = (0x00000584) +NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET = (0x00000588) +NVC9B0_HEVC_SET_SAO_BUFFER_OFFSET = (0x0000058C) +NVC9B0_HEVC_SET_SLICE_INFO_BUFFER_OFFSET = (0x00000590) +NVC9B0_HEVC_SET_SLICE_GROUP_INDEX = (0x00000594) +NVC9B0_VP9_SET_PROB_TAB_BUF_OFFSET = (0x000005C0) +NVC9B0_VP9_SET_CTX_COUNTER_BUF_OFFSET = (0x000005C4) +NVC9B0_VP9_SET_SEGMENT_READ_BUF_OFFSET = (0x000005C8) +NVC9B0_VP9_SET_SEGMENT_WRITE_BUF_OFFSET = (0x000005CC) +NVC9B0_VP9_SET_TILE_SIZE_BUF_OFFSET = (0x000005D0) +NVC9B0_VP9_SET_COL_MVWRITE_BUF_OFFSET = (0x000005D4) +NVC9B0_VP9_SET_COL_MVREAD_BUF_OFFSET = (0x000005D8) +NVC9B0_VP9_SET_FILTER_BUFFER_OFFSET = (0x000005DC) +NVC9B0_VP9_PARSER_SET_PIC_SETUP_OFFSET = (0x000005E0) +NVC9B0_VP9_PARSER_SET_PREV_PIC_SETUP_OFFSET = (0x000005E4) +NVC9B0_VP9_PARSER_SET_PROB_TAB_BUF_OFFSET = (0x000005E8) +NVC9B0_VP9_SET_HINT_DUMP_BUF_OFFSET = (0x000005EC) +NVC9B0_PASS1_SET_CLEAR_HEADER_OFFSET = (0x00000600) +NVC9B0_PASS1_SET_RE_ENCRYPT_OFFSET = (0x00000604) +NVC9B0_PASS1_SET_VP8_TOKEN_OFFSET = (0x00000608) +NVC9B0_PASS1_SET_INPUT_DATA_OFFSET = (0x0000060C) +NVC9B0_PASS1_SET_OUTPUT_DATA_SIZE_OFFSET = (0x00000610) +NVC9B0_AV1_SET_PROB_TAB_READ_BUF_OFFSET = (0x00000640) +NVC9B0_AV1_SET_PROB_TAB_WRITE_BUF_OFFSET = (0x00000644) +NVC9B0_AV1_SET_SEGMENT_READ_BUF_OFFSET = (0x00000648) +NVC9B0_AV1_SET_SEGMENT_WRITE_BUF_OFFSET = (0x0000064C) +NVC9B0_AV1_SET_COL_MV0_READ_BUF_OFFSET = (0x00000650) +NVC9B0_AV1_SET_COL_MV1_READ_BUF_OFFSET = (0x00000654) +NVC9B0_AV1_SET_COL_MV2_READ_BUF_OFFSET = (0x00000658) +NVC9B0_AV1_SET_COL_MVWRITE_BUF_OFFSET = (0x0000065C) +NVC9B0_AV1_SET_GLOBAL_MODEL_BUF_OFFSET = (0x00000660) +NVC9B0_AV1_SET_FILM_GRAIN_BUF_OFFSET = (0x00000664) +NVC9B0_AV1_SET_TILE_STREAM_INFO_BUF_OFFSET = (0x00000668) +NVC9B0_AV1_SET_SUB_STREAM_ENTRY_BUF_OFFSET = (0x0000066C) +NVC9B0_AV1_SET_HINT_DUMP_BUF_OFFSET = (0x00000670) +NVC9B0_H264_SET_SCALING_LIST_OFFSET = (0x00000680) +NVC9B0_H264_SET_VLDHIST_BUF_OFFSET = (0x00000684) +NVC9B0_H264_SET_EDOBOFFSET0 = (0x00000688) +NVC9B0_H264_SET_EDOBOFFSET1 = (0x0000068C) +NVC9B0_H264_SET_EDOBOFFSET2 = (0x00000690) +NVC9B0_H264_SET_EDOBOFFSET3 = (0x00000694) +NVC9B0_SET_CONTENT_INITIAL_VECTOR = lambda b: (0x00000C00 + (b)*0x00000004) +NVC9B0_SET_CTL_COUNT = (0x00000C10) +NVC9B0_SET_UPPER_SRC = (0x00000C14) +NVC9B0_SET_LOWER_SRC = (0x00000C18) +NVC9B0_SET_UPPER_DST = (0x00000C1C) +NVC9B0_SET_LOWER_DST = (0x00000C20) +NVC9B0_SET_BLOCK_COUNT = (0x00000C24) +NVC9B0_PR_SET_REQUEST_BUF_OFFSET = (0x00000D00) +NVC9B0_PR_SET_REQUEST_BUF_SIZE = (0x00000D04) +NVC9B0_PR_SET_RESPONSE_BUF_OFFSET = (0x00000D08) +NVC9B0_PR_SET_RESPONSE_BUF_SIZE = (0x00000D0C) +NVC9B0_PR_SET_REQUEST_MESSAGE_BUF_OFFSET = (0x00000D10) +NVC9B0_PR_SET_RESPONSE_MESSAGE_BUF_OFFSET = (0x00000D14) +NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_OFFSET = (0x00000D18) +NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_SIZE = (0x00000D1C) +NVC9B0_PR_SET_CONTENT_DECRYPT_INFO_BUF_OFFSET = (0x00000D20) +NVC9B0_PR_SET_REENCRYPTED_BITSTREAM_BUF_OFFSET = (0x00000D24) +NVC9B0_DH_KE_SET_CHALLENGE_BUF_OFFSET = (0x00000E00) +NVC9B0_DH_KE_SET_RESPONSE_BUF_OFFSET = (0x00000E04) +NVC9B0_SET_SESSION_KEY = lambda b: (0x00000F00 + (b)*0x00000004) +NVC9B0_SET_CONTENT_KEY = lambda b: (0x00000F10 + (b)*0x00000004) +NVC9B0_PM_TRIGGER_END = (0x00001114) +NVC9B0_ERROR_NONE = (0x00000000) +NVC9B0_OS_ERROR_EXECUTE_INSUFFICIENT_DATA = (0x00000001) +NVC9B0_OS_ERROR_SEMAPHORE_INSUFFICIENT_DATA = (0x00000002) +NVC9B0_OS_ERROR_INVALID_METHOD = (0x00000003) +NVC9B0_OS_ERROR_INVALID_DMA_PAGE = (0x00000004) +NVC9B0_OS_ERROR_UNHANDLED_INTERRUPT = (0x00000005) +NVC9B0_OS_ERROR_EXCEPTION = (0x00000006) +NVC9B0_OS_ERROR_INVALID_CTXSW_REQUEST = (0x00000007) +NVC9B0_OS_ERROR_APPLICATION = (0x00000008) +NVC9B0_OS_ERROR_SW_BREAKPT = (0x00000009) +NVC9B0_OS_INTERRUPT_EXECUTE_AWAKEN = (0x00000100) +NVC9B0_OS_INTERRUPT_BACKEND_SEMAPHORE_AWAKEN = (0x00000200) +NVC9B0_OS_INTERRUPT_CTX_ERROR_FBIF = (0x00000300) +NVC9B0_OS_INTERRUPT_LIMIT_VIOLATION = (0x00000400) +NVC9B0_OS_INTERRUPT_LIMIT_AND_FBIF_CTX_ERROR = (0x00000500) +NVC9B0_OS_INTERRUPT_HALT_ENGINE = (0x00000600) +NVC9B0_OS_INTERRUPT_TRAP_NONSTALL = (0x00000700) +NVC9B0_H264_VLD_ERR_SEQ_DATA_INCONSISTENT = (0x00004001) +NVC9B0_H264_VLD_ERR_PIC_DATA_INCONSISTENT = (0x00004002) +NVC9B0_H264_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS = (0x00004100) +NVC9B0_H264_VLD_ERR_BITSTREAM_ERROR = (0x00004101) +NVC9B0_H264_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x000041F8) +NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_SIZE_NOT_MULT256 = (0x00004200) +NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256 = (0x00004201) +NVC9B0_H264_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00004203) +NVC9B0_H264_VLD_ERR_CTX_DMA_ID_SLC_HDR_OUT_INVALID = (0x00004204) +NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL = (0x00004205) +NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_ALREADY_VALID = (0x00004206) +NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL = (0x00004207) +NVC9B0_H264_VLD_ERR_DATA_BUF_CNT_TOO_SMALL = (0x00004208) +NVC9B0_H264_VLD_ERR_BITSTREAM_EMPTY = (0x00004209) +NVC9B0_H264_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x0000420A) +NVC9B0_H264_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x0000420B) +NVC9B0_H264_VLD_ERR_HIST_BUF_TOO_SMALL = (0x00004300) +NVC9B0_VC1_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND = (0x00005100) +NVC9B0_VC1_VLD_ERR_BITSTREAM_ERROR = (0x00005101) +NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256 = (0x00005200) +NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256 = (0x00005201) +NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x00005202) +NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00005203) +NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID = (0x00005204) +NVC9B0_VC1_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL = (0x00005205) +NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID = (0x00005206) +NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL = (0x00005207) +NVC9B0_VC1_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL = (0x00005208) +NVC9B0_VC1_VLD_ERR_BITSTREAM_EMPTY = (0x00005209) +NVC9B0_VC1_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x0000520A) +NVC9B0_VC1_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x0000520B) +NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT = (0x00005300) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS = (0x00006100) +NVC9B0_MPEG12_VLD_ERR_BITSTREAM_ERROR = (0x00006101) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256 = (0x00006200) +NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x00006201) +NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00006202) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL = (0x00006203) +NVC9B0_MPEG12_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL = (0x00006204) +NVC9B0_MPEG12_VLD_ERR_BITSTREAM_EMPTY = (0x00006205) +NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_STRUCTURE = (0x00006206) +NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_CODING_TYPE = (0x00006207) +NVC9B0_MPEG12_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x00006208) +NVC9B0_MPEG12_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x00006209) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_FULL_TIME_OUT = (0x00006300) +NVC9B0_CMN_VLD_ERR_PDEC_RETURNED_ERROR = (0x00007101) +NVC9B0_CMN_VLD_ERR_EDOB_FLUSH_TIME_OUT = (0x00007102) +NVC9B0_CMN_VLD_ERR_EDOB_REWIND_TIME_OUT = (0x00007103) +NVC9B0_CMN_VLD_ERR_VLD_WD_TIME_OUT = (0x00007104) +NVC9B0_CMN_VLD_ERR_NUM_SLICES_ZERO = (0x00007105) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND = (0x00008100) +NVC9B0_MPEG4_VLD_ERR_BITSTREAM_ERROR = (0x00008101) +NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256 = (0x00008200) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256 = (0x00008201) +NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x00008202) +NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00008203) +NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID = (0x00008204) +NVC9B0_MPEG4_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL = (0x00008205) +NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID = (0x00008206) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL = (0x00008207) +NVC9B0_MPEG4_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL = (0x00008208) +NVC9B0_MPEG4_VLD_ERR_BITSTREAM_EMPTY = (0x00008209) +NVC9B0_MPEG4_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x0000820A) +NVC9B0_MPEG4_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x0000820B) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT = (0x00051E01) +NVC9B0_DEC_ERROR_MPEG12_APPTIMER_EXPIRED = (0xDEC10001) +NVC9B0_DEC_ERROR_MPEG12_MVTIMER_EXPIRED = (0xDEC10002) +NVC9B0_DEC_ERROR_MPEG12_INVALID_TOKEN = (0xDEC10003) +NVC9B0_DEC_ERROR_MPEG12_SLICEDATA_MISSING = (0xDEC10004) +NVC9B0_DEC_ERROR_MPEG12_HWERR_INTERRUPT = (0xDEC10005) +NVC9B0_DEC_ERROR_MPEG12_DETECTED_VLD_FAILURE = (0xDEC10006) +NVC9B0_DEC_ERROR_MPEG12_PICTURE_INIT = (0xDEC10100) +NVC9B0_DEC_ERROR_MPEG12_STATEMACHINE_FAILURE = (0xDEC10101) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_PIC = (0xDEC10901) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_UCODE = (0xDEC10902) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_FC = (0xDEC10903) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_SLH = (0xDEC10904) +NVC9B0_DEC_ERROR_MPEG12_INVALID_UCODE_SIZE = (0xDEC10905) +NVC9B0_DEC_ERROR_MPEG12_INVALID_SLICE_COUNT = (0xDEC10906) +NVC9B0_DEC_ERROR_VC1_APPTIMER_EXPIRED = (0xDEC20001) +NVC9B0_DEC_ERROR_VC1_MVTIMER_EXPIRED = (0xDEC20002) +NVC9B0_DEC_ERROR_VC1_INVALID_TOKEN = (0xDEC20003) +NVC9B0_DEC_ERROR_VC1_SLICEDATA_MISSING = (0xDEC20004) +NVC9B0_DEC_ERROR_VC1_HWERR_INTERRUPT = (0xDEC20005) +NVC9B0_DEC_ERROR_VC1_DETECTED_VLD_FAILURE = (0xDEC20006) +NVC9B0_DEC_ERROR_VC1_TIMEOUT_POLLING_FOR_DATA = (0xDEC20007) +NVC9B0_DEC_ERROR_VC1_PDEC_PIC_END_UNALIGNED = (0xDEC20008) +NVC9B0_DEC_ERROR_VC1_WDTIMER_EXPIRED = (0xDEC20009) +NVC9B0_DEC_ERROR_VC1_ERRINTSTART = (0xDEC20010) +NVC9B0_DEC_ERROR_VC1_IQT_ERRINT = (0xDEC20011) +NVC9B0_DEC_ERROR_VC1_MC_ERRINT = (0xDEC20012) +NVC9B0_DEC_ERROR_VC1_MC_IQT_ERRINT = (0xDEC20013) +NVC9B0_DEC_ERROR_VC1_REC_ERRINT = (0xDEC20014) +NVC9B0_DEC_ERROR_VC1_REC_IQT_ERRINT = (0xDEC20015) +NVC9B0_DEC_ERROR_VC1_REC_MC_ERRINT = (0xDEC20016) +NVC9B0_DEC_ERROR_VC1_REC_MC_IQT_ERRINT = (0xDEC20017) +NVC9B0_DEC_ERROR_VC1_DBF_ERRINT = (0xDEC20018) +NVC9B0_DEC_ERROR_VC1_DBF_IQT_ERRINT = (0xDEC20019) +NVC9B0_DEC_ERROR_VC1_DBF_MC_ERRINT = (0xDEC2001A) +NVC9B0_DEC_ERROR_VC1_DBF_MC_IQT_ERRINT = (0xDEC2001B) +NVC9B0_DEC_ERROR_VC1_DBF_REC_ERRINT = (0xDEC2001C) +NVC9B0_DEC_ERROR_VC1_DBF_REC_IQT_ERRINT = (0xDEC2001D) +NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_ERRINT = (0xDEC2001E) +NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_IQT_ERRINT = (0xDEC2001F) +NVC9B0_DEC_ERROR_VC1_PICTURE_INIT = (0xDEC20100) +NVC9B0_DEC_ERROR_VC1_STATEMACHINE_FAILURE = (0xDEC20101) +NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_PIC = (0xDEC20901) +NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_UCODE = (0xDEC20902) +NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_FC = (0xDEC20903) +NVC9B0_DEC_ERROR_VC1_INVAILD_CTXID_SLH = (0xDEC20904) +NVC9B0_DEC_ERROR_VC1_INVALID_UCODE_SIZE = (0xDEC20905) +NVC9B0_DEC_ERROR_VC1_INVALID_SLICE_COUNT = (0xDEC20906) +NVC9B0_DEC_ERROR_H264_APPTIMER_EXPIRED = (0xDEC30001) +NVC9B0_DEC_ERROR_H264_MVTIMER_EXPIRED = (0xDEC30002) +NVC9B0_DEC_ERROR_H264_INVALID_TOKEN = (0xDEC30003) +NVC9B0_DEC_ERROR_H264_SLICEDATA_MISSING = (0xDEC30004) +NVC9B0_DEC_ERROR_H264_HWERR_INTERRUPT = (0xDEC30005) +NVC9B0_DEC_ERROR_H264_DETECTED_VLD_FAILURE = (0xDEC30006) +NVC9B0_DEC_ERROR_H264_ERRINTSTART = (0xDEC30010) +NVC9B0_DEC_ERROR_H264_IQT_ERRINT = (0xDEC30011) +NVC9B0_DEC_ERROR_H264_MC_ERRINT = (0xDEC30012) +NVC9B0_DEC_ERROR_H264_MC_IQT_ERRINT = (0xDEC30013) +NVC9B0_DEC_ERROR_H264_REC_ERRINT = (0xDEC30014) +NVC9B0_DEC_ERROR_H264_REC_IQT_ERRINT = (0xDEC30015) +NVC9B0_DEC_ERROR_H264_REC_MC_ERRINT = (0xDEC30016) +NVC9B0_DEC_ERROR_H264_REC_MC_IQT_ERRINT = (0xDEC30017) +NVC9B0_DEC_ERROR_H264_DBF_ERRINT = (0xDEC30018) +NVC9B0_DEC_ERROR_H264_DBF_IQT_ERRINT = (0xDEC30019) +NVC9B0_DEC_ERROR_H264_DBF_MC_ERRINT = (0xDEC3001A) +NVC9B0_DEC_ERROR_H264_DBF_MC_IQT_ERRINT = (0xDEC3001B) +NVC9B0_DEC_ERROR_H264_DBF_REC_ERRINT = (0xDEC3001C) +NVC9B0_DEC_ERROR_H264_DBF_REC_IQT_ERRINT = (0xDEC3001D) +NVC9B0_DEC_ERROR_H264_DBF_REC_MC_ERRINT = (0xDEC3001E) +NVC9B0_DEC_ERROR_H264_DBF_REC_MC_IQT_ERRINT = (0xDEC3001F) +NVC9B0_DEC_ERROR_H264_PICTURE_INIT = (0xDEC30100) +NVC9B0_DEC_ERROR_H264_STATEMACHINE_FAILURE = (0xDEC30101) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_PIC = (0xDEC30901) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_UCODE = (0xDEC30902) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_FC = (0xDEC30903) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_SLH = (0xDEC30904) +NVC9B0_DEC_ERROR_H264_INVALID_UCODE_SIZE = (0xDEC30905) +NVC9B0_DEC_ERROR_H264_INVALID_SLICE_COUNT = (0xDEC30906) +NVC9B0_DEC_ERROR_MPEG4_APPTIMER_EXPIRED = (0xDEC40001) +NVC9B0_DEC_ERROR_MPEG4_MVTIMER_EXPIRED = (0xDEC40002) +NVC9B0_DEC_ERROR_MPEG4_INVALID_TOKEN = (0xDEC40003) +NVC9B0_DEC_ERROR_MPEG4_SLICEDATA_MISSING = (0xDEC40004) +NVC9B0_DEC_ERROR_MPEG4_HWERR_INTERRUPT = (0xDEC40005) +NVC9B0_DEC_ERROR_MPEG4_DETECTED_VLD_FAILURE = (0xDEC40006) +NVC9B0_DEC_ERROR_MPEG4_TIMEOUT_POLLING_FOR_DATA = (0xDEC40007) +NVC9B0_DEC_ERROR_MPEG4_PDEC_PIC_END_UNALIGNED = (0xDEC40008) +NVC9B0_DEC_ERROR_MPEG4_WDTIMER_EXPIRED = (0xDEC40009) +NVC9B0_DEC_ERROR_MPEG4_ERRINTSTART = (0xDEC40010) +NVC9B0_DEC_ERROR_MPEG4_IQT_ERRINT = (0xDEC40011) +NVC9B0_DEC_ERROR_MPEG4_MC_ERRINT = (0xDEC40012) +NVC9B0_DEC_ERROR_MPEG4_MC_IQT_ERRINT = (0xDEC40013) +NVC9B0_DEC_ERROR_MPEG4_REC_ERRINT = (0xDEC40014) +NVC9B0_DEC_ERROR_MPEG4_REC_IQT_ERRINT = (0xDEC40015) +NVC9B0_DEC_ERROR_MPEG4_REC_MC_ERRINT = (0xDEC40016) +NVC9B0_DEC_ERROR_MPEG4_REC_MC_IQT_ERRINT = (0xDEC40017) +NVC9B0_DEC_ERROR_MPEG4_DBF_ERRINT = (0xDEC40018) +NVC9B0_DEC_ERROR_MPEG4_DBF_IQT_ERRINT = (0xDEC40019) +NVC9B0_DEC_ERROR_MPEG4_DBF_MC_ERRINT = (0xDEC4001A) +NVC9B0_DEC_ERROR_MPEG4_DBF_MC_IQT_ERRINT = (0xDEC4001B) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_ERRINT = (0xDEC4001C) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_IQT_ERRINT = (0xDEC4001D) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_ERRINT = (0xDEC4001E) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_IQT_ERRINT = (0xDEC4001F) +NVC9B0_DEC_ERROR_MPEG4_PICTURE_INIT = (0xDEC40100) +NVC9B0_DEC_ERROR_MPEG4_STATEMACHINE_FAILURE = (0xDEC40101) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_PIC = (0xDEC40901) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_UCODE = (0xDEC40902) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_FC = (0xDEC40903) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_SLH = (0xDEC40904) +NVC9B0_DEC_ERROR_MPEG4_INVALID_UCODE_SIZE = (0xDEC40905) +NVC9B0_DEC_ERROR_MPEG4_INVALID_SLICE_COUNT = (0xDEC40906) NVC6C0_QMDV02_03_OUTER_PUT = (30, 0) NVC6C0_QMDV02_03_OUTER_OVERFLOW = (31, 31) NVC6C0_QMDV02_03_OUTER_GET = (62, 32) @@ -12380,6 +14134,77 @@ NVCEC0_QMDV04_01_OUTER_PUT = (3038, 3008) NVCEC0_QMDV04_01_OUTER_OVERFLOW = (3039, 3039) NVCEC0_QMDV04_01_OUTER_GET = (3070, 3040) NVCEC0_QMDV04_01_OUTER_STICKY_OVERFLOW = (3071, 3071) +ALIGN_UP = lambda v,n: (((v) + ((n)-1)) &~ ((n)-1)) +NVDEC_ALIGN = lambda value: ALIGN_UP(value,256) +NVDEC_MAX_MPEG2_SLICE = 65536 +NVDEC_CODEC_MPEG1 = 0 +NVDEC_CODEC_MPEG2 = 1 +NVDEC_CODEC_VC1 = 2 +NVDEC_CODEC_H264 = 3 +NVDEC_CODEC_MPEG4 = 4 +NVDEC_CODEC_DIVX = NVDEC_CODEC_MPEG4 +NVDEC_CODEC_VP8 = 5 +NVDEC_CODEC_HEVC = 7 +NVDEC_CODEC_VP9 = 9 +NVDEC_CODEC_HEVC_PARSER = 12 +NVDEC_CODEC_AV1 = 10 +AES_MODE_MASK = 0x7 +AES_CTS_MASK = 0x1 +AES_PADDING_TYPE_MASK = 0x7 +AES_UNWRAP_KEY_MASK = 0x1 +AES_MODE_SHIFT = 0 +AES_CTS_SHIFT = 3 +AES_PADDING_TYPE_SHIFT = 4 +AES_UNWRAP_KEY_SHIFT = 7 +AES_SET_FLAG = lambda M,C,P: ((M & AES_MODE_MASK) << AES_MODE_SHIFT) | ((C & AES_CTS_MASK) << AES_CTS_SHIFT) | ((P & AES_PADDING_TYPE_MASK) << AES_PADDING_TYPE_SHIFT) +DRM_MODE_MASK = 0x7f +AES_GET_DRM_MODE = lambda V: (V & DRM_MODE_MASK) +GIP_ASIC_VERT_FILTER_RAM_SIZE = 16 +GIP_ASIC_BSD_CTRL_RAM_SIZE = 4 +GIP_ASIC_SCALING_LIST_SIZE = (16*64) +GIP_ASIC_VERT_SAO_RAM_SIZE = 16 +GIP_ASIC_TILE_SIZE = ((20*22*2*2+16+15) & ~0xF) +GIP_ASIC_VP9_CTB_SEG_SIZE = 32 +HEVC_DBLK_TOP_SIZE_IN_SB16 = ALIGN_UP(632, 128) +HEVC_DBLK_TOP_BUF_SIZE = lambda w: NVDEC_ALIGN( (ALIGN_UP(w,16)/16 + 2) * HEVC_DBLK_TOP_SIZE_IN_SB16) +HEVC_DBLK_LEFT_SIZE_IN_SB16 = ALIGN_UP(506, 128) +HEVC_DBLK_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_DBLK_LEFT_SIZE_IN_SB16) +HEVC_SAO_LEFT_SIZE_IN_SB16 = ALIGN_UP(713, 128) +HEVC_SAO_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_SAO_LEFT_SIZE_IN_SB16) +VP9_DBLK_TOP_SIZE_IN_SB64 = ALIGN_UP(2000, 128) +VP9_DBLK_TOP_BUF_SIZE = lambda w: NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * VP9_DBLK_TOP_SIZE_IN_SB64) +VP9_DBLK_LEFT_SIZE_IN_SB64 = ALIGN_UP(1600, 128) +VP9_DBLK_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * VP9_DBLK_LEFT_SIZE_IN_SB64) +VP9_HINT_DUMP_SIZE_IN_SB64 = ((64*64)/(4*4)*8) +VP9_HINT_DUMP_SIZE = lambda w,h: NVDEC_ALIGN(VP9_HINT_DUMP_SIZE_IN_SB64*((w+63)/64)*((h+63)/64)) +NUM_SUBSAMPLES = 32 +VP8_MAX_TOKEN_PARTITIONS = 8 +VP9_MAX_FRAMES_IN_SUPERFRAME = 8 +AV1_MAX_TILES = 256 +MAX_SUBSTREAM_ENTRY_SIZE = 32 +AV1_TEMPORAL_MV_SIZE_IN_64x64 = 256 +AV1_TEMPORAL_MV_BUF_SIZE = lambda w,h: ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_TEMPORAL_MV_SIZE_IN_64x64, 4096) +AV1_SEGMENT_ID_SIZE_IN_64x64 = 128 +AV1_SEGMENT_ID_BUF_SIZE = lambda w,h: ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_SEGMENT_ID_SIZE_IN_64x64, 4096) +AV1_GLOBAL_MOTION_BUF_SIZE = NVDEC_ALIGN(7*32) +AV1_INTRA_TOP_BUF_SIZE = NVDEC_ALIGN(8*8192) +AV1_HISTOGRAM_BUF_SIZE = NVDEC_ALIGN(1024) +AV1_DBLK_TOP_SIZE_IN_SB64 = ALIGN_UP(1920, 128) +AV1_DBLK_TOP_BUF_SIZE = lambda w: NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * AV1_DBLK_TOP_SIZE_IN_SB64) +AV1_DBLK_LEFT_SIZE_IN_SB64 = ALIGN_UP(1536, 128) +AV1_DBLK_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_DBLK_LEFT_SIZE_IN_SB64) +AV1_CDEF_LEFT_SIZE_IN_SB64 = ALIGN_UP(1792, 128) +AV1_CDEF_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_CDEF_LEFT_SIZE_IN_SB64) +AV1_HUS_LEFT_SIZE_IN_SB64 = ALIGN_UP(12544, 128) +AV1_ASIC_HUS_LEFT_BUFFER_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_HUS_LEFT_SIZE_IN_SB64) +AV1_HUS_LEFT_BUF_SIZE = lambda h: 2*AV1_ASIC_HUS_LEFT_BUFFER_SIZE(h) +AV1_LR_LEFT_SIZE_IN_SB64 = ALIGN_UP(1920, 128) +AV1_LR_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_LR_LEFT_SIZE_IN_SB64) +AV1_FGS_LEFT_SIZE_IN_SB64 = ALIGN_UP(320, 128) +AV1_FGS_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_FGS_LEFT_SIZE_IN_SB64) +AV1_HINT_DUMP_SIZE_IN_SB64 = ((64*64)/(4*4)*8) +AV1_HINT_DUMP_SIZE_IN_SB128 = ((128*128)/(4*4)*8) +AV1_HINT_DUMP_SIZE = lambda w,h: NVDEC_ALIGN(AV1_HINT_DUMP_SIZE_IN_SB128*((w+127)/128)*((h+127)/128)) NVBIT = lambda b: (1<<(b)) NVBIT32 = lambda b: NVBIT_TYPE(b, NvU32) NVBIT64 = lambda b: NVBIT_TYPE(b, NvU64) @@ -12439,6 +14264,10 @@ NV01_NULL_OBJECT = (0x0) NV1_NULL_OBJECT = NV01_NULL_OBJECT NV01_ROOT = (0x0) NV0000_ALLOC_PARAMETERS_MESSAGE_ID = (0x0000) +NV01_MEMORY_VIRTUAL = (0x70) +NV01_MEMORY_SYSTEM_DYNAMIC = (0x70) +NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS_MESSAGE_ID = (0x0070) +NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE = 0xffffffff NV01_DEVICE_0 = (0x80) NV0080_ALLOC_PARAMETERS_MESSAGE_ID = (0x0080) NV20_SUBDEVICE_0 = (0x2080) @@ -15129,6 +16958,22 @@ NV90F1_CTRL_CMD_VASPACE_GET_HOST_RM_MANAGED_SIZE = (0x90f10107) NV90F1_CTRL_VASPACE_GET_HOST_RM_MANAGED_SIZE_PARAMS_MESSAGE_ID = (0x7) NV90F1_CTRL_CMD_VASPACE_GET_VAS_HEAP_INFO = (0x90f10108) NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS_MESSAGE_ID = (0x8) +NVA06F_CTRL_CMD_GPFIFO_SCHEDULE = (0xa06f0103) +NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS_MESSAGE_ID = (0x3) +NVA06F_CTRL_CMD_BIND = (0xa06f0104) +NVA06F_CTRL_BIND_PARAMS_MESSAGE_ID = (0x4) +NVA06F_CTRL_CMD_SET_ERROR_NOTIFIER = (0xa06f0108) +NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS_MESSAGE_ID = (0x8) +NVA06F_CTRL_CMD_SET_INTERLEAVE_LEVEL = (0xa06f0109) +NVA06F_CTRL_SET_INTERLEAVE_LEVEL_PARAMS_MESSAGE_ID = (0x9) +NVA06F_CTRL_CMD_GET_INTERLEAVE_LEVEL = (0xa06f0110) +NVA06F_CTRL_GET_INTERLEAVE_LEVEL_PARAMS_MESSAGE_ID = (0x10) +NVA06F_CTRL_CMD_RESTART_RUNLIST = (0xa06f0111) +NVA06F_CTRL_RESTART_RUNLIST_PARAMS_MESSAGE_ID = (0x11) +NVA06F_CTRL_CMD_STOP_CHANNEL = (0xa06f0112) +NVA06F_CTRL_STOP_CHANNEL_PARAMS_MESSAGE_ID = (0x12) +NVA06F_CTRL_CMD_GET_CONTEXT_ID = (0xa06f0113) +NVA06F_CTRL_GET_CONTEXT_ID_PARAMS_MESSAGE_ID = (0x13) NV0000_CTRL_RESERVED = (0x00) NV0000_CTRL_SYSTEM = (0x01) NV0000_CTRL_GPU = (0x02) diff --git a/tinygrad/runtime/autogen/nv_580.py b/tinygrad/runtime/autogen/nv_580.py index 71c27ee0ab..05f7c38743 100644 --- a/tinygrad/runtime/autogen/nv_580.py +++ b/tinygrad/runtime/autogen/nv_580.py @@ -2,14 +2,1335 @@ import ctypes from tinygrad.helpers import unwrap from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR -class _anonunion0(ctypes.Union): pass +_anonenum0 = CEnum(ctypes.c_uint32) +AES128_NONE = _anonenum0.define('AES128_NONE', 0) +AES128_CTR = _anonenum0.define('AES128_CTR', 1) +AES128_CBC = _anonenum0.define('AES128_CBC', 2) +AES128_ECB = _anonenum0.define('AES128_ECB', 3) +AES128_OFB = _anonenum0.define('AES128_OFB', 4) +AES128_CTR_LSB16B = _anonenum0.define('AES128_CTR_LSB16B', 5) +AES128_CLR_AS_ENCRYPT = _anonenum0.define('AES128_CLR_AS_ENCRYPT', 6) +AES128_RESERVED = _anonenum0.define('AES128_RESERVED', 7) + +_anonenum1 = CEnum(ctypes.c_uint32) +AES128_CTS_DISABLE = _anonenum1.define('AES128_CTS_DISABLE', 0) +AES128_CTS_ENABLE = _anonenum1.define('AES128_CTS_ENABLE', 1) + +_anonenum2 = CEnum(ctypes.c_uint32) +AES128_PADDING_NONE = _anonenum2.define('AES128_PADDING_NONE', 0) +AES128_PADDING_CARRY_OVER = _anonenum2.define('AES128_PADDING_CARRY_OVER', 1) +AES128_PADDING_RFC2630 = _anonenum2.define('AES128_PADDING_RFC2630', 2) +AES128_PADDING_RESERVED = _anonenum2.define('AES128_PADDING_RESERVED', 7) + +ENCR_MODE = CEnum(ctypes.c_uint32) +ENCR_MODE_CTR64 = ENCR_MODE.define('ENCR_MODE_CTR64', 0) +ENCR_MODE_CBC = ENCR_MODE.define('ENCR_MODE_CBC', 1) +ENCR_MODE_ECB = ENCR_MODE.define('ENCR_MODE_ECB', 2) +ENCR_MODE_ECB_PARTIAL = ENCR_MODE.define('ENCR_MODE_ECB_PARTIAL', 3) +ENCR_MODE_CBC_PARTIAL = ENCR_MODE.define('ENCR_MODE_CBC_PARTIAL', 4) +ENCR_MODE_CLEAR_INTO_VPR = ENCR_MODE.define('ENCR_MODE_CLEAR_INTO_VPR', 5) +ENCR_MODE_FORCE_INTO_VPR = ENCR_MODE.define('ENCR_MODE_FORCE_INTO_VPR', 6) + +_anonenum3 = CEnum(ctypes.c_uint32) +DRM_MS_PIFF_CTR = _anonenum3.define('DRM_MS_PIFF_CTR', 17) + +_anonenum4 = CEnum(ctypes.c_uint32) +DRM_MS_PIFF_CBC = _anonenum4.define('DRM_MS_PIFF_CBC', 2) + +_anonenum5 = CEnum(ctypes.c_uint32) +DRM_MARLIN_CTR = _anonenum5.define('DRM_MARLIN_CTR', 1) + +_anonenum6 = CEnum(ctypes.c_uint32) +DRM_MARLIN_CBC = _anonenum6.define('DRM_MARLIN_CBC', 34) + +_anonenum7 = CEnum(ctypes.c_uint32) +DRM_WIDEVINE = _anonenum7.define('DRM_WIDEVINE', 10) + +_anonenum8 = CEnum(ctypes.c_uint32) +DRM_WIDEVINE_CTR = _anonenum8.define('DRM_WIDEVINE_CTR', 17) + +_anonenum9 = CEnum(ctypes.c_uint32) +DRM_ULTRA_VIOLET = _anonenum9.define('DRM_ULTRA_VIOLET', 5) + +_anonenum10 = CEnum(ctypes.c_uint32) +DRM_NONE = _anonenum10.define('DRM_NONE', 0) + +_anonenum11 = CEnum(ctypes.c_uint32) +DRM_CLR_AS_ENCRYPT = _anonenum11.define('DRM_CLR_AS_ENCRYPT', 6) + +class struct__nvdec_ssm_s(Struct): pass +struct__nvdec_ssm_s._fields_ = [ + ('bytes_of_protected_data', ctypes.c_uint32), + ('bytes_of_clear_data', ctypes.c_uint32,16), + ('skip_byte_blk', ctypes.c_uint32,4), + ('crypt_byte_blk', ctypes.c_uint32,4), + ('skip', ctypes.c_uint32,1), + ('last', ctypes.c_uint32,1), + ('pad', ctypes.c_uint32,1), + ('mode', ctypes.c_uint32,1), + ('entry_type', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,3), +] +nvdec_ssm_s = struct__nvdec_ssm_s +class struct__nvdec_pass2_otf_ext_s(Struct): pass +struct__nvdec_pass2_otf_ext_s._fields_ = [ + ('ssm_entry_num', ctypes.c_uint32,16), + ('ssm_iv_num', ctypes.c_uint32,16), + ('real_stream_length', ctypes.c_uint32), + ('non_slice_data', ctypes.c_uint32,16), + ('drm_mode', ctypes.c_uint32,7), + ('reserved', ctypes.c_uint32,9), +] +nvdec_pass2_otf_ext_s = struct__nvdec_pass2_otf_ext_s +class struct__nvdec_substream_entry_s(Struct): pass +struct__nvdec_substream_entry_s._fields_ = [ + ('substream_start_offset', ctypes.c_uint32), + ('substream_length', ctypes.c_uint32), + ('substream_first_tile_idx', ctypes.c_uint32,8), + ('substream_last_tile_idx', ctypes.c_uint32,8), + ('last_substream_entry_in_frame', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,15), +] +nvdec_substream_entry_s = struct__nvdec_substream_entry_s +class struct__nvdec_ecdma_config_s(Struct): pass +struct__nvdec_ecdma_config_s._fields_ = [ + ('ecdma_enable', ctypes.c_uint32), + ('ecdma_blk_x_src', ctypes.c_uint16), + ('ecdma_blk_y_src', ctypes.c_uint16), + ('ecdma_blk_x_dst', ctypes.c_uint16), + ('ecdma_blk_y_dst', ctypes.c_uint16), + ('ref_pic_idx', ctypes.c_uint16), + ('boundary0_top', ctypes.c_uint16), + ('boundary0_bottom', ctypes.c_uint16), + ('boundary1_left', ctypes.c_uint16), + ('boundary1_right', ctypes.c_uint16), + ('blk_copy_flag', ctypes.c_ubyte), + ('ctb_size', ctypes.c_ubyte), +] +nvdec_ecdma_config_s = struct__nvdec_ecdma_config_s +class struct__nvdec_status_hevc_s(Struct): pass +struct__nvdec_status_hevc_s._fields_ = [ + ('frame_status_intra_cnt', ctypes.c_uint32), + ('frame_status_inter_cnt', ctypes.c_uint32), + ('frame_status_skip_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvy_cnt', ctypes.c_uint32), + ('error_ctb_pos', ctypes.c_uint32), + ('error_slice_pos', ctypes.c_uint32), +] +nvdec_status_hevc_s = struct__nvdec_status_hevc_s +class struct__nvdec_status_vp9_s(Struct): pass +struct__nvdec_status_vp9_s._fields_ = [ + ('frame_status_intra_cnt', ctypes.c_uint32), + ('frame_status_inter_cnt', ctypes.c_uint32), + ('frame_status_skip_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvy_cnt', ctypes.c_uint32), + ('error_ctb_pos', ctypes.c_uint32), + ('error_slice_pos', ctypes.c_uint32), +] +nvdec_status_vp9_s = struct__nvdec_status_vp9_s +class struct__nvdec_status_s(Struct): pass +class struct__nvdec_status_s_0(ctypes.Union): pass +struct__nvdec_status_s_0._fields_ = [ + ('hevc', nvdec_status_hevc_s), + ('vp9', nvdec_status_vp9_s), +] +struct__nvdec_status_s._anonymous_ = ['_0'] +struct__nvdec_status_s._fields_ = [ + ('mbs_correctly_decoded', ctypes.c_uint32), + ('mbs_in_error', ctypes.c_uint32), + ('cycle_count', ctypes.c_uint32), + ('error_status', ctypes.c_uint32), + ('_0', struct__nvdec_status_s_0), + ('slice_header_error_code', ctypes.c_uint32), +] +nvdec_status_s = struct__nvdec_status_s +class struct__external_mv_s(Struct): pass +struct__external_mv_s._fields_ = [ + ('mvx', ctypes.c_int32,14), + ('mvy', ctypes.c_int32,14), + ('refidx', ctypes.c_uint32,4), +] +external_mv_s = struct__external_mv_s +class struct__nvdec_hevc_main10_444_ext_s(Struct): pass +struct__nvdec_hevc_main10_444_ext_s._fields_ = [ + ('transformSkipRotationEnableFlag', ctypes.c_uint32,1), + ('transformSkipContextEnableFlag', ctypes.c_uint32,1), + ('intraBlockCopyEnableFlag', ctypes.c_uint32,1), + ('implicitRdpcmEnableFlag', ctypes.c_uint32,1), + ('explicitRdpcmEnableFlag', ctypes.c_uint32,1), + ('extendedPrecisionProcessingFlag', ctypes.c_uint32,1), + ('intraSmoothingDisabledFlag', ctypes.c_uint32,1), + ('highPrecisionOffsetsEnableFlag', ctypes.c_uint32,1), + ('fastRiceAdaptationEnableFlag', ctypes.c_uint32,1), + ('cabacBypassAlignmentEnableFlag', ctypes.c_uint32,1), + ('sps_444_extension_reserved', ctypes.c_uint32,22), + ('log2MaxTransformSkipSize', ctypes.c_uint32,4), + ('crossComponentPredictionEnableFlag', ctypes.c_uint32,1), + ('chromaQpAdjustmentEnableFlag', ctypes.c_uint32,1), + ('diffCuChromaQpAdjustmentDepth', ctypes.c_uint32,2), + ('chromaQpAdjustmentTableSize', ctypes.c_uint32,3), + ('log2SaoOffsetScaleLuma', ctypes.c_uint32,3), + ('log2SaoOffsetScaleChroma', ctypes.c_uint32,3), + ('pps_444_extension_reserved', ctypes.c_uint32,15), + ('cb_qp_adjustment', (ctypes.c_char * 6)), + ('cr_qp_adjustment', (ctypes.c_char * 6)), + ('HevcFltAboveOffset', ctypes.c_uint32), + ('HevcSaoAboveOffset', ctypes.c_uint32), +] +nvdec_hevc_main10_444_ext_s = struct__nvdec_hevc_main10_444_ext_s +class struct__nvdec_hevc_pic_v1_s(Struct): pass +struct__nvdec_hevc_pic_v1_s._fields_ = [ + ('hevc_main10_444_ext', nvdec_hevc_main10_444_ext_s), + ('sw_skip_start_length', ctypes.c_uint32,14), + ('external_ref_mem_dis', ctypes.c_uint32,1), + ('error_recovery_start_pos', ctypes.c_uint32,2), + ('error_external_mv_en', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,14), +] +nvdec_hevc_pic_v1_s = struct__nvdec_hevc_pic_v1_s +class struct__nvdec_hevc_pic_v2_s(Struct): pass +struct__nvdec_hevc_pic_v2_s._fields_ = [ + ('mv_hevc_enable', ctypes.c_uint32,1), + ('nuh_layer_id', ctypes.c_uint32,6), + ('default_ref_layers_active_flag', ctypes.c_uint32,1), + ('NumDirectRefLayers', ctypes.c_uint32,6), + ('max_one_active_ref_layer_flag', ctypes.c_uint32,1), + ('NumActiveRefLayerPics', ctypes.c_uint32,6), + ('poc_lsb_not_present_flag', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,10), +] +nvdec_hevc_pic_v2_s = struct__nvdec_hevc_pic_v2_s +class struct__nvdec_hevc_pic_v3_s(Struct): pass +struct__nvdec_hevc_pic_v3_s._fields_ = [ + ('slice_decoding_enable', ctypes.c_uint32,1), + ('slice_ec_enable', ctypes.c_uint32,1), + ('slice_ec_mv_type', ctypes.c_uint32,2), + ('err_detected_sw', ctypes.c_uint32,1), + ('slice_ec_slice_type', ctypes.c_uint32,2), + ('slice_strm_recfg_en', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,24), + ('HevcSliceEdgeOffset', ctypes.c_uint32), +] +nvdec_hevc_pic_v3_s = struct__nvdec_hevc_pic_v3_s +class struct__nvdec_hevc_pic_s(Struct): pass +struct__nvdec_hevc_pic_s._fields_ = [ + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('stream_len', ctypes.c_uint32), + ('enable_encryption', ctypes.c_uint32), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('key_slot_index', ctypes.c_uint32,4), + ('ssm_en', ctypes.c_uint32,1), + ('enable_histogram', ctypes.c_uint32,1), + ('enable_substream_decoding', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,15), + ('gptimer_timeout_value', ctypes.c_uint32), + ('tileformat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('sw_start_code_e', ctypes.c_ubyte), + ('disp_output_mode', ctypes.c_ubyte), + ('reserved1', ctypes.c_ubyte), + ('framestride', (ctypes.c_uint32 * 2)), + ('colMvBuffersize', ctypes.c_uint32), + ('HevcSaoBufferOffset', ctypes.c_uint32), + ('HevcBsdCtrlOffset', ctypes.c_uint32), + ('pic_width_in_luma_samples', ctypes.c_uint16), + ('pic_height_in_luma_samples', ctypes.c_uint16), + ('chroma_format_idc', ctypes.c_uint32,4), + ('bit_depth_luma', ctypes.c_uint32,4), + ('bit_depth_chroma', ctypes.c_uint32,4), + ('log2_min_luma_coding_block_size', ctypes.c_uint32,4), + ('log2_max_luma_coding_block_size', ctypes.c_uint32,4), + ('log2_min_transform_block_size', ctypes.c_uint32,4), + ('log2_max_transform_block_size', ctypes.c_uint32,4), + ('reserved2', ctypes.c_uint32,4), + ('max_transform_hierarchy_depth_inter', ctypes.c_uint32,3), + ('max_transform_hierarchy_depth_intra', ctypes.c_uint32,3), + ('scalingListEnable', ctypes.c_uint32,1), + ('amp_enable_flag', ctypes.c_uint32,1), + ('sample_adaptive_offset_enabled_flag', ctypes.c_uint32,1), + ('pcm_enabled_flag', ctypes.c_uint32,1), + ('pcm_sample_bit_depth_luma', ctypes.c_uint32,4), + ('pcm_sample_bit_depth_chroma', ctypes.c_uint32,4), + ('log2_min_pcm_luma_coding_block_size', ctypes.c_uint32,4), + ('log2_max_pcm_luma_coding_block_size', ctypes.c_uint32,4), + ('pcm_loop_filter_disabled_flag', ctypes.c_uint32,1), + ('sps_temporal_mvp_enabled_flag', ctypes.c_uint32,1), + ('strong_intra_smoothing_enabled_flag', ctypes.c_uint32,1), + ('reserved3', ctypes.c_uint32,3), + ('dependent_slice_segments_enabled_flag', ctypes.c_uint32,1), + ('output_flag_present_flag', ctypes.c_uint32,1), + ('num_extra_slice_header_bits', ctypes.c_uint32,3), + ('sign_data_hiding_enabled_flag', ctypes.c_uint32,1), + ('cabac_init_present_flag', ctypes.c_uint32,1), + ('num_ref_idx_l0_default_active', ctypes.c_uint32,4), + ('num_ref_idx_l1_default_active', ctypes.c_uint32,4), + ('init_qp', ctypes.c_uint32,7), + ('constrained_intra_pred_flag', ctypes.c_uint32,1), + ('transform_skip_enabled_flag', ctypes.c_uint32,1), + ('cu_qp_delta_enabled_flag', ctypes.c_uint32,1), + ('diff_cu_qp_delta_depth', ctypes.c_uint32,2), + ('reserved4', ctypes.c_uint32,5), + ('pps_cb_qp_offset', ctypes.c_char), + ('pps_cr_qp_offset', ctypes.c_char), + ('pps_beta_offset', ctypes.c_char), + ('pps_tc_offset', ctypes.c_char), + ('pps_slice_chroma_qp_offsets_present_flag', ctypes.c_uint32,1), + ('weighted_pred_flag', ctypes.c_uint32,1), + ('weighted_bipred_flag', ctypes.c_uint32,1), + ('transquant_bypass_enabled_flag', ctypes.c_uint32,1), + ('tiles_enabled_flag', ctypes.c_uint32,1), + ('entropy_coding_sync_enabled_flag', ctypes.c_uint32,1), + ('num_tile_columns', ctypes.c_uint32,5), + ('num_tile_rows', ctypes.c_uint32,5), + ('loop_filter_across_tiles_enabled_flag', ctypes.c_uint32,1), + ('loop_filter_across_slices_enabled_flag', ctypes.c_uint32,1), + ('deblocking_filter_control_present_flag', ctypes.c_uint32,1), + ('deblocking_filter_override_enabled_flag', ctypes.c_uint32,1), + ('pps_deblocking_filter_disabled_flag', ctypes.c_uint32,1), + ('lists_modification_present_flag', ctypes.c_uint32,1), + ('log2_parallel_merge_level', ctypes.c_uint32,3), + ('slice_segment_header_extension_present_flag', ctypes.c_uint32,1), + ('reserved5', ctypes.c_uint32,6), + ('num_ref_frames', ctypes.c_ubyte), + ('reserved6', ctypes.c_ubyte), + ('longtermflag', ctypes.c_uint16), + ('initreflistidxl0', (ctypes.c_ubyte * 16)), + ('initreflistidxl1', (ctypes.c_ubyte * 16)), + ('RefDiffPicOrderCnts', (ctypes.c_int16 * 16)), + ('IDR_picture_flag', ctypes.c_ubyte), + ('RAP_picture_flag', ctypes.c_ubyte), + ('curr_pic_idx', ctypes.c_ubyte), + ('pattern_id', ctypes.c_ubyte), + ('sw_hdr_skip_length', ctypes.c_uint16), + ('reserved7', ctypes.c_uint16), + ('ecdma_cfg', nvdec_ecdma_config_s), + ('separate_colour_plane_flag', ctypes.c_uint32,1), + ('log2_max_pic_order_cnt_lsb_minus4', ctypes.c_uint32,4), + ('num_short_term_ref_pic_sets', ctypes.c_uint32,7), + ('num_long_term_ref_pics_sps', ctypes.c_uint32,6), + ('bBitParsingDisable', ctypes.c_uint32,1), + ('num_delta_pocs_of_rps_idx', ctypes.c_uint32,8), + ('long_term_ref_pics_present_flag', ctypes.c_uint32,1), + ('reserved_dxva', ctypes.c_uint32,4), + ('num_bits_short_term_ref_pics_in_slice', ctypes.c_uint32), + ('v1', nvdec_hevc_pic_v1_s), + ('v2', nvdec_hevc_pic_v2_s), + ('v3', nvdec_hevc_pic_v3_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_hevc_pic_s = struct__nvdec_hevc_pic_s +class struct__hevc_slice_info_s(Struct): pass +struct__hevc_slice_info_s._fields_ = [ + ('first_flag', ctypes.c_uint32,1), + ('err_flag', ctypes.c_uint32,1), + ('last_flag', ctypes.c_uint32,1), + ('conceal_partial_slice', ctypes.c_uint32,1), + ('available', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,7), + ('ctb_count', ctypes.c_uint32,20), + ('bs_offset', ctypes.c_uint32), + ('bs_length', ctypes.c_uint32), + ('start_ctbx', ctypes.c_uint16), + ('start_ctby', ctypes.c_uint16), +] +hevc_slice_info_s = struct__hevc_slice_info_s +class struct__slice_edge_ctb_pos_ctx_s(Struct): pass +struct__slice_edge_ctb_pos_ctx_s._fields_ = [ + ('next_slice_pos_ctbxy', ctypes.c_uint32), + ('next_slice_segment_addr', ctypes.c_uint32), +] +slice_edge_ctb_pos_ctx_s = struct__slice_edge_ctb_pos_ctx_s +class struct__slice_edge_tile_ctx_s(Struct): pass +struct__slice_edge_tile_ctx_s._fields_ = [ + ('tileInfo1', ctypes.c_uint32), + ('tileInfo2', ctypes.c_uint32), + ('tileInfo3', ctypes.c_uint32), +] +slice_edge_tile_ctx_s = struct__slice_edge_tile_ctx_s +class struct__slice_edge_stats_ctx_s(Struct): pass +struct__slice_edge_stats_ctx_s._fields_ = [ + ('frame_status_intra_cnt', ctypes.c_uint32), + ('frame_status_inter_cnt', ctypes.c_uint32), + ('frame_status_skip_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_fwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvx_cnt', ctypes.c_uint32), + ('frame_status_bwd_mvy_cnt', ctypes.c_uint32), + ('frame_status_mv_cnt_ext', ctypes.c_uint32), +] +slice_edge_stats_ctx_s = struct__slice_edge_stats_ctx_s +class struct__slice_vpc_edge_ctx_s(Struct): pass +struct__slice_vpc_edge_ctx_s._fields_ = [ + ('reserved', ctypes.c_uint32), +] +slice_vpc_edge_ctx_s = struct__slice_vpc_edge_ctx_s +class struct__slice_vpc_main_ctx_s(Struct): pass +struct__slice_vpc_main_ctx_s._fields_ = [ + ('reserved', ctypes.c_uint32), +] +slice_vpc_main_ctx_s = struct__slice_vpc_main_ctx_s +class struct__slice_edge_ctx_s(Struct): pass +struct__slice_edge_ctx_s._fields_ = [ + ('slice_ctb_pos_ctx', slice_edge_ctb_pos_ctx_s), + ('slice_stats_ctx', slice_edge_stats_ctx_s), + ('slice_tile_ctx', slice_edge_tile_ctx_s), + ('slice_vpc_edge_ctx', slice_vpc_edge_ctx_s), + ('slice_vpc_main_ctx', slice_vpc_main_ctx_s), +] +slice_edge_ctx_s = struct__slice_edge_ctx_s +class struct__nvdec_vp9_pic_v1_s(Struct): pass +struct__nvdec_vp9_pic_v1_s._fields_ = [ + ('Vp9FltAboveOffset', ctypes.c_uint32), + ('external_ref_mem_dis', ctypes.c_uint32,1), + ('bit_depth', ctypes.c_uint32,4), + ('error_recovery_start_pos', ctypes.c_uint32,2), + ('error_external_mv_en', ctypes.c_uint32,1), + ('Reserved0', ctypes.c_uint32,24), +] +nvdec_vp9_pic_v1_s = struct__nvdec_vp9_pic_v1_s +enum_VP9_FRAME_SFC_ID = CEnum(ctypes.c_uint32) +VP9_LAST_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_LAST_FRAME_SFC', 0) +VP9_GOLDEN_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_GOLDEN_FRAME_SFC', 1) +VP9_ALTREF_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_ALTREF_FRAME_SFC', 2) +VP9_CURR_FRAME_SFC = enum_VP9_FRAME_SFC_ID.define('VP9_CURR_FRAME_SFC', 3) + +class struct__nvdec_vp9_pic_s(Struct): pass +struct__nvdec_vp9_pic_s._fields_ = [ + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('stream_len', ctypes.c_uint32), + ('enable_encryption', ctypes.c_uint32), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('sw_hdr_skip_length', ctypes.c_uint32,14), + ('key_slot_index', ctypes.c_uint32,4), + ('ssm_en', ctypes.c_uint32,1), + ('enable_histogram', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,2), + ('gptimer_timeout_value', ctypes.c_uint32), + ('tileformat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('reserved1', (ctypes.c_ubyte * 3)), + ('Vp9BsdCtrlOffset', ctypes.c_uint32), + ('ref0_width', ctypes.c_uint16), + ('ref0_height', ctypes.c_uint16), + ('ref0_stride', (ctypes.c_uint16 * 2)), + ('ref1_width', ctypes.c_uint16), + ('ref1_height', ctypes.c_uint16), + ('ref1_stride', (ctypes.c_uint16 * 2)), + ('ref2_width', ctypes.c_uint16), + ('ref2_height', ctypes.c_uint16), + ('ref2_stride', (ctypes.c_uint16 * 2)), + ('width', ctypes.c_uint16), + ('height', ctypes.c_uint16), + ('framestride', (ctypes.c_uint16 * 2)), + ('keyFrame', ctypes.c_ubyte,1), + ('prevIsKeyFrame', ctypes.c_ubyte,1), + ('resolutionChange', ctypes.c_ubyte,1), + ('errorResilient', ctypes.c_ubyte,1), + ('prevShowFrame', ctypes.c_ubyte,1), + ('intraOnly', ctypes.c_ubyte,1), + ('reserved2', ctypes.c_ubyte,2), + ('reserved3', (ctypes.c_ubyte * 3)), + ('refFrameSignBias', (ctypes.c_ubyte * 4)), + ('loopFilterLevel', ctypes.c_char), + ('loopFilterSharpness', ctypes.c_char), + ('qpYAc', ctypes.c_ubyte), + ('qpYDc', ctypes.c_char), + ('qpChAc', ctypes.c_char), + ('qpChDc', ctypes.c_char), + ('lossless', ctypes.c_char), + ('transform_mode', ctypes.c_char), + ('allow_high_precision_mv', ctypes.c_char), + ('mcomp_filter_type', ctypes.c_char), + ('comp_pred_mode', ctypes.c_char), + ('comp_fixed_ref', ctypes.c_char), + ('comp_var_ref', (ctypes.c_char * 2)), + ('log2_tile_columns', ctypes.c_char), + ('log2_tile_rows', ctypes.c_char), + ('segmentEnabled', ctypes.c_ubyte), + ('segmentMapUpdate', ctypes.c_ubyte), + ('segmentMapTemporalUpdate', ctypes.c_ubyte), + ('segmentFeatureMode', ctypes.c_ubyte), + ('segmentFeatureEnable', ((ctypes.c_ubyte * 4) * 8)), + ('segmentFeatureData', ((ctypes.c_int16 * 4) * 8)), + ('modeRefLfEnabled', ctypes.c_char), + ('mbRefLfDelta', (ctypes.c_char * 4)), + ('mbModeLfDelta', (ctypes.c_char * 2)), + ('reserved5', ctypes.c_char), + ('v1', nvdec_vp9_pic_v1_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_vp9_pic_s = struct__nvdec_vp9_pic_s +class nvdec_nmv_context(Struct): pass +nvdec_nmv_context._fields_ = [ + ('joints', (ctypes.c_ubyte * 3)), + ('sign', (ctypes.c_ubyte * 2)), + ('class0', ((ctypes.c_ubyte * 1) * 2)), + ('fp', ((ctypes.c_ubyte * 3) * 2)), + ('class0_hp', (ctypes.c_ubyte * 2)), + ('hp', (ctypes.c_ubyte * 2)), + ('classes', ((ctypes.c_ubyte * 10) * 2)), + ('class0_fp', (((ctypes.c_ubyte * 3) * 2) * 2)), + ('bits', ((ctypes.c_ubyte * 10) * 2)), +] +class nvdec_nmv_context_counts(Struct): pass +nvdec_nmv_context_counts._fields_ = [ + ('joints', (ctypes.c_uint32 * 4)), + ('sign', ((ctypes.c_uint32 * 2) * 2)), + ('classes', ((ctypes.c_uint32 * 11) * 2)), + ('class0', ((ctypes.c_uint32 * 2) * 2)), + ('bits', (((ctypes.c_uint32 * 2) * 10) * 2)), + ('class0_fp', (((ctypes.c_uint32 * 4) * 2) * 2)), + ('fp', ((ctypes.c_uint32 * 4) * 2)), + ('class0_hp', ((ctypes.c_uint32 * 2) * 2)), + ('hp', ((ctypes.c_uint32 * 2) * 2)), +] +class struct_nvdec_vp9AdaptiveEntropyProbs_s(Struct): pass +struct_nvdec_vp9AdaptiveEntropyProbs_s._fields_ = [ + ('inter_mode_prob', ((ctypes.c_ubyte * 4) * 7)), + ('intra_inter_prob', (ctypes.c_ubyte * 4)), + ('uv_mode_prob', ((ctypes.c_ubyte * 8) * 10)), + ('tx8x8_prob', ((ctypes.c_ubyte * 1) * 2)), + ('tx16x16_prob', ((ctypes.c_ubyte * 2) * 2)), + ('tx32x32_prob', ((ctypes.c_ubyte * 3) * 2)), + ('sb_ymode_probB', ((ctypes.c_ubyte * 1) * 4)), + ('sb_ymode_prob', ((ctypes.c_ubyte * 8) * 4)), + ('partition_prob', (((ctypes.c_ubyte * 4) * 16) * 2)), + ('uv_mode_probB', ((ctypes.c_ubyte * 1) * 10)), + ('switchable_interp_prob', ((ctypes.c_ubyte * 2) * 4)), + ('comp_inter_prob', (ctypes.c_ubyte * 5)), + ('mbskip_probs', (ctypes.c_ubyte * 3)), + ('pad1', (ctypes.c_ubyte * 1)), + ('nmvc', nvdec_nmv_context), + ('single_ref_prob', ((ctypes.c_ubyte * 2) * 5)), + ('comp_ref_prob', (ctypes.c_ubyte * 5)), + ('pad2', (ctypes.c_ubyte * 17)), + ('probCoeffs', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), + ('probCoeffs8x8', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), + ('probCoeffs16x16', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), + ('probCoeffs32x32', (((((ctypes.c_ubyte * 4) * 6) * 6) * 2) * 2)), +] +nvdec_vp9AdaptiveEntropyProbs_t = struct_nvdec_vp9AdaptiveEntropyProbs_s +class struct_nvdec_vp9EntropyProbs_s(Struct): pass +struct_nvdec_vp9EntropyProbs_s._fields_ = [ + ('kf_bmode_prob', (((ctypes.c_ubyte * 8) * 10) * 10)), + ('kf_bmode_probB', (((ctypes.c_ubyte * 1) * 10) * 10)), + ('ref_pred_probs', (ctypes.c_ubyte * 3)), + ('mb_segment_tree_probs', (ctypes.c_ubyte * 7)), + ('segment_pred_probs', (ctypes.c_ubyte * 3)), + ('ref_scores', (ctypes.c_ubyte * 4)), + ('prob_comppred', (ctypes.c_ubyte * 2)), + ('pad1', (ctypes.c_ubyte * 9)), + ('kf_uv_mode_prob', ((ctypes.c_ubyte * 8) * 10)), + ('kf_uv_mode_probB', ((ctypes.c_ubyte * 1) * 10)), + ('pad2', (ctypes.c_ubyte * 6)), + ('a', nvdec_vp9AdaptiveEntropyProbs_t), +] +nvdec_vp9EntropyProbs_t = struct_nvdec_vp9EntropyProbs_s +class struct_nvdec_vp9EntropyCounts_s(Struct): pass +struct_nvdec_vp9EntropyCounts_s._fields_ = [ + ('inter_mode_counts', (((ctypes.c_uint32 * 2) * 3) * 7)), + ('sb_ymode_counts', ((ctypes.c_uint32 * 10) * 4)), + ('uv_mode_counts', ((ctypes.c_uint32 * 10) * 10)), + ('partition_counts', ((ctypes.c_uint32 * 4) * 16)), + ('switchable_interp_counts', ((ctypes.c_uint32 * 3) * 4)), + ('intra_inter_count', ((ctypes.c_uint32 * 2) * 4)), + ('comp_inter_count', ((ctypes.c_uint32 * 2) * 5)), + ('single_ref_count', (((ctypes.c_uint32 * 2) * 2) * 5)), + ('comp_ref_count', ((ctypes.c_uint32 * 2) * 5)), + ('tx32x32_count', ((ctypes.c_uint32 * 4) * 2)), + ('tx16x16_count', ((ctypes.c_uint32 * 3) * 2)), + ('tx8x8_count', ((ctypes.c_uint32 * 2) * 2)), + ('mbskip_count', ((ctypes.c_uint32 * 2) * 3)), + ('nmvcount', nvdec_nmv_context_counts), + ('countCoeffs', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countCoeffs8x8', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countCoeffs16x16', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countCoeffs32x32', (((((ctypes.c_uint32 * 4) * 6) * 6) * 2) * 2)), + ('countEobs', (((((ctypes.c_uint32 * 6) * 6) * 2) * 2) * 4)), +] +nvdec_vp9EntropyCounts_t = struct_nvdec_vp9EntropyCounts_s +class struct__nvdec_pass2_otf_s(Struct): pass +struct__nvdec_pass2_otf_s._fields_ = [ + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('enable_encryption', ctypes.c_uint32,1), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('key_slot_index', ctypes.c_uint32,4), + ('ssm_en', ctypes.c_uint32,1), + ('reserved1', ctypes.c_uint32,16), +] +nvdec_pass2_otf_s = struct__nvdec_pass2_otf_s +class struct__nvdec_display_param_s(Struct): pass +struct__nvdec_display_param_s._fields_ = [ + ('enableTFOutput', ctypes.c_uint32,1), + ('VC1MapYFlag', ctypes.c_uint32,1), + ('MapYValue', ctypes.c_uint32,3), + ('VC1MapUVFlag', ctypes.c_uint32,1), + ('MapUVValue', ctypes.c_uint32,3), + ('OutStride', ctypes.c_uint32,8), + ('TilingFormat', ctypes.c_uint32,3), + ('OutputStructure', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,11), + ('OutputTop', (ctypes.c_int32 * 2)), + ('OutputBottom', (ctypes.c_int32 * 2)), + ('enableHistogram', ctypes.c_uint32,1), + ('HistogramStartX', ctypes.c_uint32,12), + ('HistogramStartY', ctypes.c_uint32,12), + ('reserved1', ctypes.c_uint32,7), + ('HistogramEndX', ctypes.c_uint32,12), + ('HistogramEndY', ctypes.c_uint32,12), + ('reserved2', ctypes.c_uint32,8), +] +nvdec_display_param_s = struct__nvdec_display_param_s +class struct__nvdec_dpb_entry_s(Struct): pass +struct__nvdec_dpb_entry_s._fields_ = [ + ('index', ctypes.c_uint32,7), + ('col_idx', ctypes.c_uint32,5), + ('state', ctypes.c_uint32,2), + ('is_long_term', ctypes.c_uint32,1), + ('not_existing', ctypes.c_uint32,1), + ('is_field', ctypes.c_uint32,1), + ('top_field_marking', ctypes.c_uint32,4), + ('bottom_field_marking', ctypes.c_uint32,4), + ('output_memory_layout', ctypes.c_uint32,1), + ('reserved', ctypes.c_uint32,6), + ('FieldOrderCnt', (ctypes.c_uint32 * 2)), + ('FrameIdx', ctypes.c_int32), +] +nvdec_dpb_entry_s = struct__nvdec_dpb_entry_s +class struct__nvdec_h264_pic_s(Struct): pass +struct__nvdec_h264_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('hint_dump_en', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 2)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('mbhist_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('log2_max_pic_order_cnt_lsb_minus4', ctypes.c_int32), + ('delta_pic_order_always_zero_flag', ctypes.c_int32), + ('frame_mbs_only_flag', ctypes.c_int32), + ('PicWidthInMbs', ctypes.c_int32), + ('FrameHeightInMbs', ctypes.c_int32), + ('tileFormat', ctypes.c_uint32,2), + ('gob_height', ctypes.c_uint32,3), + ('reserverd_surface_format', ctypes.c_uint32,27), + ('entropy_coding_mode_flag', ctypes.c_int32), + ('pic_order_present_flag', ctypes.c_int32), + ('num_ref_idx_l0_active_minus1', ctypes.c_int32), + ('num_ref_idx_l1_active_minus1', ctypes.c_int32), + ('deblocking_filter_control_present_flag', ctypes.c_int32), + ('redundant_pic_cnt_present_flag', ctypes.c_int32), + ('transform_8x8_mode_flag', ctypes.c_int32), + ('pitch_luma', ctypes.c_uint32), + ('pitch_chroma', ctypes.c_uint32), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('MbaffFrameFlag', ctypes.c_uint32,1), + ('direct_8x8_inference_flag', ctypes.c_uint32,1), + ('weighted_pred_flag', ctypes.c_uint32,1), + ('constrained_intra_pred_flag', ctypes.c_uint32,1), + ('ref_pic_flag', ctypes.c_uint32,1), + ('field_pic_flag', ctypes.c_uint32,1), + ('bottom_field_flag', ctypes.c_uint32,1), + ('second_field', ctypes.c_uint32,1), + ('log2_max_frame_num_minus4', ctypes.c_uint32,4), + ('chroma_format_idc', ctypes.c_uint32,2), + ('pic_order_cnt_type', ctypes.c_uint32,2), + ('pic_init_qp_minus26', ctypes.c_int32,6), + ('chroma_qp_index_offset', ctypes.c_int32,5), + ('second_chroma_qp_index_offset', ctypes.c_int32,5), + ('weighted_bipred_idc', ctypes.c_uint32,2), + ('CurrPicIdx', ctypes.c_uint32,7), + ('CurrColIdx', ctypes.c_uint32,5), + ('frame_num', ctypes.c_uint32,16), + ('frame_surfaces', ctypes.c_uint32,1), + ('output_memory_layout', ctypes.c_uint32,1), + ('CurrFieldOrderCnt', (ctypes.c_int32 * 2)), + ('dpb', (nvdec_dpb_entry_s * 16)), + ('WeightScale', (((ctypes.c_ubyte * 4) * 4) * 6)), + ('WeightScale8x8', (((ctypes.c_ubyte * 8) * 8) * 2)), + ('num_inter_view_refs_lX', (ctypes.c_ubyte * 2)), + ('reserved1', (ctypes.c_char * 14)), + ('inter_view_refidx_lX', ((ctypes.c_byte * 16) * 2)), + ('lossless_ipred8x8_filter_enable', ctypes.c_uint32,1), + ('qpprime_y_zero_transform_bypass_flag', ctypes.c_uint32,1), + ('reserved2', ctypes.c_uint32,30), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_h264_pic_s = struct__nvdec_h264_pic_s +enum__vc1_fcm_e = CEnum(ctypes.c_uint32) +FCM_PROGRESSIVE = enum__vc1_fcm_e.define('FCM_PROGRESSIVE', 0) +FCM_FRAME_INTERLACE = enum__vc1_fcm_e.define('FCM_FRAME_INTERLACE', 2) +FCM_FIELD_INTERLACE = enum__vc1_fcm_e.define('FCM_FIELD_INTERLACE', 3) + +vc1_fcm_e = enum__vc1_fcm_e +enum__syntax_vc1_ptype_e = CEnum(ctypes.c_uint32) +PTYPE_I = enum__syntax_vc1_ptype_e.define('PTYPE_I', 0) +PTYPE_P = enum__syntax_vc1_ptype_e.define('PTYPE_P', 1) +PTYPE_B = enum__syntax_vc1_ptype_e.define('PTYPE_B', 2) +PTYPE_BI = enum__syntax_vc1_ptype_e.define('PTYPE_BI', 3) +PTYPE_SKIPPED = enum__syntax_vc1_ptype_e.define('PTYPE_SKIPPED', 4) + +syntax_vc1_ptype_e = enum__syntax_vc1_ptype_e +enum_vc1_mvmode_e = CEnum(ctypes.c_uint32) +MVMODE_MIXEDMV = enum_vc1_mvmode_e.define('MVMODE_MIXEDMV', 0) +MVMODE_1MV = enum_vc1_mvmode_e.define('MVMODE_1MV', 1) +MVMODE_1MV_HALFPEL = enum_vc1_mvmode_e.define('MVMODE_1MV_HALFPEL', 2) +MVMODE_1MV_HALFPEL_BILINEAR = enum_vc1_mvmode_e.define('MVMODE_1MV_HALFPEL_BILINEAR', 3) +MVMODE_INTENSITY_COMPENSATION = enum_vc1_mvmode_e.define('MVMODE_INTENSITY_COMPENSATION', 4) + +enum__vc1_fptype_e = CEnum(ctypes.c_uint32) +FPTYPE_I_I = enum__vc1_fptype_e.define('FPTYPE_I_I', 0) +FPTYPE_I_P = enum__vc1_fptype_e.define('FPTYPE_I_P', 1) +FPTYPE_P_I = enum__vc1_fptype_e.define('FPTYPE_P_I', 2) +FPTYPE_P_P = enum__vc1_fptype_e.define('FPTYPE_P_P', 3) +FPTYPE_B_B = enum__vc1_fptype_e.define('FPTYPE_B_B', 4) +FPTYPE_B_BI = enum__vc1_fptype_e.define('FPTYPE_B_BI', 5) +FPTYPE_BI_B = enum__vc1_fptype_e.define('FPTYPE_BI_B', 6) +FPTYPE_BI_BI = enum__vc1_fptype_e.define('FPTYPE_BI_BI', 7) + +vc1_fptype_e = enum__vc1_fptype_e +enum__vc1_dqprofile_e = CEnum(ctypes.c_uint32) +DQPROFILE_ALL_FOUR_EDGES = enum__vc1_dqprofile_e.define('DQPROFILE_ALL_FOUR_EDGES', 0) +DQPROFILE_DOUBLE_EDGE = enum__vc1_dqprofile_e.define('DQPROFILE_DOUBLE_EDGE', 1) +DQPROFILE_SINGLE_EDGE = enum__vc1_dqprofile_e.define('DQPROFILE_SINGLE_EDGE', 2) +DQPROFILE_ALL_MACROBLOCKS = enum__vc1_dqprofile_e.define('DQPROFILE_ALL_MACROBLOCKS', 3) + +vc1_dqprofile_e = enum__vc1_dqprofile_e +class struct__nvdec_vc1_pic_s(Struct): pass +struct__nvdec_vc1_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('prefixStartCode', (ctypes.c_ubyte * 4)), + ('bitstream_offset', ctypes.c_uint32), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 3)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('scratch_pic_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_uint16), + ('FrameHeight', ctypes.c_uint16), + ('profile', ctypes.c_ubyte), + ('postprocflag', ctypes.c_ubyte), + ('pulldown', ctypes.c_ubyte), + ('interlace', ctypes.c_ubyte), + ('tfcntrflag', ctypes.c_ubyte), + ('finterpflag', ctypes.c_ubyte), + ('psf', ctypes.c_ubyte), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('multires', ctypes.c_ubyte), + ('syncmarker', ctypes.c_ubyte), + ('rangered', ctypes.c_ubyte), + ('maxbframes', ctypes.c_ubyte), + ('dquant', ctypes.c_ubyte), + ('panscan_flag', ctypes.c_ubyte), + ('refdist_flag', ctypes.c_ubyte), + ('quantizer', ctypes.c_ubyte), + ('extended_mv', ctypes.c_ubyte), + ('extended_dmv', ctypes.c_ubyte), + ('overlap', ctypes.c_ubyte), + ('vstransform', ctypes.c_ubyte), + ('refdist', ctypes.c_char), + ('reserved1', (ctypes.c_char * 3)), + ('fcm', vc1_fcm_e), + ('ptype', syntax_vc1_ptype_e), + ('tfcntr', ctypes.c_int32), + ('rptfrm', ctypes.c_int32), + ('tff', ctypes.c_int32), + ('rndctrl', ctypes.c_int32), + ('pqindex', ctypes.c_int32), + ('halfqp', ctypes.c_int32), + ('pquantizer', ctypes.c_int32), + ('postproc', ctypes.c_int32), + ('condover', ctypes.c_int32), + ('transacfrm', ctypes.c_int32), + ('transacfrm2', ctypes.c_int32), + ('transdctab', ctypes.c_int32), + ('pqdiff', ctypes.c_int32), + ('abspq', ctypes.c_int32), + ('dquantfrm', ctypes.c_int32), + ('dqprofile', vc1_dqprofile_e), + ('dqsbedge', ctypes.c_int32), + ('dqdbedge', ctypes.c_int32), + ('dqbilevel', ctypes.c_int32), + ('mvrange', ctypes.c_int32), + ('mvmode', enum_vc1_mvmode_e), + ('mvmode2', enum_vc1_mvmode_e), + ('lumscale', ctypes.c_int32), + ('lumshift', ctypes.c_int32), + ('mvtab', ctypes.c_int32), + ('cbptab', ctypes.c_int32), + ('ttmbf', ctypes.c_int32), + ('ttfrm', ctypes.c_int32), + ('bfraction', ctypes.c_int32), + ('fptype', vc1_fptype_e), + ('numref', ctypes.c_int32), + ('reffield', ctypes.c_int32), + ('dmvrange', ctypes.c_int32), + ('intcompfield', ctypes.c_int32), + ('lumscale1', ctypes.c_int32), + ('lumshift1', ctypes.c_int32), + ('lumscale2', ctypes.c_int32), + ('lumshift2', ctypes.c_int32), + ('mbmodetab', ctypes.c_int32), + ('imvtab', ctypes.c_int32), + ('icbptab', ctypes.c_int32), + ('fourmvbptab', ctypes.c_int32), + ('fourmvswitch', ctypes.c_int32), + ('intcomp', ctypes.c_int32), + ('twomvbptab', ctypes.c_int32), + ('rangeredfrm', ctypes.c_int32), + ('HistBufferSize', ctypes.c_uint32), + ('FrameStride', (ctypes.c_uint32 * 2)), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('CodedWidth', ctypes.c_uint16), + ('CodedHeight', ctypes.c_uint16), + ('loopfilter', ctypes.c_ubyte), + ('fastuvmc', ctypes.c_ubyte), + ('output_memory_layout', ctypes.c_ubyte), + ('ref_memory_layout', (ctypes.c_ubyte * 2)), + ('reserved3', (ctypes.c_ubyte * 3)), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_vc1_pic_s = struct__nvdec_vc1_pic_s +class struct__nvdec_mpeg2_pic_s(Struct): pass +struct__nvdec_mpeg2_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 3)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_int16), + ('FrameHeight', ctypes.c_int16), + ('picture_structure', ctypes.c_ubyte), + ('picture_coding_type', ctypes.c_ubyte), + ('intra_dc_precision', ctypes.c_ubyte), + ('frame_pred_frame_dct', ctypes.c_char), + ('concealment_motion_vectors', ctypes.c_char), + ('intra_vlc_format', ctypes.c_char), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('reserved1', ctypes.c_char), + ('f_code', (ctypes.c_char * 4)), + ('PicWidthInMbs', ctypes.c_uint16), + ('FrameHeightInMbs', ctypes.c_uint16), + ('pitch_luma', ctypes.c_uint32), + ('pitch_chroma', ctypes.c_uint32), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('output_memory_layout', ctypes.c_uint16), + ('alternate_scan', ctypes.c_uint16), + ('secondfield', ctypes.c_uint16), + ('rounding_type', ctypes.c_uint16), + ('MbInfoSizeInBytes', ctypes.c_uint32), + ('q_scale_type', ctypes.c_uint32), + ('top_field_first', ctypes.c_uint32), + ('full_pel_fwd_vector', ctypes.c_uint32), + ('full_pel_bwd_vector', ctypes.c_uint32), + ('quant_mat_8x8intra', (ctypes.c_ubyte * 64)), + ('quant_mat_8x8nonintra', (ctypes.c_ubyte * 64)), + ('ref_memory_layout', (ctypes.c_uint32 * 2)), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_mpeg2_pic_s = struct__nvdec_mpeg2_pic_s +class struct__nvdec_mpeg4_pic_s(Struct): pass +struct__nvdec_mpeg4_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('reserved2', (ctypes.c_ubyte * 3)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('scratch_pic_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_int16), + ('FrameHeight', ctypes.c_int16), + ('vop_time_increment_bitcount', ctypes.c_char), + ('resync_marker_disable', ctypes.c_char), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('reserved3', ctypes.c_char), + ('width', ctypes.c_int32), + ('height', ctypes.c_int32), + ('FrameStride', (ctypes.c_uint32 * 2)), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('trd', (ctypes.c_int32 * 2)), + ('trb', (ctypes.c_int32 * 2)), + ('divx_flags', ctypes.c_int32), + ('vop_fcode_forward', ctypes.c_int16), + ('vop_fcode_backward', ctypes.c_int16), + ('interlaced', ctypes.c_ubyte), + ('quant_type', ctypes.c_ubyte), + ('quarter_sample', ctypes.c_ubyte), + ('short_video_header', ctypes.c_ubyte), + ('curr_output_memory_layout', ctypes.c_ubyte), + ('ptype', ctypes.c_ubyte), + ('rnd', ctypes.c_ubyte), + ('alternate_vertical_scan_flag', ctypes.c_ubyte), + ('top_field_flag', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 3)), + ('intra_quant_mat', (ctypes.c_ubyte * 64)), + ('nonintra_quant_mat', (ctypes.c_ubyte * 64)), + ('ref_memory_layout', (ctypes.c_ubyte * 2)), + ('reserved1', (ctypes.c_ubyte * 34)), + ('displayPara', nvdec_display_param_s), +] +nvdec_mpeg4_pic_s = struct__nvdec_mpeg4_pic_s +enum_VP8_FRAME_TYPE = CEnum(ctypes.c_uint32) +VP8_KEYFRAME = enum_VP8_FRAME_TYPE.define('VP8_KEYFRAME', 0) +VP8_INTERFRAME = enum_VP8_FRAME_TYPE.define('VP8_INTERFRAME', 1) + +enum_VP8_FRAME_SFC_ID = CEnum(ctypes.c_uint32) +VP8_GOLDEN_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_GOLDEN_FRAME_SFC', 0) +VP8_ALTREF_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_ALTREF_FRAME_SFC', 1) +VP8_LAST_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_LAST_FRAME_SFC', 2) +VP8_CURR_FRAME_SFC = enum_VP8_FRAME_SFC_ID.define('VP8_CURR_FRAME_SFC', 3) + +class struct__nvdec_vp8_pic_s(Struct): pass +struct__nvdec_vp8_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('gptimer_timeout_value', ctypes.c_uint32), + ('FrameWidth', ctypes.c_uint16), + ('FrameHeight', ctypes.c_uint16), + ('keyFrame', ctypes.c_ubyte), + ('version', ctypes.c_ubyte), + ('tileFormat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('reserverd_surface_format', ctypes.c_ubyte,3), + ('errorConcealOn', ctypes.c_ubyte), + ('firstPartSize', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('VLDBufferSize', ctypes.c_uint32), + ('FrameStride', (ctypes.c_uint32 * 2)), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('displayPara', nvdec_display_param_s), + ('current_output_memory_layout', ctypes.c_char), + ('output_memory_layout', (ctypes.c_char * 3)), + ('segmentation_feature_data_update', ctypes.c_ubyte), + ('reserved1', (ctypes.c_ubyte * 3)), + ('resultValue', ctypes.c_uint32), + ('partition_offset', (ctypes.c_uint32 * 8)), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_vp8_pic_s = struct__nvdec_vp8_pic_s +class struct__bytes_of_data_s(Struct): pass +struct__bytes_of_data_s._fields_ = [ + ('clear_bytes', ctypes.c_uint32), + ('encypted_bytes', ctypes.c_uint32), +] +bytes_of_data_s = struct__bytes_of_data_s +class struct__nvdec_pass1_input_data_s(Struct): pass +struct__nvdec_pass1_input_data_s._fields_ = [ + ('sample_size', (bytes_of_data_s * 32)), + ('initialization_vector', ((ctypes.c_uint32 * 4) * 32)), + ('IvValid', (ctypes.c_ubyte * 32)), + ('stream_len', ctypes.c_uint32), + ('clearBufferSize', ctypes.c_uint32), + ('reencryptBufferSize', ctypes.c_uint32), + ('vp8coeffPartitonBufferSize', ctypes.c_uint32), + ('PrevWidth', ctypes.c_uint32), + ('num_nals', ctypes.c_uint32,16), + ('drm_mode', ctypes.c_uint32,8), + ('key_sel', ctypes.c_uint32,4), + ('codec', ctypes.c_uint32,4), + ('TotalSizeOfClearData', ctypes.c_uint32), + ('SliceHdrOffset', ctypes.c_uint32), + ('EncryptBlkCnt', ctypes.c_uint32,16), + ('SkipBlkCnt', ctypes.c_uint32,16), +] +nvdec_pass1_input_data_s = struct__nvdec_pass1_input_data_s +class struct__nvdec_pass1_output_data_s(Struct): pass +class struct__nvdec_pass1_output_data_s_0(ctypes.Union): pass +struct__nvdec_pass1_output_data_s_0._fields_ = [ + ('partition_size', (ctypes.c_uint32 * 8)), + ('vp9_frame_sizes', (ctypes.c_uint32 * 8)), +] +struct__nvdec_pass1_output_data_s._anonymous_ = ['_0'] +struct__nvdec_pass1_output_data_s._fields_ = [ + ('clear_header_size', ctypes.c_uint32), + ('reencrypt_data_size', ctypes.c_uint32), + ('clear_token_data_size', ctypes.c_uint32), + ('key_increment', ctypes.c_uint32,6), + ('encryption_mode', ctypes.c_uint32,4), + ('bReEncrypted', ctypes.c_uint32,1), + ('bvp9SuperFrame', ctypes.c_uint32,1), + ('vp9NumFramesMinus1', ctypes.c_uint32,3), + ('reserved1', ctypes.c_uint32,17), + ('wrapped_session_key', (ctypes.c_uint32 * 4)), + ('wrapped_content_key', (ctypes.c_uint32 * 4)), + ('initialization_vector', (ctypes.c_uint32 * 4)), + ('_0', struct__nvdec_pass1_output_data_s_0), + ('vp9_clear_hdr_size', (ctypes.c_uint32 * 8)), +] +nvdec_pass1_output_data_s = struct__nvdec_pass1_output_data_s +class struct__scale_factors_reference_s(Struct): pass +struct__scale_factors_reference_s._fields_ = [ + ('x_scale_fp', ctypes.c_int16), + ('y_scale_fp', ctypes.c_int16), +] +scale_factors_reference_s = struct__scale_factors_reference_s +class struct__frame_info_t(Struct): pass +struct__frame_info_t._fields_ = [ + ('width', ctypes.c_uint16), + ('height', ctypes.c_uint16), + ('stride', (ctypes.c_uint16 * 2)), + ('frame_buffer_idx', ctypes.c_uint32), +] +frame_info_t = struct__frame_info_t +class struct__ref_frame_struct_s(Struct): pass +struct__ref_frame_struct_s._fields_ = [ + ('info', frame_info_t), + ('sf', scale_factors_reference_s), + ('sign_bias', ctypes.c_ubyte,1), + ('wmtype', ctypes.c_ubyte,2), + ('reserved_rf', ctypes.c_ubyte,5), + ('frame_off', ctypes.c_int16), + ('roffset', ctypes.c_int16), +] +ref_frame_struct_s = struct__ref_frame_struct_s +class struct__av1_fgs_cfg_t(Struct): pass +struct__av1_fgs_cfg_t._fields_ = [ + ('apply_grain', ctypes.c_uint16,1), + ('overlap_flag', ctypes.c_uint16,1), + ('clip_to_restricted_range', ctypes.c_uint16,1), + ('chroma_scaling_from_luma', ctypes.c_uint16,1), + ('num_y_points_b', ctypes.c_uint16,1), + ('num_cb_points_b', ctypes.c_uint16,1), + ('num_cr_points_b', ctypes.c_uint16,1), + ('scaling_shift', ctypes.c_uint16,4), + ('reserved_fgs', ctypes.c_uint16,5), + ('sw_random_seed', ctypes.c_uint16), + ('cb_offset', ctypes.c_int16), + ('cr_offset', ctypes.c_int16), + ('cb_mult', ctypes.c_char), + ('cb_luma_mult', ctypes.c_char), + ('cr_mult', ctypes.c_char), + ('cr_luma_mult', ctypes.c_char), +] +av1_fgs_cfg_t = struct__av1_fgs_cfg_t +class struct__nvdec_av1_pic_s(Struct): pass +struct__nvdec_av1_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('ssm', nvdec_pass2_otf_ext_s), + ('fgs_cfg', av1_fgs_cfg_t), + ('gptimer_timeout_value', ctypes.c_uint32), + ('stream_len', ctypes.c_uint32), + ('reserved12', ctypes.c_uint32), + ('use_128x128_superblock', ctypes.c_uint32,1), + ('chroma_format', ctypes.c_uint32,2), + ('bit_depth', ctypes.c_uint32,4), + ('enable_filter_intra', ctypes.c_uint32,1), + ('enable_intra_edge_filter', ctypes.c_uint32,1), + ('enable_interintra_compound', ctypes.c_uint32,1), + ('enable_masked_compound', ctypes.c_uint32,1), + ('enable_dual_filter', ctypes.c_uint32,1), + ('reserved10', ctypes.c_uint32,1), + ('reserved0', ctypes.c_uint32,3), + ('enable_jnt_comp', ctypes.c_uint32,1), + ('reserved1', ctypes.c_uint32,1), + ('enable_cdef', ctypes.c_uint32,1), + ('reserved11', ctypes.c_uint32,1), + ('enable_fgs', ctypes.c_uint32,1), + ('enable_substream_decoding', ctypes.c_uint32,1), + ('reserved2', ctypes.c_uint32,10), + ('frame_type', ctypes.c_uint32,2), + ('show_frame', ctypes.c_uint32,1), + ('reserved13', ctypes.c_uint32,1), + ('disable_cdf_update', ctypes.c_uint32,1), + ('allow_screen_content_tools', ctypes.c_uint32,1), + ('cur_frame_force_integer_mv', ctypes.c_uint32,1), + ('scale_denom_minus9', ctypes.c_uint32,3), + ('allow_intrabc', ctypes.c_uint32,1), + ('allow_high_precision_mv', ctypes.c_uint32,1), + ('interp_filter', ctypes.c_uint32,3), + ('switchable_motion_mode', ctypes.c_uint32,1), + ('use_ref_frame_mvs', ctypes.c_uint32,1), + ('refresh_frame_context', ctypes.c_uint32,1), + ('delta_q_present_flag', ctypes.c_uint32,1), + ('delta_q_res', ctypes.c_uint32,2), + ('delta_lf_present_flag', ctypes.c_uint32,1), + ('delta_lf_res', ctypes.c_uint32,2), + ('delta_lf_multi', ctypes.c_uint32,1), + ('reserved3', ctypes.c_uint32,1), + ('coded_lossless', ctypes.c_uint32,1), + ('tile_enabled', ctypes.c_uint32,1), + ('reserved4', ctypes.c_uint32,2), + ('superres_is_scaled', ctypes.c_uint32,1), + ('reserved_fh', ctypes.c_uint32,1), + ('tile_cols', ctypes.c_uint32,8), + ('tile_rows', ctypes.c_uint32,8), + ('context_update_tile_id', ctypes.c_uint32,16), + ('cdef_damping_minus_3', ctypes.c_uint32,2), + ('cdef_bits', ctypes.c_uint32,2), + ('frame_tx_mode', ctypes.c_uint32,3), + ('frame_reference_mode', ctypes.c_uint32,2), + ('skip_mode_flag', ctypes.c_uint32,1), + ('skip_ref0', ctypes.c_uint32,4), + ('skip_ref1', ctypes.c_uint32,4), + ('allow_warp', ctypes.c_uint32,1), + ('reduced_tx_set_used', ctypes.c_uint32,1), + ('ref_scaling_enable', ctypes.c_uint32,1), + ('reserved5', ctypes.c_uint32,1), + ('reserved6', ctypes.c_uint32,10), + ('superres_upscaled_width', ctypes.c_uint16), + ('superres_luma_step', ctypes.c_uint16), + ('superres_chroma_step', ctypes.c_uint16), + ('superres_init_luma_subpel_x', ctypes.c_uint16), + ('superres_init_chroma_subpel_x', ctypes.c_uint16), + ('base_qindex', ctypes.c_ubyte), + ('y_dc_delta_q', ctypes.c_char), + ('u_dc_delta_q', ctypes.c_char), + ('v_dc_delta_q', ctypes.c_char), + ('u_ac_delta_q', ctypes.c_char), + ('v_ac_delta_q', ctypes.c_char), + ('qm_y', ctypes.c_ubyte), + ('qm_u', ctypes.c_ubyte), + ('qm_v', ctypes.c_ubyte), + ('cdef_y_pri_strength', ctypes.c_uint32), + ('cdef_uv_pri_strength', ctypes.c_uint32), + ('cdef_y_sec_strength', ctypes.c_uint32,16), + ('cdef_uv_sec_strength', ctypes.c_uint32,16), + ('segment_enabled', ctypes.c_ubyte), + ('segment_update_map', ctypes.c_ubyte), + ('reserved7', ctypes.c_ubyte), + ('segment_temporal_update', ctypes.c_ubyte), + ('segment_feature_data', ((ctypes.c_int16 * 8) * 8)), + ('last_active_segid', ctypes.c_ubyte), + ('segid_preskip', ctypes.c_ubyte), + ('prevsegid_flag', ctypes.c_ubyte), + ('segment_quant_sign', ctypes.c_ubyte,8), + ('filter_level', (ctypes.c_ubyte * 2)), + ('filter_level_u', ctypes.c_ubyte), + ('filter_level_v', ctypes.c_ubyte), + ('lf_sharpness_level', ctypes.c_ubyte), + ('lf_ref_deltas', (ctypes.c_char * 8)), + ('lf_mode_deltas', (ctypes.c_char * 2)), + ('lr_type', ctypes.c_ubyte), + ('lr_unit_size', ctypes.c_ubyte), + ('current_frame', frame_info_t), + ('ref_frame', (ref_frame_struct_s * 7)), + ('use_temporal0_mvs', ctypes.c_uint32,1), + ('use_temporal1_mvs', ctypes.c_uint32,1), + ('use_temporal2_mvs', ctypes.c_uint32,1), + ('mf1_type', ctypes.c_uint32,3), + ('mf2_type', ctypes.c_uint32,3), + ('mf3_type', ctypes.c_uint32,3), + ('reserved_mfmv', ctypes.c_uint32,20), + ('mfmv_offset', ((ctypes.c_int16 * 7) * 3)), + ('mfmv_side', ((ctypes.c_char * 7) * 3)), + ('tileformat', ctypes.c_ubyte,2), + ('gob_height', ctypes.c_ubyte,3), + ('errorConcealOn', ctypes.c_ubyte,1), + ('reserver8', ctypes.c_ubyte,2), + ('stream_error_detection', ctypes.c_ubyte,1), + ('mv_error_detection', ctypes.c_ubyte,1), + ('coeff_error_detection', ctypes.c_ubyte,1), + ('reserved_eh', ctypes.c_ubyte,5), + ('Av1FltTopOffset', ctypes.c_uint32), + ('Av1FltVertOffset', ctypes.c_uint32), + ('Av1CdefVertOffset', ctypes.c_uint32), + ('Av1LrVertOffset', ctypes.c_uint32), + ('Av1HusVertOffset', ctypes.c_uint32), + ('Av1FgsVertOffset', ctypes.c_uint32), + ('enable_histogram', ctypes.c_uint32,1), + ('sw_skip_start_length', ctypes.c_uint32,14), + ('reserved_stat', ctypes.c_uint32,17), +] +nvdec_av1_pic_s = struct__nvdec_av1_pic_s +class struct__AV1FilmGrainMemory(Struct): pass +struct__AV1FilmGrainMemory._fields_ = [ + ('scaling_lut_y', (ctypes.c_ubyte * 256)), + ('scaling_lut_cb', (ctypes.c_ubyte * 256)), + ('scaling_lut_cr', (ctypes.c_ubyte * 256)), + ('cropped_luma_grain_block', (ctypes.c_int16 * 4096)), + ('cropped_cb_grain_block', (ctypes.c_int16 * 1024)), + ('cropped_cr_grain_block', (ctypes.c_int16 * 1024)), +] +AV1FilmGrainMemory = struct__AV1FilmGrainMemory +class struct__AV1TileInfo_OLD(Struct): pass +struct__AV1TileInfo_OLD._fields_ = [ + ('width_in_sb', ctypes.c_ubyte), + ('height_in_sb', ctypes.c_ubyte), + ('tile_start_b0', ctypes.c_ubyte), + ('tile_start_b1', ctypes.c_ubyte), + ('tile_start_b2', ctypes.c_ubyte), + ('tile_start_b3', ctypes.c_ubyte), + ('tile_end_b0', ctypes.c_ubyte), + ('tile_end_b1', ctypes.c_ubyte), + ('tile_end_b2', ctypes.c_ubyte), + ('tile_end_b3', ctypes.c_ubyte), + ('padding', (ctypes.c_ubyte * 6)), +] +AV1TileInfo_OLD = struct__AV1TileInfo_OLD +class struct__AV1TileInfo(Struct): pass +struct__AV1TileInfo._fields_ = [ + ('width_in_sb', ctypes.c_ubyte), + ('padding_w', ctypes.c_ubyte), + ('height_in_sb', ctypes.c_ubyte), + ('padding_h', ctypes.c_ubyte), +] +AV1TileInfo = struct__AV1TileInfo +class struct__AV1TileStreamInfo(Struct): pass +struct__AV1TileStreamInfo._fields_ = [ + ('tile_start', ctypes.c_uint32), + ('tile_end', ctypes.c_uint32), + ('padding', (ctypes.c_ubyte * 8)), +] +AV1TileStreamInfo = struct__AV1TileStreamInfo +class struct__nvdec_new_h264_pic_s(Struct): pass +struct__nvdec_new_h264_pic_s._fields_ = [ + ('encryption_params', nvdec_pass2_otf_s), + ('eos', (ctypes.c_ubyte * 16)), + ('explicitEOSPresentFlag', ctypes.c_ubyte), + ('hint_dump_en', ctypes.c_ubyte), + ('reserved0', (ctypes.c_ubyte * 2)), + ('stream_len', ctypes.c_uint32), + ('slice_count', ctypes.c_uint32), + ('mbhist_buffer_size', ctypes.c_uint32), + ('gptimer_timeout_value', ctypes.c_uint32), + ('log2_max_pic_order_cnt_lsb_minus4', ctypes.c_int32), + ('delta_pic_order_always_zero_flag', ctypes.c_int32), + ('frame_mbs_only_flag', ctypes.c_int32), + ('PicWidthInMbs', ctypes.c_int32), + ('FrameHeightInMbs', ctypes.c_int32), + ('tileFormat', ctypes.c_uint32,2), + ('gob_height', ctypes.c_uint32,3), + ('reserverd_surface_format', ctypes.c_uint32,27), + ('entropy_coding_mode_flag', ctypes.c_int32), + ('pic_order_present_flag', ctypes.c_int32), + ('num_ref_idx_l0_active_minus1', ctypes.c_int32), + ('num_ref_idx_l1_active_minus1', ctypes.c_int32), + ('deblocking_filter_control_present_flag', ctypes.c_int32), + ('redundant_pic_cnt_present_flag', ctypes.c_int32), + ('transform_8x8_mode_flag', ctypes.c_int32), + ('pitch_luma', ctypes.c_uint32), + ('pitch_chroma', ctypes.c_uint32), + ('luma_top_offset', ctypes.c_uint32), + ('luma_bot_offset', ctypes.c_uint32), + ('luma_frame_offset', ctypes.c_uint32), + ('chroma_top_offset', ctypes.c_uint32), + ('chroma_bot_offset', ctypes.c_uint32), + ('chroma_frame_offset', ctypes.c_uint32), + ('HistBufferSize', ctypes.c_uint32), + ('MbaffFrameFlag', ctypes.c_uint32,1), + ('direct_8x8_inference_flag', ctypes.c_uint32,1), + ('weighted_pred_flag', ctypes.c_uint32,1), + ('constrained_intra_pred_flag', ctypes.c_uint32,1), + ('ref_pic_flag', ctypes.c_uint32,1), + ('field_pic_flag', ctypes.c_uint32,1), + ('bottom_field_flag', ctypes.c_uint32,1), + ('second_field', ctypes.c_uint32,1), + ('log2_max_frame_num_minus4', ctypes.c_uint32,4), + ('chroma_format_idc', ctypes.c_uint32,2), + ('pic_order_cnt_type', ctypes.c_uint32,2), + ('pic_init_qp_minus26', ctypes.c_int32,6), + ('chroma_qp_index_offset', ctypes.c_int32,5), + ('second_chroma_qp_index_offset', ctypes.c_int32,5), + ('weighted_bipred_idc', ctypes.c_uint32,2), + ('CurrPicIdx', ctypes.c_uint32,7), + ('CurrColIdx', ctypes.c_uint32,5), + ('frame_num', ctypes.c_uint32,16), + ('frame_surfaces', ctypes.c_uint32,1), + ('output_memory_layout', ctypes.c_uint32,1), + ('CurrFieldOrderCnt', (ctypes.c_int32 * 2)), + ('dpb', (nvdec_dpb_entry_s * 16)), + ('WeightScale', (((ctypes.c_ubyte * 4) * 4) * 6)), + ('WeightScale8x8', (((ctypes.c_ubyte * 8) * 8) * 2)), + ('num_inter_view_refs_lX', (ctypes.c_ubyte * 2)), + ('reserved1', (ctypes.c_char * 14)), + ('inter_view_refidx_lX', ((ctypes.c_byte * 16) * 2)), + ('lossless_ipred8x8_filter_enable', ctypes.c_uint32,1), + ('qpprime_y_zero_transform_bypass_flag', ctypes.c_uint32,1), + ('reserved2', ctypes.c_uint32,30), + ('displayPara', nvdec_display_param_s), + ('ssm', nvdec_pass2_otf_ext_s), +] +nvdec_new_h264_pic_s = struct__nvdec_new_h264_pic_s +class nvdec_crc_s(Struct): pass +nvdec_crc_s._fields_ = [ + ('dbg_crc_enable_partb', ctypes.c_uint32,1), + ('dbg_crc_enable_partc', ctypes.c_uint32,1), + ('dbg_crc_enable_partd', ctypes.c_uint32,1), + ('dbg_crc_enable_parte', ctypes.c_uint32,1), + ('dbg_crc_intf_partb', ctypes.c_uint32,6), + ('dbg_crc_intf_partc', ctypes.c_uint32,6), + ('dbg_crc_intf_partd', ctypes.c_uint32,6), + ('dbg_crc_intf_parte', ctypes.c_uint32,6), + ('reserved0', ctypes.c_uint32,4), + ('dbg_crc_partb_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_partc_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_partd_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_parte_golden', (ctypes.c_uint32 * 4)), + ('dbg_crc_comp_partb', ctypes.c_uint32,4), + ('dbg_crc_comp_partc', ctypes.c_uint32,4), + ('dbg_crc_comp_partd', ctypes.c_uint32,4), + ('dbg_crc_comp_parte', ctypes.c_uint32,4), + ('reserved1', ctypes.c_uint32,16), + ('reserved2', (ctypes.c_ubyte * 56)), +] +class _anonunion12(ctypes.Union): pass NvUPtr = ctypes.c_uint64 -_anonunion0._fields_ = [ +_anonunion12._fields_ = [ ('v', NvUPtr), ('p', ctypes.c_void_p), ] -class _anonunion1(ctypes.Union): pass -_anonunion1._fields_ = [ +class _anonunion13(ctypes.Union): pass +_anonunion13._fields_ = [ ('v', NvUPtr), ('p', ctypes.c_void_p), ] @@ -24,9 +1345,16 @@ struct_NV0000_ALLOC_PARAMETERS._fields_ = [ ('pOsPidInfo', NvP64), ] NV0000_ALLOC_PARAMETERS = struct_NV0000_ALLOC_PARAMETERS +class struct_NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS(Struct): pass +NvU64 = ctypes.c_uint64 +struct_NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS._fields_ = [ + ('offset', NvU64), + ('limit', NvU64), + ('hVASpace', NvHandle), +] +NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS = struct_NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS class struct_NV0080_ALLOC_PARAMETERS(Struct): pass NvV32 = ctypes.c_uint32 -NvU64 = ctypes.c_uint64 struct_NV0080_ALLOC_PARAMETERS._fields_ = [ ('deviceId', NvU32), ('hClientShare', NvHandle), @@ -2142,6 +3470,36 @@ struct_NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS._fields_ = [ ('numFreeBlocks', NvU32), ] NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS = struct_NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS +NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS = struct_NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS +NVA06F_CTRL_BIND_PARAMS = struct_NVA06F_CTRL_BIND_PARAMS +class struct_NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS(Struct): pass +struct_NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS._fields_ = [ + ('bNotifyEachChannelInTSG', NvBool), +] +NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS = struct_NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS +class struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS(Struct): pass +struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS._fields_ = [ + ('channelInterleaveLevel', NvU32), +] +NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS = struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS +NVA06F_CTRL_SET_INTERLEAVE_LEVEL_PARAMS = struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS +NVA06F_CTRL_GET_INTERLEAVE_LEVEL_PARAMS = struct_NVA06F_CTRL_INTERLEAVE_LEVEL_PARAMS +class struct_NVA06F_CTRL_RESTART_RUNLIST_PARAMS(Struct): pass +struct_NVA06F_CTRL_RESTART_RUNLIST_PARAMS._fields_ = [ + ('bForceRestart', NvBool), + ('bBypassWait', NvBool), +] +NVA06F_CTRL_RESTART_RUNLIST_PARAMS = struct_NVA06F_CTRL_RESTART_RUNLIST_PARAMS +class struct_NVA06F_CTRL_STOP_CHANNEL_PARAMS(Struct): pass +struct_NVA06F_CTRL_STOP_CHANNEL_PARAMS._fields_ = [ + ('bImmediate', NvBool), +] +NVA06F_CTRL_STOP_CHANNEL_PARAMS = struct_NVA06F_CTRL_STOP_CHANNEL_PARAMS +class struct_NVA06F_CTRL_GET_CONTEXT_ID_PARAMS(Struct): pass +struct_NVA06F_CTRL_GET_CONTEXT_ID_PARAMS._fields_ = [ + ('contextId', NvU32), +] +NVA06F_CTRL_GET_CONTEXT_ID_PARAMS = struct_NVA06F_CTRL_GET_CONTEXT_ID_PARAMS class struct_NV0000_CTRL_CLIENT_GET_ADDR_SPACE_TYPE_PARAMS(Struct): pass struct_NV0000_CTRL_CLIENT_GET_ADDR_SPACE_TYPE_PARAMS._fields_ = [ ('hObject', NvHandle), @@ -11859,6 +13217,402 @@ NV_WARN_NULL_OBJECT = nv_status_codes.define('NV_WARN_NULL_OBJECT', 65543) NV_WARN_OUT_OF_RANGE = nv_status_codes.define('NV_WARN_OUT_OF_RANGE', 65544) NV_WARN_THRESHOLD_CROSSED = nv_status_codes.define('NV_WARN_THRESHOLD_CROSSED', 65545) +NVC9B0_VIDEO_DECODER = (0x0000C9B0) +NVC9B0_NOP = (0x00000100) +NVC9B0_PM_TRIGGER = (0x00000140) +NVC9B0_SET_APPLICATION_ID = (0x00000200) +NVC9B0_SET_APPLICATION_ID_ID_MPEG12 = (0x00000001) +NVC9B0_SET_APPLICATION_ID_ID_VC1 = (0x00000002) +NVC9B0_SET_APPLICATION_ID_ID_H264 = (0x00000003) +NVC9B0_SET_APPLICATION_ID_ID_MPEG4 = (0x00000004) +NVC9B0_SET_APPLICATION_ID_ID_VP8 = (0x00000005) +NVC9B0_SET_APPLICATION_ID_ID_CTR64 = (0x00000006) +NVC9B0_SET_APPLICATION_ID_ID_HEVC = (0x00000007) +NVC9B0_SET_APPLICATION_ID_ID_NEW_H264 = (0x00000008) +NVC9B0_SET_APPLICATION_ID_ID_VP9 = (0x00000009) +NVC9B0_SET_APPLICATION_ID_ID_PASS1 = (0x0000000A) +NVC9B0_SET_APPLICATION_ID_ID_HEVC_PARSER = (0x0000000C) +NVC9B0_SET_APPLICATION_ID_ID_UCODE_TEST = (0x0000000D) +NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIO = (0x0000000E) +NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_DECRYPTAUDIOMULTIPLE = (0x0000000F) +NVC9B0_SET_APPLICATION_ID_ID_HWDRM_PR_PREPROCESSENCRYPTEDDATA = (0x00000010) +NVC9B0_SET_APPLICATION_ID_ID_VP9_WITH_PARSER = (0x00000011) +NVC9B0_SET_APPLICATION_ID_ID_AVD = (0x00000012) +NVC9B0_SET_APPLICATION_ID_ID_HW_DRM_PR4_DECRYPTCONTENTMULTIPLE = (0x00000013) +NVC9B0_SET_APPLICATION_ID_ID_DHKE = (0x00000020) +NVC9B0_SET_WATCHDOG_TIMER = (0x00000204) +NVC9B0_SEMAPHORE_A = (0x00000240) +NVC9B0_SEMAPHORE_B = (0x00000244) +NVC9B0_SEMAPHORE_C = (0x00000248) +NVC9B0_CTX_SAVE_AREA = (0x0000024C) +NVC9B0_CTX_SWITCH = (0x00000250) +NVC9B0_CTX_SWITCH_OP_CTX_UPDATE = (0x00000000) +NVC9B0_CTX_SWITCH_OP_CTX_SAVE = (0x00000001) +NVC9B0_CTX_SWITCH_OP_CTX_RESTORE = (0x00000002) +NVC9B0_CTX_SWITCH_OP_CTX_FORCERESTORE = (0x00000003) +NVC9B0_CTX_SWITCH_CTXID_VALID_FALSE = (0x00000000) +NVC9B0_CTX_SWITCH_CTXID_VALID_TRUE = (0x00000001) +NVC9B0_SET_SEMAPHORE_PAYLOAD_LOWER = (0x00000254) +NVC9B0_SET_SEMAPHORE_PAYLOAD_UPPER = (0x00000258) +NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_A = (0x0000025C) +NVC9B0_SET_MONITORED_FENCE_SIGNAL_ADDRESS_BASE_B = (0x00000260) +NVC9B0_EXECUTE = (0x00000300) +NVC9B0_EXECUTE_NOTIFY_DISABLE = (0x00000000) +NVC9B0_EXECUTE_NOTIFY_ENABLE = (0x00000001) +NVC9B0_EXECUTE_NOTIFY_ON_END = (0x00000000) +NVC9B0_EXECUTE_NOTIFY_ON_BEGIN = (0x00000001) +NVC9B0_EXECUTE_PREDICATION_DISABLE = (0x00000000) +NVC9B0_EXECUTE_PREDICATION_ENABLE = (0x00000001) +NVC9B0_EXECUTE_PREDICATION_OP_EQUAL_ZERO = (0x00000000) +NVC9B0_EXECUTE_PREDICATION_OP_NOT_EQUAL_ZERO = (0x00000001) +NVC9B0_EXECUTE_AWAKEN_DISABLE = (0x00000000) +NVC9B0_EXECUTE_AWAKEN_ENABLE = (0x00000001) +NVC9B0_SEMAPHORE_D = (0x00000304) +NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_ONE = (0x00000000) +NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_FOUR = (0x00000001) +NVC9B0_SEMAPHORE_D_STRUCTURE_SIZE_TWO = (0x00000002) +NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_FALSE = (0x00000000) +NVC9B0_SEMAPHORE_D_AWAKEN_ENABLE_TRUE = (0x00000001) +NVC9B0_SEMAPHORE_D_OPERATION_RELEASE = (0x00000000) +NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_0 = (0x00000001) +NVC9B0_SEMAPHORE_D_OPERATION_RESERVED_1 = (0x00000002) +NVC9B0_SEMAPHORE_D_OPERATION_TRAP = (0x00000003) +NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_FALSE = (0x00000000) +NVC9B0_SEMAPHORE_D_FLUSH_DISABLE_TRUE = (0x00000001) +NVC9B0_SEMAPHORE_D_TRAP_TYPE_UNCONDITIONAL = (0x00000000) +NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL = (0x00000001) +NVC9B0_SEMAPHORE_D_TRAP_TYPE_CONDITIONAL_EXT = (0x00000002) +NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_32BIT = (0x00000000) +NVC9B0_SEMAPHORE_D_PAYLOAD_SIZE_64BIT = (0x00000001) +NVC9B0_SET_PREDICATION_OFFSET_UPPER = (0x00000308) +NVC9B0_SET_PREDICATION_OFFSET_LOWER = (0x0000030C) +NVC9B0_SET_AUXILIARY_DATA_BUFFER = (0x00000310) +NVC9B0_SET_CONTROL_PARAMS = (0x00000400) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG1 = (0x00000000) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG2 = (0x00000001) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VC1 = (0x00000002) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_H264 = (0x00000003) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_MPEG4 = (0x00000004) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_DIVX3 = (0x00000004) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP8 = (0x00000005) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_HEVC = (0x00000007) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_VP9 = (0x00000009) +NVC9B0_SET_CONTROL_PARAMS_CODEC_TYPE_AV1 = (0x0000000A) +NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_TRACE3D_RUN = (0x00000000) +NVC9B0_SET_CONTROL_PARAMS_TESTRUN_ENV_PROD_RUN = (0x00000001) +NVC9B0_SET_DRV_PIC_SETUP_OFFSET = (0x00000404) +NVC9B0_SET_IN_BUF_BASE_OFFSET = (0x00000408) +NVC9B0_SET_PICTURE_INDEX = (0x0000040C) +NVC9B0_SET_SLICE_OFFSETS_BUF_OFFSET = (0x00000410) +NVC9B0_SET_COLOC_DATA_OFFSET = (0x00000414) +NVC9B0_SET_HISTORY_OFFSET = (0x00000418) +NVC9B0_SET_DISPLAY_BUF_SIZE = (0x0000041C) +NVC9B0_SET_HISTOGRAM_OFFSET = (0x00000420) +NVC9B0_SET_NVDEC_STATUS_OFFSET = (0x00000424) +NVC9B0_SET_DISPLAY_BUF_LUMA_OFFSET = (0x00000428) +NVC9B0_SET_DISPLAY_BUF_CHROMA_OFFSET = (0x0000042C) +NVC9B0_SET_PICTURE_LUMA_OFFSET0 = (0x00000430) +NVC9B0_SET_PICTURE_LUMA_OFFSET1 = (0x00000434) +NVC9B0_SET_PICTURE_LUMA_OFFSET2 = (0x00000438) +NVC9B0_SET_PICTURE_LUMA_OFFSET3 = (0x0000043C) +NVC9B0_SET_PICTURE_LUMA_OFFSET4 = (0x00000440) +NVC9B0_SET_PICTURE_LUMA_OFFSET5 = (0x00000444) +NVC9B0_SET_PICTURE_LUMA_OFFSET6 = (0x00000448) +NVC9B0_SET_PICTURE_LUMA_OFFSET7 = (0x0000044C) +NVC9B0_SET_PICTURE_LUMA_OFFSET8 = (0x00000450) +NVC9B0_SET_PICTURE_LUMA_OFFSET9 = (0x00000454) +NVC9B0_SET_PICTURE_LUMA_OFFSET10 = (0x00000458) +NVC9B0_SET_PICTURE_LUMA_OFFSET11 = (0x0000045C) +NVC9B0_SET_PICTURE_LUMA_OFFSET12 = (0x00000460) +NVC9B0_SET_PICTURE_LUMA_OFFSET13 = (0x00000464) +NVC9B0_SET_PICTURE_LUMA_OFFSET14 = (0x00000468) +NVC9B0_SET_PICTURE_LUMA_OFFSET15 = (0x0000046C) +NVC9B0_SET_PICTURE_LUMA_OFFSET16 = (0x00000470) +NVC9B0_SET_PICTURE_CHROMA_OFFSET0 = (0x00000474) +NVC9B0_SET_PICTURE_CHROMA_OFFSET1 = (0x00000478) +NVC9B0_SET_PICTURE_CHROMA_OFFSET2 = (0x0000047C) +NVC9B0_SET_PICTURE_CHROMA_OFFSET3 = (0x00000480) +NVC9B0_SET_PICTURE_CHROMA_OFFSET4 = (0x00000484) +NVC9B0_SET_PICTURE_CHROMA_OFFSET5 = (0x00000488) +NVC9B0_SET_PICTURE_CHROMA_OFFSET6 = (0x0000048C) +NVC9B0_SET_PICTURE_CHROMA_OFFSET7 = (0x00000490) +NVC9B0_SET_PICTURE_CHROMA_OFFSET8 = (0x00000494) +NVC9B0_SET_PICTURE_CHROMA_OFFSET9 = (0x00000498) +NVC9B0_SET_PICTURE_CHROMA_OFFSET10 = (0x0000049C) +NVC9B0_SET_PICTURE_CHROMA_OFFSET11 = (0x000004A0) +NVC9B0_SET_PICTURE_CHROMA_OFFSET12 = (0x000004A4) +NVC9B0_SET_PICTURE_CHROMA_OFFSET13 = (0x000004A8) +NVC9B0_SET_PICTURE_CHROMA_OFFSET14 = (0x000004AC) +NVC9B0_SET_PICTURE_CHROMA_OFFSET15 = (0x000004B0) +NVC9B0_SET_PICTURE_CHROMA_OFFSET16 = (0x000004B4) +NVC9B0_SET_PIC_SCRATCH_BUF_OFFSET = (0x000004B8) +NVC9B0_SET_EXTERNAL_MVBUFFER_OFFSET = (0x000004BC) +NVC9B0_SET_SUB_SAMPLE_MAP_OFFSET = (0x000004C0) +NVC9B0_SET_SUB_SAMPLE_MAP_IV_OFFSET = (0x000004C4) +NVC9B0_SET_INTRA_TOP_BUF_OFFSET = (0x000004C8) +NVC9B0_SET_TILE_SIZE_BUF_OFFSET = (0x000004CC) +NVC9B0_SET_FILTER_BUFFER_OFFSET = (0x000004D0) +NVC9B0_SET_CRC_STRUCT_OFFSET = (0x000004D4) +NVC9B0_SET_PR_SSM_CONTENT_INFO_BUF_OFFSET = (0x000004D8) +NVC9B0_H264_SET_MBHIST_BUF_OFFSET = (0x00000500) +NVC9B0_VP8_SET_PROB_DATA_OFFSET = (0x00000540) +NVC9B0_VP8_SET_HEADER_PARTITION_BUF_BASE_OFFSET = (0x00000544) +NVC9B0_HEVC_SET_SCALING_LIST_OFFSET = (0x00000580) +NVC9B0_HEVC_SET_TILE_SIZES_OFFSET = (0x00000584) +NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET = (0x00000588) +NVC9B0_HEVC_SET_SAO_BUFFER_OFFSET = (0x0000058C) +NVC9B0_HEVC_SET_SLICE_INFO_BUFFER_OFFSET = (0x00000590) +NVC9B0_HEVC_SET_SLICE_GROUP_INDEX = (0x00000594) +NVC9B0_VP9_SET_PROB_TAB_BUF_OFFSET = (0x000005C0) +NVC9B0_VP9_SET_CTX_COUNTER_BUF_OFFSET = (0x000005C4) +NVC9B0_VP9_SET_SEGMENT_READ_BUF_OFFSET = (0x000005C8) +NVC9B0_VP9_SET_SEGMENT_WRITE_BUF_OFFSET = (0x000005CC) +NVC9B0_VP9_SET_TILE_SIZE_BUF_OFFSET = (0x000005D0) +NVC9B0_VP9_SET_COL_MVWRITE_BUF_OFFSET = (0x000005D4) +NVC9B0_VP9_SET_COL_MVREAD_BUF_OFFSET = (0x000005D8) +NVC9B0_VP9_SET_FILTER_BUFFER_OFFSET = (0x000005DC) +NVC9B0_VP9_PARSER_SET_PIC_SETUP_OFFSET = (0x000005E0) +NVC9B0_VP9_PARSER_SET_PREV_PIC_SETUP_OFFSET = (0x000005E4) +NVC9B0_VP9_PARSER_SET_PROB_TAB_BUF_OFFSET = (0x000005E8) +NVC9B0_VP9_SET_HINT_DUMP_BUF_OFFSET = (0x000005EC) +NVC9B0_PASS1_SET_CLEAR_HEADER_OFFSET = (0x00000600) +NVC9B0_PASS1_SET_RE_ENCRYPT_OFFSET = (0x00000604) +NVC9B0_PASS1_SET_VP8_TOKEN_OFFSET = (0x00000608) +NVC9B0_PASS1_SET_INPUT_DATA_OFFSET = (0x0000060C) +NVC9B0_PASS1_SET_OUTPUT_DATA_SIZE_OFFSET = (0x00000610) +NVC9B0_AV1_SET_PROB_TAB_READ_BUF_OFFSET = (0x00000640) +NVC9B0_AV1_SET_PROB_TAB_WRITE_BUF_OFFSET = (0x00000644) +NVC9B0_AV1_SET_SEGMENT_READ_BUF_OFFSET = (0x00000648) +NVC9B0_AV1_SET_SEGMENT_WRITE_BUF_OFFSET = (0x0000064C) +NVC9B0_AV1_SET_COL_MV0_READ_BUF_OFFSET = (0x00000650) +NVC9B0_AV1_SET_COL_MV1_READ_BUF_OFFSET = (0x00000654) +NVC9B0_AV1_SET_COL_MV2_READ_BUF_OFFSET = (0x00000658) +NVC9B0_AV1_SET_COL_MVWRITE_BUF_OFFSET = (0x0000065C) +NVC9B0_AV1_SET_GLOBAL_MODEL_BUF_OFFSET = (0x00000660) +NVC9B0_AV1_SET_FILM_GRAIN_BUF_OFFSET = (0x00000664) +NVC9B0_AV1_SET_TILE_STREAM_INFO_BUF_OFFSET = (0x00000668) +NVC9B0_AV1_SET_SUB_STREAM_ENTRY_BUF_OFFSET = (0x0000066C) +NVC9B0_AV1_SET_HINT_DUMP_BUF_OFFSET = (0x00000670) +NVC9B0_H264_SET_SCALING_LIST_OFFSET = (0x00000680) +NVC9B0_H264_SET_VLDHIST_BUF_OFFSET = (0x00000684) +NVC9B0_H264_SET_EDOBOFFSET0 = (0x00000688) +NVC9B0_H264_SET_EDOBOFFSET1 = (0x0000068C) +NVC9B0_H264_SET_EDOBOFFSET2 = (0x00000690) +NVC9B0_H264_SET_EDOBOFFSET3 = (0x00000694) +NVC9B0_SET_CONTENT_INITIAL_VECTOR = lambda b: (0x00000C00 + (b)*0x00000004) +NVC9B0_SET_CTL_COUNT = (0x00000C10) +NVC9B0_SET_UPPER_SRC = (0x00000C14) +NVC9B0_SET_LOWER_SRC = (0x00000C18) +NVC9B0_SET_UPPER_DST = (0x00000C1C) +NVC9B0_SET_LOWER_DST = (0x00000C20) +NVC9B0_SET_BLOCK_COUNT = (0x00000C24) +NVC9B0_PR_SET_REQUEST_BUF_OFFSET = (0x00000D00) +NVC9B0_PR_SET_REQUEST_BUF_SIZE = (0x00000D04) +NVC9B0_PR_SET_RESPONSE_BUF_OFFSET = (0x00000D08) +NVC9B0_PR_SET_RESPONSE_BUF_SIZE = (0x00000D0C) +NVC9B0_PR_SET_REQUEST_MESSAGE_BUF_OFFSET = (0x00000D10) +NVC9B0_PR_SET_RESPONSE_MESSAGE_BUF_OFFSET = (0x00000D14) +NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_OFFSET = (0x00000D18) +NVC9B0_PR_SET_LOCAL_DECRYPT_BUF_SIZE = (0x00000D1C) +NVC9B0_PR_SET_CONTENT_DECRYPT_INFO_BUF_OFFSET = (0x00000D20) +NVC9B0_PR_SET_REENCRYPTED_BITSTREAM_BUF_OFFSET = (0x00000D24) +NVC9B0_DH_KE_SET_CHALLENGE_BUF_OFFSET = (0x00000E00) +NVC9B0_DH_KE_SET_RESPONSE_BUF_OFFSET = (0x00000E04) +NVC9B0_SET_SESSION_KEY = lambda b: (0x00000F00 + (b)*0x00000004) +NVC9B0_SET_CONTENT_KEY = lambda b: (0x00000F10 + (b)*0x00000004) +NVC9B0_PM_TRIGGER_END = (0x00001114) +NVC9B0_ERROR_NONE = (0x00000000) +NVC9B0_OS_ERROR_EXECUTE_INSUFFICIENT_DATA = (0x00000001) +NVC9B0_OS_ERROR_SEMAPHORE_INSUFFICIENT_DATA = (0x00000002) +NVC9B0_OS_ERROR_INVALID_METHOD = (0x00000003) +NVC9B0_OS_ERROR_INVALID_DMA_PAGE = (0x00000004) +NVC9B0_OS_ERROR_UNHANDLED_INTERRUPT = (0x00000005) +NVC9B0_OS_ERROR_EXCEPTION = (0x00000006) +NVC9B0_OS_ERROR_INVALID_CTXSW_REQUEST = (0x00000007) +NVC9B0_OS_ERROR_APPLICATION = (0x00000008) +NVC9B0_OS_ERROR_SW_BREAKPT = (0x00000009) +NVC9B0_OS_INTERRUPT_EXECUTE_AWAKEN = (0x00000100) +NVC9B0_OS_INTERRUPT_BACKEND_SEMAPHORE_AWAKEN = (0x00000200) +NVC9B0_OS_INTERRUPT_CTX_ERROR_FBIF = (0x00000300) +NVC9B0_OS_INTERRUPT_LIMIT_VIOLATION = (0x00000400) +NVC9B0_OS_INTERRUPT_LIMIT_AND_FBIF_CTX_ERROR = (0x00000500) +NVC9B0_OS_INTERRUPT_HALT_ENGINE = (0x00000600) +NVC9B0_OS_INTERRUPT_TRAP_NONSTALL = (0x00000700) +NVC9B0_H264_VLD_ERR_SEQ_DATA_INCONSISTENT = (0x00004001) +NVC9B0_H264_VLD_ERR_PIC_DATA_INCONSISTENT = (0x00004002) +NVC9B0_H264_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS = (0x00004100) +NVC9B0_H264_VLD_ERR_BITSTREAM_ERROR = (0x00004101) +NVC9B0_H264_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x000041F8) +NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_SIZE_NOT_MULT256 = (0x00004200) +NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256 = (0x00004201) +NVC9B0_H264_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00004203) +NVC9B0_H264_VLD_ERR_CTX_DMA_ID_SLC_HDR_OUT_INVALID = (0x00004204) +NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL = (0x00004205) +NVC9B0_H264_VLD_ERR_SLC_HDR_OUT_BUF_ALREADY_VALID = (0x00004206) +NVC9B0_H264_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL = (0x00004207) +NVC9B0_H264_VLD_ERR_DATA_BUF_CNT_TOO_SMALL = (0x00004208) +NVC9B0_H264_VLD_ERR_BITSTREAM_EMPTY = (0x00004209) +NVC9B0_H264_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x0000420A) +NVC9B0_H264_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x0000420B) +NVC9B0_H264_VLD_ERR_HIST_BUF_TOO_SMALL = (0x00004300) +NVC9B0_VC1_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND = (0x00005100) +NVC9B0_VC1_VLD_ERR_BITSTREAM_ERROR = (0x00005101) +NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256 = (0x00005200) +NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256 = (0x00005201) +NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x00005202) +NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00005203) +NVC9B0_VC1_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID = (0x00005204) +NVC9B0_VC1_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL = (0x00005205) +NVC9B0_VC1_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID = (0x00005206) +NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL = (0x00005207) +NVC9B0_VC1_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL = (0x00005208) +NVC9B0_VC1_VLD_ERR_BITSTREAM_EMPTY = (0x00005209) +NVC9B0_VC1_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x0000520A) +NVC9B0_VC1_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x0000520B) +NVC9B0_VC1_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT = (0x00005300) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_BUF_ADDR_OUT_OF_BOUNDS = (0x00006100) +NVC9B0_MPEG12_VLD_ERR_BITSTREAM_ERROR = (0x00006101) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_SIZE_NOT_MULT256 = (0x00006200) +NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x00006201) +NVC9B0_MPEG12_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00006202) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_TOO_SMALL = (0x00006203) +NVC9B0_MPEG12_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL = (0x00006204) +NVC9B0_MPEG12_VLD_ERR_BITSTREAM_EMPTY = (0x00006205) +NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_STRUCTURE = (0x00006206) +NVC9B0_MPEG12_VLD_ERR_INVALID_PIC_CODING_TYPE = (0x00006207) +NVC9B0_MPEG12_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x00006208) +NVC9B0_MPEG12_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x00006209) +NVC9B0_MPEG12_VLD_ERR_SLC_DATA_OUT_BUF_FULL_TIME_OUT = (0x00006300) +NVC9B0_CMN_VLD_ERR_PDEC_RETURNED_ERROR = (0x00007101) +NVC9B0_CMN_VLD_ERR_EDOB_FLUSH_TIME_OUT = (0x00007102) +NVC9B0_CMN_VLD_ERR_EDOB_REWIND_TIME_OUT = (0x00007103) +NVC9B0_CMN_VLD_ERR_VLD_WD_TIME_OUT = (0x00007104) +NVC9B0_CMN_VLD_ERR_NUM_SLICES_ZERO = (0x00007105) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_BUF_ADDR_OUT_OF_BOUND = (0x00008100) +NVC9B0_MPEG4_VLD_ERR_BITSTREAM_ERROR = (0x00008101) +NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_SIZE_NOT_MULT256 = (0x00008200) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_SIZE_NOT_MULT256 = (0x00008201) +NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_CTRL_IN_INVALID = (0x00008202) +NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_FLOW_CTRL_INVALID = (0x00008203) +NVC9B0_MPEG4_VLD_ERR_CTX_DMA_ID_PIC_HDR_OUT_INVALID = (0x00008204) +NVC9B0_MPEG4_VLD_ERR_SLC_HDR_OUT_BUF_TOO_SMALL = (0x00008205) +NVC9B0_MPEG4_VLD_ERR_PIC_HDR_OUT_BUF_ALREADY_VALID = (0x00008206) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_TOO_SMALL = (0x00008207) +NVC9B0_MPEG4_VLD_ERR_DATA_INFO_IN_BUF_TOO_SMALL = (0x00008208) +NVC9B0_MPEG4_VLD_ERR_BITSTREAM_EMPTY = (0x00008209) +NVC9B0_MPEG4_VLD_ERR_FRAME_WIDTH_TOO_LARGE = (0x0000820A) +NVC9B0_MPEG4_VLD_ERR_FRAME_HEIGHT_TOO_LARGE = (0x0000820B) +NVC9B0_MPEG4_VLD_ERR_PIC_DATA_OUT_BUF_FULL_TIME_OUT = (0x00051E01) +NVC9B0_DEC_ERROR_MPEG12_APPTIMER_EXPIRED = (0xDEC10001) +NVC9B0_DEC_ERROR_MPEG12_MVTIMER_EXPIRED = (0xDEC10002) +NVC9B0_DEC_ERROR_MPEG12_INVALID_TOKEN = (0xDEC10003) +NVC9B0_DEC_ERROR_MPEG12_SLICEDATA_MISSING = (0xDEC10004) +NVC9B0_DEC_ERROR_MPEG12_HWERR_INTERRUPT = (0xDEC10005) +NVC9B0_DEC_ERROR_MPEG12_DETECTED_VLD_FAILURE = (0xDEC10006) +NVC9B0_DEC_ERROR_MPEG12_PICTURE_INIT = (0xDEC10100) +NVC9B0_DEC_ERROR_MPEG12_STATEMACHINE_FAILURE = (0xDEC10101) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_PIC = (0xDEC10901) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_UCODE = (0xDEC10902) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_FC = (0xDEC10903) +NVC9B0_DEC_ERROR_MPEG12_INVALID_CTXID_SLH = (0xDEC10904) +NVC9B0_DEC_ERROR_MPEG12_INVALID_UCODE_SIZE = (0xDEC10905) +NVC9B0_DEC_ERROR_MPEG12_INVALID_SLICE_COUNT = (0xDEC10906) +NVC9B0_DEC_ERROR_VC1_APPTIMER_EXPIRED = (0xDEC20001) +NVC9B0_DEC_ERROR_VC1_MVTIMER_EXPIRED = (0xDEC20002) +NVC9B0_DEC_ERROR_VC1_INVALID_TOKEN = (0xDEC20003) +NVC9B0_DEC_ERROR_VC1_SLICEDATA_MISSING = (0xDEC20004) +NVC9B0_DEC_ERROR_VC1_HWERR_INTERRUPT = (0xDEC20005) +NVC9B0_DEC_ERROR_VC1_DETECTED_VLD_FAILURE = (0xDEC20006) +NVC9B0_DEC_ERROR_VC1_TIMEOUT_POLLING_FOR_DATA = (0xDEC20007) +NVC9B0_DEC_ERROR_VC1_PDEC_PIC_END_UNALIGNED = (0xDEC20008) +NVC9B0_DEC_ERROR_VC1_WDTIMER_EXPIRED = (0xDEC20009) +NVC9B0_DEC_ERROR_VC1_ERRINTSTART = (0xDEC20010) +NVC9B0_DEC_ERROR_VC1_IQT_ERRINT = (0xDEC20011) +NVC9B0_DEC_ERROR_VC1_MC_ERRINT = (0xDEC20012) +NVC9B0_DEC_ERROR_VC1_MC_IQT_ERRINT = (0xDEC20013) +NVC9B0_DEC_ERROR_VC1_REC_ERRINT = (0xDEC20014) +NVC9B0_DEC_ERROR_VC1_REC_IQT_ERRINT = (0xDEC20015) +NVC9B0_DEC_ERROR_VC1_REC_MC_ERRINT = (0xDEC20016) +NVC9B0_DEC_ERROR_VC1_REC_MC_IQT_ERRINT = (0xDEC20017) +NVC9B0_DEC_ERROR_VC1_DBF_ERRINT = (0xDEC20018) +NVC9B0_DEC_ERROR_VC1_DBF_IQT_ERRINT = (0xDEC20019) +NVC9B0_DEC_ERROR_VC1_DBF_MC_ERRINT = (0xDEC2001A) +NVC9B0_DEC_ERROR_VC1_DBF_MC_IQT_ERRINT = (0xDEC2001B) +NVC9B0_DEC_ERROR_VC1_DBF_REC_ERRINT = (0xDEC2001C) +NVC9B0_DEC_ERROR_VC1_DBF_REC_IQT_ERRINT = (0xDEC2001D) +NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_ERRINT = (0xDEC2001E) +NVC9B0_DEC_ERROR_VC1_DBF_REC_MC_IQT_ERRINT = (0xDEC2001F) +NVC9B0_DEC_ERROR_VC1_PICTURE_INIT = (0xDEC20100) +NVC9B0_DEC_ERROR_VC1_STATEMACHINE_FAILURE = (0xDEC20101) +NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_PIC = (0xDEC20901) +NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_UCODE = (0xDEC20902) +NVC9B0_DEC_ERROR_VC1_INVALID_CTXID_FC = (0xDEC20903) +NVC9B0_DEC_ERROR_VC1_INVAILD_CTXID_SLH = (0xDEC20904) +NVC9B0_DEC_ERROR_VC1_INVALID_UCODE_SIZE = (0xDEC20905) +NVC9B0_DEC_ERROR_VC1_INVALID_SLICE_COUNT = (0xDEC20906) +NVC9B0_DEC_ERROR_H264_APPTIMER_EXPIRED = (0xDEC30001) +NVC9B0_DEC_ERROR_H264_MVTIMER_EXPIRED = (0xDEC30002) +NVC9B0_DEC_ERROR_H264_INVALID_TOKEN = (0xDEC30003) +NVC9B0_DEC_ERROR_H264_SLICEDATA_MISSING = (0xDEC30004) +NVC9B0_DEC_ERROR_H264_HWERR_INTERRUPT = (0xDEC30005) +NVC9B0_DEC_ERROR_H264_DETECTED_VLD_FAILURE = (0xDEC30006) +NVC9B0_DEC_ERROR_H264_ERRINTSTART = (0xDEC30010) +NVC9B0_DEC_ERROR_H264_IQT_ERRINT = (0xDEC30011) +NVC9B0_DEC_ERROR_H264_MC_ERRINT = (0xDEC30012) +NVC9B0_DEC_ERROR_H264_MC_IQT_ERRINT = (0xDEC30013) +NVC9B0_DEC_ERROR_H264_REC_ERRINT = (0xDEC30014) +NVC9B0_DEC_ERROR_H264_REC_IQT_ERRINT = (0xDEC30015) +NVC9B0_DEC_ERROR_H264_REC_MC_ERRINT = (0xDEC30016) +NVC9B0_DEC_ERROR_H264_REC_MC_IQT_ERRINT = (0xDEC30017) +NVC9B0_DEC_ERROR_H264_DBF_ERRINT = (0xDEC30018) +NVC9B0_DEC_ERROR_H264_DBF_IQT_ERRINT = (0xDEC30019) +NVC9B0_DEC_ERROR_H264_DBF_MC_ERRINT = (0xDEC3001A) +NVC9B0_DEC_ERROR_H264_DBF_MC_IQT_ERRINT = (0xDEC3001B) +NVC9B0_DEC_ERROR_H264_DBF_REC_ERRINT = (0xDEC3001C) +NVC9B0_DEC_ERROR_H264_DBF_REC_IQT_ERRINT = (0xDEC3001D) +NVC9B0_DEC_ERROR_H264_DBF_REC_MC_ERRINT = (0xDEC3001E) +NVC9B0_DEC_ERROR_H264_DBF_REC_MC_IQT_ERRINT = (0xDEC3001F) +NVC9B0_DEC_ERROR_H264_PICTURE_INIT = (0xDEC30100) +NVC9B0_DEC_ERROR_H264_STATEMACHINE_FAILURE = (0xDEC30101) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_PIC = (0xDEC30901) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_UCODE = (0xDEC30902) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_FC = (0xDEC30903) +NVC9B0_DEC_ERROR_H264_INVALID_CTXID_SLH = (0xDEC30904) +NVC9B0_DEC_ERROR_H264_INVALID_UCODE_SIZE = (0xDEC30905) +NVC9B0_DEC_ERROR_H264_INVALID_SLICE_COUNT = (0xDEC30906) +NVC9B0_DEC_ERROR_MPEG4_APPTIMER_EXPIRED = (0xDEC40001) +NVC9B0_DEC_ERROR_MPEG4_MVTIMER_EXPIRED = (0xDEC40002) +NVC9B0_DEC_ERROR_MPEG4_INVALID_TOKEN = (0xDEC40003) +NVC9B0_DEC_ERROR_MPEG4_SLICEDATA_MISSING = (0xDEC40004) +NVC9B0_DEC_ERROR_MPEG4_HWERR_INTERRUPT = (0xDEC40005) +NVC9B0_DEC_ERROR_MPEG4_DETECTED_VLD_FAILURE = (0xDEC40006) +NVC9B0_DEC_ERROR_MPEG4_TIMEOUT_POLLING_FOR_DATA = (0xDEC40007) +NVC9B0_DEC_ERROR_MPEG4_PDEC_PIC_END_UNALIGNED = (0xDEC40008) +NVC9B0_DEC_ERROR_MPEG4_WDTIMER_EXPIRED = (0xDEC40009) +NVC9B0_DEC_ERROR_MPEG4_ERRINTSTART = (0xDEC40010) +NVC9B0_DEC_ERROR_MPEG4_IQT_ERRINT = (0xDEC40011) +NVC9B0_DEC_ERROR_MPEG4_MC_ERRINT = (0xDEC40012) +NVC9B0_DEC_ERROR_MPEG4_MC_IQT_ERRINT = (0xDEC40013) +NVC9B0_DEC_ERROR_MPEG4_REC_ERRINT = (0xDEC40014) +NVC9B0_DEC_ERROR_MPEG4_REC_IQT_ERRINT = (0xDEC40015) +NVC9B0_DEC_ERROR_MPEG4_REC_MC_ERRINT = (0xDEC40016) +NVC9B0_DEC_ERROR_MPEG4_REC_MC_IQT_ERRINT = (0xDEC40017) +NVC9B0_DEC_ERROR_MPEG4_DBF_ERRINT = (0xDEC40018) +NVC9B0_DEC_ERROR_MPEG4_DBF_IQT_ERRINT = (0xDEC40019) +NVC9B0_DEC_ERROR_MPEG4_DBF_MC_ERRINT = (0xDEC4001A) +NVC9B0_DEC_ERROR_MPEG4_DBF_MC_IQT_ERRINT = (0xDEC4001B) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_ERRINT = (0xDEC4001C) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_IQT_ERRINT = (0xDEC4001D) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_ERRINT = (0xDEC4001E) +NVC9B0_DEC_ERROR_MPEG4_DBF_REC_MC_IQT_ERRINT = (0xDEC4001F) +NVC9B0_DEC_ERROR_MPEG4_PICTURE_INIT = (0xDEC40100) +NVC9B0_DEC_ERROR_MPEG4_STATEMACHINE_FAILURE = (0xDEC40101) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_PIC = (0xDEC40901) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_UCODE = (0xDEC40902) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_FC = (0xDEC40903) +NVC9B0_DEC_ERROR_MPEG4_INVALID_CTXID_SLH = (0xDEC40904) +NVC9B0_DEC_ERROR_MPEG4_INVALID_UCODE_SIZE = (0xDEC40905) +NVC9B0_DEC_ERROR_MPEG4_INVALID_SLICE_COUNT = (0xDEC40906) NVC6C0_QMDV02_03_OUTER_PUT = (30, 0) NVC6C0_QMDV02_03_OUTER_OVERFLOW = (31, 31) NVC6C0_QMDV02_03_OUTER_GET = (62, 32) @@ -13370,6 +15124,77 @@ NVCEC0_QMDV04_01_OUTER_PUT = (3038, 3008) NVCEC0_QMDV04_01_OUTER_OVERFLOW = (3039, 3039) NVCEC0_QMDV04_01_OUTER_GET = (3070, 3040) NVCEC0_QMDV04_01_OUTER_STICKY_OVERFLOW = (3071, 3071) +ALIGN_UP = lambda v,n: (((v) + ((n)-1)) &~ ((n)-1)) +NVDEC_ALIGN = lambda value: ALIGN_UP(value,256) +NVDEC_MAX_MPEG2_SLICE = 65536 +NVDEC_CODEC_MPEG1 = 0 +NVDEC_CODEC_MPEG2 = 1 +NVDEC_CODEC_VC1 = 2 +NVDEC_CODEC_H264 = 3 +NVDEC_CODEC_MPEG4 = 4 +NVDEC_CODEC_DIVX = NVDEC_CODEC_MPEG4 +NVDEC_CODEC_VP8 = 5 +NVDEC_CODEC_HEVC = 7 +NVDEC_CODEC_VP9 = 9 +NVDEC_CODEC_HEVC_PARSER = 12 +NVDEC_CODEC_AV1 = 10 +AES_MODE_MASK = 0x7 +AES_CTS_MASK = 0x1 +AES_PADDING_TYPE_MASK = 0x7 +AES_UNWRAP_KEY_MASK = 0x1 +AES_MODE_SHIFT = 0 +AES_CTS_SHIFT = 3 +AES_PADDING_TYPE_SHIFT = 4 +AES_UNWRAP_KEY_SHIFT = 7 +AES_SET_FLAG = lambda M,C,P: ((M & AES_MODE_MASK) << AES_MODE_SHIFT) | ((C & AES_CTS_MASK) << AES_CTS_SHIFT) | ((P & AES_PADDING_TYPE_MASK) << AES_PADDING_TYPE_SHIFT) +DRM_MODE_MASK = 0x7f +AES_GET_DRM_MODE = lambda V: (V & DRM_MODE_MASK) +GIP_ASIC_VERT_FILTER_RAM_SIZE = 16 +GIP_ASIC_BSD_CTRL_RAM_SIZE = 4 +GIP_ASIC_SCALING_LIST_SIZE = (16*64) +GIP_ASIC_VERT_SAO_RAM_SIZE = 16 +GIP_ASIC_TILE_SIZE = ((20*22*2*2+16+15) & ~0xF) +GIP_ASIC_VP9_CTB_SEG_SIZE = 32 +HEVC_DBLK_TOP_SIZE_IN_SB16 = ALIGN_UP(632, 128) +HEVC_DBLK_TOP_BUF_SIZE = lambda w: NVDEC_ALIGN( (ALIGN_UP(w,16)/16 + 2) * HEVC_DBLK_TOP_SIZE_IN_SB16) +HEVC_DBLK_LEFT_SIZE_IN_SB16 = ALIGN_UP(506, 128) +HEVC_DBLK_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_DBLK_LEFT_SIZE_IN_SB16) +HEVC_SAO_LEFT_SIZE_IN_SB16 = ALIGN_UP(713, 128) +HEVC_SAO_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_SAO_LEFT_SIZE_IN_SB16) +VP9_DBLK_TOP_SIZE_IN_SB64 = ALIGN_UP(2000, 128) +VP9_DBLK_TOP_BUF_SIZE = lambda w: NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * VP9_DBLK_TOP_SIZE_IN_SB64) +VP9_DBLK_LEFT_SIZE_IN_SB64 = ALIGN_UP(1600, 128) +VP9_DBLK_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * VP9_DBLK_LEFT_SIZE_IN_SB64) +VP9_HINT_DUMP_SIZE_IN_SB64 = ((64*64)/(4*4)*8) +VP9_HINT_DUMP_SIZE = lambda w,h: NVDEC_ALIGN(VP9_HINT_DUMP_SIZE_IN_SB64*((w+63)/64)*((h+63)/64)) +NUM_SUBSAMPLES = 32 +VP8_MAX_TOKEN_PARTITIONS = 8 +VP9_MAX_FRAMES_IN_SUPERFRAME = 8 +AV1_MAX_TILES = 256 +MAX_SUBSTREAM_ENTRY_SIZE = 32 +AV1_TEMPORAL_MV_SIZE_IN_64x64 = 256 +AV1_TEMPORAL_MV_BUF_SIZE = lambda w,h: ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_TEMPORAL_MV_SIZE_IN_64x64, 4096) +AV1_SEGMENT_ID_SIZE_IN_64x64 = 128 +AV1_SEGMENT_ID_BUF_SIZE = lambda w,h: ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_SEGMENT_ID_SIZE_IN_64x64, 4096) +AV1_GLOBAL_MOTION_BUF_SIZE = NVDEC_ALIGN(7*32) +AV1_INTRA_TOP_BUF_SIZE = NVDEC_ALIGN(8*8192) +AV1_HISTOGRAM_BUF_SIZE = NVDEC_ALIGN(1024) +AV1_DBLK_TOP_SIZE_IN_SB64 = ALIGN_UP(1920, 128) +AV1_DBLK_TOP_BUF_SIZE = lambda w: NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * AV1_DBLK_TOP_SIZE_IN_SB64) +AV1_DBLK_LEFT_SIZE_IN_SB64 = ALIGN_UP(1536, 128) +AV1_DBLK_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_DBLK_LEFT_SIZE_IN_SB64) +AV1_CDEF_LEFT_SIZE_IN_SB64 = ALIGN_UP(1792, 128) +AV1_CDEF_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_CDEF_LEFT_SIZE_IN_SB64) +AV1_HUS_LEFT_SIZE_IN_SB64 = ALIGN_UP(12544, 128) +AV1_ASIC_HUS_LEFT_BUFFER_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_HUS_LEFT_SIZE_IN_SB64) +AV1_HUS_LEFT_BUF_SIZE = lambda h: 2*AV1_ASIC_HUS_LEFT_BUFFER_SIZE(h) +AV1_LR_LEFT_SIZE_IN_SB64 = ALIGN_UP(1920, 128) +AV1_LR_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_LR_LEFT_SIZE_IN_SB64) +AV1_FGS_LEFT_SIZE_IN_SB64 = ALIGN_UP(320, 128) +AV1_FGS_LEFT_BUF_SIZE = lambda h: NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_FGS_LEFT_SIZE_IN_SB64) +AV1_HINT_DUMP_SIZE_IN_SB64 = ((64*64)/(4*4)*8) +AV1_HINT_DUMP_SIZE_IN_SB128 = ((128*128)/(4*4)*8) +AV1_HINT_DUMP_SIZE = lambda w,h: NVDEC_ALIGN(AV1_HINT_DUMP_SIZE_IN_SB128*((w+127)/128)*((h+127)/128)) NVBIT = lambda b: (1<<(b)) NVBIT32 = lambda b: NVBIT_TYPE(b, NvU32) NVBIT64 = lambda b: NVBIT_TYPE(b, NvU64) @@ -13429,6 +15254,10 @@ NV01_NULL_OBJECT = (0x0) NV1_NULL_OBJECT = NV01_NULL_OBJECT NV01_ROOT = (0x0) NV0000_ALLOC_PARAMETERS_MESSAGE_ID = (0x0000) +NV01_MEMORY_VIRTUAL = (0x70) +NV01_MEMORY_SYSTEM_DYNAMIC = (0x70) +NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS_MESSAGE_ID = (0x0070) +NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE = 0xffffffff NV01_DEVICE_0 = (0x80) NV0080_ALLOC_PARAMETERS_MESSAGE_ID = (0x0080) NV20_SUBDEVICE_0 = (0x2080) @@ -15969,6 +17798,22 @@ NV90F1_CTRL_CMD_VASPACE_GET_HOST_RM_MANAGED_SIZE = (0x90f10107) NV90F1_CTRL_VASPACE_GET_HOST_RM_MANAGED_SIZE_PARAMS_MESSAGE_ID = (0x7) NV90F1_CTRL_CMD_VASPACE_GET_VAS_HEAP_INFO = (0x90f10108) NV90F1_CTRL_VASPACE_GET_VAS_HEAP_INFO_PARAMS_MESSAGE_ID = (0x8) +NVA06F_CTRL_CMD_GPFIFO_SCHEDULE = (0xa06f0103) +NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS_MESSAGE_ID = (0x3) +NVA06F_CTRL_CMD_BIND = (0xa06f0104) +NVA06F_CTRL_BIND_PARAMS_MESSAGE_ID = (0x4) +NVA06F_CTRL_CMD_SET_ERROR_NOTIFIER = (0xa06f0108) +NVA06F_CTRL_SET_ERROR_NOTIFIER_PARAMS_MESSAGE_ID = (0x8) +NVA06F_CTRL_CMD_SET_INTERLEAVE_LEVEL = (0xa06f0109) +NVA06F_CTRL_SET_INTERLEAVE_LEVEL_PARAMS_MESSAGE_ID = (0x9) +NVA06F_CTRL_CMD_GET_INTERLEAVE_LEVEL = (0xa06f0110) +NVA06F_CTRL_GET_INTERLEAVE_LEVEL_PARAMS_MESSAGE_ID = (0x10) +NVA06F_CTRL_CMD_RESTART_RUNLIST = (0xa06f0111) +NVA06F_CTRL_RESTART_RUNLIST_PARAMS_MESSAGE_ID = (0x11) +NVA06F_CTRL_CMD_STOP_CHANNEL = (0xa06f0112) +NVA06F_CTRL_STOP_CHANNEL_PARAMS_MESSAGE_ID = (0x12) +NVA06F_CTRL_CMD_GET_CONTEXT_ID = (0xa06f0113) +NVA06F_CTRL_GET_CONTEXT_ID_PARAMS_MESSAGE_ID = (0x13) NV0000_CTRL_RESERVED = (0x00) NV0000_CTRL_SYSTEM = (0x01) NV0000_CTRL_GPU = (0x02) diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index 11ccc88993..824395bd3d 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -180,6 +180,31 @@ class NVCopyQueue(NVCommandQueue): def _submit(self, dev:NVDevice): self._submit_to_gpfifo(dev, dev.dma_gpfifo) +class NVVideoQueue(NVCommandQueue): + def decode_hevc_chunk(self, pic_desc:HCQBuffer, in_buf:HCQBuffer, out_buf:HCQBuffer, out_buf_pos:int, hist_bufs:list[HCQBuffer], + hist_pos:list[int], chroma_off:int, coloc_buf:HCQBuffer, filter_buf:HCQBuffer, intra_top_off:int, status_buf:HCQBuffer): + self.nvm(4, nv_gpu.NVC9B0_SET_APPLICATION_ID, nv_gpu.NVC9B0_SET_APPLICATION_ID_ID_HEVC) + self.nvm(4, nv_gpu.NVC9B0_SET_CONTROL_PARAMS, 0x52057) + self.nvm(4, nv_gpu.NVC9B0_SET_DRV_PIC_SETUP_OFFSET, pic_desc.va_addr >> 8) + self.nvm(4, nv_gpu.NVC9B0_SET_IN_BUF_BASE_OFFSET, in_buf.va_addr >> 8) + for pos, buf in zip(hist_pos + [out_buf_pos], hist_bufs + [out_buf]): + self.nvm(4, nv_gpu.NVC9B0_SET_PICTURE_LUMA_OFFSET0 + pos*4, buf.va_addr >> 8) + self.nvm(4, nv_gpu.NVC9B0_SET_PICTURE_CHROMA_OFFSET0 + pos*4, buf.offset(chroma_off).va_addr >> 8) + self.nvm(4, nv_gpu.NVC9B0_SET_COLOC_DATA_OFFSET, coloc_buf.va_addr >> 8) + self.nvm(4, nv_gpu.NVC9B0_SET_NVDEC_STATUS_OFFSET, status_buf.va_addr >> 8) + self.nvm(4, nv_gpu.NVC9B0_HEVC_SET_TILE_SIZES_OFFSET, pic_desc.offset(0x200).va_addr >> 8) + self.nvm(4, nv_gpu.NVC9B0_HEVC_SET_FILTER_BUFFER_OFFSET, filter_buf.va_addr >> 8) + self.nvm(4, nv_gpu.NVC9B0_SET_INTRA_TOP_BUF_OFFSET, (filter_buf.va_addr + intra_top_off) >> 8) + self.nvm(4, nv_gpu.NVC9B0_EXECUTE, 0) + return self + + def signal(self, signal:HCQSignal, value:sint=0): + self.nvm(4, nv_gpu.NVC9B0_SEMAPHORE_A, *data64(signal.value_addr), value) + self.nvm(4, nv_gpu.NVC9B0_SEMAPHORE_D, 0) + return self + + def _submit(self, dev:NVDevice): self._submit_to_gpfifo(dev, dev.vid_gpfifo) + class NVArgsState(CLikeArgsState): def __init__(self, buf:HCQBuffer, prg:NVProgram, bufs:tuple[HCQBuffer, ...], vals:tuple[int, ...]=()): if MOCKGPU: prg.cbuf_0[80:82] = [len(bufs), len(vals)] @@ -281,6 +306,16 @@ class NVAllocator(HCQAllocator['NVDevice']): def _map(self, buf:HCQBuffer): return self.dev.iface.map(buf._base if buf._base is not None else buf) + def _encode_decode(self, bufout:HCQBuffer, bufin:HCQBuffer, desc_buf:HCQBuffer, hist:list[HCQBuffer], shape:tuple[int,...], frame_pos:int): + assert all(h.va_addr % 0x100 == 0 for h in hist + [bufin, bufout]), "all buffers must be 0x100 aligned" + + h, w = ((2 * shape[0]) // 3 if shape[0] % 3 == 0 else (2 * shape[0] - 1) // 3), shape[1] + self.dev._ensure_has_vid_hw(w, h) + NVVideoQueue().wait(self.dev.timeline_signal, self.dev.timeline_value - 1) \ + .decode_hevc_chunk(desc_buf, bufin, bufout, frame_pos, hist, [(frame_pos-x) % (len(hist) + 1) for x in range(len(hist), 0, -1)], + round_up(w, 64)*round_up(h, 64), self.dev.vid_coloc_buf, self.dev.vid_filter_buf, self.dev.intra_top_off, self.dev.vid_stat_buf) \ + .signal(self.dev.timeline_signal, self.dev.next_timeline()).submit(self.dev) + @dataclass class GPFifo: ring: MMIOInterface @@ -358,6 +393,7 @@ class NVKIface: self.gpfifo_class:int = next(c for c in [nv_gpu.BLACKWELL_CHANNEL_GPFIFO_A, nv_gpu.AMPERE_CHANNEL_GPFIFO_A] if c in self.nvclasses) self.compute_class:int = next(c for c in [nv_gpu.BLACKWELL_COMPUTE_B, nv_gpu.ADA_COMPUTE_A, nv_gpu.AMPERE_COMPUTE_B] if c in self.nvclasses) self.dma_class:int = next(c for c in [nv_gpu.BLACKWELL_DMA_COPY_B, nv_gpu.AMPERE_DMA_COPY_B] if c in self.nvclasses) + self.viddec_class:int|None = next((c for c in [nv_gpu.NVC9B0_VIDEO_DECODER] if c in self.nvclasses), None) usermode = self.rm_alloc(self.dev.subdevice, self.usermode_class) return usermode, MMIOInterface(self._gpu_map_to_cpu(usermode, mmio_sz:=0x10000), mmio_sz, fmt='I') @@ -440,7 +476,15 @@ class NVKIface: if mem.meta.has_cpu_mapping: FileIOInterface.munmap(cast(int, mem.va_addr), mem.size) def _gpu_uvm_map(self, va_base, size, mem_handle, create_range=True, has_cpu_mapping=False) -> HCQBuffer: - if create_range: self.uvm(nv_gpu.UVM_CREATE_EXTERNAL_RANGE, nv_gpu.UVM_CREATE_EXTERNAL_RANGE_PARAMS(base=va_base, length=size)) + if create_range: + self.uvm(nv_gpu.UVM_CREATE_EXTERNAL_RANGE, nv_gpu.UVM_CREATE_EXTERNAL_RANGE_PARAMS(base=va_base, length=size)) + made = nv_gpu.NVOS46_PARAMETERS(hClient=self.root, hDevice=self.dev.nvdevice, hDma=self.dev.virtmem, hMemory=mem_handle, length=size, + flags=(nv_gpu.NVOS46_FLAGS_PAGE_SIZE_4KB<<8)|(nv_gpu.NVOS46_FLAGS_CACHE_SNOOP_ENABLE<<4)|(nv_gpu.NVOS46_FLAGS_DMA_OFFSET_FIXED_TRUE<<15), + dmaOffset=va_base) + nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_MAP_MEMORY_DMA, made) + if made.status != 0: raise RuntimeError(f"nv_sys_alloc 1 returned {get_error_str(made.status)}") + assert made.dmaOffset == va_base, f"made.dmaOffset != va_base {made.dmaOffset=} {va_base=}" + attrs = (nv_gpu.UvmGpuMappingAttributes*256)(nv_gpu.UvmGpuMappingAttributes(gpuUuid=self.gpu_uuid, gpuMappingType=1)) self.uvm(nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION, uvm_map:=nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION_PARAMS(base=va_base, length=size, @@ -472,6 +516,7 @@ class PCIIface(PCIIfaceBase): # Setup classes for the GPU self.gpfifo_class, self.compute_class, self.dma_class = (gsp:=self.dev_impl.gsp).gpfifo_class, gsp.compute_class, gsp.dma_class + self.viddec_class = None def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, **kwargs) -> HCQBuffer: # Force use of huge pages for large allocations. NVDev will attempt to use huge pages in any case, @@ -499,6 +544,7 @@ class NVDevice(HCQCompiled[HCQSignal]): vaMode=nv_gpu.NV_DEVICE_ALLOCATION_VAMODE_OPTIONAL_MULTIPLE_VASPACES) self.nvdevice = self.iface.rm_alloc(self.iface.root, nv_gpu.NV01_DEVICE_0, device_params) self.subdevice = self.iface.rm_alloc(self.nvdevice, nv_gpu.NV20_SUBDEVICE_0, nv_gpu.NV2080_ALLOC_PARAMETERS()) + self.virtmem = self.iface.rm_alloc(self.nvdevice, nv_gpu.NV01_MEMORY_VIRTUAL, nv_gpu.NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS(limit=0x1ffffffffffff)) self.usermode, self.gpu_mmio = self.iface.setup_usermode() self.iface.rm_control(self.subdevice, nv_gpu.NV2080_CTRL_CMD_PERF_BOOST, nv_gpu.NV2080_CTRL_PERF_BOOST_PARAMS(duration=0xffffffff, @@ -514,14 +560,14 @@ class NVDevice(HCQCompiled[HCQSignal]): channel_params = nv_gpu.NV_CHANNEL_GROUP_ALLOCATION_PARAMETERS(engineType=nv_gpu.NV2080_ENGINE_TYPE_GRAPHICS) channel_group = self.iface.rm_alloc(self.nvdevice, nv_gpu.KEPLER_CHANNEL_GROUP_A, channel_params) - gpfifo_area = self.iface.alloc(0x200000, contiguous=True, cpu_access=True, force_devmem=True, + self.gpfifo_area = self.iface.alloc(0x300000, contiguous=True, cpu_access=True, force_devmem=True, map_flags=(nv_gpu.NVOS33_FLAGS_CACHING_TYPE_WRITECOMBINED<<23)) ctxshare_params = nv_gpu.NV_CTXSHARE_ALLOCATION_PARAMETERS(hVASpace=vaspace, flags=nv_gpu.NV_CTXSHARE_ALLOCATION_FLAGS_SUBCONTEXT_ASYNC) ctxshare = self.iface.rm_alloc(channel_group, nv_gpu.FERMI_CONTEXT_SHARE_A, ctxshare_params) - self.compute_gpfifo = self._new_gpu_fifo(gpfifo_area, ctxshare, channel_group, offset=0, entries=0x10000, compute=True) - self.dma_gpfifo = self._new_gpu_fifo(gpfifo_area, ctxshare, channel_group, offset=0x100000, entries=0x10000, compute=False) + self.compute_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, channel_group, offset=0, entries=0x10000, compute=True) + self.dma_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, channel_group, offset=0x100000, entries=0x10000, compute=False) self.iface.rm_control(channel_group, nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06C_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1)) self.cmdq_page:HCQBuffer = self.iface.alloc(0x200000, cpu_access=True) @@ -542,22 +588,27 @@ class NVDevice(HCQCompiled[HCQSignal]): self._setup_gpfifos() - def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400, compute=False) -> GPFifo: + def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400, compute=False, video=False) -> GPFifo: notifier = self.iface.alloc(48 << 20, uncached=True) - params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(hObjectError=notifier.meta.hMemory, hObjectBuffer=gpfifo_area.meta.hMemory, - gpFifoOffset=gpfifo_area.va_addr+offset, gpFifoEntries=entries, hContextShare=ctxshare, - hUserdMemory=(ctypes.c_uint32*8)(gpfifo_area.meta.hMemory), userdOffset=(ctypes.c_uint64*8)(entries*8+offset)) + params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(gpFifoOffset=gpfifo_area.va_addr+offset, gpFifoEntries=entries, hContextShare=ctxshare, + hObjectError=notifier.meta.hMemory, hObjectBuffer=self.virtmem if video else gpfifo_area.meta.hMemory, + hUserdMemory=(ctypes.c_uint32*8)(gpfifo_area.meta.hMemory), userdOffset=(ctypes.c_uint64*8)(entries*8+offset), engineType=19 if video else 0) gpfifo = self.iface.rm_alloc(channel_group, self.iface.gpfifo_class, params) if compute: self.debug_compute_obj, self.debug_channel = self.iface.rm_alloc(gpfifo, self.iface.compute_class), gpfifo debugger_params = nv_gpu.NV83DE_ALLOC_PARAMETERS(hAppClient=self.iface.root, hClass3dObject=self.debug_compute_obj) self.debugger = self.iface.rm_alloc(self.nvdevice, nv_gpu.GT200_DEBUGGER, debugger_params) - else: self.iface.rm_alloc(gpfifo, self.iface.dma_class) + elif not video: self.iface.rm_alloc(gpfifo, self.iface.dma_class) + else: self.iface.rm_alloc(gpfifo, self.iface.viddec_class) + + if channel_group == self.nvdevice: + self.iface.rm_control(gpfifo, nv_gpu.NVA06F_CTRL_CMD_BIND, nv_gpu.NVA06F_CTRL_BIND_PARAMS(engineType=params.engineType)) + self.iface.rm_control(gpfifo, nv_gpu.NVA06F_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1)) ws_token_params = self.iface.rm_control(gpfifo, nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN, nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS(workSubmitToken=-1)) - self.iface.setup_gpfifo_vm(gpfifo) + if ctxshare != 0: self.iface.setup_gpfifo_vm(gpfifo) return GPFifo(ring=gpfifo_area.cpu_view().view(offset, entries*8, fmt='Q'), entries_count=entries, token=ws_token_params.workSubmitToken, controls=nv_gpu.AmpereAControlGPFifo.from_address(gpfifo_area.cpu_view().addr + offset + entries * 8)) @@ -604,6 +655,24 @@ class NVDevice(HCQCompiled[HCQSignal]): .setup(local_mem=self.shader_local_mem.va_addr, local_mem_tpc_bytes=bytes_per_tpc) \ .signal(self.timeline_signal, self.next_timeline()).submit(self) + def _ensure_has_vid_hw(self, w, h): + if self.iface.viddec_class is None: raise RuntimeError(f"{self.device} Video decoder class not available.") + + coloc_size = round_up((round_up(h, 64) * round_up(h, 64)) + (round_up(w, 64) * round_up(h, 64) // 16), 2 << 20) + self.intra_top_off = round_up(h, 64) * (608 + 4864 + 152 + 2000) + filter_size = round_up(round_up(self.intra_top_off, 0x10000) + 64 << 10, 2 << 20) + + if not hasattr(self, 'vid_gpfifo'): + self.vid_gpfifo = self._new_gpu_fifo(self.gpfifo_area, 0, self.nvdevice, offset=0x200000, entries=2048, compute=False, video=True) + self.vid_coloc_buf, self.vid_filter_buf = self.allocator.alloc(coloc_size), self.allocator.alloc(filter_size) + self.vid_stat_buf = self.allocator.alloc(0x1000) + NVVideoQueue().wait(self.timeline_signal, self.timeline_value - 1) \ + .setup(copy_class=self.iface.viddec_class) \ + .signal(self.timeline_signal, self.next_timeline()).submit(self) + else: + if coloc_size > self.vid_coloc_buf.size: self.vid_coloc_buf, _ = self._realloc(self.vid_coloc_buf, coloc_size, force=True) + if filter_size > self.vid_filter_buf.size: self.vid_filter_buf, _ = self._realloc(self.vid_filter_buf, filter_size, force=True) + def invalidate_caches(self): if self.is_nvd(): self.iface.rm_control(self.subdevice, nv_gpu.NV2080_CTRL_CMD_INTERNAL_BUS_FLUSH_WITH_SYSMEMBAR, None) else: diff --git a/tinygrad/runtime/support/hcq.py b/tinygrad/runtime/support/hcq.py index 29778f159a..9649db0186 100644 --- a/tinygrad/runtime/support/hcq.py +++ b/tinygrad/runtime/support/hcq.py @@ -432,10 +432,12 @@ class HCQCompiled(Compiled, Generic[SignalType]): self.timeline_signal.value = 0 cast(HCQAllocatorBase, self.allocator).b_timeline = [0] * len(cast(HCQAllocatorBase, self.allocator).b) - def _realloc(self, oldbuf:HCQBuffer|None, new_size:int, options:BufferSpec|None=None) -> tuple[HCQBuffer, bool]: + def _realloc(self, oldbuf:HCQBuffer|None, new_size:int, options:BufferSpec|None=None, force=False) -> tuple[HCQBuffer, bool]: if oldbuf is not None: self.allocator.free(oldbuf, oldbuf.size, options=options) try: buf, realloced = self.allocator.alloc(new_size, options=options), True - except MemoryError: buf, realloced = self.allocator.alloc(oldbuf.size if oldbuf is not None else new_size, options=options), False + except MemoryError: + if force: raise + buf, realloced = self.allocator.alloc(oldbuf.size if oldbuf is not None else new_size, options=options), False return buf, realloced def _select_iface(self, *ifaces:Type): diff --git a/tinygrad/schedule/indexing.py b/tinygrad/schedule/indexing.py index 5f1b140fb8..c4e995ae06 100644 --- a/tinygrad/schedule/indexing.py +++ b/tinygrad/schedule/indexing.py @@ -8,7 +8,7 @@ from tinygrad.helpers import argsort, all_same, cpu_profile, PCONTIG, colored ALWAYS_CONTIGUOUS: set[Ops] = {Ops.CONTIGUOUS, Ops.ASSIGN, Ops.COPY, Ops.BUFFER, Ops.BUFFER_VIEW, Ops.CONST, Ops.BIND, Ops.DEVICE, Ops.MSELECT, Ops.MSTACK, Ops.DEFINE_GLOBAL, - Ops.DEFINE_LOCAL, Ops.DEFINE_REG, Ops.LOAD, Ops.KERNEL} + Ops.DEFINE_LOCAL, Ops.DEFINE_REG, Ops.LOAD, Ops.KERNEL, Ops.ENCDEC} def realize(ctx:dict[UOp, None], tr:UOp) -> None: ctx[tr] = None @@ -24,12 +24,12 @@ def realize_assign(ctx:dict[UOp, None], a:UOp) -> None: pm_generate_realize_map = PatternMatcher([ # always realize SINK src (UPat(Ops.SINK, name="s"), lambda ctx,s: ctx.update((x.base, None) for x in s.src if x.base.op not in ALWAYS_CONTIGUOUS)), - # always realize COPY/BUFFER_VIEW/CONTIGUOUS/STORE - (UPat({Ops.COPY, Ops.BUFFER_VIEW, Ops.CONTIGUOUS, Ops.STORE}, name="tr"), realize), + # always realize COPY/BUFFER_VIEW/CONTIGUOUS/STORE/ENCDEC + (UPat({Ops.COPY, Ops.BUFFER_VIEW, Ops.CONTIGUOUS, Ops.STORE, Ops.ENCDEC}, name="tr"), realize), # always realize REDUCE on outer ranges (UPat(Ops.REDUCE, name="r"), lambda ctx,r: realize(ctx, r) if any(tr.arg[-1] == AxisType.OUTER for tr in r.src[1:]) else None), - # realize srcs of COPY, MSELECT, MSTACK - (UPat((Ops.COPY, Ops.MSELECT, Ops.MSTACK), name="rb"), realize_srcs), + # realize srcs of COPY, MSELECT, MSTACK, ENCDEC + (UPat((Ops.COPY, Ops.MSELECT, Ops.MSTACK, Ops.ENCDEC), name="rb"), realize_srcs), # realize ASSIGN and input to assign (might be optimized out) (UPat(Ops.ASSIGN, name="a"), realize_assign), ]) diff --git a/tinygrad/schedule/rangeify.py b/tinygrad/schedule/rangeify.py index e523a97988..be790f84aa 100644 --- a/tinygrad/schedule/rangeify.py +++ b/tinygrad/schedule/rangeify.py @@ -117,7 +117,7 @@ earliest_rewrites = mop_cleanup+PatternMatcher([ # 3.5 cleanups # Ops.NOOP happens when we have a COPY to the device the Tensor is already on. We treat it like COPY here for MSTACK. -ALWAYS_RUN_OPS = {Ops.CONTIGUOUS, Ops.COPY, Ops.ASSIGN, Ops.NOOP} +ALWAYS_RUN_OPS = {Ops.CONTIGUOUS, Ops.COPY, Ops.ASSIGN, Ops.ENCDEC, Ops.NOOP} # you don't know in the first pass if axes are going to die, this happens if there's an EXPAND to the left def cleanup_dead_axes(b:UOp): @@ -494,7 +494,7 @@ def split_store(ctx:list[UOp], x:UOp) -> UOp|None: # NOTE: the hack for COPY is here for u in ret.toposort(): # TODO: this can be wrong if there's multiple of these - if u.op in {Ops.COPY, Ops.BUFFER_VIEW}: + if u.op in {Ops.COPY, Ops.BUFFER_VIEW, Ops.ENCDEC}: ret = u break else: diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 0e4bc9b365..9f058bd644 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -11,7 +11,7 @@ from tinygrad.helpers import suppress_finalizing, disable_gc from tinygrad.gradient import compute_gradient from tinygrad.mixin import OpMixin from tinygrad.mixin.movement import _align_left -from tinygrad.uop.ops import smax, smin, resolve, UOp, Ops, sint, identity_element, all_metadata, _index_to_concrete_int, sint_to_uop +from tinygrad.uop.ops import smax, smin, resolve, UOp, Ops, sint, identity_element, all_metadata, _index_to_concrete_int, sint_to_uop, Variable from tinygrad.engine.schedule import ScheduleItem, complete_create_schedule_with_vars from tinygrad.device import Device, Buffer from tinygrad.engine.realize import run_schedule @@ -3564,6 +3564,19 @@ class Tensor(OpMixin): def __eq__(self, x) -> Tensor: return self.eq(x) # type: ignore[override] + # ***** encoding/decoding ops ***** + + def decode_hevc_frame(self, frame_pos:Variable, shape:tuple[int,...], state:Tensor, ref_frames:list[Tensor]|None=None) -> Tensor: + """ + Creates a Tensor by decoding an HEVC frame chunk. + + You must provide the output shape of the decoded data (`shape`), the HEVC context (`vstate`), and, if required by the chunk, + the reference frames (`ref_frames`). + """ + ref_frames = [x.contiguous() for x in ref_frames or []] + assert isinstance(frame_pos, Variable), "frame_pos must be a Variable" + return self.contiguous()._apply_uop(UOp.encdec, state.contiguous(), *ref_frames, extra_args=(frame_pos,), arg=(shape,)) + # ***** functional nn ops ***** def linear(self, weight:Tensor, bias:Tensor|None=None, dtype:DTypeLike|None=None) -> Tensor: diff --git a/tinygrad/uop/__init__.py b/tinygrad/uop/__init__.py index 962139b927..11eb13617a 100644 --- a/tinygrad/uop/__init__.py +++ b/tinygrad/uop/__init__.py @@ -80,7 +80,7 @@ class Ops(FastEnum): CONTIGUOUS = auto(); CONTIGUOUS_BACKWARD = auto(); DETACH = auto() # buffer ops - BUFFERIZE = auto(); COPY = auto(); BUFFER = auto(); BUFFER_VIEW = auto(); MSELECT = auto(); MSTACK = auto() + BUFFERIZE = auto(); COPY = auto(); BUFFER = auto(); BUFFER_VIEW = auto(); MSELECT = auto(); MSTACK = auto(); ENCDEC = auto() # the core 6 movement ops! these only exist in the tensor graph RESHAPE = auto(); PERMUTE = auto(); EXPAND = auto(); PAD = auto(); SHRINK = auto(); FLIP = auto() diff --git a/tinygrad/uop/ops.py b/tinygrad/uop/ops.py index f4d67e5b5a..55bd7bf90b 100644 --- a/tinygrad/uop/ops.py +++ b/tinygrad/uop/ops.py @@ -232,6 +232,7 @@ class UOp(OpMixin, metaclass=UOpMetaClass): case Ops.CONST | Ops.DEFINE_VAR | Ops.BIND: return () if self._device is not None else None case Ops.BUFFER: return (self.arg,) case Ops.BUFFER_VIEW: return (self.arg[0],) + case Ops.ENCDEC: return self.arg[0] case Ops.BUFFERIZE: return tuple([int(r.vmax+1) for r in self.src[1:]]) case Ops.DEFINE_GLOBAL | Ops.DEFINE_LOCAL | Ops.DEFINE_REG: return (self.ptrdtype.size,) @@ -538,6 +539,7 @@ class UOp(OpMixin, metaclass=UOpMetaClass): def mselect(self, arg:int) -> UOp: return UOp(Ops.MSELECT, self.dtype, (self,), arg) @property def metadata(self) -> tuple[Metadata, ...]|None: return all_metadata.get(self, None) + def encdec(self, *src, arg=None): return UOp(Ops.ENCDEC, self.dtype, src=(self,)+src, arg=arg) # *** uop movement ops *** @@ -1371,6 +1373,7 @@ pm_pyrender_extra = PatternMatcher([ (UPat(Ops.BUFFER, src=(UPat(Ops.UNIQUE, name="u"), UPat(Ops.DEVICE, name="d")), name="x"), lambda x,u,d: f"UOp.new_buffer({repr(d.arg)}, {x.size}, {x.dtype}, {u.arg})"), (UPat(Ops.COPY, src=(UPat(name="x"), UPat(Ops.DEVICE, name="d"))), lambda ctx,x,d: f"{ctx[x]}.copy_to_device({repr(d.arg)})"), + (UPat(Ops.ENCDEC, name="x"), lambda ctx,x: f"{ctx[x.src[0]]}.encdec({''.join([str(ctx[s])+', ' for s in x.src[1:]])}arg={x.arg!r})"), (UPat(Ops.REDUCE_AXIS, name="r"), lambda ctx,r: f"{ctx[r.src[0]]}.r({r.arg[0]}, {r.arg[1]})"), # NOTE: range has srcs sometimes after control flow (UPat(Ops.RANGE, src=(UPat(Ops.CONST, name="c"),), allow_any_len=True, name="x"), lambda ctx,x,c: diff --git a/tinygrad/uop/spec.py b/tinygrad/uop/spec.py index c3ac6ac691..6035936acc 100644 --- a/tinygrad/uop/spec.py +++ b/tinygrad/uop/spec.py @@ -96,10 +96,11 @@ _tensor_spec = PatternMatcher([ (UPat(Ops.CONTIGUOUS, name="root", src=(UPat.var("x"),), allow_any_len=True, arg=None), lambda root,x: root.dtype == x.dtype and all(u.op is Ops.RANGE for u in root.src[1:])), - # COPY/ALLREDUCE/MULTI + # COPY/ALLREDUCE/MULTI/ENCDEC (UPat(Ops.COPY, name="copy", src=(UPat.var("x"), UPat(Ops.DEVICE)), arg=None), lambda copy,x: copy.dtype == x.dtype), (UPat(Ops.ALLREDUCE, name="red", src=(UPat.var("x"), UPat(Ops.DEVICE))), lambda red,x: red.dtype == x.dtype and isinstance(red.arg, Ops)), (UPat(Ops.MULTI, name="multi"), lambda multi: all(x.dtype == multi.dtype for x in multi.src) and isinstance(multi.arg, int)), + (UPat(Ops.ENCDEC, name="x"), lambda x: len(x.src) >= 2), # state + inbuffer # REDUCE_AXIS is the reduce in the tensor graph (UPat(Ops.REDUCE_AXIS, name="x"), lambda x: isinstance(x.arg, tuple) and len(x.arg) >= 2 and x.arg[0] in {Ops.ADD, Ops.MUL, Ops.MAX}), diff --git a/tinygrad/viz/serve.py b/tinygrad/viz/serve.py index 5a4c09d129..a9c6195532 100755 --- a/tinygrad/viz/serve.py +++ b/tinygrad/viz/serve.py @@ -19,7 +19,7 @@ uops_colors = {Ops.LOAD: "#ffc0c0", Ops.STORE: "#87CEEB", Ops.CONST: "#e0e0e0", Ops.RANGE: "#c8a0e0", Ops.ASSIGN: "#909090", Ops.BARRIER: "#ff8080", Ops.IF: "#c8b0c0", Ops.SPECIAL: "#c0c0ff", Ops.INDEX: "#cef263", Ops.WMMA: "#efefc0", Ops.MULTI: "#f6ccff", Ops.KERNEL: "#3e7f55", **{x:"#D8F9E4" for x in GroupOp.Movement}, **{x:"#ffffc0" for x in GroupOp.ALU}, Ops.THREEFRY:"#ffff80", - Ops.BUFFER_VIEW: "#E5EAFF", Ops.BUFFER: "#B0BDFF", Ops.COPY: "#a040a0", + Ops.BUFFER_VIEW: "#E5EAFF", Ops.BUFFER: "#B0BDFF", Ops.COPY: "#a040a0", Ops.ENCDEC: "#bf71b6", Ops.ALLREDUCE: "#ff40a0", Ops.MSELECT: "#d040a0", Ops.MSTACK: "#d040a0", Ops.CONTIGUOUS: "#FFC14D", Ops.BUFFERIZE: "#FF991C", Ops.REWRITE_ERROR: "#ff2e2e", Ops.AFTER: "#8A7866", Ops.END: "#524C46"}