Files
tinygrad/extra/nv_gpu_driver/nvdec_drv.h
nimlgen 455dd88236 nv: minimal hevc (#13502)
* nv: minimal hevc

* validate

* not needed

* tralin

* var

* cpu

* fxi

* desc

* move

* cleanup
2025-11-30 16:46:55 +03:00

1846 lines
92 KiB
C

/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __NVDEC_DRV_H_
#define __NVDEC_DRV_H_
// TODO: Many fields can be converted to bitfields to save memory BW
// TODO: Revisit reserved fields for proper alignment and memory savings
///////////////////////////////////////////////////////////////////////////////
// NVDEC(MSDEC 5) is a single engine solution, and seperates into VLD, MV, IQT,
// MCFETCH, MC, MCC, REC, DBF, DFBFDMA, HIST etc unit.
// The class(driver to HW) can mainly seperate into VLD parser
// and Decoder part to be consistent with original design. And
// the sequence level info usally set in VLD part. Later codec like
// VP8 won't name in this way.
// MSVLD: Multi-Standard VLD parser.
//
#define ALIGN_UP(v, n) (((v) + ((n)-1)) &~ ((n)-1))
#define NVDEC_ALIGN(value) ALIGN_UP(value,256) // Align to 256 bytes
#define NVDEC_MAX_MPEG2_SLICE 65536 // at 4096*4096, macroblock count = 65536, 1 macroblock per slice
#define NVDEC_CODEC_MPEG1 0
#define NVDEC_CODEC_MPEG2 1
#define NVDEC_CODEC_VC1 2
#define NVDEC_CODEC_H264 3
#define NVDEC_CODEC_MPEG4 4
#define NVDEC_CODEC_DIVX NVDEC_CODEC_MPEG4
#define NVDEC_CODEC_VP8 5
#define NVDEC_CODEC_HEVC 7
#define NVDEC_CODEC_VP9 9
#define NVDEC_CODEC_HEVC_PARSER 12
#define NVDEC_CODEC_AV1 10
// AES encryption
enum
{
AES128_NONE = 0x0,
AES128_CTR = 0x1,
AES128_CBC,
AES128_ECB,
AES128_OFB,
AES128_CTR_LSB16B,
AES128_CLR_AS_ENCRYPT,
AES128_RESERVED = 0x7
};
enum
{
AES128_CTS_DISABLE = 0x0,
AES128_CTS_ENABLE = 0x1
};
enum
{
AES128_PADDING_NONE = 0x0,
AES128_PADDING_CARRY_OVER,
AES128_PADDING_RFC2630,
AES128_PADDING_RESERVED = 0x7
};
typedef enum
{
ENCR_MODE_CTR64 = 0,
ENCR_MODE_CBC = 1,
ENCR_MODE_ECB = 2,
ENCR_MODE_ECB_PARTIAL = 3,
ENCR_MODE_CBC_PARTIAL = 4,
ENCR_MODE_CLEAR_INTO_VPR = 5, // used for clear stream decoding into VPR.
ENCR_MODE_FORCE_INTO_VPR = 6, // used to force decode output into VPR.
} ENCR_MODE;
// drm_mode configuration
//
// Bit 0:2 AES encryption mode
// Bit 3 CTS (CipherTextStealing) enable/disable
// Bit 4:6 Padding type
// Bit 7:7 Unwrap key enable/disable
#define AES_MODE_MASK 0x7
#define AES_CTS_MASK 0x1
#define AES_PADDING_TYPE_MASK 0x7
#define AES_UNWRAP_KEY_MASK 0x1
#define AES_MODE_SHIFT 0
#define AES_CTS_SHIFT 3
#define AES_PADDING_TYPE_SHIFT 4
#define AES_UNWRAP_KEY_SHIFT 7
#define AES_SET_FLAG(M, C, P) ((M & AES_MODE_MASK) << AES_MODE_SHIFT) | \
((C & AES_CTS_MASK) << AES_CTS_SHIFT) | \
((P & AES_PADDING_TYPE_MASK) << AES_PADDING_TYPE_SHIFT)
#define AES_GET_FLAG(V, F) ((V & ((AES_##F##_MASK) <<(AES_##F##_SHIFT))) >> (AES_##F##_SHIFT))
#define DRM_MODE_MASK 0x7f // Bits 0:6 (0:2 -> AES_MODE, 3 -> AES_CTS, 4:6 -> AES_PADDING_TYPE)
#define AES_GET_DRM_MODE(V) (V & DRM_MODE_MASK)
enum { DRM_MS_PIFF_CTR = AES_SET_FLAG(AES128_CTR, AES128_CTS_DISABLE, AES128_PADDING_CARRY_OVER) };
enum { DRM_MS_PIFF_CBC = AES_SET_FLAG(AES128_CBC, AES128_CTS_DISABLE, AES128_PADDING_NONE) };
enum { DRM_MARLIN_CTR = AES_SET_FLAG(AES128_CTR, AES128_CTS_DISABLE, AES128_PADDING_NONE) };
enum { DRM_MARLIN_CBC = AES_SET_FLAG(AES128_CBC, AES128_CTS_DISABLE, AES128_PADDING_RFC2630) };
enum { DRM_WIDEVINE = AES_SET_FLAG(AES128_CBC, AES128_CTS_ENABLE, AES128_PADDING_NONE) };
enum { DRM_WIDEVINE_CTR = AES_SET_FLAG(AES128_CTR, AES128_CTS_DISABLE, AES128_PADDING_CARRY_OVER) };
enum { DRM_ULTRA_VIOLET = AES_SET_FLAG(AES128_CTR_LSB16B, AES128_CTS_DISABLE, AES128_PADDING_NONE) };
enum { DRM_NONE = AES_SET_FLAG(AES128_NONE, AES128_CTS_DISABLE, AES128_PADDING_NONE) };
enum { DRM_CLR_AS_ENCRYPT = AES_SET_FLAG(AES128_CLR_AS_ENCRYPT, AES128_CTS_DISABLE, AES128_PADDING_NONE)};
// SSM entry structure
typedef struct _nvdec_ssm_s {
unsigned int bytes_of_protected_data;//bytes of protected data, follows bytes_of_clear_data. Note: When padding is enabled, it does not include the padding_bytes (1~15), which can be derived by "(16-(bytes_of_protected_data&0xF))&0xF"
unsigned int bytes_of_clear_data:16; //bytes of clear data, located before bytes_of_protected_data
unsigned int skip_byte_blk : 4; //valid when (entry_type==0 && mode = 1)
unsigned int crypt_byte_blk : 4; //valid when (entry_type==0 && mode = 1)
unsigned int skip : 1; //whether this SSM entry should be skipped or not
unsigned int last : 1; //whether this SSM entry is the last one for the whole decoding frame
unsigned int pad : 1; //valid when (entry_type==0 && mode==0 && AES_PADDING_TYPE==AES128_PADDING_RFC2630), 0 for pad_end, 1 for pad_begin
unsigned int mode : 1; //0 for normal mode, 1 for pattern mode
unsigned int entry_type : 1; //0 for DATA, 1 for IV
unsigned int reserved : 3;
} nvdec_ssm_s; /* SubSampleMap, 8bytes */
// PASS2 OTF extension structure for SSM support, not exist in nvdec_mpeg4_pic_s (as MPEG4 OTF SW-DRM is not supported yet)
typedef struct _nvdec_pass2_otf_ext_s {
unsigned int ssm_entry_num :16; //specifies how many SSM entries (each in unit of 8 bytes) existed in SET_SUB_SAMPLE_MAP_OFFSET surface
unsigned int ssm_iv_num :16; //specifies how many SSM IV (each in unit of 16 bytes) existed in SET_SUB_SAMPLE_MAP_IV_OFFSET surface
unsigned int real_stream_length; //the real stream length, which is the bitstream length EMD/VLD will get after whole frame SSM processing, sum up of "clear+protected" bytes in SSM entries and removing "non_slice_data/skip".
unsigned int non_slice_data :16; //specifies the first many bytes needed to skip, includes only those of "clear+protected" bytes ("padding" bytes excluded)
unsigned int drm_mode : 7;
unsigned int reserved : 9;
} nvdec_pass2_otf_ext_s; /* 12bytes */
//NVDEC5.0 low latency decoding (partial stream kickoff without context switch), method will reuse HevcSetSliceInfoBufferOffset.
typedef struct _nvdec_substream_entry_s {
unsigned int substream_start_offset; //substream byte start offset to bitstream base address
unsigned int substream_length; //subsream length in byte
unsigned int substream_first_tile_idx : 8; //the first tile index(raster scan in frame) of this substream,max is 255
unsigned int substream_last_tile_idx : 8; //the last tile index(raster scan in frame) of this substream, max is 255
unsigned int last_substream_entry_in_frame : 1; //this entry is the last substream entry of this frame
unsigned int reserved : 15;
} nvdec_substream_entry_s;/*low latency without context switch substream entry map,12bytes*/
// GIP
/* tile border coefficients of filter */
#define GIP_ASIC_VERT_FILTER_RAM_SIZE 16 /* bytes per pixel */
/* BSD control data of current picture at tile border
* 11 * 128 bits per 4x4 tile = 128/(8*4) bytes per row */
#define GIP_ASIC_BSD_CTRL_RAM_SIZE 4 /* bytes per row */
/* 8 dc + 8 to boundary + 6*16 + 2*6*64 + 2*64 -> 63 * 16 bytes */
#define GIP_ASIC_SCALING_LIST_SIZE (16*64)
/* tile border coefficients of filter */
#define GIP_ASIC_VERT_SAO_RAM_SIZE 16 /* bytes per pixel */
/* max number of tiles times width and height (2 bytes each),
* rounding up to next 16 bytes boundary + one extra 16 byte
* chunk (HW guys wanted to have this) */
#define GIP_ASIC_TILE_SIZE ((20*22*2*2+16+15) & ~0xF)
/* Segment map uses 32 bytes / CTB */
#define GIP_ASIC_VP9_CTB_SEG_SIZE 32
// HEVC Filter FG buffer
#define HEVC_DBLK_TOP_SIZE_IN_SB16 ALIGN_UP(632, 128) // ctb16 + 444
#define HEVC_DBLK_TOP_BUF_SIZE(w) NVDEC_ALIGN( (ALIGN_UP(w,16)/16 + 2) * HEVC_DBLK_TOP_SIZE_IN_SB16) // 8K: 1285*256
#define HEVC_DBLK_LEFT_SIZE_IN_SB16 ALIGN_UP(506, 128) // ctb16 + 444
#define HEVC_DBLK_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_DBLK_LEFT_SIZE_IN_SB16) // 8K: 1028*256
#define HEVC_SAO_LEFT_SIZE_IN_SB16 ALIGN_UP(713, 128) // ctb16 + 444
#define HEVC_SAO_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,16)/16 + 2) * HEVC_SAO_LEFT_SIZE_IN_SB16) // 8K: 1542*256
// VP9 Filter FG buffer
#define VP9_DBLK_TOP_SIZE_IN_SB64 ALIGN_UP(2000, 128) // 420
#define VP9_DBLK_TOP_BUF_SIZE(w) NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * VP9_DBLK_TOP_SIZE_IN_SB64) // 8K: 1040*256
#define VP9_DBLK_LEFT_SIZE_IN_SB64 ALIGN_UP(1600, 128) // 420
#define VP9_DBLK_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * VP9_DBLK_LEFT_SIZE_IN_SB64) // 8K: 845*256
// VP9 Hint Dump Buffer
#define VP9_HINT_DUMP_SIZE_IN_SB64 ((64*64)/(4*4)*8) // 8 bytes per CU, 256 CUs(2048 bytes) per SB64
#define VP9_HINT_DUMP_SIZE(w, h) NVDEC_ALIGN(VP9_HINT_DUMP_SIZE_IN_SB64*((w+63)/64)*((h+63)/64))
// used for ecdma debug
typedef struct _nvdec_ecdma_config_s
{
unsigned int ecdma_enable; // enable/disable ecdma
unsigned short ecdma_blk_x_src; // src start position x , it's 64x aligned
unsigned short ecdma_blk_y_src; // src start position y , it's 8x aligned
unsigned short ecdma_blk_x_dst; // dst start position x , it's 64x aligned
unsigned short ecdma_blk_y_dst; // dst start position y , it's 8x aligned
unsigned short ref_pic_idx; // ref(src) picture index , used to derived source picture base address
unsigned short boundary0_top; // src insided tile/partition region top boundary
unsigned short boundary0_bottom; // src insided tile/partition region bottom boundary
unsigned short boundary1_left; // src insided tile/partition region left boundary
unsigned short boundary1_right; // src insided tile/partition region right boundary
unsigned char blk_copy_flag; // blk_copy enable flag.
// if it's 1 ,ctb_size ==3,ecdma_blk_x_src == boundary1_left and ecdma_blk_y_src == boundary0_top ;
// if it's 0 ,ecdma_blk_x_src == ecdma_blk_x_dst and ecdma_blk_y_src == ecdma_blk_y_dst;
unsigned char ctb_size; // ctb_size .0:64x64,1:32x32,2:16x16,3:8x8
} nvdec_ecdma_config_s;
typedef struct _nvdec_status_hevc_s
{
unsigned int frame_status_intra_cnt; //Intra block counter, in unit of 8x8 block, IPCM block included
unsigned int frame_status_inter_cnt; //Inter block counter, in unit of 8x8 block, SKIP block included
unsigned int frame_status_skip_cnt; //Skip block counter, in unit of 4x4 block, blocks having NO/ZERO texture/coeff data
unsigned int frame_status_fwd_mvx_cnt; //ABS sum of forward MVx, one 14bit MVx(integer) per 4x4 block
unsigned int frame_status_fwd_mvy_cnt; //ABS sum of forward MVy, one 14bit MVy(integer) per 4x4 block
unsigned int frame_status_bwd_mvx_cnt; //ABS sum of backward MVx, one 14bit MVx(integer) per 4x4 block
unsigned int frame_status_bwd_mvy_cnt; //ABS sum of backward MVy, one 14bit MVy(integer) per 4x4 block
unsigned int error_ctb_pos; //[15:0] error ctb position in Y direction, [31:16] error ctb position in X direction
unsigned int error_slice_pos; //[15:0] error slice position in Y direction, [31:16] error slice position in X direction
} nvdec_status_hevc_s;
typedef struct _nvdec_status_vp9_s
{
unsigned int frame_status_intra_cnt; //Intra block counter, in unit of 8x8 block, IPCM block included
unsigned int frame_status_inter_cnt; //Inter block counter, in unit of 8x8 block, SKIP block included
unsigned int frame_status_skip_cnt; //Skip block counter, in unit of 4x4 block, blocks having NO/ZERO texture/coeff data
unsigned int frame_status_fwd_mvx_cnt; //ABS sum of forward MVx, one 14bit MVx(integer) per 4x4 block
unsigned int frame_status_fwd_mvy_cnt; //ABS sum of forward MVy, one 14bit MVy(integer) per 4x4 block
unsigned int frame_status_bwd_mvx_cnt; //ABS sum of backward MVx, one 14bit MVx(integer) per 4x4 block
unsigned int frame_status_bwd_mvy_cnt; //ABS sum of backward MVy, one 14bit MVy(integer) per 4x4 block
unsigned int error_ctb_pos; //[15:0] error ctb position in Y direction, [31:16] error ctb position in X direction
unsigned int error_slice_pos; //[15:0] error slice position in Y direction, [31:16] error slice position in X direction
} nvdec_status_vp9_s;
typedef struct _nvdec_status_s
{
unsigned int mbs_correctly_decoded; // total numers of correctly decoded macroblocks
unsigned int mbs_in_error; // number of error macroblocks.
unsigned int cycle_count; // total cycles taken for execute. read from PERF_DECODE_FRAME_V register
unsigned int error_status; // report error if any
union
{
nvdec_status_hevc_s hevc;
nvdec_status_vp9_s vp9;
};
unsigned int slice_header_error_code; // report error in slice header
} nvdec_status_s;
// per 16x16 block, used in hevc/vp9 surface of SetExternalMVBufferOffset when error_external_mv_en = 1
typedef struct _external_mv_s
{
int mvx : 14; //integrate pixel precision
int mvy : 14; //integrate pixel precision
unsigned int refidx : 4;
} external_mv_s;
// HEVC
typedef struct _nvdec_hevc_main10_444_ext_s
{
unsigned int transformSkipRotationEnableFlag : 1; //sps extension for transform_skip_rotation_enabled_flag
unsigned int transformSkipContextEnableFlag : 1; //sps extension for transform_skip_context_enabled_flag
unsigned int intraBlockCopyEnableFlag :1; //sps intraBlockCopyEnableFlag, always 0 before spec define it
unsigned int implicitRdpcmEnableFlag : 1; //sps implicit_rdpcm_enabled_flag
unsigned int explicitRdpcmEnableFlag : 1; //sps explicit_rdpcm_enabled_flag
unsigned int extendedPrecisionProcessingFlag : 1; //sps extended_precision_processing_flag,always 0 in current profile
unsigned int intraSmoothingDisabledFlag : 1; //sps intra_smoothing_disabled_flag
unsigned int highPrecisionOffsetsEnableFlag :1; //sps high_precision_offsets_enabled_flag
unsigned int fastRiceAdaptationEnableFlag: 1; //sps fast_rice_adaptation_enabled_flag
unsigned int cabacBypassAlignmentEnableFlag : 1; //sps cabac_bypass_alignment_enabled_flag, always 0 in current profile
unsigned int sps_444_extension_reserved : 22; //sps reserve for future extension
unsigned int log2MaxTransformSkipSize : 4 ; //pps extension log2_max_transform_skip_block_size_minus2, 0...5
unsigned int crossComponentPredictionEnableFlag: 1; //pps cross_component_prediction_enabled_flag
unsigned int chromaQpAdjustmentEnableFlag:1; //pps chroma_qp_adjustment_enabled_flag
unsigned int diffCuChromaQpAdjustmentDepth:2; //pps diff_cu_chroma_qp_adjustment_depth, 0...3
unsigned int chromaQpAdjustmentTableSize:3; //pps chroma_qp_adjustment_table_size_minus1+1, 1...6
unsigned int log2SaoOffsetScaleLuma:3; //pps log2_sao_offset_scale_luma, max(0,bitdepth-10),maxBitdepth 16 for future.
unsigned int log2SaoOffsetScaleChroma: 3; //pps log2_sao_offset_scale_chroma
unsigned int pps_444_extension_reserved : 15; //pps reserved
char cb_qp_adjustment[6]; //-[12,+12]
char cr_qp_adjustment[6]; //-[12,+12]
unsigned int HevcFltAboveOffset; // filter above offset respect to filter buffer, 256 bytes unit
unsigned int HevcSaoAboveOffset; // sao above offset respect to filter buffer, 256 bytes unit
} nvdec_hevc_main10_444_ext_s;
typedef struct _nvdec_hevc_pic_v1_s
{
// New fields
//hevc main10 444 extensions
nvdec_hevc_main10_444_ext_s hevc_main10_444_ext;
//HEVC skip bytes from beginning setting for secure
//it is different to the sw_hdr_skip_length who skips the middle of stream of
//the slice header which is parsed by driver
unsigned int sw_skip_start_length : 14;
unsigned int external_ref_mem_dis : 1;
unsigned int error_recovery_start_pos : 2; //0: from start of frame, 1: from start of slice segment, 2: from error detected ctb, 3: reserved
unsigned int error_external_mv_en : 1;
unsigned int reserved0 : 14;
// Reserved bits padding
} nvdec_hevc_pic_v1_s;
//No versioning in structure: NVDEC2 (T210 and GM206)
//version v1 : NVDEC3 (T186 and GP100)
//version v2 : NVDEC3.1 (GP10x)
typedef struct _nvdec_hevc_pic_v2_s
{
// mv-hevc field
unsigned int mv_hevc_enable :1;
unsigned int nuh_layer_id :6;
unsigned int default_ref_layers_active_flag :1;
unsigned int NumDirectRefLayers :6;
unsigned int max_one_active_ref_layer_flag :1;
unsigned int NumActiveRefLayerPics :6;
unsigned int poc_lsb_not_present_flag :1;
unsigned int reserved0 :10;
} nvdec_hevc_pic_v2_s;
typedef struct _nvdec_hevc_pic_v3_s
{
// slice level decoding
unsigned int slice_decoding_enable:1;//1: enable slice level decoding
unsigned int slice_ec_enable:1; //1: enable slice error concealment. When slice_ec_enable=1,slice_decoding_enable must be 1;
unsigned int slice_ec_mv_type:2; //0: zero mv; 1: co-located mv; 2: external mv;
unsigned int err_detected_sw:1; //1: indicate sw/driver has detected error already in frame kick mode
unsigned int slice_ec_slice_type:2; //0: B slice; 1: P slice ; others: reserved
unsigned int slice_strm_recfg_en:1; //enable slice bitstream re-configure or not ;
unsigned int reserved:24;
unsigned int HevcSliceEdgeOffset;// slice edge buffer offset which repsect to filter buffer ,256 bytes as one unit
}nvdec_hevc_pic_v3_s;
typedef struct _nvdec_hevc_pic_s
{
//The key/IV addr must be 128bit alignment
unsigned int wrapped_session_key[4]; //session keys
unsigned int wrapped_content_key[4]; //content keys
unsigned int initialization_vector[4]; //Ctrl64 initial vector
// hevc_bitstream_data_info
unsigned int stream_len; // stream length in one frame
unsigned int enable_encryption; // flag to enable/disable encryption
unsigned int key_increment : 6; // added to content key after unwrapping
unsigned int encryption_mode : 4;
unsigned int key_slot_index : 4;
unsigned int ssm_en : 1;
unsigned int enable_histogram : 1; // histogram stats output enable
unsigned int enable_substream_decoding: 1; //frame substream kickoff without context switch
unsigned int reserved0 :15;
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
// general
unsigned char tileformat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned char reserverd_surface_format : 3 ;
unsigned char sw_start_code_e; // 0: stream doesn't contain start codes,1: stream contains start codes
unsigned char disp_output_mode; // 0: Rec.709 8 bit, 1: Rec.709 10 bit, 2: Rec.709 10 bits -> 8 bit, 3: Rec.2020 10 bit -> 8 bit
unsigned char reserved1;
unsigned int framestride[2]; // frame buffer stride for luma and chroma
unsigned int colMvBuffersize; // collocated MV buffer size of one picture ,256 bytes unit
unsigned int HevcSaoBufferOffset; // sao buffer offset respect to filter buffer ,256 bytes unit .
unsigned int HevcBsdCtrlOffset; // bsd buffer offset respect to filter buffer ,256 bytes unit .
// sps
unsigned short pic_width_in_luma_samples; // :15, 48(?)..16384, multiple of 8 (48 is smallest width supported by NVDEC for CTU size 16x16)
unsigned short pic_height_in_luma_samples; // :15, 8..16384, multiple of 8
unsigned int chroma_format_idc : 4; // always 1 (=4:2:0)
unsigned int bit_depth_luma : 4; // 8..12
unsigned int bit_depth_chroma : 4;
unsigned int log2_min_luma_coding_block_size : 4; // 3..6
unsigned int log2_max_luma_coding_block_size : 4; // 3..6
unsigned int log2_min_transform_block_size : 4; // 2..5
unsigned int log2_max_transform_block_size : 4; // 2..5
unsigned int reserved2 : 4;
unsigned int max_transform_hierarchy_depth_inter : 3; // 0..4
unsigned int max_transform_hierarchy_depth_intra : 3; // 0..4
unsigned int scalingListEnable : 1; //
unsigned int amp_enable_flag : 1; //
unsigned int sample_adaptive_offset_enabled_flag : 1; //
unsigned int pcm_enabled_flag : 1; //
unsigned int pcm_sample_bit_depth_luma : 4; //
unsigned int pcm_sample_bit_depth_chroma : 4;
unsigned int log2_min_pcm_luma_coding_block_size : 4; //
unsigned int log2_max_pcm_luma_coding_block_size : 4; //
unsigned int pcm_loop_filter_disabled_flag : 1; //
unsigned int sps_temporal_mvp_enabled_flag : 1; //
unsigned int strong_intra_smoothing_enabled_flag : 1; //
unsigned int reserved3 : 3;
// pps
unsigned int dependent_slice_segments_enabled_flag : 1; //
unsigned int output_flag_present_flag : 1; //
unsigned int num_extra_slice_header_bits : 3; // 0..7 (normally 0)
unsigned int sign_data_hiding_enabled_flag : 1; //
unsigned int cabac_init_present_flag : 1; //
unsigned int num_ref_idx_l0_default_active : 4; // 1..15
unsigned int num_ref_idx_l1_default_active : 4; // 1..15
unsigned int init_qp : 7; // 0..127, support higher bitdepth
unsigned int constrained_intra_pred_flag : 1; //
unsigned int transform_skip_enabled_flag : 1; //
unsigned int cu_qp_delta_enabled_flag : 1; //
unsigned int diff_cu_qp_delta_depth : 2; // 0..3
unsigned int reserved4 : 5; //
char pps_cb_qp_offset ; // -12..12
char pps_cr_qp_offset ; // -12..12
char pps_beta_offset ; // -12..12
char pps_tc_offset ; // -12..12
unsigned int pps_slice_chroma_qp_offsets_present_flag : 1; //
unsigned int weighted_pred_flag : 1; //
unsigned int weighted_bipred_flag : 1; //
unsigned int transquant_bypass_enabled_flag : 1; //
unsigned int tiles_enabled_flag : 1; // (redundant: = num_tile_columns_minus1!=0 || num_tile_rows_minus1!=0)
unsigned int entropy_coding_sync_enabled_flag : 1; //
unsigned int num_tile_columns : 5; // 0..20
unsigned int num_tile_rows : 5; // 0..22
unsigned int loop_filter_across_tiles_enabled_flag : 1; //
unsigned int loop_filter_across_slices_enabled_flag : 1; //
unsigned int deblocking_filter_control_present_flag : 1; //
unsigned int deblocking_filter_override_enabled_flag : 1; //
unsigned int pps_deblocking_filter_disabled_flag : 1; //
unsigned int lists_modification_present_flag : 1; //
unsigned int log2_parallel_merge_level : 3; // 2..4
unsigned int slice_segment_header_extension_present_flag : 1; // (normally 0)
unsigned int reserved5 : 6;
// reference picture related
unsigned char num_ref_frames;
unsigned char reserved6;
unsigned short longtermflag; // long term flag for refpiclist.bit 15 for picidx 0, bit 14 for picidx 1,...
unsigned char initreflistidxl0[16]; // :5, [refPicidx] 0..15
unsigned char initreflistidxl1[16]; // :5, [refPicidx] 0..15
short RefDiffPicOrderCnts[16]; // poc diff between current and reference pictures .[-128,127]
// misc
unsigned char IDR_picture_flag; // idr flag for current picture
unsigned char RAP_picture_flag; // rap flag for current picture
unsigned char curr_pic_idx; // current picture store buffer index,used to derive the store addess of frame buffer and MV
unsigned char pattern_id; // used for dithering to select between 2 tables
unsigned short sw_hdr_skip_length; // reference picture inititial related syntax elements(SE) bits in slice header.
// those SE only decoding once in driver,related bits will flush in HW
unsigned short reserved7;
// used for ecdma debug
nvdec_ecdma_config_s ecdma_cfg;
//DXVA on windows
unsigned int separate_colour_plane_flag : 1;
unsigned int log2_max_pic_order_cnt_lsb_minus4 : 4; //0~12
unsigned int num_short_term_ref_pic_sets : 7 ; //0~64
unsigned int num_long_term_ref_pics_sps : 6; //0~32
unsigned int bBitParsingDisable : 1 ; //disable parsing
unsigned int num_delta_pocs_of_rps_idx : 8;
unsigned int long_term_ref_pics_present_flag : 1;
unsigned int reserved_dxva : 4;
//the number of bits for short_term_ref_pic_set()in slice header,dxva API
unsigned int num_bits_short_term_ref_pics_in_slice;
// New additions
nvdec_hevc_pic_v1_s v1;
nvdec_hevc_pic_v2_s v2;
nvdec_hevc_pic_v3_s v3;
nvdec_pass2_otf_ext_s ssm;
} nvdec_hevc_pic_s;
//hevc slice info class
typedef struct _hevc_slice_info_s {
unsigned int first_flag :1;//first slice(s) of frame,must valid for slice EC
unsigned int err_flag :1;//error slice(s) .optional info for EC
unsigned int last_flag :1;//last slice segment(s) of frame,this bit is must be valid when slice_strm_recfg_en==1 or slice_ec==1
unsigned int conceal_partial_slice :1; // indicate do partial slice error conealment for packet loss case
unsigned int available :1; // indicate the slice bitstream is available.
unsigned int reserved0 :7;
unsigned int ctb_count :20;// ctbs counter inside slice(s) .must valid for slice EC
unsigned int bs_offset; //slice(s) bitstream offset in bitstream buffer (in byte unit)
unsigned int bs_length; //slice(s) bitstream length. It is sum of aligned size and skip size and valid slice bitstream size.
unsigned short start_ctbx; //slice start ctbx ,it's optional,HW can output it in previous slice decoding.
//but this is one check points for error
unsigned short start_ctby; //slice start ctby
} hevc_slice_info_s;
//hevc slice ctx class
//slice pos and next slice address
typedef struct _slice_edge_ctb_pos_ctx_s {
unsigned int next_slice_pos_ctbxy; //2d address in raster scan
unsigned int next_slice_segment_addr; //1d address in tile scan
}slice_edge_ctb_pos_ctx_s;
// next slice's first ctb located tile related information
typedef struct _slice_edge_tile_ctx_s {
unsigned int tileInfo1;// Misc tile info includes tile width and tile height and tile col and tile row
unsigned int tileInfo2;// Misc tile info includes tile start ctbx and start ctby and tile index
unsigned int tileInfo3;// Misc tile info includes ctb pos inside tile
} slice_edge_tile_ctx_s;
//frame level stats
typedef struct _slice_edge_stats_ctx_s {
unsigned int frame_status_intra_cnt;// frame stats for intra block count
unsigned int frame_status_inter_cnt;// frame stats for inter block count
unsigned int frame_status_skip_cnt;// frame stats for skip block count
unsigned int frame_status_fwd_mvx_cnt;// frame stats for sum of abs fwd mvx
unsigned int frame_status_fwd_mvy_cnt;// frame stats for sum of abs fwd mvy
unsigned int frame_status_bwd_mvx_cnt;// frame stats for sum of abs bwd mvx
unsigned int frame_status_bwd_mvy_cnt;// frame stats for sum of abs bwd mvy
unsigned int frame_status_mv_cnt_ext;// extension bits of sum of abs mv to keep full precision.
}slice_edge_stats_ctx_s;
//ctx of vpc_edge unit for tile left
typedef struct _slice_vpc_edge_ctx_s {
unsigned int reserved;
}slice_vpc_edge_ctx_s;
//ctx of vpc_main unit
typedef struct _slice_vpc_main_ctx_s {
unsigned int reserved;
} slice_vpc_main_ctx_s;
//hevc slice edge ctx class
typedef struct _slice_edge_ctx_s {
//ctb pos
slice_edge_ctb_pos_ctx_s slice_ctb_pos_ctx;
// stats
slice_edge_stats_ctx_s slice_stats_ctx;
// tile info
slice_edge_tile_ctx_s slice_tile_ctx;
//vpc_edge
slice_vpc_edge_ctx_s slice_vpc_edge_ctx;
//vpc_main
slice_vpc_main_ctx_s slice_vpc_main_ctx;
} slice_edge_ctx_s;
//vp9
typedef struct _nvdec_vp9_pic_v1_s
{
// New fields
// new_var : xx; // for variables with expanded bitlength, comment on why the new bit legth is required
// Reserved bits for padding and/or non-HW specific functionality
unsigned int Vp9FltAboveOffset; // filter above offset respect to filter buffer, 256 bytes unit
unsigned int external_ref_mem_dis : 1;
unsigned int bit_depth : 4;
unsigned int error_recovery_start_pos : 2; //0: from start of frame, 1: from start of slice segment, 2: from error detected ctb, 3: reserved
unsigned int error_external_mv_en : 1;
unsigned int Reserved0 : 24;
} nvdec_vp9_pic_v1_s;
enum VP9_FRAME_SFC_ID
{
VP9_LAST_FRAME_SFC = 0,
VP9_GOLDEN_FRAME_SFC,
VP9_ALTREF_FRAME_SFC,
VP9_CURR_FRAME_SFC
};
typedef struct _nvdec_vp9_pic_s
{
// vp9_bitstream_data_info
//Key and IV address must 128bit alignment
unsigned int wrapped_session_key[4]; //session keys
unsigned int wrapped_content_key[4]; //content keys
unsigned int initialization_vector[4]; //Ctrl64 initial vector
unsigned int stream_len; // stream length in one frame
unsigned int enable_encryption; // flag to enable/disable encryption
unsigned int key_increment : 6; // added to content key after unwrapping
unsigned int encryption_mode : 4;
unsigned int sw_hdr_skip_length :14; //vp9 skip bytes setting for secure
unsigned int key_slot_index : 4;
unsigned int ssm_en : 1;
unsigned int enable_histogram : 1; // histogram stats output enable
unsigned int reserved0 : 2;
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
//general
unsigned char tileformat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned char reserverd_surface_format : 3 ;
unsigned char reserved1[3];
unsigned int Vp9BsdCtrlOffset; // bsd buffer offset respect to filter buffer ,256 bytes unit .
//ref_last dimensions
unsigned short ref0_width; //ref_last coded width
unsigned short ref0_height; //ref_last coded height
unsigned short ref0_stride[2]; //ref_last stride
//ref_golden dimensions
unsigned short ref1_width; //ref_golden coded width
unsigned short ref1_height; //ref_golden coded height
unsigned short ref1_stride[2]; //ref_golden stride
//ref_alt dimensions
unsigned short ref2_width; //ref_alt coded width
unsigned short ref2_height; //ref_alt coded height
unsigned short ref2_stride[2]; //ref_alt stride
/* Current frame dimensions */
unsigned short width; //pic width
unsigned short height; //pic height
unsigned short framestride[2]; // frame buffer stride for luma and chroma
unsigned char keyFrame :1;
unsigned char prevIsKeyFrame:1;
unsigned char resolutionChange:1;
unsigned char errorResilient:1;
unsigned char prevShowFrame:1;
unsigned char intraOnly:1;
unsigned char reserved2 : 2;
/* DCT coefficient partitions */
//unsigned int offsetToDctParts;
unsigned char reserved3[3];
//unsigned char activeRefIdx[3];//3 bits
//unsigned char refreshFrameFlags;
//unsigned char refreshEntropyProbs;
//unsigned char frameParallelDecoding;
//unsigned char resetFrameContext;
unsigned char refFrameSignBias[4];
char loopFilterLevel;//6 bits
char loopFilterSharpness;//3 bits
/* Quantization parameters */
unsigned char qpYAc;
char qpYDc;
char qpChAc;
char qpChDc;
/* From here down, frame-to-frame persisting stuff */
char lossless;
char transform_mode;
char allow_high_precision_mv;
char mcomp_filter_type;
char comp_pred_mode;
char comp_fixed_ref;
char comp_var_ref[2];
char log2_tile_columns;
char log2_tile_rows;
/* Segment and macroblock specific values */
unsigned char segmentEnabled;
unsigned char segmentMapUpdate;
unsigned char segmentMapTemporalUpdate;
unsigned char segmentFeatureMode; /* ABS data or delta data */
unsigned char segmentFeatureEnable[8][4];
short segmentFeatureData[8][4];
char modeRefLfEnabled;
char mbRefLfDelta[4];
char mbModeLfDelta[2];
char reserved5; // for alignment
// New additions
nvdec_vp9_pic_v1_s v1;
nvdec_pass2_otf_ext_s ssm;
} nvdec_vp9_pic_s;
#define NVDEC_VP9HWPAD(x, y) unsigned char x[y]
typedef struct {
/* last bytes of address 41 */
unsigned char joints[3];
unsigned char sign[2];
/* address 42 */
unsigned char class0[2][1];
unsigned char fp[2][3];
unsigned char class0_hp[2];
unsigned char hp[2];
unsigned char classes[2][10];
/* address 43 */
unsigned char class0_fp[2][2][3];
unsigned char bits[2][10];
} nvdec_nmv_context;
typedef struct {
unsigned int joints[4];
unsigned int sign[2][2];
unsigned int classes[2][11];
unsigned int class0[2][2];
unsigned int bits[2][10][2];
unsigned int class0_fp[2][2][4];
unsigned int fp[2][4];
unsigned int class0_hp[2][2];
unsigned int hp[2][2];
} nvdec_nmv_context_counts;
/* Adaptive entropy contexts, padding elements are added to have
* 256 bit aligned tables for HW access.
* Compile with TRACE_PROB_TABLES to print bases for each table. */
typedef struct nvdec_vp9AdaptiveEntropyProbs_s
{
/* address 32 */
unsigned char inter_mode_prob[7][4];
unsigned char intra_inter_prob[4];
/* address 33 */
unsigned char uv_mode_prob[10][8];
unsigned char tx8x8_prob[2][1];
unsigned char tx16x16_prob[2][2];
unsigned char tx32x32_prob[2][3];
unsigned char sb_ymode_probB[4][1];
unsigned char sb_ymode_prob[4][8];
/* address 37 */
unsigned char partition_prob[2][16][4];
/* address 41 */
unsigned char uv_mode_probB[10][1];
unsigned char switchable_interp_prob[4][2];
unsigned char comp_inter_prob[5];
unsigned char mbskip_probs[3];
NVDEC_VP9HWPAD(pad1, 1);
nvdec_nmv_context nmvc;
/* address 44 */
unsigned char single_ref_prob[5][2];
unsigned char comp_ref_prob[5];
NVDEC_VP9HWPAD(pad2, 17);
/* address 45 */
unsigned char probCoeffs[2][2][6][6][4];
unsigned char probCoeffs8x8[2][2][6][6][4];
unsigned char probCoeffs16x16[2][2][6][6][4];
unsigned char probCoeffs32x32[2][2][6][6][4];
} nvdec_vp9AdaptiveEntropyProbs_t;
/* Entropy contexts */
typedef struct nvdec_vp9EntropyProbs_s
{
/* Default keyframe probs */
/* Table formatted for 256b memory, probs 0to7 for all tables followed by
* probs 8toN for all tables.
* Compile with TRACE_PROB_TABLES to print bases for each table. */
unsigned char kf_bmode_prob[10][10][8];
/* Address 25 */
unsigned char kf_bmode_probB[10][10][1];
unsigned char ref_pred_probs[3];
unsigned char mb_segment_tree_probs[7];
unsigned char segment_pred_probs[3];
unsigned char ref_scores[4];
unsigned char prob_comppred[2];
NVDEC_VP9HWPAD(pad1, 9);
/* Address 29 */
unsigned char kf_uv_mode_prob[10][8];
unsigned char kf_uv_mode_probB[10][1];
NVDEC_VP9HWPAD(pad2, 6);
nvdec_vp9AdaptiveEntropyProbs_t a; /* Probs with backward adaptation */
} nvdec_vp9EntropyProbs_t;
/* Counters for adaptive entropy contexts */
typedef struct nvdec_vp9EntropyCounts_s
{
unsigned int inter_mode_counts[7][3][2];
unsigned int sb_ymode_counts[4][10];
unsigned int uv_mode_counts[10][10];
unsigned int partition_counts[16][4];
unsigned int switchable_interp_counts[4][3];
unsigned int intra_inter_count[4][2];
unsigned int comp_inter_count[5][2];
unsigned int single_ref_count[5][2][2];
unsigned int comp_ref_count[5][2];
unsigned int tx32x32_count[2][4];
unsigned int tx16x16_count[2][3];
unsigned int tx8x8_count[2][2];
unsigned int mbskip_count[3][2];
nvdec_nmv_context_counts nmvcount;
unsigned int countCoeffs[2][2][6][6][4];
unsigned int countCoeffs8x8[2][2][6][6][4];
unsigned int countCoeffs16x16[2][2][6][6][4];
unsigned int countCoeffs32x32[2][2][6][6][4];
unsigned int countEobs[4][2][2][6][6];
} nvdec_vp9EntropyCounts_t;
// Legacy codecs encryption parameters
typedef struct _nvdec_pass2_otf_s {
unsigned int wrapped_session_key[4]; // session keys
unsigned int wrapped_content_key[4]; // content keys
unsigned int initialization_vector[4];// Ctrl64 initial vector
unsigned int enable_encryption : 1; // flag to enable/disable encryption
unsigned int key_increment : 6; // added to content key after unwrapping
unsigned int encryption_mode : 4;
unsigned int key_slot_index : 4;
unsigned int ssm_en : 1;
unsigned int reserved1 :16; // reserved
} nvdec_pass2_otf_s; // 0x10 bytes
typedef struct _nvdec_display_param_s
{
unsigned int enableTFOutput : 1; //=1, enable dbfdma to output the display surface; if disable, then the following configure on tf is useless.
//remap for VC1
unsigned int VC1MapYFlag : 1;
unsigned int MapYValue : 3;
unsigned int VC1MapUVFlag : 1;
unsigned int MapUVValue : 3;
//tf
unsigned int OutStride : 8;
unsigned int TilingFormat : 3;
unsigned int OutputStructure : 1; //(0=frame, 1=field)
unsigned int reserved0 :11;
int OutputTop[2]; // in units of 256
int OutputBottom[2]; // in units of 256
//histogram
unsigned int enableHistogram : 1; // enable histogram info collection.
unsigned int HistogramStartX :12; // start X of Histogram window
unsigned int HistogramStartY :12; // start Y of Histogram window
unsigned int reserved1 : 7;
unsigned int HistogramEndX :12; // end X of Histogram window
unsigned int HistogramEndY :12; // end y of Histogram window
unsigned int reserved2 : 8;
} nvdec_display_param_s; // size 0x1c bytes
// H.264
typedef struct _nvdec_dpb_entry_s // 16 bytes
{
unsigned int index : 7; // uncompressed frame buffer index
unsigned int col_idx : 5; // index of associated co-located motion data buffer
unsigned int state : 2; // bit1(state)=1: top field used for reference, bit1(state)=1: bottom field used for reference
unsigned int is_long_term : 1; // 0=short-term, 1=long-term
unsigned int not_existing : 1; // 1=marked as non-existing
unsigned int is_field : 1; // set if unpaired field or complementary field pair
unsigned int top_field_marking : 4;
unsigned int bottom_field_marking : 4;
unsigned int output_memory_layout : 1; // Set according to picture level output NV12/NV24 setting.
unsigned int reserved : 6;
unsigned int FieldOrderCnt[2]; // : 2*32 [top/bottom]
int FrameIdx; // : 16 short-term: FrameNum (16 bits), long-term: LongTermFrameIdx (4 bits)
} nvdec_dpb_entry_s;
typedef struct _nvdec_h264_pic_s
{
nvdec_pass2_otf_s encryption_params;
unsigned char eos[16];
unsigned char explicitEOSPresentFlag;
unsigned char hint_dump_en; //enable COLOMV surface dump for all frames, which includes hints of "MV/REFIDX/QP/CBP/MBPART/MBTYPE", nvbug: 200212874
unsigned char reserved0[2];
unsigned int stream_len;
unsigned int slice_count;
unsigned int mbhist_buffer_size; // to pass buffer size of MBHIST_BUFFER
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
// Fields from msvld_h264_seq_s
int log2_max_pic_order_cnt_lsb_minus4;
int delta_pic_order_always_zero_flag;
int frame_mbs_only_flag;
int PicWidthInMbs;
int FrameHeightInMbs;
unsigned int tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned int gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned int reserverd_surface_format : 27;
// Fields from msvld_h264_pic_s
int entropy_coding_mode_flag;
int pic_order_present_flag;
int num_ref_idx_l0_active_minus1;
int num_ref_idx_l1_active_minus1;
int deblocking_filter_control_present_flag;
int redundant_pic_cnt_present_flag;
int transform_8x8_mode_flag;
// Fields from mspdec_h264_picture_setup_s
unsigned int pitch_luma; // Luma pitch
unsigned int pitch_chroma; // chroma pitch
unsigned int luma_top_offset; // offset of luma top field in units of 256
unsigned int luma_bot_offset; // offset of luma bottom field in units of 256
unsigned int luma_frame_offset; // offset of luma frame in units of 256
unsigned int chroma_top_offset; // offset of chroma top field in units of 256
unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256
unsigned int chroma_frame_offset; // offset of chroma frame in units of 256
unsigned int HistBufferSize; // in units of 256
unsigned int MbaffFrameFlag : 1; //
unsigned int direct_8x8_inference_flag: 1; //
unsigned int weighted_pred_flag : 1; //
unsigned int constrained_intra_pred_flag:1; //
unsigned int ref_pic_flag : 1; // reference picture (nal_ref_idc != 0)
unsigned int field_pic_flag : 1; //
unsigned int bottom_field_flag : 1; //
unsigned int second_field : 1; // second field of complementary reference field
unsigned int log2_max_frame_num_minus4: 4; // (0..12)
unsigned int chroma_format_idc : 2; //
unsigned int pic_order_cnt_type : 2; // (0..2)
int pic_init_qp_minus26 : 6; // : 6 (-26..+25)
int chroma_qp_index_offset : 5; // : 5 (-12..+12)
int second_chroma_qp_index_offset : 5; // : 5 (-12..+12)
unsigned int weighted_bipred_idc : 2; // : 2 (0..2)
unsigned int CurrPicIdx : 7; // : 7 uncompressed frame buffer index
unsigned int CurrColIdx : 5; // : 5 index of associated co-located motion data buffer
unsigned int frame_num : 16; //
unsigned int frame_surfaces : 1; // frame surfaces flag
unsigned int output_memory_layout : 1; // 0: NV12; 1:NV24. Field pair must use the same setting.
int CurrFieldOrderCnt[2]; // : 32 [Top_Bottom], [0]=TopFieldOrderCnt, [1]=BottomFieldOrderCnt
nvdec_dpb_entry_s dpb[16];
unsigned char WeightScale[6][4][4]; // : 6*4*4*8 in raster scan order (not zig-zag order)
unsigned char WeightScale8x8[2][8][8]; // : 2*8*8*8 in raster scan order (not zig-zag order)
// mvc setup info, must be zero if not mvc
unsigned char num_inter_view_refs_lX[2]; // number of inter-view references
char reserved1[14]; // reserved for alignment
signed char inter_view_refidx_lX[2][16]; // DPB indices (must also be marked as long-term)
// lossless decode (At the time of writing this manual, x264 and JM encoders, differ in Intra_8x8 reference sample filtering)
unsigned int lossless_ipred8x8_filter_enable : 1; // = 0, skips Intra_8x8 reference sample filtering, for vertical and horizontal predictions (x264 encoded streams); = 1, filter Intra_8x8 reference samples (JM encoded streams)
unsigned int qpprime_y_zero_transform_bypass_flag : 1; // determines the transform bypass mode
unsigned int reserved2 : 30; // kept for alignment; may be used for other parameters
nvdec_display_param_s displayPara;
nvdec_pass2_otf_ext_s ssm;
} nvdec_h264_pic_s;
// VC-1 Scratch buffer
typedef enum _vc1_fcm_e
{
FCM_PROGRESSIVE = 0,
FCM_FRAME_INTERLACE = 2,
FCM_FIELD_INTERLACE = 3
} vc1_fcm_e;
typedef enum _syntax_vc1_ptype_e
{
PTYPE_I = 0,
PTYPE_P = 1,
PTYPE_B = 2,
PTYPE_BI = 3, //PTYPE_BI is not used to config register NV_CNVDEC_VLD_PIC_INFO_COMMON. field NV_CNVDEC_VLD_PIC_INFO_COMMON_PIC_CODING_VC1 is only 2 bits. I and BI pictures are configured with same value. Please refer to manual.
PTYPE_SKIPPED = 4
} syntax_vc1_ptype_e;
// 7.1.1.32, Table 46 etc.
enum vc1_mvmode_e
{
MVMODE_MIXEDMV = 0,
MVMODE_1MV = 1,
MVMODE_1MV_HALFPEL = 2,
MVMODE_1MV_HALFPEL_BILINEAR = 3,
MVMODE_INTENSITY_COMPENSATION = 4
};
// 9.1.1.42, Table 105
typedef enum _vc1_fptype_e
{
FPTYPE_I_I = 0,
FPTYPE_I_P,
FPTYPE_P_I,
FPTYPE_P_P,
FPTYPE_B_B,
FPTYPE_B_BI,
FPTYPE_BI_B,
FPTYPE_BI_BI
} vc1_fptype_e;
// Table 43 (7.1.1.31.2)
typedef enum _vc1_dqprofile_e
{
DQPROFILE_ALL_FOUR_EDGES = 0,
DQPROFILE_DOUBLE_EDGE = 1,
DQPROFILE_SINGLE_EDGE = 2,
DQPROFILE_ALL_MACROBLOCKS = 3
} vc1_dqprofile_e;
typedef struct _nvdec_vc1_pic_s
{
nvdec_pass2_otf_s encryption_params;
unsigned char eos[16]; // to pass end of stream data separately if not present in bitstream surface
unsigned char prefixStartCode[4]; // used for dxva to pass prefix start code.
unsigned int bitstream_offset; // offset in words from start of bitstream surface if there is gap.
unsigned char explicitEOSPresentFlag; // to indicate that eos[] is used for passing end of stream data.
unsigned char reserved0[3];
unsigned int stream_len;
unsigned int slice_count;
unsigned int scratch_pic_buffer_size;
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
// Fields from vc1_seq_s
unsigned short FrameWidth; // actual frame width
unsigned short FrameHeight; // actual frame height
unsigned char profile; // 1 = SIMPLE or MAIN, 2 = ADVANCED
unsigned char postprocflag;
unsigned char pulldown;
unsigned char interlace;
unsigned char tfcntrflag;
unsigned char finterpflag;
unsigned char psf;
unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned char reserverd_surface_format : 3 ;
// simple,main
unsigned char multires;
unsigned char syncmarker;
unsigned char rangered;
unsigned char maxbframes;
// Fields from vc1_entrypoint_s
unsigned char dquant;
unsigned char panscan_flag;
unsigned char refdist_flag;
unsigned char quantizer;
unsigned char extended_mv;
unsigned char extended_dmv;
unsigned char overlap;
unsigned char vstransform;
// Fields from vc1_scratch_s
char refdist;
char reserved1[3]; // for alignment
// Fields from vld_vc1_pic_s
vc1_fcm_e fcm;
syntax_vc1_ptype_e ptype;
int tfcntr;
int rptfrm;
int tff;
int rndctrl;
int pqindex;
int halfqp;
int pquantizer;
int postproc;
int condover;
int transacfrm;
int transacfrm2;
int transdctab;
int pqdiff;
int abspq;
int dquantfrm;
vc1_dqprofile_e dqprofile;
int dqsbedge;
int dqdbedge;
int dqbilevel;
int mvrange;
enum vc1_mvmode_e mvmode;
enum vc1_mvmode_e mvmode2;
int lumscale;
int lumshift;
int mvtab;
int cbptab;
int ttmbf;
int ttfrm;
int bfraction;
vc1_fptype_e fptype;
int numref;
int reffield;
int dmvrange;
int intcompfield;
int lumscale1; // type was char in ucode
int lumshift1; // type was char in ucode
int lumscale2; // type was char in ucode
int lumshift2; // type was char in ucode
int mbmodetab;
int imvtab;
int icbptab;
int fourmvbptab;
int fourmvswitch;
int intcomp;
int twomvbptab;
// simple,main
int rangeredfrm;
// Fields from pdec_vc1_pic_s
unsigned int HistBufferSize; // in units of 256
// frame buffers
unsigned int FrameStride[2]; // [y_c]
unsigned int luma_top_offset; // offset of luma top field in units of 256
unsigned int luma_bot_offset; // offset of luma bottom field in units of 256
unsigned int luma_frame_offset; // offset of luma frame in units of 256
unsigned int chroma_top_offset; // offset of chroma top field in units of 256
unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256
unsigned int chroma_frame_offset; // offset of chroma frame in units of 256
unsigned short CodedWidth; // entrypoint specific
unsigned short CodedHeight; // entrypoint specific
unsigned char loopfilter; // entrypoint specific
unsigned char fastuvmc; // entrypoint specific
unsigned char output_memory_layout; // picture specific
unsigned char ref_memory_layout[2]; // picture specific 0: fwd, 1: bwd
unsigned char reserved3[3]; // for alignment
nvdec_display_param_s displayPara;
nvdec_pass2_otf_ext_s ssm;
} nvdec_vc1_pic_s;
// MPEG-2
typedef struct _nvdec_mpeg2_pic_s
{
nvdec_pass2_otf_s encryption_params;
unsigned char eos[16];
unsigned char explicitEOSPresentFlag;
unsigned char reserved0[3];
unsigned int stream_len;
unsigned int slice_count;
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
// Fields from vld_mpeg2_seq_pic_info_s
short FrameWidth; // actual frame width
short FrameHeight; // actual frame height
unsigned char picture_structure; // 0 => Reserved, 1 => Top field, 2 => Bottom field, 3 => Frame picture. Table 6-14.
unsigned char picture_coding_type; // 0 => Forbidden, 1 => I, 2 => P, 3 => B, 4 => D (for MPEG-2). Table 6-12.
unsigned char intra_dc_precision; // 0 => 8 bits, 1=> 9 bits, 2 => 10 bits, 3 => 11 bits. Table 6-13.
char frame_pred_frame_dct; // as in section 6.3.10
char concealment_motion_vectors; // as in section 6.3.10
char intra_vlc_format; // as in section 6.3.10
unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned char reserverd_surface_format : 3 ;
char reserved1; // always 0
char f_code[4]; // as in section 6.3.10
// Fields from pdec_mpeg2_picture_setup_s
unsigned short PicWidthInMbs;
unsigned short FrameHeightInMbs;
unsigned int pitch_luma;
unsigned int pitch_chroma;
unsigned int luma_top_offset;
unsigned int luma_bot_offset;
unsigned int luma_frame_offset;
unsigned int chroma_top_offset;
unsigned int chroma_bot_offset;
unsigned int chroma_frame_offset;
unsigned int HistBufferSize;
unsigned short output_memory_layout;
unsigned short alternate_scan;
unsigned short secondfield;
/******************************/
// Got rid of the union kept for compatibility with NVDEC1.
// Removed field mpeg2, and kept rounding type.
// NVDEC1 ucode is not using the mpeg2 field, instead using codec type from the methods.
// Rounding type should only be set for Divx3.11.
unsigned short rounding_type;
/******************************/
unsigned int MbInfoSizeInBytes;
unsigned int q_scale_type;
unsigned int top_field_first;
unsigned int full_pel_fwd_vector;
unsigned int full_pel_bwd_vector;
unsigned char quant_mat_8x8intra[64];
unsigned char quant_mat_8x8nonintra[64];
unsigned int ref_memory_layout[2]; //0:for fwd; 1:for bwd
nvdec_display_param_s displayPara;
nvdec_pass2_otf_ext_s ssm;
} nvdec_mpeg2_pic_s;
// MPEG-4
typedef struct _nvdec_mpeg4_pic_s
{
nvdec_pass2_otf_s encryption_params;
unsigned char eos[16];
unsigned char explicitEOSPresentFlag;
unsigned char reserved2[3]; // for alignment
unsigned int stream_len;
unsigned int slice_count;
unsigned int scratch_pic_buffer_size;
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
// Fields from vld_mpeg4_seq_s
short FrameWidth; // :13 video_object_layer_width
short FrameHeight; // :13 video_object_layer_height
char vop_time_increment_bitcount; // : 5 1..16
char resync_marker_disable; // : 1
unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned char reserverd_surface_format : 3 ;
char reserved3; // for alignment
// Fields from pdec_mpeg4_picture_setup_s
int width; // : 13
int height; // : 13
unsigned int FrameStride[2]; // [y_c]
unsigned int luma_top_offset; // offset of luma top field in units of 256
unsigned int luma_bot_offset; // offset of luma bottom field in units of 256
unsigned int luma_frame_offset; // offset of luma frame in units of 256
unsigned int chroma_top_offset; // offset of chroma top field in units of 256
unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256
unsigned int chroma_frame_offset; // offset of chroma frame in units of 256
unsigned int HistBufferSize; // in units of 256, History buffer size
int trd[2]; // : 16, temporal reference frame distance (only needed for B-VOPs)
int trb[2]; // : 16, temporal reference B-VOP distance from fwd reference frame (only needed for B-VOPs)
int divx_flags; // : 16 (bit 0: DivX interlaced chroma rounding, bit 1: Divx 4 boundary padding, bit 2: Divx IDCT)
short vop_fcode_forward; // : 1...7
short vop_fcode_backward; // : 1...7
unsigned char interlaced; // : 1
unsigned char quant_type; // : 1
unsigned char quarter_sample; // : 1
unsigned char short_video_header; // : 1
unsigned char curr_output_memory_layout; // : 1 0:NV12; 1:NV24
unsigned char ptype; // picture type: 0 for PTYPE_I, 1 for PTYPE_P, 2 for PTYPE_B, 3 for PTYPE_BI, 4 for PTYPE_SKIPPED
unsigned char rnd; // : 1, rounding mode
unsigned char alternate_vertical_scan_flag; // : 1
unsigned char top_field_flag; // : 1
unsigned char reserved0[3]; // alignment purpose
unsigned char intra_quant_mat[64]; // : 64*8
unsigned char nonintra_quant_mat[64]; // : 64*8
unsigned char ref_memory_layout[2]; //0:for fwd; 1:for bwd
unsigned char reserved1[34]; // 256 byte alignemnt till now
nvdec_display_param_s displayPara;
} nvdec_mpeg4_pic_s;
// VP8
enum VP8_FRAME_TYPE
{
VP8_KEYFRAME = 0,
VP8_INTERFRAME = 1
};
enum VP8_FRAME_SFC_ID
{
VP8_GOLDEN_FRAME_SFC = 0,
VP8_ALTREF_FRAME_SFC,
VP8_LAST_FRAME_SFC,
VP8_CURR_FRAME_SFC
};
typedef struct _nvdec_vp8_pic_s
{
nvdec_pass2_otf_s encryption_params;
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
unsigned short FrameWidth; // actual frame width
unsigned short FrameHeight; // actual frame height
unsigned char keyFrame; // 1: key frame; 0: not
unsigned char version;
unsigned char tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned char gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned char reserverd_surface_format : 3 ;
unsigned char errorConcealOn; // 1: error conceal on; 0: off
unsigned int firstPartSize; // the size of first partition(frame header and mb header partition)
// ctx
unsigned int HistBufferSize; // in units of 256
unsigned int VLDBufferSize; // in units of 1
// current frame buffers
unsigned int FrameStride[2]; // [y_c]
unsigned int luma_top_offset; // offset of luma top field in units of 256
unsigned int luma_bot_offset; // offset of luma bottom field in units of 256
unsigned int luma_frame_offset; // offset of luma frame in units of 256
unsigned int chroma_top_offset; // offset of chroma top field in units of 256
unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256
unsigned int chroma_frame_offset; // offset of chroma frame in units of 256
nvdec_display_param_s displayPara;
// decode picture buffere related
char current_output_memory_layout;
char output_memory_layout[3]; // output NV12/NV24 setting. item 0:golden; 1: altref; 2: last
unsigned char segmentation_feature_data_update;
unsigned char reserved1[3];
// ucode return result
unsigned int resultValue; // ucode return the picture header info; includes copy_buffer_to_golden etc.
unsigned int partition_offset[8]; // byte offset to each token partition (used for encrypted streams only)
nvdec_pass2_otf_ext_s ssm;
} nvdec_vp8_pic_s; // size is 0xc0
// PASS1
//Sample means the entire frame is encrypted with a single IV, and subsample means a given frame may be encrypted in multiple chunks with different IVs.
#define NUM_SUBSAMPLES 32
typedef struct _bytes_of_data_s
{
unsigned int clear_bytes; // clear bytes per subsample
unsigned int encypted_bytes; // encrypted bytes per subsample
} bytes_of_data_s;
typedef struct _nvdec_pass1_input_data_s
{
bytes_of_data_s sample_size[NUM_SUBSAMPLES]; // clear/encrypted bytes per subsample
unsigned int initialization_vector[NUM_SUBSAMPLES][4]; // Ctrl64 initial vector per subsample
unsigned char IvValid[NUM_SUBSAMPLES]; // each element will tell whether IV is valid for that subsample or not.
unsigned int stream_len; // encrypted bitstream size.
unsigned int clearBufferSize; // allocated size of clear buffer size
unsigned int reencryptBufferSize; // allocated size of reencrypted buffer size
unsigned int vp8coeffPartitonBufferSize; // allocated buffer for vp8 coeff partition buffer
unsigned int PrevWidth; // required for VP9
unsigned int num_nals :16; // number of subsamples in a frame
unsigned int drm_mode : 8; // DRM mode
unsigned int key_sel : 4; // key select from keyslot
unsigned int codec : 4; // codecs selection
unsigned int TotalSizeOfClearData; // Used with Pattern based encryption
unsigned int SliceHdrOffset; // This is used with pattern mode encryption where data before slice hdr comes in clear.
unsigned int EncryptBlkCnt :16;
unsigned int SkipBlkCnt :16;
} nvdec_pass1_input_data_s;
#define VP8_MAX_TOKEN_PARTITIONS 8
#define VP9_MAX_FRAMES_IN_SUPERFRAME 8
typedef struct _nvdec_pass1_output_data_s
{
unsigned int clear_header_size; // h264/vc1/mpeg2/vp8, decrypted pps/sps/part of slice header info, 128 bits aligned
unsigned int reencrypt_data_size; // h264/vc1/mpeg2, slice level data, vp8 mb header info, 128 bits aligned
unsigned int clear_token_data_size; // vp8, clear token data saved in VPR, 128 bits aligned
unsigned int key_increment : 6; // added to content key after unwrapping
unsigned int encryption_mode : 4; // encryption mode
unsigned int bReEncrypted : 1; // set to 0 if no re-encryption is done.
unsigned int bvp9SuperFrame : 1; // set to 1 for vp9 superframe
unsigned int vp9NumFramesMinus1 : 3; // set equal to numFrames-1 for vp9superframe. Max 8 frames are possible in vp9 superframe.
unsigned int reserved1 :17; // reserved, 32 bit alignment
unsigned int wrapped_session_key[4]; // session keys
unsigned int wrapped_content_key[4]; // content keys
unsigned int initialization_vector[4]; // Ctrl64 initial vector
union {
unsigned int partition_size[VP8_MAX_TOKEN_PARTITIONS]; // size of each token partition (used for encrypted streams of VP8)
unsigned int vp9_frame_sizes[VP9_MAX_FRAMES_IN_SUPERFRAME]; // frame size information for all frames in vp9 superframe.
};
unsigned int vp9_clear_hdr_size[VP9_MAX_FRAMES_IN_SUPERFRAME]; // clear header size for each frame in vp9 superframe.
} nvdec_pass1_output_data_s;
/*****************************************************
AV1
*****************************************************/
typedef struct _scale_factors_reference_s{
short x_scale_fp; // horizontal fixed point scale factor
short y_scale_fp; // vertical fixed point scale factor
}scale_factors_reference_s;
typedef struct _frame_info_t{
unsigned short width; // in pixel, av1 support arbitray resolution
unsigned short height;
unsigned short stride[2]; // luma and chroma stride in 16Bytes
unsigned int frame_buffer_idx; // TBD :clean associate the reference frame and frame buffer id to lookup base_addr
} frame_info_t;
typedef struct _ref_frame_struct_s{
frame_info_t info;
scale_factors_reference_s sf; // scalefactor for reference frame and current frame size, driver can calculate it
unsigned char sign_bias : 1; // calcuate based on frame_offset and current frame offset
unsigned char wmtype : 2; // global motion parameters : identity,translation,rotzoom,affine
unsigned char reserved_rf : 5;
short frame_off; // relative offset to current frame
short roffset; // relative offset from current frame
} ref_frame_struct_s;
typedef struct _av1_fgs_cfg_t{
//from AV1 spec 5.9.30 Film Grain Params syntax
unsigned short apply_grain : 1;
unsigned short overlap_flag : 1;
unsigned short clip_to_restricted_range : 1;
unsigned short chroma_scaling_from_luma : 1;
unsigned short num_y_points_b : 1; // flag indicates num_y_points>0
unsigned short num_cb_points_b : 1; // flag indicates num_cb_points>0
unsigned short num_cr_points_b : 1; // flag indicates num_cr_points>0
unsigned short scaling_shift : 4;
unsigned short reserved_fgs : 5;
unsigned short sw_random_seed;
short cb_offset;
short cr_offset;
char cb_mult;
char cb_luma_mult;
char cr_mult;
char cr_luma_mult;
} av1_fgs_cfg_t;
typedef struct _nvdec_av1_pic_s
{
nvdec_pass2_otf_s encryption_params;
nvdec_pass2_otf_ext_s ssm;
av1_fgs_cfg_t fgs_cfg;
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
unsigned int stream_len; // stream length.
unsigned int reserved12; // skip bytes length to real frame data .
//sequence header
unsigned int use_128x128_superblock : 1; // superblock 128x128 or 64x64, 0:64x64, 1: 128x128
unsigned int chroma_format : 2; // 1:420, others:reserved for future
unsigned int bit_depth : 4; // bitdepth
unsigned int enable_filter_intra : 1; // tool enable in seq level, 0 : disable 1: frame header control
unsigned int enable_intra_edge_filter : 1;
unsigned int enable_interintra_compound : 1;
unsigned int enable_masked_compound : 1;
unsigned int enable_dual_filter : 1; // enable or disable vertical and horiz filter selection
unsigned int reserved10 : 1; // 0 - disable order hint, and related tools
unsigned int reserved0 : 3;
unsigned int enable_jnt_comp : 1; // 0 - disable joint compound modes
unsigned int reserved1 : 1;
unsigned int enable_cdef : 1;
unsigned int reserved11 : 1;
unsigned int enable_fgs : 1;
unsigned int enable_substream_decoding : 1; //enable frame substream kickoff mode without context switch
unsigned int reserved2 : 10; // reserved bits
//frame header
unsigned int frame_type : 2; // 0:Key frame, 1:Inter frame, 2:intra only, 3:s-frame
unsigned int show_frame : 1; // show frame flag
unsigned int reserved13 : 1;
unsigned int disable_cdf_update : 1; // disable CDF update during symbol decoding
unsigned int allow_screen_content_tools : 1; // screen content tool enable
unsigned int cur_frame_force_integer_mv : 1; // AMVR enable
unsigned int scale_denom_minus9 : 3; // The denominator minus9 of the superres scale
unsigned int allow_intrabc : 1; // IBC enable
unsigned int allow_high_precision_mv : 1; // 1/8 precision mv enable
unsigned int interp_filter : 3; // interpolation filter : EIGHTTAP_REGULAR,....
unsigned int switchable_motion_mode : 1; // 0: simple motion mode, 1: SIMPLE, OBMC, LOCAL WARP
unsigned int use_ref_frame_mvs : 1; // 1: current frame can use the previous frame mv information, MFMV
unsigned int refresh_frame_context : 1; // backward update flag
unsigned int delta_q_present_flag : 1; // quantizer index delta values are present in the block level
unsigned int delta_q_res : 2; // left shift will apply to decoded quantizer index delta values
unsigned int delta_lf_present_flag : 1; // specified whether loop filter delta values are present in the block level
unsigned int delta_lf_res : 2; // specifies the left shift will apply to decoded loop filter values
unsigned int delta_lf_multi : 1; // seperate loop filter deltas for Hy,Vy,U,V edges
unsigned int reserved3 : 1;
unsigned int coded_lossless : 1; // 1 means all segments use lossless coding. Frame is fully lossless, CDEF/DBF will disable
unsigned int tile_enabled : 1; // tile enable
unsigned int reserved4 : 2;
unsigned int superres_is_scaled : 1; // frame level frame for using_superres
unsigned int reserved_fh : 1;
unsigned int tile_cols : 8; // horizontal tile numbers in frame, max is 64
unsigned int tile_rows : 8; // vertical tile numbers in frame, max is 64
unsigned int context_update_tile_id : 16; // which tile cdf will be seleted as the backward update CDF, MAXTILEROW=64, MAXTILECOL=64, 12bits
unsigned int cdef_damping_minus_3 : 2; // controls the amount of damping in the deringing filter
unsigned int cdef_bits : 2; // the number of bits needed to specify which CDEF filter to apply
unsigned int frame_tx_mode : 3; // 0:ONLY4x4,3:LARGEST,4:SELECT
unsigned int frame_reference_mode : 2; // single,compound,select
unsigned int skip_mode_flag : 1; // skip mode
unsigned int skip_ref0 : 4;
unsigned int skip_ref1 : 4;
unsigned int allow_warp : 1; // sequence level & frame level warp enable
unsigned int reduced_tx_set_used : 1; // whether the frame is restricted to oa reduced subset of the full set of transform types
unsigned int ref_scaling_enable : 1;
unsigned int reserved5 : 1;
unsigned int reserved6 : 10; // reserved bits
unsigned short superres_upscaled_width; // upscale width, frame_size_with_refs() syntax,restoration will use it
unsigned short superres_luma_step;
unsigned short superres_chroma_step;
unsigned short superres_init_luma_subpel_x;
unsigned short superres_init_chroma_subpel_x;
/*frame header qp information*/
unsigned char base_qindex; // the maximum qp is 255
char y_dc_delta_q;
char u_dc_delta_q;
char v_dc_delta_q;
char u_ac_delta_q;
char v_ac_delta_q;
unsigned char qm_y; // 4bit: 0-15
unsigned char qm_u;
unsigned char qm_v;
/*cdef, need to update in the new spec*/
unsigned int cdef_y_pri_strength; // 4bit for one, max is 8
unsigned int cdef_uv_pri_strength; // 4bit for one, max is 8
unsigned int cdef_y_sec_strength : 16; // 2bit for one, max is 8
unsigned int cdef_uv_sec_strength : 16; // 2bit for one, max is 8
/*segmentation*/
unsigned char segment_enabled;
unsigned char segment_update_map;
unsigned char reserved7;
unsigned char segment_temporal_update;
short segment_feature_data[8][8];
unsigned char last_active_segid; // The highest numbered segment id that has some enabled feature.
unsigned char segid_preskip; // Whether the segment id will be read before the skip syntax element.
// 1: the segment id will be read first.
// 0: the skip syntax element will be read first.
unsigned char prevsegid_flag; // 1 : previous segment id is available
unsigned char segment_quant_sign : 8; // sign bit for segment alternative QP
/*loopfilter*/
unsigned char filter_level[2];
unsigned char filter_level_u;
unsigned char filter_level_v;
unsigned char lf_sharpness_level;
char lf_ref_deltas[8]; // 0 = Intra, Last, Last2+Last3, GF, BRF, ARF2, ARF
char lf_mode_deltas[2]; // 0 = ZERO_MV, MV
/*restoration*/
unsigned char lr_type ; // restoration type. Y:bit[1:0];U:bit[3:2],V:bit[5:4]
unsigned char lr_unit_size; // restoration unit size 0:32x32, 1:64x64, 2:128x128,3:256x256; Y:bit[1:0];U:bit[3:2],V:bit[5:4]
//general
frame_info_t current_frame;
ref_frame_struct_s ref_frame[7]; // Last, Last2, Last3, Golden, BWDREF, ALTREF2, ALTREF
unsigned int use_temporal0_mvs : 1;
unsigned int use_temporal1_mvs : 1;
unsigned int use_temporal2_mvs : 1;
unsigned int mf1_type : 3;
unsigned int mf2_type : 3;
unsigned int mf3_type : 3;
unsigned int reserved_mfmv : 20;
short mfmv_offset[3][7]; // 3: mf0~2, 7: Last, Last2, Last3, Golden, BWDREF, ALTREF2, ALTREF
char mfmv_side[3][7]; // flag for reverse offset great than 0
// MFMV relative offset from the ref frame(reference to reference relative offset)
unsigned char tileformat : 2; // 0: TBL; 1: KBL;
unsigned char gob_height : 3; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned char errorConcealOn : 1; // this field is not used, use ctrl_param.error_conceal_on to enable error concealment in ucode,
// always set NV_CNVDEC_GIP_ERR_CONCEAL_CTRL_ON = 1 to enable error detect in hw
unsigned char reserver8 : 2; // reserve
unsigned char stream_error_detection : 1;
unsigned char mv_error_detection : 1;
unsigned char coeff_error_detection : 1;
unsigned char reserved_eh : 5;
// Filt neighbor buffer offset
unsigned int Av1FltTopOffset; // filter top buffer offset respect to filter buffer, 256 bytes unit
unsigned int Av1FltVertOffset; // filter vertical buffer offset respect to filter buffer, 256 bytes unit
unsigned int Av1CdefVertOffset; // cdef vertical buffer offset respect to filter buffer, 256 bytes unit
unsigned int Av1LrVertOffset; // lr vertical buffer offset respect to filter buffer, 256 bytes unit
unsigned int Av1HusVertOffset; // hus vertical buffer offset respect to filter buffer, 256 bytes unit
unsigned int Av1FgsVertOffset; // fgs vertical buffer offset respect to filter buffer, 256 bytes unit
unsigned int enable_histogram : 1;
unsigned int sw_skip_start_length : 14; //skip start length
unsigned int reserved_stat : 17;
} nvdec_av1_pic_s;
//////////////////////////////////////////////////////////////////////
// AV1 Buffer structure
//////////////////////////////////////////////////////////////////////
typedef struct _AV1FilmGrainMemory
{
unsigned char scaling_lut_y[256];
unsigned char scaling_lut_cb[256];
unsigned char scaling_lut_cr[256];
short cropped_luma_grain_block[4096];
short cropped_cb_grain_block[1024];
short cropped_cr_grain_block[1024];
} AV1FilmGrainMemory;
typedef struct _AV1TileInfo_OLD
{
unsigned char width_in_sb;
unsigned char height_in_sb;
unsigned char tile_start_b0;
unsigned char tile_start_b1;
unsigned char tile_start_b2;
unsigned char tile_start_b3;
unsigned char tile_end_b0;
unsigned char tile_end_b1;
unsigned char tile_end_b2;
unsigned char tile_end_b3;
unsigned char padding[6];
} AV1TileInfo_OLD;
typedef struct _AV1TileInfo
{
unsigned char width_in_sb;
unsigned char padding_w;
unsigned char height_in_sb;
unsigned char padding_h;
} AV1TileInfo;
typedef struct _AV1TileStreamInfo
{
unsigned int tile_start;
unsigned int tile_end;
unsigned char padding[8];
} AV1TileStreamInfo;
// AV1 TileSize buffer
#define AV1_MAX_TILES 256
#define AV1_TILEINFO_BUF_SIZE_OLD NVDEC_ALIGN(AV1_MAX_TILES * sizeof(AV1TileInfo_OLD))
#define AV1_TILEINFO_BUF_SIZE NVDEC_ALIGN(AV1_MAX_TILES * sizeof(AV1TileInfo))
// AV1 TileStreamInfo buffer
#define AV1_TILESTREAMINFO_BUF_SIZE NVDEC_ALIGN(AV1_MAX_TILES * sizeof(AV1TileStreamInfo))
// AV1 SubStreamEntry buffer
#define MAX_SUBSTREAM_ENTRY_SIZE 32
#define AV1_SUBSTREAM_ENTRY_BUF_SIZE NVDEC_ALIGN(MAX_SUBSTREAM_ENTRY_SIZE * sizeof(nvdec_substream_entry_s))
// AV1 FilmGrain Parameter buffer
#define AV1_FGS_BUF_SIZE NVDEC_ALIGN(sizeof(AV1FilmGrainMemory))
// AV1 Temporal MV buffer
#define AV1_TEMPORAL_MV_SIZE_IN_64x64 256 // 4Bytes for 8x8
#define AV1_TEMPORAL_MV_BUF_SIZE(w, h) ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_TEMPORAL_MV_SIZE_IN_64x64, 4096)
// AV1 SegmentID buffer
#define AV1_SEGMENT_ID_SIZE_IN_64x64 128 // (3bits + 1 pad_bits) for 4x4
#define AV1_SEGMENT_ID_BUF_SIZE(w, h) ALIGN_UP( ALIGN_UP(w,128) * ALIGN_UP(h,128) / (64*64) * AV1_SEGMENT_ID_SIZE_IN_64x64, 4096)
// AV1 Global Motion buffer
#define AV1_GLOBAL_MOTION_BUF_SIZE NVDEC_ALIGN(7*32)
// AV1 Intra Top buffer
#define AV1_INTRA_TOP_BUF_SIZE NVDEC_ALIGN(8*8192)
// AV1 Histogram buffer
#define AV1_HISTOGRAM_BUF_SIZE NVDEC_ALIGN(1024)
// AV1 Filter FG buffer
#define AV1_DBLK_TOP_SIZE_IN_SB64 ALIGN_UP(1920, 128)
#define AV1_DBLK_TOP_BUF_SIZE(w) NVDEC_ALIGN( (ALIGN_UP(w,64)/64 + 2) * AV1_DBLK_TOP_SIZE_IN_SB64)
#define AV1_DBLK_LEFT_SIZE_IN_SB64 ALIGN_UP(1536, 128)
#define AV1_DBLK_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_DBLK_LEFT_SIZE_IN_SB64)
#define AV1_CDEF_LEFT_SIZE_IN_SB64 ALIGN_UP(1792, 128)
#define AV1_CDEF_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_CDEF_LEFT_SIZE_IN_SB64)
#define AV1_HUS_LEFT_SIZE_IN_SB64 ALIGN_UP(12544, 128)
#define AV1_ASIC_HUS_LEFT_BUFFER_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_HUS_LEFT_SIZE_IN_SB64)
#define AV1_HUS_LEFT_BUF_SIZE(h) 2*AV1_ASIC_HUS_LEFT_BUFFER_SIZE(h) // Ping-Pong buffers
#define AV1_LR_LEFT_SIZE_IN_SB64 ALIGN_UP(1920, 128)
#define AV1_LR_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_LR_LEFT_SIZE_IN_SB64)
#define AV1_FGS_LEFT_SIZE_IN_SB64 ALIGN_UP(320, 128)
#define AV1_FGS_LEFT_BUF_SIZE(h) NVDEC_ALIGN( (ALIGN_UP(h,64)/64 + 2) * AV1_FGS_LEFT_SIZE_IN_SB64)
// AV1 Hint Dump Buffer
#define AV1_HINT_DUMP_SIZE_IN_SB64 ((64*64)/(4*4)*8) // 8 bytes per CU, 256 CUs(2048 bytes) per SB64
#define AV1_HINT_DUMP_SIZE_IN_SB128 ((128*128)/(4*4)*8) // 8 bytes per CU,1024 CUs(8192 bytes) per SB128
#define AV1_HINT_DUMP_SIZE(w, h) NVDEC_ALIGN(AV1_HINT_DUMP_SIZE_IN_SB128*((w+127)/128)*((h+127)/128)) // always use SB128 for allocation
/*******************************************************************
New H264
********************************************************************/
typedef struct _nvdec_new_h264_pic_s
{
nvdec_pass2_otf_s encryption_params;
unsigned char eos[16];
unsigned char explicitEOSPresentFlag;
unsigned char hint_dump_en; //enable COLOMV surface dump for all frames, which includes hints of "MV/REFIDX/QP/CBP/MBPART/MBTYPE", nvbug: 200212874
unsigned char reserved0[2];
unsigned int stream_len;
unsigned int slice_count;
unsigned int mbhist_buffer_size; // to pass buffer size of MBHIST_BUFFER
// Driver may or may not use based upon need.
// If 0 then default value of 1<<27 = 298ms @ 450MHz will be used in ucode.
// Driver can send this value based upon resolution using the formula:
// gptimer_timeout_value = 3 * (cycles required for one frame)
unsigned int gptimer_timeout_value;
// Fields from msvld_h264_seq_s
int log2_max_pic_order_cnt_lsb_minus4;
int delta_pic_order_always_zero_flag;
int frame_mbs_only_flag;
int PicWidthInMbs;
int FrameHeightInMbs;
unsigned int tileFormat : 2 ; // 0: TBL; 1: KBL; 2: Tile16x16
unsigned int gob_height : 3 ; // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
unsigned int reserverd_surface_format : 27;
// Fields from msvld_h264_pic_s
int entropy_coding_mode_flag;
int pic_order_present_flag;
int num_ref_idx_l0_active_minus1;
int num_ref_idx_l1_active_minus1;
int deblocking_filter_control_present_flag;
int redundant_pic_cnt_present_flag;
int transform_8x8_mode_flag;
// Fields from mspdec_h264_picture_setup_s
unsigned int pitch_luma; // Luma pitch
unsigned int pitch_chroma; // chroma pitch
unsigned int luma_top_offset; // offset of luma top field in units of 256
unsigned int luma_bot_offset; // offset of luma bottom field in units of 256
unsigned int luma_frame_offset; // offset of luma frame in units of 256
unsigned int chroma_top_offset; // offset of chroma top field in units of 256
unsigned int chroma_bot_offset; // offset of chroma bottom field in units of 256
unsigned int chroma_frame_offset; // offset of chroma frame in units of 256
unsigned int HistBufferSize; // in units of 256
unsigned int MbaffFrameFlag : 1; //
unsigned int direct_8x8_inference_flag: 1; //
unsigned int weighted_pred_flag : 1; //
unsigned int constrained_intra_pred_flag:1; //
unsigned int ref_pic_flag : 1; // reference picture (nal_ref_idc != 0)
unsigned int field_pic_flag : 1; //
unsigned int bottom_field_flag : 1; //
unsigned int second_field : 1; // second field of complementary reference field
unsigned int log2_max_frame_num_minus4: 4; // (0..12)
unsigned int chroma_format_idc : 2; //
unsigned int pic_order_cnt_type : 2; // (0..2)
int pic_init_qp_minus26 : 6; // : 6 (-26..+25)
int chroma_qp_index_offset : 5; // : 5 (-12..+12)
int second_chroma_qp_index_offset : 5; // : 5 (-12..+12)
unsigned int weighted_bipred_idc : 2; // : 2 (0..2)
unsigned int CurrPicIdx : 7; // : 7 uncompressed frame buffer index
unsigned int CurrColIdx : 5; // : 5 index of associated co-located motion data buffer
unsigned int frame_num : 16; //
unsigned int frame_surfaces : 1; // frame surfaces flag
unsigned int output_memory_layout : 1; // 0: NV12; 1:NV24. Field pair must use the same setting.
int CurrFieldOrderCnt[2]; // : 32 [Top_Bottom], [0]=TopFieldOrderCnt, [1]=BottomFieldOrderCnt
nvdec_dpb_entry_s dpb[16];
unsigned char WeightScale[6][4][4]; // : 6*4*4*8 in raster scan order (not zig-zag order)
unsigned char WeightScale8x8[2][8][8]; // : 2*8*8*8 in raster scan order (not zig-zag order)
// mvc setup info, must be zero if not mvc
unsigned char num_inter_view_refs_lX[2]; // number of inter-view references
char reserved1[14]; // reserved for alignment
signed char inter_view_refidx_lX[2][16]; // DPB indices (must also be marked as long-term)
// lossless decode (At the time of writing this manual, x264 and JM encoders, differ in Intra_8x8 reference sample filtering)
unsigned int lossless_ipred8x8_filter_enable : 1; // = 0, skips Intra_8x8 reference sample filtering, for vertical and horizontal predictions (x264 encoded streams); = 1, filter Intra_8x8 reference samples (JM encoded streams)
unsigned int qpprime_y_zero_transform_bypass_flag : 1; // determines the transform bypass mode
unsigned int reserved2 : 30; // kept for alignment; may be used for other parameters
nvdec_display_param_s displayPara;
nvdec_pass2_otf_ext_s ssm;
} nvdec_new_h264_pic_s;
// golden crc struct dumped into surface
// for each part, if golden crc compare is enabled, one interface is selected to do crc calculation in vmod.
// vmod's crc is compared with cmod's golden crc (4*32 bits), and compare reuslt is written into surface.
typedef struct
{
// input
unsigned int dbg_crc_enable_partb : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part b
unsigned int dbg_crc_enable_partc : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part c
unsigned int dbg_crc_enable_partd : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part d
unsigned int dbg_crc_enable_parte : 1; // Eable flag for enable/disable interface crc calculation in NVDEC HW's part e
unsigned int dbg_crc_intf_partb : 6; // For partb to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface
unsigned int dbg_crc_intf_partc : 6; // For partc to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface
unsigned int dbg_crc_intf_partd : 6; // For partd to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface
unsigned int dbg_crc_intf_parte : 6; // For parte to select which interface to compare crc. see DBG_CRC_PARTE_INTF_SEL for detailed control value for each interface
unsigned int reserved0 : 4;
unsigned int dbg_crc_partb_golden[4]; // Golden crc values for part b
unsigned int dbg_crc_partc_golden[4]; // Golden crc values for part c
unsigned int dbg_crc_partd_golden[4]; // Golden crc values for part d
unsigned int dbg_crc_parte_golden[4]; // Golden crc values for part e
// output
unsigned int dbg_crc_comp_partb : 4; // Compare result for part b
unsigned int dbg_crc_comp_partc : 4; // Compare result for part c
unsigned int dbg_crc_comp_partd : 4; // Compare result for part d
unsigned int dbg_crc_comp_parte : 4; // Compare result for part e
unsigned int reserved1 : 16;
unsigned char reserved2[56];
}nvdec_crc_s; // 128 Bytes
#endif // __DRV_NVDEC_H_