mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-07 22:23:55 -05:00
kfd free buffers (#4027)
* kfd free buffers * unmap * all test passes * better pm4 * forgot these * invalidate only range * better cache * forgot * comments * fixes
This commit is contained in:
@@ -79,16 +79,21 @@ generate_hsa() {
|
||||
/opt/rocm/include/hsa/amd_hsa_signal.h \
|
||||
/opt/rocm/include/hsa/amd_hsa_queue.h \
|
||||
/opt/rocm/include/hsa/hsa_ext_finalize.h /opt/rocm/include/hsa/hsa_ext_image.h \
|
||||
/opt/rocm/include/hsa/hsa_ven_amd_aqlprofile.h \
|
||||
--clang-args="-I/opt/rocm/include" \
|
||||
-o $BASE/hsa.py -l /opt/rocm/lib/libhsa-runtime64.so
|
||||
|
||||
# clang2py broken when pass -x c++ to prev headers
|
||||
clang2py extra/hip_gpu_driver/sdma_registers.h \
|
||||
--clang-args="-I/opt/rocm/include -x c++" \
|
||||
-o $BASE/amd_sdma.py -l /opt/rocm/lib/libhsa-runtime64.so
|
||||
-o $BASE/amd_gpu.py -l /opt/rocm/lib/libhsa-runtime64.so
|
||||
|
||||
sed 's/^\(.*\)\(\s*\/\*\)\(.*\)$/\1 #\2\3/; s/^\(\s*\*\)\(.*\)$/#\1\2/' extra/hip_gpu_driver/nvd.h >> $BASE/amd_gpu.py # comments
|
||||
sed -i 's/#\s*define\s*\([^ \t]*\)(\([^)]*\))\s*\(.*\)/def \1(\2): return \3/' $BASE/amd_gpu.py # #define name(x) (smth) -> def name(x): return (smth)
|
||||
sed -i '/#\s*define\s\+\([^ \t]\+\)\s\+\([^ ]\+\)/s//\1 = \2/' $BASE/amd_gpu.py # #define name val -> name = val
|
||||
|
||||
fixup $BASE/hsa.py
|
||||
fixup $BASE/amd_sdma.py
|
||||
fixup $BASE/amd_gpu.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/hsa.py
|
||||
sed -i "s\'/opt/rocm/\os.getenv('ROCM_PATH', '/opt/rocm/')+'/\g" $BASE/hsa.py
|
||||
python3 -c "import tinygrad.runtime.autogen.hsa"
|
||||
|
||||
470
extra/hip_gpu_driver/nvd.h
Normal file
470
extra/hip_gpu_driver/nvd.h
Normal file
@@ -0,0 +1,470 @@
|
||||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NVD_H
|
||||
#define NVD_H
|
||||
|
||||
/**
|
||||
* Navi's PM4 definitions
|
||||
*/
|
||||
#define PACKET_TYPE0 0
|
||||
#define PACKET_TYPE1 1
|
||||
#define PACKET_TYPE2 2
|
||||
#define PACKET_TYPE3 3
|
||||
|
||||
#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
|
||||
#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
|
||||
#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
|
||||
#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
|
||||
#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
|
||||
((reg) & 0xFFFF) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
#define CP_PACKET2 0x80000000
|
||||
#define PACKET2_PAD_SHIFT 0
|
||||
#define PACKET2_PAD_MASK (0x3fffffff << 0)
|
||||
|
||||
#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
|
||||
|
||||
#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
|
||||
(((op) & 0xFF) << 8) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
|
||||
#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
|
||||
|
||||
/* Packet 3 types */
|
||||
#define PACKET3_NOP 0x10
|
||||
#define PACKET3_SET_BASE 0x11
|
||||
#define PACKET3_BASE_INDEX(x) ((x) << 0)
|
||||
#define CE_PARTITION_BASE 3
|
||||
#define PACKET3_CLEAR_STATE 0x12
|
||||
#define PACKET3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PACKET3_DISPATCH_DIRECT 0x15
|
||||
#define PACKET3_DISPATCH_INDIRECT 0x16
|
||||
#define PACKET3_INDIRECT_BUFFER_END 0x17
|
||||
#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
|
||||
#define PACKET3_ATOMIC_GDS 0x1D
|
||||
#define PACKET3_ATOMIC_MEM 0x1E
|
||||
#define PACKET3_OCCLUSION_QUERY 0x1F
|
||||
#define PACKET3_SET_PREDICATION 0x20
|
||||
#define PACKET3_REG_RMW 0x21
|
||||
#define PACKET3_COND_EXEC 0x22
|
||||
#define PACKET3_PRED_EXEC 0x23
|
||||
#define PACKET3_DRAW_INDIRECT 0x24
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PACKET3_INDEX_BASE 0x26
|
||||
#define PACKET3_DRAW_INDEX_2 0x27
|
||||
#define PACKET3_CONTEXT_CONTROL 0x28
|
||||
#define PACKET3_INDEX_TYPE 0x2A
|
||||
#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define PACKET3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PACKET3_NUM_INSTANCES 0x2F
|
||||
#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PACKET3_INDIRECT_BUFFER_PRIV 0x32
|
||||
#define PACKET3_INDIRECT_BUFFER_CNST 0x33
|
||||
#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33
|
||||
#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PACKET3_DRAW_PREAMBLE 0x36
|
||||
#define PACKET3_WRITE_DATA 0x37
|
||||
#define WRITE_DATA_DST_SEL(x) ((x) << 8)
|
||||
/* 0 - register
|
||||
* 1 - memory (sync - via GRBM)
|
||||
* 2 - gl2
|
||||
* 3 - gds
|
||||
* 4 - reserved
|
||||
* 5 - memory (async - direct)
|
||||
*/
|
||||
#define WR_ONE_ADDR (1 << 16)
|
||||
#define WR_CONFIRM (1 << 20)
|
||||
#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
* 2 - ce
|
||||
*/
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PACKET3_MEM_SEMAPHORE 0x39
|
||||
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
|
||||
# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
|
||||
# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
|
||||
# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
|
||||
#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A
|
||||
#define PACKET3_COPY_DW 0x3B
|
||||
#define PACKET3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
|
||||
/* 0 - always
|
||||
* 1 - <
|
||||
* 2 - <=
|
||||
* 3 - ==
|
||||
* 4 - !=
|
||||
* 5 - >=
|
||||
* 6 - >
|
||||
*/
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
|
||||
/* 0 - reg
|
||||
* 1 - mem
|
||||
*/
|
||||
#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
|
||||
/* 0 - wait_reg_mem
|
||||
* 1 - wr_wait_wr_reg
|
||||
*/
|
||||
#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
*/
|
||||
#define PACKET3_INDIRECT_BUFFER 0x3F
|
||||
#define INDIRECT_BUFFER_VALID (1 << 23)
|
||||
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
* 2 - Bypass
|
||||
*/
|
||||
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
|
||||
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
|
||||
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
|
||||
#define PACKET3_COPY_DATA 0x40
|
||||
#define PACKET3_CP_DMA 0x41
|
||||
#define PACKET3_PFP_SYNC_ME 0x42
|
||||
#define PACKET3_SURFACE_SYNC 0x43
|
||||
#define PACKET3_ME_INITIALIZE 0x44
|
||||
#define PACKET3_COND_WRITE 0x45
|
||||
#define PACKET3_EVENT_WRITE 0x46
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
/* 0 - any non-TS event
|
||||
* 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
|
||||
* 2 - SAMPLE_PIPELINESTAT
|
||||
* 3 - SAMPLE_STREAMOUTSTAT*
|
||||
* 4 - *S_PARTIAL_FLUSH
|
||||
*/
|
||||
#define PACKET3_EVENT_WRITE_EOP 0x47
|
||||
#define PACKET3_EVENT_WRITE_EOS 0x48
|
||||
#define PACKET3_RELEASE_MEM 0x49
|
||||
#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
|
||||
#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLM_WB (1 << 12)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLM_INV (1 << 13)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL1_INV (1 << 15)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE (1 << 17)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21)
|
||||
#define PACKET3_RELEASE_MEM_GCR_SEQ (1 << 22)
|
||||
#define PACKET3_RELEASE_MEM_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - cache_policy__me_release_mem__lru
|
||||
* 1 - cache_policy__me_release_mem__stream
|
||||
* 2 - cache_policy__me_release_mem__noa
|
||||
* 3 - cache_policy__me_release_mem__bypass
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28)
|
||||
|
||||
#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
|
||||
/* 0 - discard
|
||||
* 1 - send low 32bit data
|
||||
* 2 - send 64bit data
|
||||
* 3 - send 64bit GPU counter value
|
||||
* 4 - send 64bit sys counter value
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
|
||||
/* 0 - none
|
||||
* 1 - interrupt only (DATA_SEL = 0)
|
||||
* 2 - interrupt when data write is confirmed
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
|
||||
/* 0 - MC
|
||||
* 1 - TC/L2
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define PACKET3_PREAMBLE_CNTL 0x4A
|
||||
# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
|
||||
# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
|
||||
#define PACKET3_DMA_DATA 0x50
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. SRC_ADDR_LO or DATA [31:0]
|
||||
* 4. SRC_ADDR_HI [31:0]
|
||||
* 5. DST_ADDR_LO [31:0]
|
||||
* 6. DST_ADDR_HI [7:0]
|
||||
* 7. COMMAND [31:26] | BYTE_COUNT [25:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
|
||||
/* 0 - ME
|
||||
* 1 - PFP
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
|
||||
/* 0 - DST_ADDR using DAS
|
||||
* 1 - GDS
|
||||
* 3 - DST_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
|
||||
/* 0 - SRC_ADDR using SAS
|
||||
* 1 - GDS
|
||||
* 2 - DATA
|
||||
* 3 - SRC_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
|
||||
/* COMMAND */
|
||||
# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
||||
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
||||
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
||||
#define PACKET3_CONTEXT_REG_RMW 0x51
|
||||
#define PACKET3_GFX_CNTX_UPDATE 0x52
|
||||
#define PACKET3_BLK_CNTX_UPDATE 0x53
|
||||
#define PACKET3_INCR_UPDT_STATE 0x55
|
||||
#define PACKET3_ACQUIRE_MEM 0x58
|
||||
/* 1. HEADER
|
||||
* 2. COHER_CNTL [30:0]
|
||||
* 2.1 ENGINE_SEL [31:31]
|
||||
* 2. COHER_SIZE [31:0]
|
||||
* 3. COHER_SIZE_HI [7:0]
|
||||
* 4. COHER_BASE_LO [31:0]
|
||||
* 5. COHER_BASE_HI [23:0]
|
||||
* 7. POLL_INTERVAL [15:0]
|
||||
* 8. GCR_CNTL [18:0]
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
|
||||
/*
|
||||
* 0:NOP
|
||||
* 1:ALL
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
|
||||
/*
|
||||
* 0:ALL
|
||||
* 1:reserved
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
|
||||
/*
|
||||
* 0:ALL
|
||||
* 1:VOL
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
|
||||
/*
|
||||
* 0: PARALLEL
|
||||
* 1: FORWARD
|
||||
* 2: REVERSE
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
|
||||
#define PACKET3_REWIND 0x59
|
||||
#define PACKET3_INTERRUPT 0x5A
|
||||
#define PACKET3_GEN_PDEPTE 0x5B
|
||||
#define PACKET3_INDIRECT_BUFFER_PASID 0x5C
|
||||
#define PACKET3_PRIME_UTCL2 0x5D
|
||||
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
||||
#define PACKET3_LOAD_SH_REG 0x5F
|
||||
#define PACKET3_LOAD_CONFIG_REG 0x60
|
||||
#define PACKET3_LOAD_CONTEXT_REG 0x61
|
||||
#define PACKET3_LOAD_COMPUTE_STATE 0x62
|
||||
#define PACKET3_LOAD_SH_REG_INDEX 0x63
|
||||
#define PACKET3_SET_CONFIG_REG 0x68
|
||||
#define PACKET3_SET_CONFIG_REG_START 0x00002000
|
||||
#define PACKET3_SET_CONFIG_REG_END 0x00002c00
|
||||
#define PACKET3_SET_CONTEXT_REG 0x69
|
||||
#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
|
||||
#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
|
||||
#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A
|
||||
#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71
|
||||
#define PACKET3_SET_SH_REG_DI 0x72
|
||||
#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
|
||||
#define PACKET3_SET_SH_REG_DI_MULTI 0x74
|
||||
#define PACKET3_GFX_PIPE_LOCK 0x75
|
||||
#define PACKET3_SET_SH_REG 0x76
|
||||
#define PACKET3_SET_SH_REG_START 0x00002c00
|
||||
#define PACKET3_SET_SH_REG_END 0x00003000
|
||||
#define PACKET3_SET_SH_REG_OFFSET 0x77
|
||||
#define PACKET3_SET_QUEUE_REG 0x78
|
||||
#define PACKET3_SET_UCONFIG_REG 0x79
|
||||
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
|
||||
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
|
||||
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
|
||||
#define PACKET3_FORWARD_HEADER 0x7C
|
||||
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
|
||||
#define PACKET3_SCRATCH_RAM_READ 0x7E
|
||||
#define PACKET3_LOAD_CONST_RAM 0x80
|
||||
#define PACKET3_WRITE_CONST_RAM 0x81
|
||||
#define PACKET3_DUMP_CONST_RAM 0x83
|
||||
#define PACKET3_INCREMENT_CE_COUNTER 0x84
|
||||
#define PACKET3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PACKET3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
|
||||
#define PACKET3_SWITCH_BUFFER 0x8B
|
||||
#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
|
||||
#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C
|
||||
#define PACKET3_DISPATCH_DRAW 0x8D
|
||||
#define PACKET3_DISPATCH_DRAW_ACE 0x8D
|
||||
#define PACKET3_GET_LOD_STATS 0x8E
|
||||
#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F
|
||||
#define PACKET3_FRAME_CONTROL 0x90
|
||||
# define FRAME_TMZ (1 << 0)
|
||||
# define FRAME_CMD(x) ((x) << 28)
|
||||
/*
|
||||
* x=0: tmz_begin
|
||||
* x=1: tmz_end
|
||||
*/
|
||||
#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91
|
||||
#define PACKET3_WAIT_REG_MEM64 0x93
|
||||
#define PACKET3_COND_PREEMPT 0x94
|
||||
#define PACKET3_HDP_FLUSH 0x95
|
||||
#define PACKET3_COPY_DATA_RB 0x96
|
||||
#define PACKET3_INVALIDATE_TLBS 0x98
|
||||
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
|
||||
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
|
||||
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
|
||||
#define PACKET3_AQL_PACKET 0x99
|
||||
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
|
||||
#define PACKET3_SET_SH_REG_INDEX 0x9B
|
||||
#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D
|
||||
#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E
|
||||
#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F
|
||||
#define PACKET3_SET_RESOURCES 0xA0
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. QUEUE_MASK_LO [31:0]
|
||||
* 4. QUEUE_MASK_HI [31:0]
|
||||
* 5. GWS_MASK_LO [31:0]
|
||||
* 6. GWS_MASK_HI [31:0]
|
||||
* 7. OAC_MASK [15:0]
|
||||
* 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
|
||||
*/
|
||||
# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
|
||||
# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
|
||||
# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_MAP_PROCESS 0xA1
|
||||
#define PACKET3_MAP_QUEUES 0xA2
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. MQD_ADDR_LO [31:0]
|
||||
* 5. MQD_ADDR_HI [31:0]
|
||||
* 6. WPTR_ADDR_LO [31:0]
|
||||
* 7. WPTR_ADDR_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
|
||||
# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
|
||||
# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
|
||||
# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
|
||||
# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2 */
|
||||
# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
|
||||
# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
#define PACKET3_UNMAP_QUEUES 0xA3
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. CONTROL3
|
||||
* 5. CONTROL4
|
||||
* 6. CONTROL5
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
|
||||
/* 0 - PREEMPT_QUEUES
|
||||
* 1 - RESET_QUEUES
|
||||
* 2 - DISABLE_PROCESS_QUEUES
|
||||
* 3 - PREEMPT_QUEUES_NO_UNMAP
|
||||
*/
|
||||
# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
|
||||
/* CONTROL3a */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
|
||||
/* CONTROL3b */
|
||||
# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
|
||||
/* CONTROL4 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
|
||||
/* CONTROL5 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
|
||||
#define PACKET3_QUERY_STATUS 0xA4
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. ADDR_LO [31:0]
|
||||
* 5. ADDR_HI [31:0]
|
||||
* 6. DATA_LO [31:0]
|
||||
* 7. DATA_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
|
||||
# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
|
||||
# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
|
||||
#define PACKET3_RUN_LIST 0xA5
|
||||
#define PACKET3_MAP_PROCESS_VM 0xA6
|
||||
/* GFX11 */
|
||||
#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
|
||||
# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
|
||||
# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
|
||||
|
||||
#endif
|
||||
@@ -1428,3 +1428,473 @@ __all__ = \
|
||||
'union_SDMA_PKT_TIMESTAMP_TAG_HEADER_UNION',
|
||||
'union_SDMA_PKT_TRAP_TAG_HEADER_UNION',
|
||||
'union_SDMA_PKT_TRAP_TAG_INT_CONTEXT_UNION']
|
||||
#/*
|
||||
# * Copyright 2019 Advanced Micro Devices, Inc.
|
||||
# *
|
||||
# * Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# * copy of this software and associated documentation files (the "Software"),
|
||||
# * to deal in the Software without restriction, including without limitation
|
||||
# * the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# * and/or sell copies of the Software, and to permit persons to whom the
|
||||
# * Software is furnished to do so, subject to the following conditions:
|
||||
# *
|
||||
# * The above copyright notice and this permission notice shall be included in
|
||||
# * all copies or substantial portions of the Software.
|
||||
# *
|
||||
# * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# * OTHER DEALINGS IN THE SOFTWARE.
|
||||
# *
|
||||
# */
|
||||
|
||||
#ifndef NVD_H
|
||||
#define NVD_H
|
||||
|
||||
#/**
|
||||
# * Navi's PM4 definitions
|
||||
# */
|
||||
PACKET_TYPE0 = 0
|
||||
PACKET_TYPE1 = 1
|
||||
PACKET_TYPE2 = 2
|
||||
PACKET_TYPE3 = 3
|
||||
|
||||
def CP_PACKET_GET_TYPE(h): return (((h) >> 30) & 3)
|
||||
def CP_PACKET_GET_COUNT(h): return (((h) >> 16) & 0x3FFF)
|
||||
def CP_PACKET0_GET_REG(h): return ((h) & 0xFFFF)
|
||||
def CP_PACKET3_GET_OPCODE(h): return (((h) >> 8) & 0xFF)
|
||||
def PACKET0(reg, n): return ((PACKET_TYPE0 << 30) | \
|
||||
((reg) & 0xFFFF) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
CP_PACKET2 = 0x80000000
|
||||
PACKET2_PAD_SHIFT = 0
|
||||
PACKET2_PAD_MASK = (0x3fffffff << 0)
|
||||
|
||||
def PACKET2(v): return (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
|
||||
|
||||
def PACKET3(op, n): return ((PACKET_TYPE3 << 30) | \
|
||||
(((op) & 0xFF) << 8) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
|
||||
def PACKET3_COMPUTE(op, n): return (PACKET3(op, n) | 1 << 1)
|
||||
|
||||
#/* Packet 3 types */
|
||||
PACKET3_NOP = 0x10
|
||||
PACKET3_SET_BASE = 0x11
|
||||
def PACKET3_BASE_INDEX(x): return ((x) << 0)
|
||||
CE_PARTITION_BASE = 3
|
||||
PACKET3_CLEAR_STATE = 0x12
|
||||
PACKET3_INDEX_BUFFER_SIZE = 0x13
|
||||
PACKET3_DISPATCH_DIRECT = 0x15
|
||||
PACKET3_DISPATCH_INDIRECT = 0x16
|
||||
PACKET3_INDIRECT_BUFFER_END = 0x17
|
||||
PACKET3_INDIRECT_BUFFER_CNST_END = 0x19
|
||||
PACKET3_ATOMIC_GDS = 0x1D
|
||||
PACKET3_ATOMIC_MEM = 0x1E
|
||||
PACKET3_OCCLUSION_QUERY = 0x1F
|
||||
PACKET3_SET_PREDICATION = 0x20
|
||||
PACKET3_REG_RMW = 0x21
|
||||
PACKET3_COND_EXEC = 0x22
|
||||
PACKET3_PRED_EXEC = 0x23
|
||||
PACKET3_DRAW_INDIRECT = 0x24
|
||||
PACKET3_DRAW_INDEX_INDIRECT = 0x25
|
||||
PACKET3_INDEX_BASE = 0x26
|
||||
PACKET3_DRAW_INDEX_2 = 0x27
|
||||
PACKET3_CONTEXT_CONTROL = 0x28
|
||||
PACKET3_INDEX_TYPE = 0x2A
|
||||
PACKET3_DRAW_INDIRECT_MULTI = 0x2C
|
||||
PACKET3_DRAW_INDEX_AUTO = 0x2D
|
||||
PACKET3_NUM_INSTANCES = 0x2F
|
||||
PACKET3_DRAW_INDEX_MULTI_AUTO = 0x30
|
||||
PACKET3_INDIRECT_BUFFER_PRIV = 0x32
|
||||
PACKET3_INDIRECT_BUFFER_CNST = 0x33
|
||||
PACKET3_COND_INDIRECT_BUFFER_CNST = 0x33
|
||||
PACKET3_STRMOUT_BUFFER_UPDATE = 0x34
|
||||
PACKET3_DRAW_INDEX_OFFSET_2 = 0x35
|
||||
PACKET3_DRAW_PREAMBLE = 0x36
|
||||
PACKET3_WRITE_DATA = 0x37
|
||||
def WRITE_DATA_DST_SEL(x): return ((x) << 8)
|
||||
#/* 0 - register
|
||||
# * 1 - memory (sync - via GRBM)
|
||||
# * 2 - gl2
|
||||
# * 3 - gds
|
||||
# * 4 - reserved
|
||||
# * 5 - memory (async - direct)
|
||||
# */
|
||||
WR_ONE_ADDR = (1 << 16)
|
||||
WR_CONFIRM = (1 << 20)
|
||||
def WRITE_DATA_CACHE_POLICY(x): return ((x) << 25)
|
||||
#/* 0 - LRU
|
||||
# * 1 - Stream
|
||||
# */
|
||||
def WRITE_DATA_ENGINE_SEL(x): return ((x) << 30)
|
||||
#/* 0 - me
|
||||
# * 1 - pfp
|
||||
# * 2 - ce
|
||||
# */
|
||||
PACKET3_DRAW_INDEX_INDIRECT_MULTI = 0x38
|
||||
PACKET3_MEM_SEMAPHORE = 0x39
|
||||
PACKET3_SEM_USE_MAILBOX = (0x1 << 16)
|
||||
PACKET3_SEM_SEL_SIGNAL_TYPE = (0x1 << 20) #/* 0 = increment, 1 = write 1 */
|
||||
PACKET3_SEM_SEL_SIGNAL = (0x6 << 29)
|
||||
PACKET3_SEM_SEL_WAIT = (0x7 << 29)
|
||||
PACKET3_DRAW_INDEX_MULTI_INST = 0x3A
|
||||
PACKET3_COPY_DW = 0x3B
|
||||
PACKET3_WAIT_REG_MEM = 0x3C
|
||||
def WAIT_REG_MEM_FUNCTION(x): return ((x) << 0)
|
||||
#/* 0 - always
|
||||
# * 1 - <
|
||||
# * 2 - <=
|
||||
# * 3 - ==
|
||||
# * 4 - !=
|
||||
# * 5 - >=
|
||||
# * 6 - >
|
||||
# */
|
||||
def WAIT_REG_MEM_MEM_SPACE(x): return ((x) << 4)
|
||||
#/* 0 - reg
|
||||
# * 1 - mem
|
||||
# */
|
||||
def WAIT_REG_MEM_OPERATION(x): return ((x) << 6)
|
||||
#/* 0 - wait_reg_mem
|
||||
# * 1 - wr_wait_wr_reg
|
||||
# */
|
||||
def WAIT_REG_MEM_ENGINE(x): return ((x) << 8)
|
||||
#/* 0 - me
|
||||
# * 1 - pfp
|
||||
# */
|
||||
PACKET3_INDIRECT_BUFFER = 0x3F
|
||||
INDIRECT_BUFFER_VALID = (1 << 23)
|
||||
def INDIRECT_BUFFER_CACHE_POLICY(x): return ((x) << 28)
|
||||
#/* 0 - LRU
|
||||
# * 1 - Stream
|
||||
# * 2 - Bypass
|
||||
# */
|
||||
def INDIRECT_BUFFER_PRE_ENB(x): return ((x) << 21)
|
||||
def INDIRECT_BUFFER_PRE_RESUME(x): return ((x) << 30)
|
||||
PACKET3_COND_INDIRECT_BUFFER = 0x3F
|
||||
PACKET3_COPY_DATA = 0x40
|
||||
PACKET3_CP_DMA = 0x41
|
||||
PACKET3_PFP_SYNC_ME = 0x42
|
||||
PACKET3_SURFACE_SYNC = 0x43
|
||||
PACKET3_ME_INITIALIZE = 0x44
|
||||
PACKET3_COND_WRITE = 0x45
|
||||
PACKET3_EVENT_WRITE = 0x46
|
||||
def EVENT_TYPE(x): return ((x) << 0)
|
||||
def EVENT_INDEX(x): return ((x) << 8)
|
||||
#/* 0 - any non-TS event
|
||||
# * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
|
||||
# * 2 - SAMPLE_PIPELINESTAT
|
||||
# * 3 - SAMPLE_STREAMOUTSTAT*
|
||||
# * 4 - *S_PARTIAL_FLUSH
|
||||
# */
|
||||
PACKET3_EVENT_WRITE_EOP = 0x47
|
||||
PACKET3_EVENT_WRITE_EOS = 0x48
|
||||
PACKET3_RELEASE_MEM = 0x49
|
||||
def PACKET3_RELEASE_MEM_EVENT_TYPE(x): return ((x) << 0)
|
||||
def PACKET3_RELEASE_MEM_EVENT_INDEX(x): return ((x) << 8)
|
||||
PACKET3_RELEASE_MEM_GCR_GLM_WB = (1 << 12)
|
||||
PACKET3_RELEASE_MEM_GCR_GLM_INV = (1 << 13)
|
||||
PACKET3_RELEASE_MEM_GCR_GLV_INV = (1 << 14)
|
||||
PACKET3_RELEASE_MEM_GCR_GL1_INV = (1 << 15)
|
||||
PACKET3_RELEASE_MEM_GCR_GL2_US = (1 << 16)
|
||||
PACKET3_RELEASE_MEM_GCR_GL2_RANGE = (1 << 17)
|
||||
PACKET3_RELEASE_MEM_GCR_GL2_DISCARD = (1 << 19)
|
||||
PACKET3_RELEASE_MEM_GCR_GL2_INV = (1 << 20)
|
||||
PACKET3_RELEASE_MEM_GCR_GL2_WB = (1 << 21)
|
||||
PACKET3_RELEASE_MEM_GCR_SEQ = (1 << 22)
|
||||
def PACKET3_RELEASE_MEM_CACHE_POLICY(x): return ((x) << 25)
|
||||
#/* 0 - cache_policy__me_release_mem__lru
|
||||
# * 1 - cache_policy__me_release_mem__stream
|
||||
# * 2 - cache_policy__me_release_mem__noa
|
||||
# * 3 - cache_policy__me_release_mem__bypass
|
||||
# */
|
||||
PACKET3_RELEASE_MEM_EXECUTE = (1 << 28)
|
||||
|
||||
def PACKET3_RELEASE_MEM_DATA_SEL(x): return ((x) << 29)
|
||||
#/* 0 - discard
|
||||
# * 1 - send low 32bit data
|
||||
# * 2 - send 64bit data
|
||||
# * 3 - send 64bit GPU counter value
|
||||
# * 4 - send 64bit sys counter value
|
||||
# */
|
||||
def PACKET3_RELEASE_MEM_INT_SEL(x): return ((x) << 24)
|
||||
#/* 0 - none
|
||||
# * 1 - interrupt only (DATA_SEL = 0)
|
||||
# * 2 - interrupt when data write is confirmed
|
||||
# */
|
||||
def PACKET3_RELEASE_MEM_DST_SEL(x): return ((x) << 16)
|
||||
#/* 0 - MC
|
||||
# * 1 - TC/L2
|
||||
# */
|
||||
|
||||
|
||||
|
||||
PACKET3_PREAMBLE_CNTL = 0x4A
|
||||
PACKET3_PREAMBLE_BEGIN_CLEAR_STATE = (2 << 28)
|
||||
PACKET3_PREAMBLE_END_CLEAR_STATE = (3 << 28)
|
||||
PACKET3_DMA_DATA = 0x50
|
||||
#/* 1. header
|
||||
# * 2. CONTROL
|
||||
# * 3. SRC_ADDR_LO or DATA [31:0]
|
||||
# * 4. SRC_ADDR_HI [31:0]
|
||||
# * 5. DST_ADDR_LO [31:0]
|
||||
# * 6. DST_ADDR_HI [7:0]
|
||||
# * 7. COMMAND [31:26] | BYTE_COUNT [25:0]
|
||||
# */
|
||||
#/* CONTROL */
|
||||
def PACKET3_DMA_DATA_ENGINE(x): return ((x) << 0)
|
||||
#/* 0 - ME
|
||||
# * 1 - PFP
|
||||
# */
|
||||
def PACKET3_DMA_DATA_SRC_CACHE_POLICY(x): return ((x) << 13)
|
||||
#/* 0 - LRU
|
||||
# * 1 - Stream
|
||||
# */
|
||||
def PACKET3_DMA_DATA_DST_SEL(x): return ((x) << 20)
|
||||
#/* 0 - DST_ADDR using DAS
|
||||
# * 1 - GDS
|
||||
# * 3 - DST_ADDR using L2
|
||||
# */
|
||||
def PACKET3_DMA_DATA_DST_CACHE_POLICY(x): return ((x) << 25)
|
||||
#/* 0 - LRU
|
||||
# * 1 - Stream
|
||||
# */
|
||||
def PACKET3_DMA_DATA_SRC_SEL(x): return ((x) << 29)
|
||||
#/* 0 - SRC_ADDR using SAS
|
||||
# * 1 - GDS
|
||||
# * 2 - DATA
|
||||
# * 3 - SRC_ADDR using L2
|
||||
# */
|
||||
PACKET3_DMA_DATA_CP_SYNC = (1 << 31)
|
||||
#/* COMMAND */
|
||||
PACKET3_DMA_DATA_CMD_SAS = (1 << 26)
|
||||
#/* 0 - memory
|
||||
# * 1 - register
|
||||
# */
|
||||
PACKET3_DMA_DATA_CMD_DAS = (1 << 27)
|
||||
#/* 0 - memory
|
||||
# * 1 - register
|
||||
# */
|
||||
PACKET3_DMA_DATA_CMD_SAIC = (1 << 28)
|
||||
PACKET3_DMA_DATA_CMD_DAIC = (1 << 29)
|
||||
PACKET3_DMA_DATA_CMD_RAW_WAIT = (1 << 30)
|
||||
PACKET3_CONTEXT_REG_RMW = 0x51
|
||||
PACKET3_GFX_CNTX_UPDATE = 0x52
|
||||
PACKET3_BLK_CNTX_UPDATE = 0x53
|
||||
PACKET3_INCR_UPDT_STATE = 0x55
|
||||
PACKET3_ACQUIRE_MEM = 0x58
|
||||
#/* 1. HEADER
|
||||
# * 2. COHER_CNTL [30:0]
|
||||
# * 2.1 ENGINE_SEL [31:31]
|
||||
# * 2. COHER_SIZE [31:0]
|
||||
# * 3. COHER_SIZE_HI [7:0]
|
||||
# * 4. COHER_BASE_LO [31:0]
|
||||
# * 5. COHER_BASE_HI [23:0]
|
||||
# * 7. POLL_INTERVAL [15:0]
|
||||
# * 8. GCR_CNTL [18:0]
|
||||
# */
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x): return ((x) << 0)
|
||||
#/*
|
||||
# * 0:NOP
|
||||
# * 1:ALL
|
||||
# * 2:RANGE
|
||||
# * 3:FIRST_LAST
|
||||
# */
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x): return ((x) << 2)
|
||||
#/*
|
||||
# * 0:ALL
|
||||
# * 1:reserved
|
||||
# * 2:RANGE
|
||||
# * 3:FIRST_LAST
|
||||
# */
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x): return ((x) << 4)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x): return ((x) << 5)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x): return ((x) << 6)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x): return ((x) << 7)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x): return ((x) << 8)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x): return ((x) << 9)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x): return ((x) << 10)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x): return ((x) << 11)
|
||||
#/*
|
||||
# * 0:ALL
|
||||
# * 1:VOL
|
||||
# * 2:RANGE
|
||||
# * 3:FIRST_LAST
|
||||
# */
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x): return ((x) << 13)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x): return ((x) << 14)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x): return ((x) << 15)
|
||||
def PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x): return ((x) << 16)
|
||||
#/*
|
||||
# * 0: PARALLEL
|
||||
# * 1: FORWARD
|
||||
# * 2: REVERSE
|
||||
# */
|
||||
PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA = (1 << 18)
|
||||
PACKET3_REWIND = 0x59
|
||||
PACKET3_INTERRUPT = 0x5A
|
||||
PACKET3_GEN_PDEPTE = 0x5B
|
||||
PACKET3_INDIRECT_BUFFER_PASID = 0x5C
|
||||
PACKET3_PRIME_UTCL2 = 0x5D
|
||||
PACKET3_LOAD_UCONFIG_REG = 0x5E
|
||||
PACKET3_LOAD_SH_REG = 0x5F
|
||||
PACKET3_LOAD_CONFIG_REG = 0x60
|
||||
PACKET3_LOAD_CONTEXT_REG = 0x61
|
||||
PACKET3_LOAD_COMPUTE_STATE = 0x62
|
||||
PACKET3_LOAD_SH_REG_INDEX = 0x63
|
||||
PACKET3_SET_CONFIG_REG = 0x68
|
||||
PACKET3_SET_CONFIG_REG_START = 0x00002000
|
||||
PACKET3_SET_CONFIG_REG_END = 0x00002c00
|
||||
PACKET3_SET_CONTEXT_REG = 0x69
|
||||
PACKET3_SET_CONTEXT_REG_START = 0x0000a000
|
||||
PACKET3_SET_CONTEXT_REG_END = 0x0000a400
|
||||
PACKET3_SET_CONTEXT_REG_INDEX = 0x6A
|
||||
PACKET3_SET_VGPR_REG_DI_MULTI = 0x71
|
||||
PACKET3_SET_SH_REG_DI = 0x72
|
||||
PACKET3_SET_CONTEXT_REG_INDIRECT = 0x73
|
||||
PACKET3_SET_SH_REG_DI_MULTI = 0x74
|
||||
PACKET3_GFX_PIPE_LOCK = 0x75
|
||||
PACKET3_SET_SH_REG = 0x76
|
||||
PACKET3_SET_SH_REG_START = 0x00002c00
|
||||
PACKET3_SET_SH_REG_END = 0x00003000
|
||||
PACKET3_SET_SH_REG_OFFSET = 0x77
|
||||
PACKET3_SET_QUEUE_REG = 0x78
|
||||
PACKET3_SET_UCONFIG_REG = 0x79
|
||||
PACKET3_SET_UCONFIG_REG_START = 0x0000c000
|
||||
PACKET3_SET_UCONFIG_REG_END = 0x0000c400
|
||||
PACKET3_SET_UCONFIG_REG_INDEX = 0x7A
|
||||
PACKET3_FORWARD_HEADER = 0x7C
|
||||
PACKET3_SCRATCH_RAM_WRITE = 0x7D
|
||||
PACKET3_SCRATCH_RAM_READ = 0x7E
|
||||
PACKET3_LOAD_CONST_RAM = 0x80
|
||||
PACKET3_WRITE_CONST_RAM = 0x81
|
||||
PACKET3_DUMP_CONST_RAM = 0x83
|
||||
PACKET3_INCREMENT_CE_COUNTER = 0x84
|
||||
PACKET3_INCREMENT_DE_COUNTER = 0x85
|
||||
PACKET3_WAIT_ON_CE_COUNTER = 0x86
|
||||
PACKET3_WAIT_ON_DE_COUNTER_DIFF = 0x88
|
||||
PACKET3_SWITCH_BUFFER = 0x8B
|
||||
PACKET3_DISPATCH_DRAW_PREAMBLE = 0x8C
|
||||
PACKET3_DISPATCH_DRAW_PREAMBLE_ACE = 0x8C
|
||||
PACKET3_DISPATCH_DRAW = 0x8D
|
||||
PACKET3_DISPATCH_DRAW_ACE = 0x8D
|
||||
PACKET3_GET_LOD_STATS = 0x8E
|
||||
PACKET3_DRAW_MULTI_PREAMBLE = 0x8F
|
||||
PACKET3_FRAME_CONTROL = 0x90
|
||||
FRAME_TMZ = (1 << 0)
|
||||
def FRAME_CMD(x): return ((x) << 28)
|
||||
#/*
|
||||
# * x=0: tmz_begin
|
||||
# * x=1: tmz_end
|
||||
# */
|
||||
PACKET3_INDEX_ATTRIBUTES_INDIRECT = 0x91
|
||||
PACKET3_WAIT_REG_MEM64 = 0x93
|
||||
PACKET3_COND_PREEMPT = 0x94
|
||||
PACKET3_HDP_FLUSH = 0x95
|
||||
PACKET3_COPY_DATA_RB = 0x96
|
||||
PACKET3_INVALIDATE_TLBS = 0x98
|
||||
def PACKET3_INVALIDATE_TLBS_DST_SEL(x): return ((x) << 0)
|
||||
def PACKET3_INVALIDATE_TLBS_ALL_HUB(x): return ((x) << 4)
|
||||
def PACKET3_INVALIDATE_TLBS_PASID(x): return ((x) << 5)
|
||||
PACKET3_AQL_PACKET = 0x99
|
||||
PACKET3_DMA_DATA_FILL_MULTI = 0x9A
|
||||
PACKET3_SET_SH_REG_INDEX = 0x9B
|
||||
PACKET3_DRAW_INDIRECT_COUNT_MULTI = 0x9C
|
||||
PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI = 0x9D
|
||||
PACKET3_DUMP_CONST_RAM_OFFSET = 0x9E
|
||||
PACKET3_LOAD_CONTEXT_REG_INDEX = 0x9F
|
||||
PACKET3_SET_RESOURCES = 0xA0
|
||||
#/* 1. header
|
||||
# * 2. CONTROL
|
||||
# * 3. QUEUE_MASK_LO [31:0]
|
||||
# * 4. QUEUE_MASK_HI [31:0]
|
||||
# * 5. GWS_MASK_LO [31:0]
|
||||
# * 6. GWS_MASK_HI [31:0]
|
||||
# * 7. OAC_MASK [15:0]
|
||||
# * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
|
||||
# */
|
||||
def PACKET3_SET_RESOURCES_VMID_MASK(x): return ((x) << 0)
|
||||
def PACKET3_SET_RESOURCES_UNMAP_LATENTY(x): return ((x) << 16)
|
||||
def PACKET3_SET_RESOURCES_QUEUE_TYPE(x): return ((x) << 29)
|
||||
PACKET3_MAP_PROCESS = 0xA1
|
||||
PACKET3_MAP_QUEUES = 0xA2
|
||||
#/* 1. header
|
||||
# * 2. CONTROL
|
||||
# * 3. CONTROL2
|
||||
# * 4. MQD_ADDR_LO [31:0]
|
||||
# * 5. MQD_ADDR_HI [31:0]
|
||||
# * 6. WPTR_ADDR_LO [31:0]
|
||||
# * 7. WPTR_ADDR_HI [31:0]
|
||||
# */
|
||||
#/* CONTROL */
|
||||
def PACKET3_MAP_QUEUES_QUEUE_SEL(x): return ((x) << 4)
|
||||
def PACKET3_MAP_QUEUES_VMID(x): return ((x) << 8)
|
||||
def PACKET3_MAP_QUEUES_QUEUE(x): return ((x) << 13)
|
||||
def PACKET3_MAP_QUEUES_PIPE(x): return ((x) << 16)
|
||||
def PACKET3_MAP_QUEUES_ME(x): return ((x) << 18)
|
||||
def PACKET3_MAP_QUEUES_QUEUE_TYPE(x): return ((x) << 21)
|
||||
def PACKET3_MAP_QUEUES_ALLOC_FORMAT(x): return ((x) << 24)
|
||||
def PACKET3_MAP_QUEUES_ENGINE_SEL(x): return ((x) << 26)
|
||||
def PACKET3_MAP_QUEUES_NUM_QUEUES(x): return ((x) << 29)
|
||||
#/* CONTROL2 */
|
||||
def PACKET3_MAP_QUEUES_CHECK_DISABLE(x): return ((x) << 1)
|
||||
def PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x): return ((x) << 2)
|
||||
PACKET3_UNMAP_QUEUES = 0xA3
|
||||
#/* 1. header
|
||||
# * 2. CONTROL
|
||||
# * 3. CONTROL2
|
||||
# * 4. CONTROL3
|
||||
# * 5. CONTROL4
|
||||
# * 6. CONTROL5
|
||||
# */
|
||||
#/* CONTROL */
|
||||
def PACKET3_UNMAP_QUEUES_ACTION(x): return ((x) << 0)
|
||||
#/* 0 - PREEMPT_QUEUES
|
||||
# * 1 - RESET_QUEUES
|
||||
# * 2 - DISABLE_PROCESS_QUEUES
|
||||
# * 3 - PREEMPT_QUEUES_NO_UNMAP
|
||||
# */
|
||||
def PACKET3_UNMAP_QUEUES_QUEUE_SEL(x): return ((x) << 4)
|
||||
def PACKET3_UNMAP_QUEUES_ENGINE_SEL(x): return ((x) << 26)
|
||||
def PACKET3_UNMAP_QUEUES_NUM_QUEUES(x): return ((x) << 29)
|
||||
#/* CONTROL2a */
|
||||
def PACKET3_UNMAP_QUEUES_PASID(x): return ((x) << 0)
|
||||
#/* CONTROL2b */
|
||||
def PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x): return ((x) << 2)
|
||||
#/* CONTROL3a */
|
||||
def PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x): return ((x) << 2)
|
||||
#/* CONTROL3b */
|
||||
def PACKET3_UNMAP_QUEUES_RB_WPTR(x): return ((x) << 0)
|
||||
#/* CONTROL4 */
|
||||
def PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x): return ((x) << 2)
|
||||
#/* CONTROL5 */
|
||||
def PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x): return ((x) << 2)
|
||||
PACKET3_QUERY_STATUS = 0xA4
|
||||
#/* 1. header
|
||||
# * 2. CONTROL
|
||||
# * 3. CONTROL2
|
||||
# * 4. ADDR_LO [31:0]
|
||||
# * 5. ADDR_HI [31:0]
|
||||
# * 6. DATA_LO [31:0]
|
||||
# * 7. DATA_HI [31:0]
|
||||
# */
|
||||
#/* CONTROL */
|
||||
def PACKET3_QUERY_STATUS_CONTEXT_ID(x): return ((x) << 0)
|
||||
def PACKET3_QUERY_STATUS_INTERRUPT_SEL(x): return ((x) << 28)
|
||||
def PACKET3_QUERY_STATUS_COMMAND(x): return ((x) << 30)
|
||||
#/* CONTROL2a */
|
||||
def PACKET3_QUERY_STATUS_PASID(x): return ((x) << 0)
|
||||
#/* CONTROL2b */
|
||||
def PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x): return ((x) << 2)
|
||||
def PACKET3_QUERY_STATUS_ENG_SEL(x): return ((x) << 25)
|
||||
PACKET3_RUN_LIST = 0xA5
|
||||
PACKET3_MAP_PROCESS_VM = 0xA6
|
||||
#/* GFX11 */
|
||||
PACKET3_SET_Q_PREEMPTION_MODE = 0xF0
|
||||
def PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x): return ((x) << 0)
|
||||
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM = (1 << 0)
|
||||
|
||||
#endif
|
||||
@@ -146,6 +146,16 @@ if ctypes.sizeof(ctypes.c_longdouble) == 16:
|
||||
else:
|
||||
c_long_double_t = ctypes.c_ubyte*16
|
||||
|
||||
class FunctionFactoryStub:
|
||||
def __getattr__(self, _):
|
||||
return ctypes.CFUNCTYPE(lambda y:y)
|
||||
|
||||
# libraries['FIXME_STUB'] explanation
|
||||
# As you did not list (-l libraryname.so) a library that exports this function
|
||||
# This is a non-working stub instead.
|
||||
# You can either re-run clan2py with -l /path/to/library.so
|
||||
# Or manually fix this by comment the ctypes.CDLL loading
|
||||
_libraries['FIXME_STUB'] = FunctionFactoryStub() # ctypes.CDLL('FIXME_STUB')
|
||||
|
||||
|
||||
|
||||
@@ -3834,6 +3844,327 @@ struct_hsa_ext_finalizer_1_00_pfn_s._fields_ = [
|
||||
]
|
||||
|
||||
hsa_ext_finalizer_1_00_pfn_t = struct_hsa_ext_finalizer_1_00_pfn_s
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_version_major = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_version_major
|
||||
hsa_ven_amd_aqlprofile_version_major.restype = uint32_t
|
||||
hsa_ven_amd_aqlprofile_version_major.argtypes = []
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_version_minor = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_version_minor
|
||||
hsa_ven_amd_aqlprofile_version_minor.restype = uint32_t
|
||||
hsa_ven_amd_aqlprofile_version_minor.argtypes = []
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# values for enumeration 'c__EA_hsa_ven_amd_aqlprofile_event_type_t'
|
||||
c__EA_hsa_ven_amd_aqlprofile_event_type_t__enumvalues = {
|
||||
0: 'HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC',
|
||||
1: 'HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE',
|
||||
}
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1
|
||||
c__EA_hsa_ven_amd_aqlprofile_event_type_t = ctypes.c_uint32 # enum
|
||||
hsa_ven_amd_aqlprofile_event_type_t = c__EA_hsa_ven_amd_aqlprofile_event_type_t
|
||||
hsa_ven_amd_aqlprofile_event_type_t__enumvalues = c__EA_hsa_ven_amd_aqlprofile_event_type_t__enumvalues
|
||||
|
||||
# values for enumeration 'c__EA_hsa_ven_amd_aqlprofile_block_name_t'
|
||||
c__EA_hsa_ven_amd_aqlprofile_block_name_t__enumvalues = {
|
||||
0: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC',
|
||||
1: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF',
|
||||
2: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS',
|
||||
3: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM',
|
||||
4: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE',
|
||||
5: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI',
|
||||
6: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ',
|
||||
7: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS',
|
||||
8: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM',
|
||||
9: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX',
|
||||
10: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA',
|
||||
11: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA',
|
||||
12: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC',
|
||||
13: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP',
|
||||
14: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD',
|
||||
15: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB',
|
||||
16: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB',
|
||||
17: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM',
|
||||
18: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ',
|
||||
19: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2',
|
||||
20: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR',
|
||||
21: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC',
|
||||
22: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2',
|
||||
23: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA',
|
||||
24: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB',
|
||||
25: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA',
|
||||
26: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A',
|
||||
27: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C',
|
||||
28: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A',
|
||||
29: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C',
|
||||
30: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR',
|
||||
31: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS',
|
||||
32: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC',
|
||||
33: 'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA',
|
||||
34: 'HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER',
|
||||
}
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A = 26
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C = 27
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A = 28
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C = 29
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR = 30
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS = 31
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC = 32
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA = 33
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER = 34
|
||||
c__EA_hsa_ven_amd_aqlprofile_block_name_t = ctypes.c_uint32 # enum
|
||||
hsa_ven_amd_aqlprofile_block_name_t = c__EA_hsa_ven_amd_aqlprofile_block_name_t
|
||||
hsa_ven_amd_aqlprofile_block_name_t__enumvalues = c__EA_hsa_ven_amd_aqlprofile_block_name_t__enumvalues
|
||||
class struct_c__SA_hsa_ven_amd_aqlprofile_event_t(Structure):
|
||||
pass
|
||||
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_event_t._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_event_t._fields_ = [
|
||||
('block_name', hsa_ven_amd_aqlprofile_block_name_t),
|
||||
('block_index', ctypes.c_uint32),
|
||||
('counter_id', ctypes.c_uint32),
|
||||
]
|
||||
|
||||
hsa_ven_amd_aqlprofile_event_t = struct_c__SA_hsa_ven_amd_aqlprofile_event_t
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_validate_event = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_validate_event
|
||||
hsa_ven_amd_aqlprofile_validate_event.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_validate_event.argtypes = [hsa_agent_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_event_t), ctypes.POINTER(ctypes.c_bool)]
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# values for enumeration 'c__EA_hsa_ven_amd_aqlprofile_parameter_name_t'
|
||||
c__EA_hsa_ven_amd_aqlprofile_parameter_name_t__enumvalues = {
|
||||
0: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET',
|
||||
1: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK',
|
||||
2: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK',
|
||||
3: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK',
|
||||
4: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2',
|
||||
5: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK',
|
||||
6: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE',
|
||||
7: 'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT',
|
||||
}
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7
|
||||
c__EA_hsa_ven_amd_aqlprofile_parameter_name_t = ctypes.c_uint32 # enum
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t = c__EA_hsa_ven_amd_aqlprofile_parameter_name_t
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t__enumvalues = c__EA_hsa_ven_amd_aqlprofile_parameter_name_t__enumvalues
|
||||
class struct_c__SA_hsa_ven_amd_aqlprofile_parameter_t(Structure):
|
||||
pass
|
||||
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_parameter_t._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_parameter_t._fields_ = [
|
||||
('parameter_name', hsa_ven_amd_aqlprofile_parameter_name_t),
|
||||
('value', ctypes.c_uint32),
|
||||
]
|
||||
|
||||
hsa_ven_amd_aqlprofile_parameter_t = struct_c__SA_hsa_ven_amd_aqlprofile_parameter_t
|
||||
class struct_c__SA_hsa_ven_amd_aqlprofile_descriptor_t(Structure):
|
||||
pass
|
||||
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_descriptor_t._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_descriptor_t._fields_ = [
|
||||
('ptr', ctypes.POINTER(None)),
|
||||
('size', ctypes.c_uint32),
|
||||
('PADDING_0', ctypes.c_ubyte * 4),
|
||||
]
|
||||
|
||||
hsa_ven_amd_aqlprofile_descriptor_t = struct_c__SA_hsa_ven_amd_aqlprofile_descriptor_t
|
||||
class struct_c__SA_hsa_ven_amd_aqlprofile_profile_t(Structure):
|
||||
pass
|
||||
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_profile_t._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_profile_t._fields_ = [
|
||||
('agent', hsa_agent_t),
|
||||
('type', hsa_ven_amd_aqlprofile_event_type_t),
|
||||
('PADDING_0', ctypes.c_ubyte * 4),
|
||||
('events', ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_event_t)),
|
||||
('event_count', ctypes.c_uint32),
|
||||
('PADDING_1', ctypes.c_ubyte * 4),
|
||||
('parameters', ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_parameter_t)),
|
||||
('parameter_count', ctypes.c_uint32),
|
||||
('PADDING_2', ctypes.c_ubyte * 4),
|
||||
('output_buffer', hsa_ven_amd_aqlprofile_descriptor_t),
|
||||
('command_buffer', hsa_ven_amd_aqlprofile_descriptor_t),
|
||||
]
|
||||
|
||||
hsa_ven_amd_aqlprofile_profile_t = struct_c__SA_hsa_ven_amd_aqlprofile_profile_t
|
||||
class struct_c__SA_hsa_ext_amd_aql_pm4_packet_t(Structure):
|
||||
pass
|
||||
|
||||
struct_c__SA_hsa_ext_amd_aql_pm4_packet_t._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ext_amd_aql_pm4_packet_t._fields_ = [
|
||||
('header', ctypes.c_uint16),
|
||||
('pm4_command', ctypes.c_uint16 * 27),
|
||||
('completion_signal', hsa_signal_t),
|
||||
]
|
||||
|
||||
hsa_ext_amd_aql_pm4_packet_t = struct_c__SA_hsa_ext_amd_aql_pm4_packet_t
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_start = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_start
|
||||
hsa_ven_amd_aqlprofile_start.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_start.argtypes = [ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_stop = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_stop
|
||||
hsa_ven_amd_aqlprofile_stop.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_stop.argtypes = [ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_read = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_read
|
||||
hsa_ven_amd_aqlprofile_read.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_read.argtypes = [ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t)]
|
||||
except AttributeError:
|
||||
pass
|
||||
HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192 # Variable ctypes.c_uint32
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_legacy_get_pm4 = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_legacy_get_pm4
|
||||
hsa_ven_amd_aqlprofile_legacy_get_pm4.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_legacy_get_pm4.argtypes = [ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t), ctypes.POINTER(None)]
|
||||
except AttributeError:
|
||||
pass
|
||||
class struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t(Structure):
|
||||
pass
|
||||
|
||||
class union_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0(Union):
|
||||
pass
|
||||
|
||||
class struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0_pmc_data(Structure):
|
||||
pass
|
||||
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0_pmc_data._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0_pmc_data._fields_ = [
|
||||
('event', hsa_ven_amd_aqlprofile_event_t),
|
||||
('PADDING_0', ctypes.c_ubyte * 4),
|
||||
('result', ctypes.c_uint64),
|
||||
]
|
||||
|
||||
union_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0._pack_ = 1 # source:False
|
||||
union_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0._fields_ = [
|
||||
('pmc_data', struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0_pmc_data),
|
||||
('trace_data', hsa_ven_amd_aqlprofile_descriptor_t),
|
||||
('PADDING_0', ctypes.c_ubyte * 8),
|
||||
]
|
||||
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t._anonymous_ = ('_0',)
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t._fields_ = [
|
||||
('sample_id', ctypes.c_uint32),
|
||||
('PADDING_0', ctypes.c_ubyte * 4),
|
||||
('_0', union_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0),
|
||||
]
|
||||
|
||||
hsa_ven_amd_aqlprofile_info_data_t = struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t
|
||||
class struct_c__SA_hsa_ven_amd_aqlprofile_id_query_t(Structure):
|
||||
pass
|
||||
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_id_query_t._pack_ = 1 # source:False
|
||||
struct_c__SA_hsa_ven_amd_aqlprofile_id_query_t._fields_ = [
|
||||
('name', ctypes.POINTER(ctypes.c_char)),
|
||||
('id', ctypes.c_uint32),
|
||||
('instance_count', ctypes.c_uint32),
|
||||
]
|
||||
|
||||
hsa_ven_amd_aqlprofile_id_query_t = struct_c__SA_hsa_ven_amd_aqlprofile_id_query_t
|
||||
|
||||
# values for enumeration 'c__EA_hsa_ven_amd_aqlprofile_info_type_t'
|
||||
c__EA_hsa_ven_amd_aqlprofile_info_type_t__enumvalues = {
|
||||
0: 'HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE',
|
||||
1: 'HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE',
|
||||
2: 'HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA',
|
||||
3: 'HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA',
|
||||
4: 'HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS',
|
||||
5: 'HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID',
|
||||
6: 'HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD',
|
||||
7: 'HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD',
|
||||
}
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7
|
||||
c__EA_hsa_ven_amd_aqlprofile_info_type_t = ctypes.c_uint32 # enum
|
||||
hsa_ven_amd_aqlprofile_info_type_t = c__EA_hsa_ven_amd_aqlprofile_info_type_t
|
||||
hsa_ven_amd_aqlprofile_info_type_t__enumvalues = c__EA_hsa_ven_amd_aqlprofile_info_type_t__enumvalues
|
||||
hsa_ven_amd_aqlprofile_data_callback_t = ctypes.CFUNCTYPE(c__EA_hsa_status_t, c__EA_hsa_ven_amd_aqlprofile_info_type_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t), ctypes.POINTER(None))
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_get_info = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_get_info
|
||||
hsa_ven_amd_aqlprofile_get_info.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_get_info.argtypes = [ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), hsa_ven_amd_aqlprofile_info_type_t, ctypes.POINTER(None)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_iterate_data = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_iterate_data
|
||||
hsa_ven_amd_aqlprofile_iterate_data.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_iterate_data.argtypes = [ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), hsa_ven_amd_aqlprofile_data_callback_t, ctypes.POINTER(None)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
hsa_ven_amd_aqlprofile_error_string = _libraries['FIXME_STUB'].hsa_ven_amd_aqlprofile_error_string
|
||||
hsa_ven_amd_aqlprofile_error_string.restype = hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_error_string.argtypes = [ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError:
|
||||
pass
|
||||
kAqlProfileLib = 'libhsa-amd-aqlprofile64.so' # Variable ctypes.c_char * 27
|
||||
class struct_hsa_ven_amd_aqlprofile_1_00_pfn_s(Structure):
|
||||
pass
|
||||
|
||||
struct_hsa_ven_amd_aqlprofile_1_00_pfn_s._pack_ = 1 # source:False
|
||||
struct_hsa_ven_amd_aqlprofile_1_00_pfn_s._fields_ = [
|
||||
('hsa_ven_amd_aqlprofile_version_major', ctypes.CFUNCTYPE(ctypes.c_uint32)),
|
||||
('hsa_ven_amd_aqlprofile_version_minor', ctypes.CFUNCTYPE(ctypes.c_uint32)),
|
||||
('hsa_ven_amd_aqlprofile_error_string', ctypes.CFUNCTYPE(c__EA_hsa_status_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char)))),
|
||||
('hsa_ven_amd_aqlprofile_validate_event', ctypes.CFUNCTYPE(c__EA_hsa_status_t, struct_hsa_agent_s, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_event_t), ctypes.POINTER(ctypes.c_bool))),
|
||||
('hsa_ven_amd_aqlprofile_start', ctypes.CFUNCTYPE(c__EA_hsa_status_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t))),
|
||||
('hsa_ven_amd_aqlprofile_stop', ctypes.CFUNCTYPE(c__EA_hsa_status_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t))),
|
||||
('hsa_ven_amd_aqlprofile_read', ctypes.CFUNCTYPE(c__EA_hsa_status_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t))),
|
||||
('hsa_ven_amd_aqlprofile_legacy_get_pm4', ctypes.CFUNCTYPE(c__EA_hsa_status_t, ctypes.POINTER(struct_c__SA_hsa_ext_amd_aql_pm4_packet_t), ctypes.POINTER(None))),
|
||||
('hsa_ven_amd_aqlprofile_get_info', ctypes.CFUNCTYPE(c__EA_hsa_status_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), c__EA_hsa_ven_amd_aqlprofile_info_type_t, ctypes.POINTER(None))),
|
||||
('hsa_ven_amd_aqlprofile_iterate_data', ctypes.CFUNCTYPE(c__EA_hsa_status_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_profile_t), ctypes.CFUNCTYPE(c__EA_hsa_status_t, c__EA_hsa_ven_amd_aqlprofile_info_type_t, ctypes.POINTER(struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t), ctypes.POINTER(None)), ctypes.POINTER(None))),
|
||||
]
|
||||
|
||||
hsa_ven_amd_aqlprofile_1_00_pfn_t = struct_hsa_ven_amd_aqlprofile_1_00_pfn_s
|
||||
hsa_ven_amd_aqlprofile_pfn_t = struct_hsa_ven_amd_aqlprofile_1_00_pfn_s
|
||||
__all__ = \
|
||||
['AMD_QUEUE_PROPERTIES_ENABLE_PROFILING',
|
||||
'AMD_QUEUE_PROPERTIES_ENABLE_PROFILING_SHIFT',
|
||||
@@ -4221,13 +4552,68 @@ __all__ = \
|
||||
'HSA_SYSTEM_INFO_VERSION_MAJOR', 'HSA_SYSTEM_INFO_VERSION_MINOR',
|
||||
'HSA_VARIABLE_ALLOCATION_AGENT',
|
||||
'HSA_VARIABLE_ALLOCATION_PROGRAM', 'HSA_VARIABLE_SEGMENT_GLOBAL',
|
||||
'HSA_VARIABLE_SEGMENT_READONLY', 'HSA_WAIT_STATE_ACTIVE',
|
||||
'HSA_WAIT_STATE_BLOCKED', 'HSA_WAVEFRONT_INFO_SIZE',
|
||||
'MEMORY_TYPE_NONE', 'MEMORY_TYPE_PINNED',
|
||||
'amd_queue_properties32_t', 'amd_queue_properties_t',
|
||||
'amd_queue_t', 'amd_signal_kind64_t', 'amd_signal_kind_t',
|
||||
'amd_signal_t', 'c__EA_hsa_access_permission_t',
|
||||
'c__EA_hsa_agent_feature_t', 'c__EA_hsa_agent_info_t',
|
||||
'HSA_VARIABLE_SEGMENT_READONLY',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD',
|
||||
'HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC',
|
||||
'HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC',
|
||||
'HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE',
|
||||
'HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA',
|
||||
'HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2',
|
||||
'HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK',
|
||||
'HSA_WAIT_STATE_ACTIVE', 'HSA_WAIT_STATE_BLOCKED',
|
||||
'HSA_WAVEFRONT_INFO_SIZE', 'MEMORY_TYPE_NONE',
|
||||
'MEMORY_TYPE_PINNED', 'amd_queue_properties32_t',
|
||||
'amd_queue_properties_t', 'amd_queue_t', 'amd_signal_kind64_t',
|
||||
'amd_signal_kind_t', 'amd_signal_t',
|
||||
'c__EA_hsa_access_permission_t', 'c__EA_hsa_agent_feature_t',
|
||||
'c__EA_hsa_agent_info_t',
|
||||
'c__EA_hsa_amd_agent_memory_pool_info_t',
|
||||
'c__EA_hsa_amd_copy_direction_t',
|
||||
'c__EA_hsa_amd_hw_exception_reset_cause_t',
|
||||
@@ -4267,6 +4653,10 @@ __all__ = \
|
||||
'c__EA_hsa_status_t', 'c__EA_hsa_symbol_kind_t',
|
||||
'c__EA_hsa_symbol_linkage_t', 'c__EA_hsa_system_info_t',
|
||||
'c__EA_hsa_variable_allocation_t', 'c__EA_hsa_variable_segment_t',
|
||||
'c__EA_hsa_ven_amd_aqlprofile_block_name_t',
|
||||
'c__EA_hsa_ven_amd_aqlprofile_event_type_t',
|
||||
'c__EA_hsa_ven_amd_aqlprofile_info_type_t',
|
||||
'c__EA_hsa_ven_amd_aqlprofile_parameter_name_t',
|
||||
'c__EA_hsa_wait_state_t', 'c__EA_hsa_wavefront_info_t',
|
||||
'c__Ea_HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS',
|
||||
'c__Ea_HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED',
|
||||
@@ -4418,7 +4808,8 @@ __all__ = \
|
||||
'hsa_executable_symbol_info_t__enumvalues',
|
||||
'hsa_executable_symbol_t', 'hsa_executable_t',
|
||||
'hsa_executable_validate', 'hsa_executable_validate_alt',
|
||||
'hsa_ext_control_directives_t', 'hsa_ext_finalizer_1_00_pfn_t',
|
||||
'hsa_ext_amd_aql_pm4_packet_t', 'hsa_ext_control_directives_t',
|
||||
'hsa_ext_finalizer_1_00_pfn_t',
|
||||
'hsa_ext_finalizer_call_convention_t',
|
||||
'hsa_ext_finalizer_call_convention_t__enumvalues',
|
||||
'hsa_ext_image_capability_t',
|
||||
@@ -4562,11 +4953,45 @@ __all__ = \
|
||||
'hsa_system_major_extension_supported',
|
||||
'hsa_variable_allocation_t',
|
||||
'hsa_variable_allocation_t__enumvalues', 'hsa_variable_segment_t',
|
||||
'hsa_variable_segment_t__enumvalues', 'hsa_wait_state_t',
|
||||
'hsa_variable_segment_t__enumvalues',
|
||||
'hsa_ven_amd_aqlprofile_1_00_pfn_t',
|
||||
'hsa_ven_amd_aqlprofile_block_name_t',
|
||||
'hsa_ven_amd_aqlprofile_block_name_t__enumvalues',
|
||||
'hsa_ven_amd_aqlprofile_data_callback_t',
|
||||
'hsa_ven_amd_aqlprofile_descriptor_t',
|
||||
'hsa_ven_amd_aqlprofile_error_string',
|
||||
'hsa_ven_amd_aqlprofile_event_t',
|
||||
'hsa_ven_amd_aqlprofile_event_type_t',
|
||||
'hsa_ven_amd_aqlprofile_event_type_t__enumvalues',
|
||||
'hsa_ven_amd_aqlprofile_get_info',
|
||||
'hsa_ven_amd_aqlprofile_id_query_t',
|
||||
'hsa_ven_amd_aqlprofile_info_data_t',
|
||||
'hsa_ven_amd_aqlprofile_info_type_t',
|
||||
'hsa_ven_amd_aqlprofile_info_type_t__enumvalues',
|
||||
'hsa_ven_amd_aqlprofile_iterate_data',
|
||||
'hsa_ven_amd_aqlprofile_legacy_get_pm4',
|
||||
'hsa_ven_amd_aqlprofile_parameter_name_t',
|
||||
'hsa_ven_amd_aqlprofile_parameter_name_t__enumvalues',
|
||||
'hsa_ven_amd_aqlprofile_parameter_t',
|
||||
'hsa_ven_amd_aqlprofile_pfn_t',
|
||||
'hsa_ven_amd_aqlprofile_profile_t', 'hsa_ven_amd_aqlprofile_read',
|
||||
'hsa_ven_amd_aqlprofile_start', 'hsa_ven_amd_aqlprofile_stop',
|
||||
'hsa_ven_amd_aqlprofile_validate_event',
|
||||
'hsa_ven_amd_aqlprofile_version_major',
|
||||
'hsa_ven_amd_aqlprofile_version_minor', 'hsa_wait_state_t',
|
||||
'hsa_wait_state_t__enumvalues', 'hsa_wavefront_get_info',
|
||||
'hsa_wavefront_info_t', 'hsa_wavefront_info_t__enumvalues',
|
||||
'hsa_wavefront_t', 'int32_t', 'size_t', 'struct_BrigModuleHeader',
|
||||
'struct_amd_queue_s', 'struct_amd_signal_s',
|
||||
'hsa_wavefront_t', 'int32_t', 'kAqlProfileLib', 'size_t',
|
||||
'struct_BrigModuleHeader', 'struct_amd_queue_s',
|
||||
'struct_amd_signal_s',
|
||||
'struct_c__SA_hsa_ext_amd_aql_pm4_packet_t',
|
||||
'struct_c__SA_hsa_ven_amd_aqlprofile_descriptor_t',
|
||||
'struct_c__SA_hsa_ven_amd_aqlprofile_event_t',
|
||||
'struct_c__SA_hsa_ven_amd_aqlprofile_id_query_t',
|
||||
'struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t',
|
||||
'struct_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0_pmc_data',
|
||||
'struct_c__SA_hsa_ven_amd_aqlprofile_parameter_t',
|
||||
'struct_c__SA_hsa_ven_amd_aqlprofile_profile_t',
|
||||
'struct_hsa_agent_dispatch_packet_s', 'struct_hsa_agent_s',
|
||||
'struct_hsa_amd_barrier_value_packet_s', 'struct_hsa_amd_event_s',
|
||||
'struct_hsa_amd_gpu_hw_exception_info_s',
|
||||
@@ -4599,6 +5024,8 @@ __all__ = \
|
||||
'struct_hsa_loaded_code_object_s', 'struct_hsa_pitched_ptr_s',
|
||||
'struct_hsa_queue_s', 'struct_hsa_region_s',
|
||||
'struct_hsa_signal_group_s', 'struct_hsa_signal_s',
|
||||
'struct_hsa_ven_amd_aqlprofile_1_00_pfn_s',
|
||||
'struct_hsa_wavefront_s', 'uint16_t', 'uint32_t', 'uint64_t',
|
||||
'union_amd_signal_s_0', 'union_amd_signal_s_1',
|
||||
'union_c__SA_hsa_ven_amd_aqlprofile_info_data_t_0',
|
||||
'union_hsa_amd_event_s_0']
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
from __future__ import annotations
|
||||
from typing import Tuple
|
||||
import os, fcntl, ctypes, functools, re, pathlib, mmap, struct
|
||||
import os, fcntl, ctypes, functools, re, pathlib, mmap, struct, errno
|
||||
from tinygrad.device import Compiled, LRUAllocator, Compiler, BufferOptions, CompilerOptions
|
||||
from tinygrad.helpers import getenv, from_mv, init_c_struct_t, to_mv, round_up
|
||||
from tinygrad.renderer.cstyle import HIPRenderer
|
||||
from tinygrad.runtime.driver.hip_comgr import compile_hip
|
||||
import tinygrad.runtime.autogen.kfd as kfd
|
||||
import tinygrad.runtime.autogen.hsa as hsa
|
||||
import tinygrad.runtime.autogen.amd_sdma as amd_sdma
|
||||
import tinygrad.runtime.autogen.amd_gpu as amd_gpu
|
||||
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401
|
||||
|
||||
libc = ctypes.CDLL("libc.so.6")
|
||||
libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
|
||||
libc.mmap.restype = ctypes.c_void_p
|
||||
libc.munmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
|
||||
libc.munmap.restype = ctypes.c_int
|
||||
|
||||
def node_sysfs_path(node_id, file): return f"/sys/devices/virtual/kfd/kfd/topology/nodes/{node_id}/{file}"
|
||||
|
||||
@@ -40,7 +42,7 @@ kio = ioctls_from_header()
|
||||
def create_sdma_packets():
|
||||
# TODO: clean up this, if we want to keep it
|
||||
structs = {}
|
||||
for name,pkt in [(name,s) for name,s in amd_sdma.__dict__.items() if name.startswith("struct_SDMA_PKT_") and name.endswith("_TAG")]:
|
||||
for name,pkt in [(name,s) for name,s in amd_gpu.__dict__.items() if name.startswith("struct_SDMA_PKT_") and name.endswith("_TAG")]:
|
||||
names = set()
|
||||
fields = []
|
||||
for pkt_fields in pkt._fields_:
|
||||
@@ -72,6 +74,8 @@ class KFDCompiler(Compiler):
|
||||
AQL_PACKET_SIZE = ctypes.sizeof(hsa.hsa_kernel_dispatch_packet_t)
|
||||
SDMA_MAX_COPY_SIZE = 0x400000
|
||||
|
||||
VENDOR_HEADER = hsa.HSA_PACKET_TYPE_VENDOR_SPECIFIC << hsa.HSA_PACKET_HEADER_TYPE
|
||||
|
||||
DISPATCH_KERNEL_SETUP = 3 << hsa.HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS
|
||||
DISPATCH_KERNEL_HEADER = 1 << hsa.HSA_PACKET_HEADER_BARRIER
|
||||
DISPATCH_KERNEL_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE
|
||||
@@ -96,6 +100,8 @@ class KFDProgram:
|
||||
for _, sh_type, sh_flags, sh_addr, sh_offset, sh_size, _, _, _ in sections:
|
||||
if sh_type == SHT_PROGBITS and sh_flags & SHF_ALLOC: lib_gpu_view[sh_addr:sh_addr+sh_size] = self.lib[sh_offset:sh_offset+sh_size]
|
||||
|
||||
self.device._submit_cache_inv(gli=2)
|
||||
|
||||
entry_point = min(sh[3] for sh in sections if sh[1] == SHT_PROGBITS and sh[2] & SHF_ALLOC)
|
||||
self.handle = self.lib_gpu.va_addr + entry_point
|
||||
self.group_segment_size = lib_gpu_view.cast("I")[entry_point//4]
|
||||
@@ -105,7 +111,8 @@ class KFDProgram:
|
||||
f"{self.private_segment_size=} > {self.device.max_private_segment_size=}"
|
||||
|
||||
# NOTE: no programs are ever freed
|
||||
def __del__(self): kio.free_memory_of_gpu(KFDDevice.kfd, handle=self.lib_gpu.handle)
|
||||
def __del__(self):
|
||||
if hasattr(self, 'lib_gpu'): self.device._gpu_free(self.lib_gpu)
|
||||
|
||||
def __call__(self, *args, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), vals:Tuple[int, ...]=(), wait=False):
|
||||
if not hasattr(self, "args_struct_t"):
|
||||
@@ -150,8 +157,15 @@ class KFDAllocator(LRUAllocator):
|
||||
super().__init__()
|
||||
|
||||
def _alloc(self, size:int, options:BufferOptions):
|
||||
if options.host: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, public=True)
|
||||
else: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM, public=True)
|
||||
try:
|
||||
if options.host: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, public=True)
|
||||
else: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM, public=True)
|
||||
except OSError as e:
|
||||
if e.errno == errno.ENOMEM: raise MemoryError("Cannot allocate memory") from e
|
||||
else: raise
|
||||
|
||||
def _free(self, gpumem, options:BufferOptions):
|
||||
self.device._gpu_free(gpumem)
|
||||
|
||||
def copyin(self, dest, src: memoryview):
|
||||
# TODO: need to make the address visible to gpu and pass it directly to sdma.
|
||||
@@ -194,11 +208,18 @@ class KFDDevice(Compiled):
|
||||
assert buf != 0xffffffffffffffff
|
||||
assert addr == buf == mem.va_addr
|
||||
if map_to_gpu:
|
||||
arr = (ctypes.c_int32 * 1)(self.gpu_id)
|
||||
stm = kio.map_memory_to_gpu(self.kfd, handle=mem.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1)
|
||||
mem.__setattr__("mapped_gpu_ids", (ctypes.c_int32 * 1)(self.gpu_id))
|
||||
stm = kio.map_memory_to_gpu(self.kfd, handle=mem.handle, device_ids_array_ptr=ctypes.addressof(gpus:=mem.mapped_gpu_ids), n_devices=len(gpus))
|
||||
assert stm.n_success == 1
|
||||
return mem
|
||||
|
||||
def _gpu_free(self, mem):
|
||||
if (gpus:=getattr(mem, "mapped_gpu_ids", None)) is not None:
|
||||
stm = kio.unmap_memory_from_gpu(self.kfd, handle=mem.handle, device_ids_array_ptr=ctypes.addressof(gpus), n_devices=len(gpus))
|
||||
assert stm.n_success == len(gpus)
|
||||
libc.munmap(mem.va_addr, mem.size)
|
||||
kio.free_memory_of_gpu(self.kfd, handle=mem.handle)
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
if KFDDevice.kfd == -1: KFDDevice.kfd = os.open("/dev/kfd", os.O_RDWR)
|
||||
self.device_id = int(device.split(":")[1]) if ":" in device else 0
|
||||
@@ -215,6 +236,7 @@ class KFDDevice(Compiled):
|
||||
self.signals_page = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True)
|
||||
self.gart = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
|
||||
self.kernargs = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
|
||||
self.pm4_indirect_buf = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True)
|
||||
self.ctx_save_restore_address = self._gpu_alloc(0x2C02000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
|
||||
|
||||
self.completion_signal = hsa.amd_signal_t.from_address(self.signals_page.va_addr)
|
||||
@@ -274,12 +296,18 @@ class KFDDevice(Compiled):
|
||||
|
||||
# prebuilt packets
|
||||
self.sdma_flush_hdp_pkt = sdma_pkts.hdp_flush(0x8, 0x0, 0x80000000, 0x0, 0x0, 0x0)
|
||||
self.sdma_cache_inv = sdma_pkts.gcr(op=amd_sdma.SDMA_OP_GCR, sub_op=amd_sdma.SDMA_SUBOP_USER_GCR, GCR_CONTROL_GL2_WB=1, GCR_CONTROL_GLK_WB=1,
|
||||
self.sdma_cache_inv = sdma_pkts.gcr(op=amd_gpu.SDMA_OP_GCR, sub_op=amd_gpu.SDMA_SUBOP_USER_GCR, GCR_CONTROL_GL2_WB=1, GCR_CONTROL_GLK_WB=1,
|
||||
GCR_CONTROL_GL2_INV=1, GCR_CONTROL_GL1_INV=1, GCR_CONTROL_GLV_INV=1, GCR_CONTROL_GLK_INV=1,
|
||||
GCR_CONTROL_GL2_RANGE=0)
|
||||
self.sdma_cache_wb = sdma_pkts.gcr(op=amd_sdma.SDMA_OP_GCR, sub_op=amd_sdma.SDMA_SUBOP_USER_GCR, GCR_CONTROL_GL2_WB=1, GCR_CONTROL_GLK_WB=1,
|
||||
self.sdma_cache_wb = sdma_pkts.gcr(op=amd_gpu.SDMA_OP_GCR, sub_op=amd_gpu.SDMA_SUBOP_USER_GCR, GCR_CONTROL_GL2_WB=1, GCR_CONTROL_GLK_WB=1,
|
||||
GCR_CONTROL_GL2_RANGE=0)
|
||||
|
||||
pm4_indirect_cmd = (ctypes.c_uint32*13)(amd_gpu.PACKET3(amd_gpu.PACKET3_INDIRECT_BUFFER, 2), self.pm4_indirect_buf.va_addr & 0xffffffff,
|
||||
(self.pm4_indirect_buf.va_addr>>32) & 0xffffffff, 8 | amd_gpu.INDIRECT_BUFFER_VALID, 0xa)
|
||||
ctypes.memmove(ctypes.addressof(pm4_cmds:=(ctypes.c_uint16*27)(1))+2, ctypes.addressof(pm4_indirect_cmd), ctypes.sizeof(pm4_indirect_cmd))
|
||||
self.pm4_packet = hsa.hsa_ext_amd_aql_pm4_packet_t(header=VENDOR_HEADER, pm4_command=pm4_cmds,
|
||||
completion_signal=hsa.hsa_signal_t(ctypes.addressof(self.completion_signal)))
|
||||
|
||||
# Helpers
|
||||
map_uptr2gpu_struct_t = init_c_struct_t(tuple(kfd.struct_kfd_ioctl_svm_args._fields_[:-1]+[('attrs', kfd.struct_kfd_ioctl_svm_attribute*2)])) # type: ignore
|
||||
self.map_uptr2gpu_struct = map_uptr2gpu_struct_t(nattr=2, op=0x0)
|
||||
@@ -302,11 +330,11 @@ class KFDDevice(Compiled):
|
||||
# NOTE: we check only low 32 bits to be zeroed, we don't use higher values for signals
|
||||
for sig in wait_signals:
|
||||
poll_addr = ctypes.addressof(sig) + getattr(hsa.amd_signal_t, 'value').offset
|
||||
blit_sdma_command(sdma_pkts.poll_regmem(op=amd_sdma.SDMA_OP_POLL_REGMEM, mem_poll=1, func=0x3, addr=poll_addr,
|
||||
blit_sdma_command(sdma_pkts.poll_regmem(op=amd_gpu.SDMA_OP_POLL_REGMEM, mem_poll=1, func=0x3, addr=poll_addr,
|
||||
value=0, mask=0xffffffff, interval=0x04, retry_count=0xfff))
|
||||
|
||||
if completion_signal is not None:
|
||||
blit_sdma_command(sdma_pkts.timestamp(op=amd_sdma.SDMA_OP_TIMESTAMP, sub_op=amd_sdma.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL,
|
||||
blit_sdma_command(sdma_pkts.timestamp(op=amd_gpu.SDMA_OP_TIMESTAMP, sub_op=amd_gpu.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL,
|
||||
addr=ctypes.addressof(completion_signal) + getattr(hsa.amd_signal_t, 'start_ts').offset))
|
||||
blit_sdma_command(self.sdma_flush_hdp_pkt)
|
||||
blit_sdma_command(self.sdma_cache_inv)
|
||||
@@ -315,22 +343,43 @@ class KFDDevice(Compiled):
|
||||
copies_commands = (copy_size + SDMA_MAX_COPY_SIZE - 1) // SDMA_MAX_COPY_SIZE
|
||||
for _ in range(copies_commands):
|
||||
step_copy_size = min(copy_size - copied, SDMA_MAX_COPY_SIZE)
|
||||
blit_sdma_command(sdma_pkts.copy_linear(op=amd_sdma.SDMA_OP_COPY, sub_op=amd_sdma.SDMA_SUBOP_COPY_LINEAR,
|
||||
blit_sdma_command(sdma_pkts.copy_linear(op=amd_gpu.SDMA_OP_COPY, sub_op=amd_gpu.SDMA_SUBOP_COPY_LINEAR,
|
||||
count=step_copy_size-1, src_addr=src+copied, dst_addr=dest+copied))
|
||||
copied += step_copy_size
|
||||
|
||||
blit_sdma_command(self.sdma_cache_wb)
|
||||
if completion_signal is not None:
|
||||
blit_sdma_command(sdma_pkts.timestamp(op=amd_sdma.SDMA_OP_TIMESTAMP, sub_op=amd_sdma.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL,
|
||||
blit_sdma_command(sdma_pkts.timestamp(op=amd_gpu.SDMA_OP_TIMESTAMP, sub_op=amd_gpu.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL,
|
||||
addr=ctypes.addressof(completion_signal) + getattr(hsa.amd_signal_t, 'end_ts').offset))
|
||||
|
||||
if completion_signal is not None:
|
||||
signal_addr = ctypes.addressof(completion_signal) + getattr(hsa.amd_signal_t, 'value').offset
|
||||
blit_sdma_command(sdma_pkts.atomic(op=amd_sdma.SDMA_OP_ATOMIC, operation=amd_sdma.SDMA_ATOMIC_ADD64, addr=signal_addr, src_data=(1<<64)-1))
|
||||
blit_sdma_command(sdma_pkts.atomic(op=amd_gpu.SDMA_OP_ATOMIC, operation=amd_gpu.SDMA_ATOMIC_ADD64, addr=signal_addr, src_data=(1<<64)-1))
|
||||
if completion_signal.event_mailbox_ptr != 0:
|
||||
blit_sdma_command(sdma_pkts.fence(op=amd_sdma.SDMA_OP_FENCE, mtype=3, addr=completion_signal.event_mailbox_ptr,
|
||||
blit_sdma_command(sdma_pkts.fence(op=amd_gpu.SDMA_OP_FENCE, mtype=3, addr=completion_signal.event_mailbox_ptr,
|
||||
data=completion_signal.event_id))
|
||||
blit_sdma_command(sdma_pkts.trap(op=amd_sdma.SDMA_OP_TRAP, int_ctx=completion_signal.event_id))
|
||||
blit_sdma_command(sdma_pkts.trap(op=amd_gpu.SDMA_OP_TRAP, int_ctx=completion_signal.event_id))
|
||||
|
||||
self.sdma_write_pointer[0] = self.sdma_doorbell_value
|
||||
self.sdma_doorbell[0] = self.sdma_doorbell_value
|
||||
|
||||
def _submit_cache_inv(self, addr=0x0, sz=(1 << 64)-1, gli=0, glv=0, glk=0, gl1=0, gl2=0):
|
||||
pm4_buffer_view = to_mv(self.pm4_indirect_buf.va_addr, 0x1000).cast("I")
|
||||
pm4_cmd = [amd_gpu.PACKET3(amd_gpu.PACKET3_ACQUIRE_MEM, 6), 0,
|
||||
sz & 0xffffffff, (sz >> 32) & 0xff, addr & 0xffffffff, (addr >> 32) & 0xffffff, 0,
|
||||
amd_gpu.PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(gli) | amd_gpu.PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(glk) | \
|
||||
amd_gpu.PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(glv) | amd_gpu.PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(gl1) | \
|
||||
amd_gpu.PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(gl2)]
|
||||
for i, value in enumerate(pm4_cmd): pm4_buffer_view[i] = value
|
||||
ctypes.memmove(self.aql_ring.va_addr + (self.aql_doorbell_value * AQL_PACKET_SIZE) % self.aql_ring.size,
|
||||
ctypes.addressof(self.pm4_packet), AQL_PACKET_SIZE)
|
||||
|
||||
self.amd_aql_queue.write_dispatch_id = self.aql_doorbell_value + 1
|
||||
self.aql_doorbell[0] = self.aql_doorbell_value
|
||||
self.aql_doorbell_value += 1
|
||||
|
||||
evt_arr = (kfd.struct_kfd_event_data * 1)()
|
||||
evt_arr[0].event_id = self.completion_signal.event_id
|
||||
kio.wait_events(KFDDevice.kfd, events_ptr=ctypes.addressof(evt_arr), num_events=1, wait_for_all=1, timeout=1000)
|
||||
|
||||
assert (wp:=self.amd_aql_queue.write_dispatch_id) == (rp:=self.amd_aql_queue.read_dispatch_id), f"didn't run {wp} != {rp}"
|
||||
|
||||
Reference in New Issue
Block a user