diff --git a/extra/gemm/asm/gemm.s b/extra/gemm/asm/gemm.s index 5d0129111d..56acb3c006 100644 --- a/extra/gemm/asm/gemm.s +++ b/extra/gemm/asm/gemm.s @@ -5,16 +5,17 @@ .type gemm,@function gemm: - // ** global buffers + // ** global buffers s_load_dwordx2 s[28:29], s[0:1], 0x0 // C s_load_dwordx4 s[32:35], s[0:1], 0x8 // A, B - // ** other inputs to the kernel - // info + s_waitcnt lgkmcnt(0) + // ** others kernel args + // info s_mov_b32 s51, 0x00000001 // gemm_info = 1 s_mov_b32 s53, 0x00000001 // kernel_info0 = 1 s_mov_b32 s11, 0x40010020 // kernel_info1 = 0x40010020 s_mov_b32 s54, 0x00000400 // numWG = 1024 - // sizes / strides + // sizes / strides s_mov_b32 s24, 0x00002000 // sizesFree0 = M = 8192 s_mov_b32 s25, 0x00002000 // sizesFree1 = N = 8192 s_mov_b32 s26, 0x00000001 // sizesFree2 = BATCH = 1 @@ -28,15 +29,10 @@ gemm: s_mov_b32 s41, 0x00000000 // strideA1 s_mov_b32 s42, 0x00002000 // strideB0 s_mov_b32 s43, 0x00000000 // strideB1 - // scalars - s_mov_b32 s44, 0x3F800000 // alpha = 1.0f - s_mov_b32 s45, 0x00000000 // beta = 0.0f - - // ** workgroup mapping + // ** workgroup mapping s_lshr_b32 s52, s51, 30 // 000000002924: 8F349E33 s_and_b32 s51, 0x3fffffff, s51 // 000000002928: 863333FF 3FFFFFFF s_cmp_eq_u32 s52, 0 // 000000002930: BF068034 - s_waitcnt lgkmcnt(0) // 000000002958: BF8CC07F s_and_b32 s10, s53, 0xffff0000 // 000000002A70: 860AFF35 FFFF0000 s_lshr_b32 s10, s10, 16 // 000000002A78: 8F0A900A s_and_b32 s50, s53, 0xffff // 000000002A7C: 8632FF35 0000FFFF @@ -193,11 +189,6 @@ label_skip_WGMXCC: s_subb_u32 s33, s33, 0 // 000000002E40: 82A18021 s_sub_u32 s34, s34, 16 // 000000002E44: 80A29022 s_subb_u32 s35, s35, 0 // 000000002E48: 82A38023 - v_cmp_eq_f32_e64 vcc, s44, 0 // 000000002E4C: D042006A 0001002C - s_cbranch_vccz label_AlphaNonZero // 000000002E54: BF860001 - s_mov_b32 s27, 0 // 000000002E58: BE9B0080 - -label_AlphaNonZero: s_and_b32 s84, s50, 0x3fff // 000000002E5C: 8654FF32 00003FFF s_cmp_eq_u32 s84, 1 // 000000002E64: BF068154 s_cbranch_scc1 label_GSU // 000000002E68: BF850037 @@ -1283,24 +1274,14 @@ label_LoopEndL: label_toPGR1: s_and_b32 s8, s50, 0x3fff // 000000004730: 8608FF32 00003FFF - s_cmp_eq_u32 s8, 1 // 000000004738: BF068108 - s_cbranch_scc0 label_GSU_3 // 00000000473C: BF8404FB - s_cmpk_eq_u32 s45, 0x0 // 000000004740: B42D0000 - s_cbranch_scc0 label_GSU_3 // 000000004744: BF8404F9 - s_cmp_eq_u32 s44, 1.0 // 000000004748: BF06F22C - s_cbranch_scc0 label_GSU_3 // 00000000474C: BF8404F7 s_and_b32 s84, 0xff, s24 // 000000004750: 865418FF 000000FF s_add_u32 s85, -1, s14 // 000000004758: 80550EC1 s_cmp_ge_u32 s2, s85 // 00000000475C: BF095502 s_cselect_b32 s84, s84, 0 // 000000004760: 85548054 - s_cmpk_gt_u32 s84, 0x0 // 000000004764: B5540000 - s_cbranch_scc1 label_GSU_3 // 000000004768: BF8504F0 s_and_b32 s84, 0xff, s25 // 00000000476C: 865419FF 000000FF s_add_u32 s85, -1, s15 // 000000004774: 80550FC1 s_cmp_ge_u32 s3, s85 // 000000004778: BF095503 s_cselect_b32 s84, s84, 0 // 00000000477C: 85548054 - s_cmpk_gt_u32 s84, 0x0 // 000000004780: B5540000 - s_cbranch_scc1 label_GSU_3 // 000000004784: BF8504E9 v_mfma_f32_16x16x32_bf16 a[0:3], v[68:71], v[4:7], a[0:3] // 000000004788: D3B58000 04020944 ds_read_b128 v[36:39], v2 offset:64 // 000000004790: D9FE0040 24000002 v_mfma_f32_16x16x32_bf16 a[4:7], v[68:71], v[8:11], a[4:7] // 000000004798: D3B58004 04121144 @@ -1987,156 +1968,6 @@ label_GW_B0_E0: s_nop 0 // 000000005B20: BF800000 s_branch label_GW_End // 000000005B24: BF820000 -label_GSU_3: -label_OptNLL_End: - s_waitcnt lgkmcnt(7) // 000000005B2C: BF8CC77F - v_mfma_f32_16x16x32_bf16 a[0:3], v[68:71], v[4:7], a[0:3] // 000000005B30: D3B58000 04020944 - ds_read_b128 v[36:39], v2 offset:64 // 000000005B38: D9FE0040 24000002 - v_mfma_f32_16x16x32_bf16 a[4:7], v[68:71], v[8:11], a[4:7] // 000000005B40: D3B58004 04121144 - ds_read_b128 v[100:103], v3 offset:64 // 000000005B48: D9FE0040 64000003 - v_mfma_f32_16x16x32_bf16 a[8:11], v[68:71], v[12:15], a[8:11]// 000000005B50: D3B58008 04221944 - ds_read_b128 v[40:43], v2 offset:192 // 000000005B58: D9FE00C0 28000002 - v_mfma_f32_16x16x32_bf16 a[12:15], v[68:71], v[16:19], a[12:15]// 000000005B60: D3B5800C 04322144 - ds_read_b128 v[44:47], v2 offset:320 // 000000005B68: D9FE0140 2C000002 - v_mfma_f32_16x16x32_bf16 a[16:19], v[68:71], v[20:23], a[16:19]// 000000005B70: D3B58010 04422944 - ds_read_b128 v[48:51], v2 offset:448 // 000000005B78: D9FE01C0 30000002 - v_mfma_f32_16x16x32_bf16 a[20:23], v[68:71], v[24:27], a[20:23]// 000000005B80: D3B58014 04523144 - ds_read_b128 v[52:55], v2 offset:576 // 000000005B88: D9FE0240 34000002 - v_mfma_f32_16x16x32_bf16 a[24:27], v[68:71], v[28:31], a[24:27]// 000000005B90: D3B58018 04623944 - ds_read_b128 v[56:59], v2 offset:704 // 000000005B98: D9FE02C0 38000002 - v_mfma_f32_16x16x32_bf16 a[28:31], v[68:71], v[32:35], a[28:31]// 000000005BA0: D3B5801C 04724144 - ds_read_b128 v[60:63], v2 offset:832 // 000000005BA8: D9FE0340 3C000002 - s_waitcnt lgkmcnt(8) // 000000005BB0: BF8CC87F - v_mfma_f32_16x16x32_bf16 a[32:35], v[72:75], v[4:7], a[32:35]// 000000005BB4: D3B58020 04820948 - ds_read_b128 v[64:67], v2 offset:960 // 000000005BBC: D9FE03C0 40000002 - v_mfma_f32_16x16x32_bf16 a[36:39], v[72:75], v[8:11], a[36:39]// 000000005BC4: D3B58024 04921148 - ds_read_b128 v[104:107], v3 offset:192 // 000000005BCC: D9FE00C0 68000003 - v_mfma_f32_16x16x32_bf16 a[40:43], v[72:75], v[12:15], a[40:43]// 000000005BD4: D3B58028 04A21948 - ds_read_b128 v[108:111], v3 offset:320 // 000000005BDC: D9FE0140 6C000003 - v_mfma_f32_16x16x32_bf16 a[44:47], v[72:75], v[16:19], a[44:47]// 000000005BE4: D3B5802C 04B22148 - ds_read_b128 v[112:115], v3 offset:448 // 000000005BEC: D9FE01C0 70000003 - v_mfma_f32_16x16x32_bf16 a[48:51], v[72:75], v[20:23], a[48:51]// 000000005BF4: D3B58030 04C22948 - ds_read_b128 v[116:119], v3 offset:576 // 000000005BFC: D9FE0240 74000003 - v_mfma_f32_16x16x32_bf16 a[52:55], v[72:75], v[24:27], a[52:55]// 000000005C04: D3B58034 04D23148 - ds_read_b128 v[120:123], v3 offset:704 // 000000005C0C: D9FE02C0 78000003 - v_mfma_f32_16x16x32_bf16 a[56:59], v[72:75], v[28:31], a[56:59]// 000000005C14: D3B58038 04E23948 - ds_read_b128 v[124:127], v3 offset:832 // 000000005C1C: D9FE0340 7C000003 - v_mfma_f32_16x16x32_bf16 a[60:63], v[72:75], v[32:35], a[60:63]// 000000005C24: D3B5803C 04F24148 - ds_read_b128 v[128:131], v3 offset:960 // 000000005C2C: D9FE03C0 80000003 - v_mfma_f32_16x16x32_bf16 a[64:67], v[76:79], v[4:7], a[64:67]// 000000005C34: D3B58040 0502094C - v_mfma_f32_16x16x32_bf16 a[68:71], v[76:79], v[8:11], a[68:71]// 000000005C3C: D3B58044 0512114C - v_mfma_f32_16x16x32_bf16 a[72:75], v[76:79], v[12:15], a[72:75]// 000000005C44: D3B58048 0522194C - v_mfma_f32_16x16x32_bf16 a[76:79], v[76:79], v[16:19], a[76:79]// 000000005C4C: D3B5804C 0532214C - v_mfma_f32_16x16x32_bf16 a[80:83], v[76:79], v[20:23], a[80:83]// 000000005C54: D3B58050 0542294C - v_mfma_f32_16x16x32_bf16 a[84:87], v[76:79], v[24:27], a[84:87]// 000000005C5C: D3B58054 0552314C - v_mfma_f32_16x16x32_bf16 a[88:91], v[76:79], v[28:31], a[88:91]// 000000005C64: D3B58058 0562394C - v_mfma_f32_16x16x32_bf16 a[92:95], v[76:79], v[32:35], a[92:95]// 000000005C6C: D3B5805C 0572414C - v_mfma_f32_16x16x32_bf16 a[96:99], v[80:83], v[4:7], a[96:99]// 000000005C74: D3B58060 05820950 - v_mfma_f32_16x16x32_bf16 a[100:103], v[80:83], v[8:11], a[100:103]// 000000005C7C: D3B58064 05921150 - v_mfma_f32_16x16x32_bf16 a[104:107], v[80:83], v[12:15], a[104:107]// 000000005C84: D3B58068 05A21950 - v_mfma_f32_16x16x32_bf16 a[108:111], v[80:83], v[16:19], a[108:111]// 000000005C8C: D3B5806C 05B22150 - v_mfma_f32_16x16x32_bf16 a[112:115], v[80:83], v[20:23], a[112:115]// 000000005C94: D3B58070 05C22950 - v_mfma_f32_16x16x32_bf16 a[116:119], v[80:83], v[24:27], a[116:119]// 000000005C9C: D3B58074 05D23150 - v_mfma_f32_16x16x32_bf16 a[120:123], v[80:83], v[28:31], a[120:123]// 000000005CA4: D3B58078 05E23950 - v_mfma_f32_16x16x32_bf16 a[124:127], v[80:83], v[32:35], a[124:127]// 000000005CAC: D3B5807C 05F24150 - v_mfma_f32_16x16x32_bf16 a[128:131], v[84:87], v[4:7], a[128:131]// 000000005CB4: D3B58080 06020954 - v_mfma_f32_16x16x32_bf16 a[132:135], v[84:87], v[8:11], a[132:135]// 000000005CBC: D3B58084 06121154 - v_mfma_f32_16x16x32_bf16 a[136:139], v[84:87], v[12:15], a[136:139]// 000000005CC4: D3B58088 06221954 - v_mfma_f32_16x16x32_bf16 a[140:143], v[84:87], v[16:19], a[140:143]// 000000005CCC: D3B5808C 06322154 - v_mfma_f32_16x16x32_bf16 a[144:147], v[84:87], v[20:23], a[144:147]// 000000005CD4: D3B58090 06422954 - v_mfma_f32_16x16x32_bf16 a[148:151], v[84:87], v[24:27], a[148:151]// 000000005CDC: D3B58094 06523154 - v_mfma_f32_16x16x32_bf16 a[152:155], v[84:87], v[28:31], a[152:155]// 000000005CE4: D3B58098 06623954 - v_mfma_f32_16x16x32_bf16 a[156:159], v[84:87], v[32:35], a[156:159]// 000000005CEC: D3B5809C 06724154 - v_mfma_f32_16x16x32_bf16 a[160:163], v[88:91], v[4:7], a[160:163]// 000000005CF4: D3B580A0 06820958 - v_mfma_f32_16x16x32_bf16 a[164:167], v[88:91], v[8:11], a[164:167]// 000000005CFC: D3B580A4 06921158 - v_mfma_f32_16x16x32_bf16 a[168:171], v[88:91], v[12:15], a[168:171]// 000000005D04: D3B580A8 06A21958 - v_mfma_f32_16x16x32_bf16 a[172:175], v[88:91], v[16:19], a[172:175]// 000000005D0C: D3B580AC 06B22158 - v_mfma_f32_16x16x32_bf16 a[176:179], v[88:91], v[20:23], a[176:179]// 000000005D14: D3B580B0 06C22958 - v_mfma_f32_16x16x32_bf16 a[180:183], v[88:91], v[24:27], a[180:183]// 000000005D1C: D3B580B4 06D23158 - v_mfma_f32_16x16x32_bf16 a[184:187], v[88:91], v[28:31], a[184:187]// 000000005D24: D3B580B8 06E23958 - v_mfma_f32_16x16x32_bf16 a[188:191], v[88:91], v[32:35], a[188:191]// 000000005D2C: D3B580BC 06F24158 - v_mfma_f32_16x16x32_bf16 a[192:195], v[92:95], v[4:7], a[192:195]// 000000005D34: D3B580C0 0702095C - v_mfma_f32_16x16x32_bf16 a[196:199], v[92:95], v[8:11], a[196:199]// 000000005D3C: D3B580C4 0712115C - v_mfma_f32_16x16x32_bf16 a[200:203], v[92:95], v[12:15], a[200:203]// 000000005D44: D3B580C8 0722195C - v_mfma_f32_16x16x32_bf16 a[204:207], v[92:95], v[16:19], a[204:207]// 000000005D4C: D3B580CC 0732215C - v_mfma_f32_16x16x32_bf16 a[208:211], v[92:95], v[20:23], a[208:211]// 000000005D54: D3B580D0 0742295C - v_mfma_f32_16x16x32_bf16 a[212:215], v[92:95], v[24:27], a[212:215]// 000000005D5C: D3B580D4 0752315C - v_mfma_f32_16x16x32_bf16 a[216:219], v[92:95], v[28:31], a[216:219]// 000000005D64: D3B580D8 0762395C - v_mfma_f32_16x16x32_bf16 a[220:223], v[92:95], v[32:35], a[220:223]// 000000005D6C: D3B580DC 0772415C - v_mfma_f32_16x16x32_bf16 a[224:227], v[96:99], v[4:7], a[224:227]// 000000005D74: D3B580E0 07820960 - v_mfma_f32_16x16x32_bf16 a[228:231], v[96:99], v[8:11], a[228:231]// 000000005D7C: D3B580E4 07921160 - v_mfma_f32_16x16x32_bf16 a[232:235], v[96:99], v[12:15], a[232:235]// 000000005D84: D3B580E8 07A21960 - v_mfma_f32_16x16x32_bf16 a[236:239], v[96:99], v[16:19], a[236:239]// 000000005D8C: D3B580EC 07B22160 - v_mfma_f32_16x16x32_bf16 a[240:243], v[96:99], v[20:23], a[240:243]// 000000005D94: D3B580F0 07C22960 - v_mfma_f32_16x16x32_bf16 a[244:247], v[96:99], v[24:27], a[244:247]// 000000005D9C: D3B580F4 07D23160 - v_mfma_f32_16x16x32_bf16 a[248:251], v[96:99], v[28:31], a[248:251]// 000000005DA4: D3B580F8 07E23960 - v_mfma_f32_16x16x32_bf16 a[252:255], v[96:99], v[32:35], a[252:255]// 000000005DAC: D3B580FC 07F24160 - s_waitcnt lgkmcnt(0) // 000000005DB4: BF8CC07F - v_mfma_f32_16x16x32_bf16 a[0:3], v[100:103], v[36:39], a[0:3]// 000000005DB8: D3B58000 04024964 - v_mfma_f32_16x16x32_bf16 a[4:7], v[100:103], v[40:43], a[4:7]// 000000005DC0: D3B58004 04125164 - v_mfma_f32_16x16x32_bf16 a[8:11], v[100:103], v[44:47], a[8:11]// 000000005DC8: D3B58008 04225964 - v_mfma_f32_16x16x32_bf16 a[12:15], v[100:103], v[48:51], a[12:15]// 000000005DD0: D3B5800C 04326164 - v_mfma_f32_16x16x32_bf16 a[16:19], v[100:103], v[52:55], a[16:19]// 000000005DD8: D3B58010 04426964 - v_mfma_f32_16x16x32_bf16 a[20:23], v[100:103], v[56:59], a[20:23]// 000000005DE0: D3B58014 04527164 - v_mfma_f32_16x16x32_bf16 a[24:27], v[100:103], v[60:63], a[24:27]// 000000005DE8: D3B58018 04627964 - v_mfma_f32_16x16x32_bf16 a[28:31], v[100:103], v[64:67], a[28:31]// 000000005DF0: D3B5801C 04728164 - v_mfma_f32_16x16x32_bf16 a[32:35], v[104:107], v[36:39], a[32:35]// 000000005DF8: D3B58020 04824968 - v_mfma_f32_16x16x32_bf16 a[36:39], v[104:107], v[40:43], a[36:39]// 000000005E00: D3B58024 04925168 - v_mfma_f32_16x16x32_bf16 a[40:43], v[104:107], v[44:47], a[40:43]// 000000005E08: D3B58028 04A25968 - v_mfma_f32_16x16x32_bf16 a[44:47], v[104:107], v[48:51], a[44:47]// 000000005E10: D3B5802C 04B26168 - v_mfma_f32_16x16x32_bf16 a[48:51], v[104:107], v[52:55], a[48:51]// 000000005E18: D3B58030 04C26968 - v_mfma_f32_16x16x32_bf16 a[52:55], v[104:107], v[56:59], a[52:55]// 000000005E20: D3B58034 04D27168 - v_mfma_f32_16x16x32_bf16 a[56:59], v[104:107], v[60:63], a[56:59]// 000000005E28: D3B58038 04E27968 - v_mfma_f32_16x16x32_bf16 a[60:63], v[104:107], v[64:67], a[60:63]// 000000005E30: D3B5803C 04F28168 - v_mfma_f32_16x16x32_bf16 a[64:67], v[108:111], v[36:39], a[64:67]// 000000005E38: D3B58040 0502496C - v_mfma_f32_16x16x32_bf16 a[68:71], v[108:111], v[40:43], a[68:71]// 000000005E40: D3B58044 0512516C - v_mfma_f32_16x16x32_bf16 a[72:75], v[108:111], v[44:47], a[72:75]// 000000005E48: D3B58048 0522596C - v_mfma_f32_16x16x32_bf16 a[76:79], v[108:111], v[48:51], a[76:79]// 000000005E50: D3B5804C 0532616C - v_mfma_f32_16x16x32_bf16 a[80:83], v[108:111], v[52:55], a[80:83]// 000000005E58: D3B58050 0542696C - v_mfma_f32_16x16x32_bf16 a[84:87], v[108:111], v[56:59], a[84:87]// 000000005E60: D3B58054 0552716C - v_mfma_f32_16x16x32_bf16 a[88:91], v[108:111], v[60:63], a[88:91]// 000000005E68: D3B58058 0562796C - v_mfma_f32_16x16x32_bf16 a[92:95], v[108:111], v[64:67], a[92:95]// 000000005E70: D3B5805C 0572816C - v_mfma_f32_16x16x32_bf16 a[96:99], v[112:115], v[36:39], a[96:99]// 000000005E78: D3B58060 05824970 - v_mfma_f32_16x16x32_bf16 a[100:103], v[112:115], v[40:43], a[100:103]// 000000005E80: D3B58064 05925170 - v_mfma_f32_16x16x32_bf16 a[104:107], v[112:115], v[44:47], a[104:107]// 000000005E88: D3B58068 05A25970 - v_mfma_f32_16x16x32_bf16 a[108:111], v[112:115], v[48:51], a[108:111]// 000000005E90: D3B5806C 05B26170 - v_mfma_f32_16x16x32_bf16 a[112:115], v[112:115], v[52:55], a[112:115]// 000000005E98: D3B58070 05C26970 - v_mfma_f32_16x16x32_bf16 a[116:119], v[112:115], v[56:59], a[116:119]// 000000005EA0: D3B58074 05D27170 - v_mfma_f32_16x16x32_bf16 a[120:123], v[112:115], v[60:63], a[120:123]// 000000005EA8: D3B58078 05E27970 - v_mfma_f32_16x16x32_bf16 a[124:127], v[112:115], v[64:67], a[124:127]// 000000005EB0: D3B5807C 05F28170 - v_mfma_f32_16x16x32_bf16 a[128:131], v[116:119], v[36:39], a[128:131]// 000000005EB8: D3B58080 06024974 - v_mfma_f32_16x16x32_bf16 a[132:135], v[116:119], v[40:43], a[132:135]// 000000005EC0: D3B58084 06125174 - v_mfma_f32_16x16x32_bf16 a[136:139], v[116:119], v[44:47], a[136:139]// 000000005EC8: D3B58088 06225974 - v_mfma_f32_16x16x32_bf16 a[140:143], v[116:119], v[48:51], a[140:143]// 000000005ED0: D3B5808C 06326174 - v_mfma_f32_16x16x32_bf16 a[144:147], v[116:119], v[52:55], a[144:147]// 000000005ED8: D3B58090 06426974 - v_mfma_f32_16x16x32_bf16 a[148:151], v[116:119], v[56:59], a[148:151]// 000000005EE0: D3B58094 06527174 - v_mfma_f32_16x16x32_bf16 a[152:155], v[116:119], v[60:63], a[152:155]// 000000005EE8: D3B58098 06627974 - v_mfma_f32_16x16x32_bf16 a[156:159], v[116:119], v[64:67], a[156:159]// 000000005EF0: D3B5809C 06728174 - v_mfma_f32_16x16x32_bf16 a[160:163], v[120:123], v[36:39], a[160:163]// 000000005EF8: D3B580A0 06824978 - v_mfma_f32_16x16x32_bf16 a[164:167], v[120:123], v[40:43], a[164:167]// 000000005F00: D3B580A4 06925178 - v_mfma_f32_16x16x32_bf16 a[168:171], v[120:123], v[44:47], a[168:171]// 000000005F08: D3B580A8 06A25978 - v_mfma_f32_16x16x32_bf16 a[172:175], v[120:123], v[48:51], a[172:175]// 000000005F10: D3B580AC 06B26178 - v_mfma_f32_16x16x32_bf16 a[176:179], v[120:123], v[52:55], a[176:179]// 000000005F18: D3B580B0 06C26978 - v_mfma_f32_16x16x32_bf16 a[180:183], v[120:123], v[56:59], a[180:183]// 000000005F20: D3B580B4 06D27178 - v_mfma_f32_16x16x32_bf16 a[184:187], v[120:123], v[60:63], a[184:187]// 000000005F28: D3B580B8 06E27978 - v_mfma_f32_16x16x32_bf16 a[188:191], v[120:123], v[64:67], a[188:191]// 000000005F30: D3B580BC 06F28178 - v_mfma_f32_16x16x32_bf16 a[192:195], v[124:127], v[36:39], a[192:195]// 000000005F38: D3B580C0 0702497C - v_mfma_f32_16x16x32_bf16 a[196:199], v[124:127], v[40:43], a[196:199]// 000000005F40: D3B580C4 0712517C - v_mfma_f32_16x16x32_bf16 a[200:203], v[124:127], v[44:47], a[200:203]// 000000005F48: D3B580C8 0722597C - v_mfma_f32_16x16x32_bf16 a[204:207], v[124:127], v[48:51], a[204:207]// 000000005F50: D3B580CC 0732617C - v_mfma_f32_16x16x32_bf16 a[208:211], v[124:127], v[52:55], a[208:211]// 000000005F58: D3B580D0 0742697C - v_mfma_f32_16x16x32_bf16 a[212:215], v[124:127], v[56:59], a[212:215]// 000000005F60: D3B580D4 0752717C - v_mfma_f32_16x16x32_bf16 a[216:219], v[124:127], v[60:63], a[216:219]// 000000005F68: D3B580D8 0762797C - v_mfma_f32_16x16x32_bf16 a[220:223], v[124:127], v[64:67], a[220:223]// 000000005F70: D3B580DC 0772817C - v_mfma_f32_16x16x32_bf16 a[224:227], v[128:131], v[36:39], a[224:227]// 000000005F78: D3B580E0 07824980 - v_mfma_f32_16x16x32_bf16 a[228:231], v[128:131], v[40:43], a[228:231]// 000000005F80: D3B580E4 07925180 - v_mfma_f32_16x16x32_bf16 a[232:235], v[128:131], v[44:47], a[232:235]// 000000005F88: D3B580E8 07A25980 - v_mfma_f32_16x16x32_bf16 a[236:239], v[128:131], v[48:51], a[236:239]// 000000005F90: D3B580EC 07B26180 - v_mfma_f32_16x16x32_bf16 a[240:243], v[128:131], v[52:55], a[240:243]// 000000005F98: D3B580F0 07C26980 - v_mfma_f32_16x16x32_bf16 a[244:247], v[128:131], v[56:59], a[244:247]// 000000005FA0: D3B580F4 07D27180 - v_mfma_f32_16x16x32_bf16 a[248:251], v[128:131], v[60:63], a[248:251]// 000000005FA8: D3B580F8 07E27980 - v_mfma_f32_16x16x32_bf16 a[252:255], v[128:131], v[64:67], a[252:255]// 000000005FB0: D3B580FC 07F28180 - label_toPGR1end_OrdNLL: v_lshrrev_b32_e32 v8, 6, v134 // 000000005FB8: 20110C86 v_lshrrev_b32_e32 v9, 1, v8 // 000000005FBC: 20121081 @@ -4715,10554 +4546,16 @@ label_GW_End_1: s_setpc_b64 s[30:31] // 00000000BB94: BE801D1E label_GSU_4: - s_cmpk_eq_u32 s45, 0x0 // 00000000BB98: B42D0000 - s_cbranch_scc0 label_GW_Beta_2 // 00000000BB9C: BF841D7F s_and_b32 s30, 0xff, s24 // 00000000BBA0: 861E18FF 000000FF s_add_u32 s31, -1, s14 // 00000000BBA8: 801F0EC1 s_cmp_ge_u32 s2, s31 // 00000000BBAC: BF091F02 s_cselect_b32 s30, s30, 0 // 00000000BBB0: 851E801E - s_cmpk_gt_u32 s30, 0x0 // 00000000BBB4: B51E0000 - s_cbranch_scc1 label_GW_B0_E1_M_1 // 00000000BBB8: BF850AE2 s_and_b32 s30, 0xff, s25 // 00000000BBBC: 861E19FF 000000FF s_add_u32 s31, -1, s15 // 00000000BBC4: 801F0FC1 s_cmp_ge_u32 s3, s31 // 00000000BBC8: BF091F03 s_cselect_b32 s30, s30, 0 // 00000000BBCC: 851E801E - s_cmpk_gt_u32 s30, 0x0 // 00000000BBD0: B51E0000 - s_cbranch_scc1 label_GW_B0_E1_N_1 // 00000000BBD4: BF8504AE -label_GW_B0_E0_2: - v_add_lshl_u32 v15, v7, v4, 1 // 00000000BBD8: D1FE000F 02060907 - v_accvgpr_read_b32 v24, a0 // 00000000BBE0: D3D84018 18000100 - v_accvgpr_read_b32 v25, a4 // 00000000BBE8: D3D84019 18000104 - v_accvgpr_read_b32 v26, a8 // 00000000BBF0: D3D8401A 18000108 - v_accvgpr_read_b32 v27, a12 // 00000000BBF8: D3D8401B 1800010C - v_accvgpr_read_b32 v28, a16 // 00000000BC00: D3D8401C 18000110 - v_accvgpr_read_b32 v29, a20 // 00000000BC08: D3D8401D 18000114 - v_accvgpr_read_b32 v30, a24 // 00000000BC10: D3D8401E 18000118 - v_accvgpr_read_b32 v31, a28 // 00000000BC18: D3D8401F 1800011C - v_accvgpr_read_b32 v32, a32 // 00000000BC20: D3D84020 18000120 - v_accvgpr_read_b32 v33, a36 // 00000000BC28: D3D84021 18000124 - v_accvgpr_read_b32 v34, a40 // 00000000BC30: D3D84022 18000128 - v_accvgpr_read_b32 v35, a44 // 00000000BC38: D3D84023 1800012C - v_accvgpr_read_b32 v36, a48 // 00000000BC40: D3D84024 18000130 - v_accvgpr_read_b32 v37, a52 // 00000000BC48: D3D84025 18000134 - v_accvgpr_read_b32 v38, a56 // 00000000BC50: D3D84026 18000138 - v_accvgpr_read_b32 v39, a60 // 00000000BC58: D3D84027 1800013C - v_accvgpr_read_b32 v40, a64 // 00000000BC60: D3D84028 18000140 - v_accvgpr_read_b32 v41, a68 // 00000000BC68: D3D84029 18000144 - v_accvgpr_read_b32 v42, a72 // 00000000BC70: D3D8402A 18000148 - v_accvgpr_read_b32 v43, a76 // 00000000BC78: D3D8402B 1800014C - v_accvgpr_read_b32 v44, a80 // 00000000BC80: D3D8402C 18000150 - v_accvgpr_read_b32 v45, a84 // 00000000BC88: D3D8402D 18000154 - v_accvgpr_read_b32 v46, a88 // 00000000BC90: D3D8402E 18000158 - v_accvgpr_read_b32 v47, a92 // 00000000BC98: D3D8402F 1800015C - v_accvgpr_read_b32 v48, a96 // 00000000BCA0: D3D84030 18000160 - v_accvgpr_read_b32 v49, a100 // 00000000BCA8: D3D84031 18000164 - v_accvgpr_read_b32 v50, a104 // 00000000BCB0: D3D84032 18000168 - v_accvgpr_read_b32 v51, a108 // 00000000BCB8: D3D84033 1800016C - v_accvgpr_read_b32 v52, a112 // 00000000BCC0: D3D84034 18000170 - v_accvgpr_read_b32 v53, a116 // 00000000BCC8: D3D84035 18000174 - v_accvgpr_read_b32 v54, a120 // 00000000BCD0: D3D84036 18000178 - v_accvgpr_read_b32 v55, a124 // 00000000BCD8: D3D84037 1800017C - v_accvgpr_read_b32 v56, a128 // 00000000BCE0: D3D84038 18000180 - v_accvgpr_read_b32 v57, a132 // 00000000BCE8: D3D84039 18000184 - v_accvgpr_read_b32 v58, a136 // 00000000BCF0: D3D8403A 18000188 - v_accvgpr_read_b32 v59, a140 // 00000000BCF8: D3D8403B 1800018C - v_accvgpr_read_b32 v60, a144 // 00000000BD00: D3D8403C 18000190 - v_accvgpr_read_b32 v61, a148 // 00000000BD08: D3D8403D 18000194 - v_accvgpr_read_b32 v62, a152 // 00000000BD10: D3D8403E 18000198 - v_accvgpr_read_b32 v63, a156 // 00000000BD18: D3D8403F 1800019C - v_accvgpr_read_b32 v64, a160 // 00000000BD20: D3D84040 180001A0 - v_accvgpr_read_b32 v65, a164 // 00000000BD28: D3D84041 180001A4 - v_accvgpr_read_b32 v66, a168 // 00000000BD30: D3D84042 180001A8 - v_accvgpr_read_b32 v67, a172 // 00000000BD38: D3D84043 180001AC - v_accvgpr_read_b32 v68, a176 // 00000000BD40: D3D84044 180001B0 - v_accvgpr_read_b32 v69, a180 // 00000000BD48: D3D84045 180001B4 - v_accvgpr_read_b32 v70, a184 // 00000000BD50: D3D84046 180001B8 - v_accvgpr_read_b32 v71, a188 // 00000000BD58: D3D84047 180001BC - v_accvgpr_read_b32 v72, a192 // 00000000BD60: D3D84048 180001C0 - v_accvgpr_read_b32 v73, a196 // 00000000BD68: D3D84049 180001C4 - v_accvgpr_read_b32 v74, a200 // 00000000BD70: D3D8404A 180001C8 - v_accvgpr_read_b32 v75, a204 // 00000000BD78: D3D8404B 180001CC - v_accvgpr_read_b32 v76, a208 // 00000000BD80: D3D8404C 180001D0 - v_accvgpr_read_b32 v77, a212 // 00000000BD88: D3D8404D 180001D4 - v_accvgpr_read_b32 v78, a216 // 00000000BD90: D3D8404E 180001D8 - v_accvgpr_read_b32 v79, a220 // 00000000BD98: D3D8404F 180001DC - v_accvgpr_read_b32 v80, a224 // 00000000BDA0: D3D84050 180001E0 - v_accvgpr_read_b32 v81, a228 // 00000000BDA8: D3D84051 180001E4 - v_accvgpr_read_b32 v82, a232 // 00000000BDB0: D3D84052 180001E8 - v_accvgpr_read_b32 v83, a236 // 00000000BDB8: D3D84053 180001EC - v_accvgpr_read_b32 v84, a240 // 00000000BDC0: D3D84054 180001F0 - v_accvgpr_read_b32 v85, a244 // 00000000BDC8: D3D84055 180001F4 - v_accvgpr_read_b32 v86, a248 // 00000000BDD0: D3D84056 180001F8 - v_accvgpr_read_b32 v87, a252 // 00000000BDD8: D3D84057 180001FC - v_accvgpr_read_b32 v88, a1 // 00000000BDE0: D3D84058 18000101 - v_accvgpr_read_b32 v89, a5 // 00000000BDE8: D3D84059 18000105 - v_accvgpr_read_b32 v90, a9 // 00000000BDF0: D3D8405A 18000109 - v_accvgpr_read_b32 v91, a13 // 00000000BDF8: D3D8405B 1800010D - v_accvgpr_read_b32 v92, a17 // 00000000BE00: D3D8405C 18000111 - v_accvgpr_read_b32 v93, a21 // 00000000BE08: D3D8405D 18000115 - v_accvgpr_read_b32 v94, a25 // 00000000BE10: D3D8405E 18000119 - v_accvgpr_read_b32 v95, a29 // 00000000BE18: D3D8405F 1800011D - v_accvgpr_read_b32 v96, a33 // 00000000BE20: D3D84060 18000121 - v_accvgpr_read_b32 v97, a37 // 00000000BE28: D3D84061 18000125 - v_accvgpr_read_b32 v98, a41 // 00000000BE30: D3D84062 18000129 - v_accvgpr_read_b32 v99, a45 // 00000000BE38: D3D84063 1800012D - v_accvgpr_read_b32 v100, a49 // 00000000BE40: D3D84064 18000131 - v_accvgpr_read_b32 v101, a53 // 00000000BE48: D3D84065 18000135 - v_accvgpr_read_b32 v102, a57 // 00000000BE50: D3D84066 18000139 - v_accvgpr_read_b32 v103, a61 // 00000000BE58: D3D84067 1800013D - v_accvgpr_read_b32 v104, a65 // 00000000BE60: D3D84068 18000141 - v_accvgpr_read_b32 v105, a69 // 00000000BE68: D3D84069 18000145 - v_accvgpr_read_b32 v106, a73 // 00000000BE70: D3D8406A 18000149 - v_accvgpr_read_b32 v107, a77 // 00000000BE78: D3D8406B 1800014D - v_accvgpr_read_b32 v108, a81 // 00000000BE80: D3D8406C 18000151 - v_accvgpr_read_b32 v109, a85 // 00000000BE88: D3D8406D 18000155 - v_accvgpr_read_b32 v110, a89 // 00000000BE90: D3D8406E 18000159 - v_accvgpr_read_b32 v111, a93 // 00000000BE98: D3D8406F 1800015D - v_accvgpr_read_b32 v112, a97 // 00000000BEA0: D3D84070 18000161 - v_accvgpr_read_b32 v113, a101 // 00000000BEA8: D3D84071 18000165 - v_accvgpr_read_b32 v114, a105 // 00000000BEB0: D3D84072 18000169 - v_accvgpr_read_b32 v115, a109 // 00000000BEB8: D3D84073 1800016D - v_accvgpr_read_b32 v116, a113 // 00000000BEC0: D3D84074 18000171 - v_accvgpr_read_b32 v117, a117 // 00000000BEC8: D3D84075 18000175 - v_accvgpr_read_b32 v118, a121 // 00000000BED0: D3D84076 18000179 - v_accvgpr_read_b32 v119, a125 // 00000000BED8: D3D84077 1800017D - v_accvgpr_read_b32 v120, a129 // 00000000BEE0: D3D84078 18000181 - v_accvgpr_read_b32 v121, a133 // 00000000BEE8: D3D84079 18000185 - v_accvgpr_read_b32 v122, a137 // 00000000BEF0: D3D8407A 18000189 - v_accvgpr_read_b32 v123, a141 // 00000000BEF8: D3D8407B 1800018D - v_accvgpr_read_b32 v124, a145 // 00000000BF00: D3D8407C 18000191 - v_accvgpr_read_b32 v125, a149 // 00000000BF08: D3D8407D 18000195 - v_accvgpr_read_b32 v126, a153 // 00000000BF10: D3D8407E 18000199 - v_accvgpr_read_b32 v127, a157 // 00000000BF18: D3D8407F 1800019D - v_accvgpr_read_b32 v136, a161 // 00000000BF20: D3D84088 180001A1 - v_accvgpr_read_b32 v137, a165 // 00000000BF28: D3D84089 180001A5 - v_accvgpr_read_b32 v138, a169 // 00000000BF30: D3D8408A 180001A9 - v_accvgpr_read_b32 v139, a173 // 00000000BF38: D3D8408B 180001AD - v_accvgpr_read_b32 v140, a177 // 00000000BF40: D3D8408C 180001B1 - v_accvgpr_read_b32 v141, a181 // 00000000BF48: D3D8408D 180001B5 - v_accvgpr_read_b32 v142, a185 // 00000000BF50: D3D8408E 180001B9 - v_accvgpr_read_b32 v143, a189 // 00000000BF58: D3D8408F 180001BD - v_accvgpr_read_b32 v144, a193 // 00000000BF60: D3D84090 180001C1 - v_accvgpr_read_b32 v145, a197 // 00000000BF68: D3D84091 180001C5 - v_accvgpr_read_b32 v146, a201 // 00000000BF70: D3D84092 180001C9 - v_accvgpr_read_b32 v147, a205 // 00000000BF78: D3D84093 180001CD - v_accvgpr_read_b32 v148, a209 // 00000000BF80: D3D84094 180001D1 - v_accvgpr_read_b32 v149, a213 // 00000000BF88: D3D84095 180001D5 - v_accvgpr_read_b32 v150, a217 // 00000000BF90: D3D84096 180001D9 - v_accvgpr_read_b32 v151, a221 // 00000000BF98: D3D84097 180001DD - v_accvgpr_read_b32 v152, a225 // 00000000BFA0: D3D84098 180001E1 - v_accvgpr_read_b32 v153, a229 // 00000000BFA8: D3D84099 180001E5 - v_accvgpr_read_b32 v154, a233 // 00000000BFB0: D3D8409A 180001E9 - v_accvgpr_read_b32 v155, a237 // 00000000BFB8: D3D8409B 180001ED - v_accvgpr_read_b32 v156, a241 // 00000000BFC0: D3D8409C 180001F1 - v_accvgpr_read_b32 v157, a245 // 00000000BFC8: D3D8409D 180001F5 - v_accvgpr_read_b32 v158, a249 // 00000000BFD0: D3D8409E 180001F9 - v_accvgpr_read_b32 v159, a253 // 00000000BFD8: D3D8409F 180001FD - v_accvgpr_read_b32 v160, a2 // 00000000BFE0: D3D840A0 18000102 - v_accvgpr_read_b32 v161, a6 // 00000000BFE8: D3D840A1 18000106 - v_accvgpr_read_b32 v162, a10 // 00000000BFF0: D3D840A2 1800010A - v_accvgpr_read_b32 v163, a14 // 00000000BFF8: D3D840A3 1800010E - v_accvgpr_read_b32 v164, a18 // 00000000C000: D3D840A4 18000112 - v_accvgpr_read_b32 v165, a22 // 00000000C008: D3D840A5 18000116 - v_accvgpr_read_b32 v166, a26 // 00000000C010: D3D840A6 1800011A - v_accvgpr_read_b32 v167, a30 // 00000000C018: D3D840A7 1800011E - v_accvgpr_read_b32 v168, a34 // 00000000C020: D3D840A8 18000122 - v_accvgpr_read_b32 v169, a38 // 00000000C028: D3D840A9 18000126 - v_accvgpr_read_b32 v170, a42 // 00000000C030: D3D840AA 1800012A - v_accvgpr_read_b32 v171, a46 // 00000000C038: D3D840AB 1800012E - v_accvgpr_read_b32 v172, a50 // 00000000C040: D3D840AC 18000132 - v_accvgpr_read_b32 v173, a54 // 00000000C048: D3D840AD 18000136 - v_accvgpr_read_b32 v174, a58 // 00000000C050: D3D840AE 1800013A - v_accvgpr_read_b32 v175, a62 // 00000000C058: D3D840AF 1800013E - v_accvgpr_read_b32 v176, a66 // 00000000C060: D3D840B0 18000142 - v_accvgpr_read_b32 v177, a70 // 00000000C068: D3D840B1 18000146 - v_accvgpr_read_b32 v178, a74 // 00000000C070: D3D840B2 1800014A - v_accvgpr_read_b32 v179, a78 // 00000000C078: D3D840B3 1800014E - v_accvgpr_read_b32 v180, a82 // 00000000C080: D3D840B4 18000152 - v_accvgpr_read_b32 v181, a86 // 00000000C088: D3D840B5 18000156 - v_accvgpr_read_b32 v182, a90 // 00000000C090: D3D840B6 1800015A - v_accvgpr_read_b32 v183, a94 // 00000000C098: D3D840B7 1800015E - v_accvgpr_read_b32 v184, a98 // 00000000C0A0: D3D840B8 18000162 - v_accvgpr_read_b32 v185, a102 // 00000000C0A8: D3D840B9 18000166 - v_accvgpr_read_b32 v186, a106 // 00000000C0B0: D3D840BA 1800016A - v_accvgpr_read_b32 v187, a110 // 00000000C0B8: D3D840BB 1800016E - v_accvgpr_read_b32 v188, a114 // 00000000C0C0: D3D840BC 18000172 - v_accvgpr_read_b32 v189, a118 // 00000000C0C8: D3D840BD 18000176 - v_accvgpr_read_b32 v190, a122 // 00000000C0D0: D3D840BE 1800017A - v_accvgpr_read_b32 v191, a126 // 00000000C0D8: D3D840BF 1800017E - v_accvgpr_read_b32 v192, a130 // 00000000C0E0: D3D840C0 18000182 - v_accvgpr_read_b32 v193, a134 // 00000000C0E8: D3D840C1 18000186 - v_accvgpr_read_b32 v194, a138 // 00000000C0F0: D3D840C2 1800018A - v_accvgpr_read_b32 v195, a142 // 00000000C0F8: D3D840C3 1800018E - v_accvgpr_read_b32 v196, a146 // 00000000C100: D3D840C4 18000192 - v_accvgpr_read_b32 v197, a150 // 00000000C108: D3D840C5 18000196 - v_accvgpr_read_b32 v198, a154 // 00000000C110: D3D840C6 1800019A - v_accvgpr_read_b32 v199, a158 // 00000000C118: D3D840C7 1800019E - v_accvgpr_read_b32 v200, a162 // 00000000C120: D3D840C8 180001A2 - v_accvgpr_read_b32 v201, a166 // 00000000C128: D3D840C9 180001A6 - v_accvgpr_read_b32 v202, a170 // 00000000C130: D3D840CA 180001AA - v_accvgpr_read_b32 v203, a174 // 00000000C138: D3D840CB 180001AE - v_accvgpr_read_b32 v204, a178 // 00000000C140: D3D840CC 180001B2 - v_accvgpr_read_b32 v205, a182 // 00000000C148: D3D840CD 180001B6 - v_accvgpr_read_b32 v206, a186 // 00000000C150: D3D840CE 180001BA - v_accvgpr_read_b32 v207, a190 // 00000000C158: D3D840CF 180001BE - v_accvgpr_read_b32 v208, a194 // 00000000C160: D3D840D0 180001C2 - v_accvgpr_read_b32 v209, a198 // 00000000C168: D3D840D1 180001C6 - v_accvgpr_read_b32 v210, a202 // 00000000C170: D3D840D2 180001CA - v_accvgpr_read_b32 v211, a206 // 00000000C178: D3D840D3 180001CE - v_accvgpr_read_b32 v212, a210 // 00000000C180: D3D840D4 180001D2 - v_accvgpr_read_b32 v213, a214 // 00000000C188: D3D840D5 180001D6 - v_accvgpr_read_b32 v214, a218 // 00000000C190: D3D840D6 180001DA - v_accvgpr_read_b32 v215, a222 // 00000000C198: D3D840D7 180001DE - v_accvgpr_read_b32 v216, a226 // 00000000C1A0: D3D840D8 180001E2 - v_accvgpr_read_b32 v217, a230 // 00000000C1A8: D3D840D9 180001E6 - v_accvgpr_read_b32 v218, a234 // 00000000C1B0: D3D840DA 180001EA - v_accvgpr_read_b32 v219, a238 // 00000000C1B8: D3D840DB 180001EE - v_accvgpr_read_b32 v220, a242 // 00000000C1C0: D3D840DC 180001F2 - v_accvgpr_read_b32 v221, a246 // 00000000C1C8: D3D840DD 180001F6 - v_accvgpr_read_b32 v222, a250 // 00000000C1D0: D3D840DE 180001FA - v_accvgpr_read_b32 v223, a254 // 00000000C1D8: D3D840DF 180001FE - v_accvgpr_read_b32 v224, a3 // 00000000C1E0: D3D840E0 18000103 - v_accvgpr_read_b32 v225, a7 // 00000000C1E8: D3D840E1 18000107 - v_accvgpr_read_b32 v226, a11 // 00000000C1F0: D3D840E2 1800010B - v_accvgpr_read_b32 v227, a15 // 00000000C1F8: D3D840E3 1800010F - v_accvgpr_read_b32 v228, a19 // 00000000C200: D3D840E4 18000113 - v_accvgpr_read_b32 v229, a23 // 00000000C208: D3D840E5 18000117 - v_accvgpr_read_b32 v230, a27 // 00000000C210: D3D840E6 1800011B - v_accvgpr_read_b32 v231, a31 // 00000000C218: D3D840E7 1800011F - v_accvgpr_read_b32 v232, a35 // 00000000C220: D3D840E8 18000123 - v_accvgpr_read_b32 v233, a39 // 00000000C228: D3D840E9 18000127 - v_accvgpr_read_b32 v234, a43 // 00000000C230: D3D840EA 1800012B - v_accvgpr_read_b32 v235, a47 // 00000000C238: D3D840EB 1800012F - v_accvgpr_read_b32 v236, a51 // 00000000C240: D3D840EC 18000133 - v_accvgpr_read_b32 v237, a55 // 00000000C248: D3D840ED 18000137 - v_accvgpr_read_b32 v238, a59 // 00000000C250: D3D840EE 1800013B - v_accvgpr_read_b32 v239, a63 // 00000000C258: D3D840EF 1800013F - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000000C260: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000000C268: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000000C270: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000000C278: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000000C280: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000000C288: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000000C290: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000000C298: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000000C2A0: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 00000000C2A8: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 00000000C2B0: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 00000000C2B8: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 00000000C2C0: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 00000000C2C8: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 00000000C2D0: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 00000000C2D8: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 00000000C2E0: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000000C2E8: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 00000000C2F0: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 00000000C2F8: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 00000000C300: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 00000000C308: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 00000000C310: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 00000000C318: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 00000000C320: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 00000000C328: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 00000000C330: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 00000000C338: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 00000000C340: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 00000000C348: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 00000000C350: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 00000000C358: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 00000000C360: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 00000000C368: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 00000000C370: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 00000000C378: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 00000000C380: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 00000000C388: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 00000000C390: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 00000000C398: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 00000000C3A0: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 00000000C3A8: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 00000000C3B0: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 00000000C3B8: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 00000000C3C0: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 00000000C3C8: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 00000000C3D0: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 00000000C3D8: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 00000000C3E0: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 00000000C3E8: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 00000000C3F0: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 00000000C3F8: D3B1407E 1002FC2C - v_pk_mul_f32 v[136:137], s[44:45], v[136:137] op_sel_hi:[0,1]// 00000000C400: D3B14088 1003102C - v_pk_mul_f32 v[138:139], s[44:45], v[138:139] op_sel_hi:[0,1]// 00000000C408: D3B1408A 1003142C - v_pk_mul_f32 v[140:141], s[44:45], v[140:141] op_sel_hi:[0,1]// 00000000C410: D3B1408C 1003182C - v_pk_mul_f32 v[142:143], s[44:45], v[142:143] op_sel_hi:[0,1]// 00000000C418: D3B1408E 10031C2C - v_pk_mul_f32 v[144:145], s[44:45], v[144:145] op_sel_hi:[0,1]// 00000000C420: D3B14090 1003202C - v_pk_mul_f32 v[146:147], s[44:45], v[146:147] op_sel_hi:[0,1]// 00000000C428: D3B14092 1003242C - v_pk_mul_f32 v[148:149], s[44:45], v[148:149] op_sel_hi:[0,1]// 00000000C430: D3B14094 1003282C - v_pk_mul_f32 v[150:151], s[44:45], v[150:151] op_sel_hi:[0,1]// 00000000C438: D3B14096 10032C2C - v_pk_mul_f32 v[152:153], s[44:45], v[152:153] op_sel_hi:[0,1]// 00000000C440: D3B14098 1003302C - v_pk_mul_f32 v[154:155], s[44:45], v[154:155] op_sel_hi:[0,1]// 00000000C448: D3B1409A 1003342C - v_pk_mul_f32 v[156:157], s[44:45], v[156:157] op_sel_hi:[0,1]// 00000000C450: D3B1409C 1003382C - v_pk_mul_f32 v[158:159], s[44:45], v[158:159] op_sel_hi:[0,1]// 00000000C458: D3B1409E 10033C2C - v_pk_mul_f32 v[160:161], s[44:45], v[160:161] op_sel_hi:[0,1]// 00000000C460: D3B140A0 1003402C - v_pk_mul_f32 v[162:163], s[44:45], v[162:163] op_sel_hi:[0,1]// 00000000C468: D3B140A2 1003442C - v_pk_mul_f32 v[164:165], s[44:45], v[164:165] op_sel_hi:[0,1]// 00000000C470: D3B140A4 1003482C - v_pk_mul_f32 v[166:167], s[44:45], v[166:167] op_sel_hi:[0,1]// 00000000C478: D3B140A6 10034C2C - v_pk_mul_f32 v[168:169], s[44:45], v[168:169] op_sel_hi:[0,1]// 00000000C480: D3B140A8 1003502C - v_pk_mul_f32 v[170:171], s[44:45], v[170:171] op_sel_hi:[0,1]// 00000000C488: D3B140AA 1003542C - v_pk_mul_f32 v[172:173], s[44:45], v[172:173] op_sel_hi:[0,1]// 00000000C490: D3B140AC 1003582C - v_pk_mul_f32 v[174:175], s[44:45], v[174:175] op_sel_hi:[0,1]// 00000000C498: D3B140AE 10035C2C - v_pk_mul_f32 v[176:177], s[44:45], v[176:177] op_sel_hi:[0,1]// 00000000C4A0: D3B140B0 1003602C - v_pk_mul_f32 v[178:179], s[44:45], v[178:179] op_sel_hi:[0,1]// 00000000C4A8: D3B140B2 1003642C - v_pk_mul_f32 v[180:181], s[44:45], v[180:181] op_sel_hi:[0,1]// 00000000C4B0: D3B140B4 1003682C - v_pk_mul_f32 v[182:183], s[44:45], v[182:183] op_sel_hi:[0,1]// 00000000C4B8: D3B140B6 10036C2C - v_pk_mul_f32 v[184:185], s[44:45], v[184:185] op_sel_hi:[0,1]// 00000000C4C0: D3B140B8 1003702C - v_pk_mul_f32 v[186:187], s[44:45], v[186:187] op_sel_hi:[0,1]// 00000000C4C8: D3B140BA 1003742C - v_pk_mul_f32 v[188:189], s[44:45], v[188:189] op_sel_hi:[0,1]// 00000000C4D0: D3B140BC 1003782C - v_pk_mul_f32 v[190:191], s[44:45], v[190:191] op_sel_hi:[0,1]// 00000000C4D8: D3B140BE 10037C2C - v_pk_mul_f32 v[192:193], s[44:45], v[192:193] op_sel_hi:[0,1]// 00000000C4E0: D3B140C0 1003802C - v_pk_mul_f32 v[194:195], s[44:45], v[194:195] op_sel_hi:[0,1]// 00000000C4E8: D3B140C2 1003842C - v_pk_mul_f32 v[196:197], s[44:45], v[196:197] op_sel_hi:[0,1]// 00000000C4F0: D3B140C4 1003882C - v_pk_mul_f32 v[198:199], s[44:45], v[198:199] op_sel_hi:[0,1]// 00000000C4F8: D3B140C6 10038C2C - v_pk_mul_f32 v[200:201], s[44:45], v[200:201] op_sel_hi:[0,1]// 00000000C500: D3B140C8 1003902C - v_pk_mul_f32 v[202:203], s[44:45], v[202:203] op_sel_hi:[0,1]// 00000000C508: D3B140CA 1003942C - v_pk_mul_f32 v[204:205], s[44:45], v[204:205] op_sel_hi:[0,1]// 00000000C510: D3B140CC 1003982C - v_pk_mul_f32 v[206:207], s[44:45], v[206:207] op_sel_hi:[0,1]// 00000000C518: D3B140CE 10039C2C - v_pk_mul_f32 v[208:209], s[44:45], v[208:209] op_sel_hi:[0,1]// 00000000C520: D3B140D0 1003A02C - v_pk_mul_f32 v[210:211], s[44:45], v[210:211] op_sel_hi:[0,1]// 00000000C528: D3B140D2 1003A42C - v_pk_mul_f32 v[212:213], s[44:45], v[212:213] op_sel_hi:[0,1]// 00000000C530: D3B140D4 1003A82C - v_pk_mul_f32 v[214:215], s[44:45], v[214:215] op_sel_hi:[0,1]// 00000000C538: D3B140D6 1003AC2C - v_pk_mul_f32 v[216:217], s[44:45], v[216:217] op_sel_hi:[0,1]// 00000000C540: D3B140D8 1003B02C - v_pk_mul_f32 v[218:219], s[44:45], v[218:219] op_sel_hi:[0,1]// 00000000C548: D3B140DA 1003B42C - v_pk_mul_f32 v[220:221], s[44:45], v[220:221] op_sel_hi:[0,1]// 00000000C550: D3B140DC 1003B82C - v_pk_mul_f32 v[222:223], s[44:45], v[222:223] op_sel_hi:[0,1]// 00000000C558: D3B140DE 1003BC2C - v_pk_mul_f32 v[224:225], s[44:45], v[224:225] op_sel_hi:[0,1]// 00000000C560: D3B140E0 1003C02C - v_pk_mul_f32 v[226:227], s[44:45], v[226:227] op_sel_hi:[0,1]// 00000000C568: D3B140E2 1003C42C - v_pk_mul_f32 v[228:229], s[44:45], v[228:229] op_sel_hi:[0,1]// 00000000C570: D3B140E4 1003C82C - v_pk_mul_f32 v[230:231], s[44:45], v[230:231] op_sel_hi:[0,1]// 00000000C578: D3B140E6 1003CC2C - v_pk_mul_f32 v[232:233], s[44:45], v[232:233] op_sel_hi:[0,1]// 00000000C580: D3B140E8 1003D02C - v_pk_mul_f32 v[234:235], s[44:45], v[234:235] op_sel_hi:[0,1]// 00000000C588: D3B140EA 1003D42C - v_pk_mul_f32 v[236:237], s[44:45], v[236:237] op_sel_hi:[0,1]// 00000000C590: D3B140EC 1003D82C - v_pk_mul_f32 v[238:239], s[44:45], v[238:239] op_sel_hi:[0,1]// 00000000C598: D3B140EE 1003DC2C - v_mov_b32_e32 v12, 0xffff0000 // 00000000C5A0: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000000C5A8: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 00000000C5B0: 7E1C02FF 00007FFF - v_cvt_pk_bf16_f32 v24, v24, v25 // 00000000C5B8: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 00000000C5C0: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 00000000C5C8: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 00000000C5D0: D268001B 00023F1E - buffer_store_dwordx4 v[24:27], v15, s[16:19], 0 offen nt // 00000000C5D8: E07E1000 8004180F - v_cvt_pk_bf16_f32 v32, v32, v33 // 00000000C5E0: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 00000000C5E8: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 00000000C5F0: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 00000000C5F8: D2680023 00024F26 - s_lshl_b32 s12, s36, 1 // 00000000C600: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C604: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C608: 82118011 - buffer_store_dwordx4 v[32:35], v15, s[16:19], 0 offen nt // 00000000C60C: E07E1000 8004200F - v_cvt_pk_bf16_f32 v40, v40, v41 // 00000000C614: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 00000000C61C: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 00000000C624: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 00000000C62C: D268002B 00025F2E - s_lshl_b32 s12, s36, 1 // 00000000C634: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C638: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C63C: 82118011 - buffer_store_dwordx4 v[40:43], v15, s[16:19], 0 offen nt // 00000000C640: E07E1000 8004280F - v_cvt_pk_bf16_f32 v48, v48, v49 // 00000000C648: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 00000000C650: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 00000000C658: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 00000000C660: D2680033 00026F36 - s_lshl_b32 s12, s36, 1 // 00000000C668: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C66C: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C670: 82118011 - buffer_store_dwordx4 v[48:51], v15, s[16:19], 0 offen nt // 00000000C674: E07E1000 8004300F - v_cvt_pk_bf16_f32 v56, v56, v57 // 00000000C67C: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 00000000C684: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 00000000C68C: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 00000000C694: D268003B 00027F3E - s_lshl_b32 s12, s36, 1 // 00000000C69C: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C6A0: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C6A4: 82118011 - buffer_store_dwordx4 v[56:59], v15, s[16:19], 0 offen nt // 00000000C6A8: E07E1000 8004380F - v_cvt_pk_bf16_f32 v64, v64, v65 // 00000000C6B0: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 00000000C6B8: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 00000000C6C0: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 00000000C6C8: D2680043 00028F46 - s_lshl_b32 s12, s36, 1 // 00000000C6D0: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C6D4: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C6D8: 82118011 - buffer_store_dwordx4 v[64:67], v15, s[16:19], 0 offen nt // 00000000C6DC: E07E1000 8004400F - v_cvt_pk_bf16_f32 v72, v72, v73 // 00000000C6E4: D2680048 00029348 - v_cvt_pk_bf16_f32 v73, v74, v75 // 00000000C6EC: D2680049 0002974A - v_cvt_pk_bf16_f32 v74, v76, v77 // 00000000C6F4: D268004A 00029B4C - v_cvt_pk_bf16_f32 v75, v78, v79 // 00000000C6FC: D268004B 00029F4E - s_lshl_b32 s12, s36, 1 // 00000000C704: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C708: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C70C: 82118011 - buffer_store_dwordx4 v[72:75], v15, s[16:19], 0 offen nt // 00000000C710: E07E1000 8004480F - v_cvt_pk_bf16_f32 v80, v80, v81 // 00000000C718: D2680050 0002A350 - v_cvt_pk_bf16_f32 v81, v82, v83 // 00000000C720: D2680051 0002A752 - v_cvt_pk_bf16_f32 v82, v84, v85 // 00000000C728: D2680052 0002AB54 - v_cvt_pk_bf16_f32 v83, v86, v87 // 00000000C730: D2680053 0002AF56 - s_lshl_b32 s12, s36, 1 // 00000000C738: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C73C: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C740: 82118011 - buffer_store_dwordx4 v[80:83], v15, s[16:19], 0 offen nt // 00000000C744: E07E1000 8004500F - v_cvt_pk_bf16_f32 v88, v88, v89 // 00000000C74C: D2680058 0002B358 - v_cvt_pk_bf16_f32 v89, v90, v91 // 00000000C754: D2680059 0002B75A - v_cvt_pk_bf16_f32 v90, v92, v93 // 00000000C75C: D268005A 0002BB5C - v_cvt_pk_bf16_f32 v91, v94, v95 // 00000000C764: D268005B 0002BF5E - s_lshl_b32 s12, s36, 1 // 00000000C76C: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C770: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C774: 82118011 - buffer_store_dwordx4 v[88:91], v15, s[16:19], 0 offen nt // 00000000C778: E07E1000 8004580F - v_cvt_pk_bf16_f32 v96, v96, v97 // 00000000C780: D2680060 0002C360 - v_cvt_pk_bf16_f32 v97, v98, v99 // 00000000C788: D2680061 0002C762 - v_cvt_pk_bf16_f32 v98, v100, v101 // 00000000C790: D2680062 0002CB64 - v_cvt_pk_bf16_f32 v99, v102, v103 // 00000000C798: D2680063 0002CF66 - s_lshl_b32 s12, s36, 1 // 00000000C7A0: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C7A4: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C7A8: 82118011 - buffer_store_dwordx4 v[96:99], v15, s[16:19], 0 offen nt // 00000000C7AC: E07E1000 8004600F - v_cvt_pk_bf16_f32 v104, v104, v105 // 00000000C7B4: D2680068 0002D368 - v_cvt_pk_bf16_f32 v105, v106, v107 // 00000000C7BC: D2680069 0002D76A - v_cvt_pk_bf16_f32 v106, v108, v109 // 00000000C7C4: D268006A 0002DB6C - v_cvt_pk_bf16_f32 v107, v110, v111 // 00000000C7CC: D268006B 0002DF6E - s_lshl_b32 s12, s36, 1 // 00000000C7D4: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C7D8: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C7DC: 82118011 - buffer_store_dwordx4 v[104:107], v15, s[16:19], 0 offen nt // 00000000C7E0: E07E1000 8004680F - v_cvt_pk_bf16_f32 v112, v112, v113 // 00000000C7E8: D2680070 0002E370 - v_cvt_pk_bf16_f32 v113, v114, v115 // 00000000C7F0: D2680071 0002E772 - v_cvt_pk_bf16_f32 v114, v116, v117 // 00000000C7F8: D2680072 0002EB74 - v_cvt_pk_bf16_f32 v115, v118, v119 // 00000000C800: D2680073 0002EF76 - s_lshl_b32 s12, s36, 1 // 00000000C808: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C80C: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C810: 82118011 - buffer_store_dwordx4 v[112:115], v15, s[16:19], 0 offen nt // 00000000C814: E07E1000 8004700F - v_cvt_pk_bf16_f32 v120, v120, v121 // 00000000C81C: D2680078 0002F378 - v_cvt_pk_bf16_f32 v121, v122, v123 // 00000000C824: D2680079 0002F77A - v_cvt_pk_bf16_f32 v122, v124, v125 // 00000000C82C: D268007A 0002FB7C - v_cvt_pk_bf16_f32 v123, v126, v127 // 00000000C834: D268007B 0002FF7E - s_lshl_b32 s12, s36, 1 // 00000000C83C: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C840: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C844: 82118011 - buffer_store_dwordx4 v[120:123], v15, s[16:19], 0 offen nt // 00000000C848: E07E1000 8004780F - v_cvt_pk_bf16_f32 v136, v136, v137 // 00000000C850: D2680088 00031388 - v_cvt_pk_bf16_f32 v137, v138, v139 // 00000000C858: D2680089 0003178A - v_cvt_pk_bf16_f32 v138, v140, v141 // 00000000C860: D268008A 00031B8C - v_cvt_pk_bf16_f32 v139, v142, v143 // 00000000C868: D268008B 00031F8E - s_lshl_b32 s12, s36, 1 // 00000000C870: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C874: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C878: 82118011 - buffer_store_dwordx4 v[136:139], v15, s[16:19], 0 offen nt // 00000000C87C: E07E1000 8004880F - v_cvt_pk_bf16_f32 v144, v144, v145 // 00000000C884: D2680090 00032390 - v_cvt_pk_bf16_f32 v145, v146, v147 // 00000000C88C: D2680091 00032792 - v_cvt_pk_bf16_f32 v146, v148, v149 // 00000000C894: D2680092 00032B94 - v_cvt_pk_bf16_f32 v147, v150, v151 // 00000000C89C: D2680093 00032F96 - s_lshl_b32 s12, s36, 1 // 00000000C8A4: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C8A8: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C8AC: 82118011 - buffer_store_dwordx4 v[144:147], v15, s[16:19], 0 offen nt // 00000000C8B0: E07E1000 8004900F - v_cvt_pk_bf16_f32 v152, v152, v153 // 00000000C8B8: D2680098 00033398 - v_cvt_pk_bf16_f32 v153, v154, v155 // 00000000C8C0: D2680099 0003379A - v_cvt_pk_bf16_f32 v154, v156, v157 // 00000000C8C8: D268009A 00033B9C - v_cvt_pk_bf16_f32 v155, v158, v159 // 00000000C8D0: D268009B 00033F9E - s_lshl_b32 s12, s36, 1 // 00000000C8D8: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C8DC: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C8E0: 82118011 - buffer_store_dwordx4 v[152:155], v15, s[16:19], 0 offen nt // 00000000C8E4: E07E1000 8004980F - v_cvt_pk_bf16_f32 v160, v160, v161 // 00000000C8EC: D26800A0 000343A0 - v_cvt_pk_bf16_f32 v161, v162, v163 // 00000000C8F4: D26800A1 000347A2 - v_cvt_pk_bf16_f32 v162, v164, v165 // 00000000C8FC: D26800A2 00034BA4 - v_cvt_pk_bf16_f32 v163, v166, v167 // 00000000C904: D26800A3 00034FA6 - s_lshl_b32 s12, s36, 1 // 00000000C90C: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C910: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C914: 82118011 - buffer_store_dwordx4 v[160:163], v15, s[16:19], 0 offen nt // 00000000C918: E07E1000 8004A00F - v_cvt_pk_bf16_f32 v168, v168, v169 // 00000000C920: D26800A8 000353A8 - v_cvt_pk_bf16_f32 v169, v170, v171 // 00000000C928: D26800A9 000357AA - v_cvt_pk_bf16_f32 v170, v172, v173 // 00000000C930: D26800AA 00035BAC - v_cvt_pk_bf16_f32 v171, v174, v175 // 00000000C938: D26800AB 00035FAE - s_lshl_b32 s12, s36, 1 // 00000000C940: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C944: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C948: 82118011 - buffer_store_dwordx4 v[168:171], v15, s[16:19], 0 offen nt // 00000000C94C: E07E1000 8004A80F - v_cvt_pk_bf16_f32 v176, v176, v177 // 00000000C954: D26800B0 000363B0 - v_cvt_pk_bf16_f32 v177, v178, v179 // 00000000C95C: D26800B1 000367B2 - v_cvt_pk_bf16_f32 v178, v180, v181 // 00000000C964: D26800B2 00036BB4 - v_cvt_pk_bf16_f32 v179, v182, v183 // 00000000C96C: D26800B3 00036FB6 - s_lshl_b32 s12, s36, 1 // 00000000C974: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C978: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C97C: 82118011 - buffer_store_dwordx4 v[176:179], v15, s[16:19], 0 offen nt // 00000000C980: E07E1000 8004B00F - v_cvt_pk_bf16_f32 v184, v184, v185 // 00000000C988: D26800B8 000373B8 - v_cvt_pk_bf16_f32 v185, v186, v187 // 00000000C990: D26800B9 000377BA - v_cvt_pk_bf16_f32 v186, v188, v189 // 00000000C998: D26800BA 00037BBC - v_cvt_pk_bf16_f32 v187, v190, v191 // 00000000C9A0: D26800BB 00037FBE - s_lshl_b32 s12, s36, 1 // 00000000C9A8: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C9AC: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C9B0: 82118011 - buffer_store_dwordx4 v[184:187], v15, s[16:19], 0 offen nt // 00000000C9B4: E07E1000 8004B80F - v_cvt_pk_bf16_f32 v192, v192, v193 // 00000000C9BC: D26800C0 000383C0 - v_cvt_pk_bf16_f32 v193, v194, v195 // 00000000C9C4: D26800C1 000387C2 - v_cvt_pk_bf16_f32 v194, v196, v197 // 00000000C9CC: D26800C2 00038BC4 - v_cvt_pk_bf16_f32 v195, v198, v199 // 00000000C9D4: D26800C3 00038FC6 - s_lshl_b32 s12, s36, 1 // 00000000C9DC: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000C9E0: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000C9E4: 82118011 - buffer_store_dwordx4 v[192:195], v15, s[16:19], 0 offen nt // 00000000C9E8: E07E1000 8004C00F - v_cvt_pk_bf16_f32 v200, v200, v201 // 00000000C9F0: D26800C8 000393C8 - v_cvt_pk_bf16_f32 v201, v202, v203 // 00000000C9F8: D26800C9 000397CA - v_cvt_pk_bf16_f32 v202, v204, v205 // 00000000CA00: D26800CA 00039BCC - v_cvt_pk_bf16_f32 v203, v206, v207 // 00000000CA08: D26800CB 00039FCE - s_lshl_b32 s12, s36, 1 // 00000000CA10: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CA14: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CA18: 82118011 - buffer_store_dwordx4 v[200:203], v15, s[16:19], 0 offen nt // 00000000CA1C: E07E1000 8004C80F - v_cvt_pk_bf16_f32 v208, v208, v209 // 00000000CA24: D26800D0 0003A3D0 - v_cvt_pk_bf16_f32 v209, v210, v211 // 00000000CA2C: D26800D1 0003A7D2 - v_cvt_pk_bf16_f32 v210, v212, v213 // 00000000CA34: D26800D2 0003ABD4 - v_cvt_pk_bf16_f32 v211, v214, v215 // 00000000CA3C: D26800D3 0003AFD6 - s_lshl_b32 s12, s36, 1 // 00000000CA44: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CA48: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CA4C: 82118011 - buffer_store_dwordx4 v[208:211], v15, s[16:19], 0 offen nt // 00000000CA50: E07E1000 8004D00F - v_cvt_pk_bf16_f32 v216, v216, v217 // 00000000CA58: D26800D8 0003B3D8 - v_cvt_pk_bf16_f32 v217, v218, v219 // 00000000CA60: D26800D9 0003B7DA - v_cvt_pk_bf16_f32 v218, v220, v221 // 00000000CA68: D26800DA 0003BBDC - v_cvt_pk_bf16_f32 v219, v222, v223 // 00000000CA70: D26800DB 0003BFDE - s_lshl_b32 s12, s36, 1 // 00000000CA78: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CA7C: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CA80: 82118011 - buffer_store_dwordx4 v[216:219], v15, s[16:19], 0 offen nt // 00000000CA84: E07E1000 8004D80F - v_cvt_pk_bf16_f32 v224, v224, v225 // 00000000CA8C: D26800E0 0003C3E0 - v_cvt_pk_bf16_f32 v225, v226, v227 // 00000000CA94: D26800E1 0003C7E2 - v_cvt_pk_bf16_f32 v226, v228, v229 // 00000000CA9C: D26800E2 0003CBE4 - v_cvt_pk_bf16_f32 v227, v230, v231 // 00000000CAA4: D26800E3 0003CFE6 - s_lshl_b32 s12, s36, 1 // 00000000CAAC: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CAB0: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CAB4: 82118011 - buffer_store_dwordx4 v[224:227], v15, s[16:19], 0 offen nt // 00000000CAB8: E07E1000 8004E00F - v_cvt_pk_bf16_f32 v232, v232, v233 // 00000000CAC0: D26800E8 0003D3E8 - v_cvt_pk_bf16_f32 v233, v234, v235 // 00000000CAC8: D26800E9 0003D7EA - v_cvt_pk_bf16_f32 v234, v236, v237 // 00000000CAD0: D26800EA 0003DBEC - v_cvt_pk_bf16_f32 v235, v238, v239 // 00000000CAD8: D26800EB 0003DFEE - s_lshl_b32 s12, s36, 1 // 00000000CAE0: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CAE4: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CAE8: 82118011 - buffer_store_dwordx4 v[232:235], v15, s[16:19], 0 offen nt // 00000000CAEC: E07E1000 8004E80F - s_nop 0 // 00000000CAF4: BF800000 - v_accvgpr_read_b32 v24, a67 // 00000000CAF8: D3D84018 18000143 - v_accvgpr_read_b32 v25, a71 // 00000000CB00: D3D84019 18000147 - v_accvgpr_read_b32 v26, a75 // 00000000CB08: D3D8401A 1800014B - v_accvgpr_read_b32 v27, a79 // 00000000CB10: D3D8401B 1800014F - v_accvgpr_read_b32 v28, a83 // 00000000CB18: D3D8401C 18000153 - v_accvgpr_read_b32 v29, a87 // 00000000CB20: D3D8401D 18000157 - v_accvgpr_read_b32 v30, a91 // 00000000CB28: D3D8401E 1800015B - v_accvgpr_read_b32 v31, a95 // 00000000CB30: D3D8401F 1800015F - v_accvgpr_read_b32 v32, a99 // 00000000CB38: D3D84020 18000163 - v_accvgpr_read_b32 v33, a103 // 00000000CB40: D3D84021 18000167 - v_accvgpr_read_b32 v34, a107 // 00000000CB48: D3D84022 1800016B - v_accvgpr_read_b32 v35, a111 // 00000000CB50: D3D84023 1800016F - v_accvgpr_read_b32 v36, a115 // 00000000CB58: D3D84024 18000173 - v_accvgpr_read_b32 v37, a119 // 00000000CB60: D3D84025 18000177 - v_accvgpr_read_b32 v38, a123 // 00000000CB68: D3D84026 1800017B - v_accvgpr_read_b32 v39, a127 // 00000000CB70: D3D84027 1800017F - v_accvgpr_read_b32 v40, a131 // 00000000CB78: D3D84028 18000183 - v_accvgpr_read_b32 v41, a135 // 00000000CB80: D3D84029 18000187 - v_accvgpr_read_b32 v42, a139 // 00000000CB88: D3D8402A 1800018B - v_accvgpr_read_b32 v43, a143 // 00000000CB90: D3D8402B 1800018F - v_accvgpr_read_b32 v44, a147 // 00000000CB98: D3D8402C 18000193 - v_accvgpr_read_b32 v45, a151 // 00000000CBA0: D3D8402D 18000197 - v_accvgpr_read_b32 v46, a155 // 00000000CBA8: D3D8402E 1800019B - v_accvgpr_read_b32 v47, a159 // 00000000CBB0: D3D8402F 1800019F - v_accvgpr_read_b32 v48, a163 // 00000000CBB8: D3D84030 180001A3 - v_accvgpr_read_b32 v49, a167 // 00000000CBC0: D3D84031 180001A7 - v_accvgpr_read_b32 v50, a171 // 00000000CBC8: D3D84032 180001AB - v_accvgpr_read_b32 v51, a175 // 00000000CBD0: D3D84033 180001AF - v_accvgpr_read_b32 v52, a179 // 00000000CBD8: D3D84034 180001B3 - v_accvgpr_read_b32 v53, a183 // 00000000CBE0: D3D84035 180001B7 - v_accvgpr_read_b32 v54, a187 // 00000000CBE8: D3D84036 180001BB - v_accvgpr_read_b32 v55, a191 // 00000000CBF0: D3D84037 180001BF - v_accvgpr_read_b32 v56, a195 // 00000000CBF8: D3D84038 180001C3 - v_accvgpr_read_b32 v57, a199 // 00000000CC00: D3D84039 180001C7 - v_accvgpr_read_b32 v58, a203 // 00000000CC08: D3D8403A 180001CB - v_accvgpr_read_b32 v59, a207 // 00000000CC10: D3D8403B 180001CF - v_accvgpr_read_b32 v60, a211 // 00000000CC18: D3D8403C 180001D3 - v_accvgpr_read_b32 v61, a215 // 00000000CC20: D3D8403D 180001D7 - v_accvgpr_read_b32 v62, a219 // 00000000CC28: D3D8403E 180001DB - v_accvgpr_read_b32 v63, a223 // 00000000CC30: D3D8403F 180001DF - v_accvgpr_read_b32 v64, a227 // 00000000CC38: D3D84040 180001E3 - v_accvgpr_read_b32 v65, a231 // 00000000CC40: D3D84041 180001E7 - v_accvgpr_read_b32 v66, a235 // 00000000CC48: D3D84042 180001EB - v_accvgpr_read_b32 v67, a239 // 00000000CC50: D3D84043 180001EF - v_accvgpr_read_b32 v68, a243 // 00000000CC58: D3D84044 180001F3 - v_accvgpr_read_b32 v69, a247 // 00000000CC60: D3D84045 180001F7 - v_accvgpr_read_b32 v70, a251 // 00000000CC68: D3D84046 180001FB - v_accvgpr_read_b32 v71, a255 // 00000000CC70: D3D84047 180001FF - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000000CC78: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000000CC80: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000000CC88: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000000CC90: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000000CC98: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000000CCA0: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000000CCA8: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000000CCB0: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000000CCB8: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 00000000CCC0: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 00000000CCC8: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 00000000CCD0: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 00000000CCD8: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 00000000CCE0: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 00000000CCE8: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 00000000CCF0: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 00000000CCF8: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000000CD00: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 00000000CD08: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 00000000CD10: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 00000000CD18: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 00000000CD20: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 00000000CD28: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 00000000CD30: D3B14046 10028C2C - v_mov_b32_e32 v12, 0xffff0000 // 00000000CD38: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000000CD40: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 00000000CD48: 7E1C02FF 00007FFF - v_cvt_pk_bf16_f32 v24, v24, v25 // 00000000CD50: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 00000000CD58: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 00000000CD60: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 00000000CD68: D268001B 00023F1E - s_lshl_b32 s12, s36, 1 // 00000000CD70: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CD74: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CD78: 82118011 - buffer_store_dwordx4 v[24:27], v15, s[16:19], 0 offen nt // 00000000CD7C: E07E1000 8004180F - v_cvt_pk_bf16_f32 v32, v32, v33 // 00000000CD84: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 00000000CD8C: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 00000000CD94: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 00000000CD9C: D2680023 00024F26 - s_lshl_b32 s12, s36, 1 // 00000000CDA4: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CDA8: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CDAC: 82118011 - buffer_store_dwordx4 v[32:35], v15, s[16:19], 0 offen nt // 00000000CDB0: E07E1000 8004200F - v_cvt_pk_bf16_f32 v40, v40, v41 // 00000000CDB8: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 00000000CDC0: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 00000000CDC8: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 00000000CDD0: D268002B 00025F2E - s_lshl_b32 s12, s36, 1 // 00000000CDD8: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CDDC: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CDE0: 82118011 - buffer_store_dwordx4 v[40:43], v15, s[16:19], 0 offen nt // 00000000CDE4: E07E1000 8004280F - v_cvt_pk_bf16_f32 v48, v48, v49 // 00000000CDEC: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 00000000CDF4: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 00000000CDFC: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 00000000CE04: D2680033 00026F36 - s_lshl_b32 s12, s36, 1 // 00000000CE0C: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CE10: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CE14: 82118011 - buffer_store_dwordx4 v[48:51], v15, s[16:19], 0 offen nt // 00000000CE18: E07E1000 8004300F - v_cvt_pk_bf16_f32 v56, v56, v57 // 00000000CE20: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 00000000CE28: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 00000000CE30: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 00000000CE38: D268003B 00027F3E - s_lshl_b32 s12, s36, 1 // 00000000CE40: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CE44: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CE48: 82118011 - buffer_store_dwordx4 v[56:59], v15, s[16:19], 0 offen nt // 00000000CE4C: E07E1000 8004380F - v_cvt_pk_bf16_f32 v64, v64, v65 // 00000000CE54: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 00000000CE5C: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 00000000CE64: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 00000000CE6C: D2680043 00028F46 - s_lshl_b32 s12, s36, 1 // 00000000CE74: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000000CE78: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000CE7C: 82118011 - buffer_store_dwordx4 v[64:67], v15, s[16:19], 0 offen nt // 00000000CE80: E07E1000 8004400F - s_nop 0 // 00000000CE88: BF800000 - s_branch label_GW_End_2 // 00000000CE8C: BF8249D0 - -label_GW_B0_E1_N_1: - v_mov_b32_e32 v10, 0x80000000 // 00000000CE90: 7E1402FF 80000000 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000CE98: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000CEA0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000CEA8: 86A2221E - v_add_lshl_u32 v15, v7, v4, 1 // 00000000CEAC: D1FE000F 02060907 - v_cndmask_b32_e64 v15, v10, v15, s[34:35] // 00000000CEB4: D100000F 008A1F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000CEBC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000CEC4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000CECC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000CED4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000CEDC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000CEE4: 86A2221E - v_add_lshl_u32 v128, v7, v4, 1 // 00000000CEE8: D1FE0080 02060907 - v_cndmask_b32_e64 v128, v10, v128, s[34:35] // 00000000CEF0: D1000080 008B010A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000CEF8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000CF00: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000CF08: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000CF10: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000CF18: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000CF20: 86A2221E - v_add_lshl_u32 v129, v7, v4, 1 // 00000000CF24: D1FE0081 02060907 - v_cndmask_b32_e64 v129, v10, v129, s[34:35] // 00000000CF2C: D1000081 008B030A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000CF34: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000CF3C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000CF44: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000CF4C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000CF54: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000CF5C: 86A2221E - v_add_lshl_u32 v130, v7, v4, 1 // 00000000CF60: D1FE0082 02060907 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 00000000CF68: D1000082 008B050A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000CF70: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000CF78: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000CF80: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000CF88: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000CF90: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000CF98: 86A2221E - v_add_lshl_u32 v131, v7, v4, 1 // 00000000CF9C: D1FE0083 02060907 - v_cndmask_b32_e64 v131, v10, v131, s[34:35] // 00000000CFA4: D1000083 008B070A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000CFAC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000CFB4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000CFBC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000CFC4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000CFCC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000CFD4: 86A2221E - v_add_lshl_u32 v135, v7, v4, 1 // 00000000CFD8: D1FE0087 02060907 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 00000000CFE0: D1000087 008B0F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000CFE8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000CFF0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000CFF8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D000: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D008: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D010: 86A2221E - v_add_lshl_u32 v216, v7, v4, 1 // 00000000D014: D1FE00D8 02060907 - v_cndmask_b32_e64 v216, v10, v216, s[34:35] // 00000000D01C: D10000D8 008BB10A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D024: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D02C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D034: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D03C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D044: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D04C: 86A2221E - v_add_lshl_u32 v217, v7, v4, 1 // 00000000D050: D1FE00D9 02060907 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000000D058: D10000D9 008BB30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D060: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D068: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D070: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D078: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D080: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D088: 86A2221E - v_add_lshl_u32 v218, v7, v4, 1 // 00000000D08C: D1FE00DA 02060907 - v_cndmask_b32_e64 v218, v10, v218, s[34:35] // 00000000D094: D10000DA 008BB50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D09C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D0A4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D0AC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D0B4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D0BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D0C4: 86A2221E - v_add_lshl_u32 v219, v7, v4, 1 // 00000000D0C8: D1FE00DB 02060907 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 00000000D0D0: D10000DB 008BB70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D0D8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D0E0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D0E8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D0F0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D0F8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D100: 86A2221E - v_add_lshl_u32 v220, v7, v4, 1 // 00000000D104: D1FE00DC 02060907 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 00000000D10C: D10000DC 008BB90A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D114: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D11C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D124: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D12C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D134: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D13C: 86A2221E - v_add_lshl_u32 v221, v7, v4, 1 // 00000000D140: D1FE00DD 02060907 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 00000000D148: D10000DD 008BBB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D150: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D158: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D160: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D168: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D170: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D178: 86A2221E - v_add_lshl_u32 v222, v7, v4, 1 // 00000000D17C: D1FE00DE 02060907 - v_cndmask_b32_e64 v222, v10, v222, s[34:35] // 00000000D184: D10000DE 008BBD0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D18C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D194: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D19C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D1A4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D1AC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D1B4: 86A2221E - v_add_lshl_u32 v223, v7, v4, 1 // 00000000D1B8: D1FE00DF 02060907 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 00000000D1C0: D10000DF 008BBF0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D1C8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D1D0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D1D8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D1E0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D1E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D1F0: 86A2221E - v_add_lshl_u32 v224, v7, v4, 1 // 00000000D1F4: D1FE00E0 02060907 - v_cndmask_b32_e64 v224, v10, v224, s[34:35] // 00000000D1FC: D10000E0 008BC10A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D204: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D20C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D214: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D21C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D224: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D22C: 86A2221E - v_add_lshl_u32 v225, v7, v4, 1 // 00000000D230: D1FE00E1 02060907 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000000D238: D10000E1 008BC30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D240: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D248: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D250: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D258: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D260: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D268: 86A2221E - v_add_lshl_u32 v226, v7, v4, 1 // 00000000D26C: D1FE00E2 02060907 - v_cndmask_b32_e64 v226, v10, v226, s[34:35] // 00000000D274: D10000E2 008BC50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D27C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D284: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D28C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D294: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D29C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D2A4: 86A2221E - v_add_lshl_u32 v227, v7, v4, 1 // 00000000D2A8: D1FE00E3 02060907 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 00000000D2B0: D10000E3 008BC70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D2B8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D2C0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D2C8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D2D0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D2D8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D2E0: 86A2221E - v_add_lshl_u32 v228, v7, v4, 1 // 00000000D2E4: D1FE00E4 02060907 - v_cndmask_b32_e64 v228, v10, v228, s[34:35] // 00000000D2EC: D10000E4 008BC90A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D2F4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D2FC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D304: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D30C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D314: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D31C: 86A2221E - v_add_lshl_u32 v229, v7, v4, 1 // 00000000D320: D1FE00E5 02060907 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 00000000D328: D10000E5 008BCB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D330: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D338: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D340: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D348: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D350: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D358: 86A2221E - v_add_lshl_u32 v230, v7, v4, 1 // 00000000D35C: D1FE00E6 02060907 - v_cndmask_b32_e64 v230, v10, v230, s[34:35] // 00000000D364: D10000E6 008BCD0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D36C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D374: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D37C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D384: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D38C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D394: 86A2221E - v_add_lshl_u32 v231, v7, v4, 1 // 00000000D398: D1FE00E7 02060907 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 00000000D3A0: D10000E7 008BCF0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D3A8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D3B0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D3B8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D3C0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D3C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D3D0: 86A2221E - v_add_lshl_u32 v232, v7, v4, 1 // 00000000D3D4: D1FE00E8 02060907 - v_cndmask_b32_e64 v232, v10, v232, s[34:35] // 00000000D3DC: D10000E8 008BD10A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000D3E4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000D3EC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000D3F4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000D3FC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000D404: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000D40C: 86A2221E - v_add_lshl_u32 v233, v7, v4, 1 // 00000000D410: D1FE00E9 02060907 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000000D418: D10000E9 008BD30A - v_accvgpr_read_b32 v16, a0 // 00000000D420: D3D84010 18000100 - v_accvgpr_read_b32 v17, a4 // 00000000D428: D3D84011 18000104 - v_accvgpr_read_b32 v18, a8 // 00000000D430: D3D84012 18000108 - v_accvgpr_read_b32 v19, a12 // 00000000D438: D3D84013 1800010C - v_accvgpr_read_b32 v20, a16 // 00000000D440: D3D84014 18000110 - v_accvgpr_read_b32 v21, a20 // 00000000D448: D3D84015 18000114 - v_accvgpr_read_b32 v22, a24 // 00000000D450: D3D84016 18000118 - v_accvgpr_read_b32 v23, a28 // 00000000D458: D3D84017 1800011C - v_accvgpr_read_b32 v24, a32 // 00000000D460: D3D84018 18000120 - v_accvgpr_read_b32 v25, a36 // 00000000D468: D3D84019 18000124 - v_accvgpr_read_b32 v26, a40 // 00000000D470: D3D8401A 18000128 - v_accvgpr_read_b32 v27, a44 // 00000000D478: D3D8401B 1800012C - v_accvgpr_read_b32 v28, a48 // 00000000D480: D3D8401C 18000130 - v_accvgpr_read_b32 v29, a52 // 00000000D488: D3D8401D 18000134 - v_accvgpr_read_b32 v30, a56 // 00000000D490: D3D8401E 18000138 - v_accvgpr_read_b32 v31, a60 // 00000000D498: D3D8401F 1800013C - v_accvgpr_read_b32 v32, a64 // 00000000D4A0: D3D84020 18000140 - v_accvgpr_read_b32 v33, a68 // 00000000D4A8: D3D84021 18000144 - v_accvgpr_read_b32 v34, a72 // 00000000D4B0: D3D84022 18000148 - v_accvgpr_read_b32 v35, a76 // 00000000D4B8: D3D84023 1800014C - v_accvgpr_read_b32 v36, a80 // 00000000D4C0: D3D84024 18000150 - v_accvgpr_read_b32 v37, a84 // 00000000D4C8: D3D84025 18000154 - v_accvgpr_read_b32 v38, a88 // 00000000D4D0: D3D84026 18000158 - v_accvgpr_read_b32 v39, a92 // 00000000D4D8: D3D84027 1800015C - v_accvgpr_read_b32 v40, a96 // 00000000D4E0: D3D84028 18000160 - v_accvgpr_read_b32 v41, a100 // 00000000D4E8: D3D84029 18000164 - v_accvgpr_read_b32 v42, a104 // 00000000D4F0: D3D8402A 18000168 - v_accvgpr_read_b32 v43, a108 // 00000000D4F8: D3D8402B 1800016C - v_accvgpr_read_b32 v44, a112 // 00000000D500: D3D8402C 18000170 - v_accvgpr_read_b32 v45, a116 // 00000000D508: D3D8402D 18000174 - v_accvgpr_read_b32 v46, a120 // 00000000D510: D3D8402E 18000178 - v_accvgpr_read_b32 v47, a124 // 00000000D518: D3D8402F 1800017C - v_accvgpr_read_b32 v48, a128 // 00000000D520: D3D84030 18000180 - v_accvgpr_read_b32 v49, a132 // 00000000D528: D3D84031 18000184 - v_accvgpr_read_b32 v50, a136 // 00000000D530: D3D84032 18000188 - v_accvgpr_read_b32 v51, a140 // 00000000D538: D3D84033 1800018C - v_accvgpr_read_b32 v52, a144 // 00000000D540: D3D84034 18000190 - v_accvgpr_read_b32 v53, a148 // 00000000D548: D3D84035 18000194 - v_accvgpr_read_b32 v54, a152 // 00000000D550: D3D84036 18000198 - v_accvgpr_read_b32 v55, a156 // 00000000D558: D3D84037 1800019C - v_accvgpr_read_b32 v56, a160 // 00000000D560: D3D84038 180001A0 - v_accvgpr_read_b32 v57, a164 // 00000000D568: D3D84039 180001A4 - v_accvgpr_read_b32 v58, a168 // 00000000D570: D3D8403A 180001A8 - v_accvgpr_read_b32 v59, a172 // 00000000D578: D3D8403B 180001AC - v_accvgpr_read_b32 v60, a176 // 00000000D580: D3D8403C 180001B0 - v_accvgpr_read_b32 v61, a180 // 00000000D588: D3D8403D 180001B4 - v_accvgpr_read_b32 v62, a184 // 00000000D590: D3D8403E 180001B8 - v_accvgpr_read_b32 v63, a188 // 00000000D598: D3D8403F 180001BC - v_accvgpr_read_b32 v64, a192 // 00000000D5A0: D3D84040 180001C0 - v_accvgpr_read_b32 v65, a196 // 00000000D5A8: D3D84041 180001C4 - v_accvgpr_read_b32 v66, a200 // 00000000D5B0: D3D84042 180001C8 - v_accvgpr_read_b32 v67, a204 // 00000000D5B8: D3D84043 180001CC - v_accvgpr_read_b32 v68, a208 // 00000000D5C0: D3D84044 180001D0 - v_accvgpr_read_b32 v69, a212 // 00000000D5C8: D3D84045 180001D4 - v_accvgpr_read_b32 v70, a216 // 00000000D5D0: D3D84046 180001D8 - v_accvgpr_read_b32 v71, a220 // 00000000D5D8: D3D84047 180001DC - v_accvgpr_read_b32 v72, a224 // 00000000D5E0: D3D84048 180001E0 - v_accvgpr_read_b32 v73, a228 // 00000000D5E8: D3D84049 180001E4 - v_accvgpr_read_b32 v74, a232 // 00000000D5F0: D3D8404A 180001E8 - v_accvgpr_read_b32 v75, a236 // 00000000D5F8: D3D8404B 180001EC - v_accvgpr_read_b32 v76, a240 // 00000000D600: D3D8404C 180001F0 - v_accvgpr_read_b32 v77, a244 // 00000000D608: D3D8404D 180001F4 - v_accvgpr_read_b32 v78, a248 // 00000000D610: D3D8404E 180001F8 - v_accvgpr_read_b32 v79, a252 // 00000000D618: D3D8404F 180001FC - v_accvgpr_read_b32 v80, a1 // 00000000D620: D3D84050 18000101 - v_accvgpr_read_b32 v81, a5 // 00000000D628: D3D84051 18000105 - v_accvgpr_read_b32 v82, a9 // 00000000D630: D3D84052 18000109 - v_accvgpr_read_b32 v83, a13 // 00000000D638: D3D84053 1800010D - v_accvgpr_read_b32 v84, a17 // 00000000D640: D3D84054 18000111 - v_accvgpr_read_b32 v85, a21 // 00000000D648: D3D84055 18000115 - v_accvgpr_read_b32 v86, a25 // 00000000D650: D3D84056 18000119 - v_accvgpr_read_b32 v87, a29 // 00000000D658: D3D84057 1800011D - v_accvgpr_read_b32 v88, a33 // 00000000D660: D3D84058 18000121 - v_accvgpr_read_b32 v89, a37 // 00000000D668: D3D84059 18000125 - v_accvgpr_read_b32 v90, a41 // 00000000D670: D3D8405A 18000129 - v_accvgpr_read_b32 v91, a45 // 00000000D678: D3D8405B 1800012D - v_accvgpr_read_b32 v92, a49 // 00000000D680: D3D8405C 18000131 - v_accvgpr_read_b32 v93, a53 // 00000000D688: D3D8405D 18000135 - v_accvgpr_read_b32 v94, a57 // 00000000D690: D3D8405E 18000139 - v_accvgpr_read_b32 v95, a61 // 00000000D698: D3D8405F 1800013D - v_accvgpr_read_b32 v96, a65 // 00000000D6A0: D3D84060 18000141 - v_accvgpr_read_b32 v97, a69 // 00000000D6A8: D3D84061 18000145 - v_accvgpr_read_b32 v98, a73 // 00000000D6B0: D3D84062 18000149 - v_accvgpr_read_b32 v99, a77 // 00000000D6B8: D3D84063 1800014D - v_accvgpr_read_b32 v100, a81 // 00000000D6C0: D3D84064 18000151 - v_accvgpr_read_b32 v101, a85 // 00000000D6C8: D3D84065 18000155 - v_accvgpr_read_b32 v102, a89 // 00000000D6D0: D3D84066 18000159 - v_accvgpr_read_b32 v103, a93 // 00000000D6D8: D3D84067 1800015D - v_accvgpr_read_b32 v104, a97 // 00000000D6E0: D3D84068 18000161 - v_accvgpr_read_b32 v105, a101 // 00000000D6E8: D3D84069 18000165 - v_accvgpr_read_b32 v106, a105 // 00000000D6F0: D3D8406A 18000169 - v_accvgpr_read_b32 v107, a109 // 00000000D6F8: D3D8406B 1800016D - v_accvgpr_read_b32 v108, a113 // 00000000D700: D3D8406C 18000171 - v_accvgpr_read_b32 v109, a117 // 00000000D708: D3D8406D 18000175 - v_accvgpr_read_b32 v110, a121 // 00000000D710: D3D8406E 18000179 - v_accvgpr_read_b32 v111, a125 // 00000000D718: D3D8406F 1800017D - v_accvgpr_read_b32 v112, a129 // 00000000D720: D3D84070 18000181 - v_accvgpr_read_b32 v113, a133 // 00000000D728: D3D84071 18000185 - v_accvgpr_read_b32 v114, a137 // 00000000D730: D3D84072 18000189 - v_accvgpr_read_b32 v115, a141 // 00000000D738: D3D84073 1800018D - v_accvgpr_read_b32 v116, a145 // 00000000D740: D3D84074 18000191 - v_accvgpr_read_b32 v117, a149 // 00000000D748: D3D84075 18000195 - v_accvgpr_read_b32 v118, a153 // 00000000D750: D3D84076 18000199 - v_accvgpr_read_b32 v119, a157 // 00000000D758: D3D84077 1800019D - v_accvgpr_read_b32 v120, a161 // 00000000D760: D3D84078 180001A1 - v_accvgpr_read_b32 v121, a165 // 00000000D768: D3D84079 180001A5 - v_accvgpr_read_b32 v122, a169 // 00000000D770: D3D8407A 180001A9 - v_accvgpr_read_b32 v123, a173 // 00000000D778: D3D8407B 180001AD - v_accvgpr_read_b32 v124, a177 // 00000000D780: D3D8407C 180001B1 - v_accvgpr_read_b32 v125, a181 // 00000000D788: D3D8407D 180001B5 - v_accvgpr_read_b32 v126, a185 // 00000000D790: D3D8407E 180001B9 - v_accvgpr_read_b32 v127, a189 // 00000000D798: D3D8407F 180001BD - v_accvgpr_read_b32 v136, a193 // 00000000D7A0: D3D84088 180001C1 - v_accvgpr_read_b32 v137, a197 // 00000000D7A8: D3D84089 180001C5 - v_accvgpr_read_b32 v138, a201 // 00000000D7B0: D3D8408A 180001C9 - v_accvgpr_read_b32 v139, a205 // 00000000D7B8: D3D8408B 180001CD - v_accvgpr_read_b32 v140, a209 // 00000000D7C0: D3D8408C 180001D1 - v_accvgpr_read_b32 v141, a213 // 00000000D7C8: D3D8408D 180001D5 - v_accvgpr_read_b32 v142, a217 // 00000000D7D0: D3D8408E 180001D9 - v_accvgpr_read_b32 v143, a221 // 00000000D7D8: D3D8408F 180001DD - v_accvgpr_read_b32 v144, a225 // 00000000D7E0: D3D84090 180001E1 - v_accvgpr_read_b32 v145, a229 // 00000000D7E8: D3D84091 180001E5 - v_accvgpr_read_b32 v146, a233 // 00000000D7F0: D3D84092 180001E9 - v_accvgpr_read_b32 v147, a237 // 00000000D7F8: D3D84093 180001ED - v_accvgpr_read_b32 v148, a241 // 00000000D800: D3D84094 180001F1 - v_accvgpr_read_b32 v149, a245 // 00000000D808: D3D84095 180001F5 - v_accvgpr_read_b32 v150, a249 // 00000000D810: D3D84096 180001F9 - v_accvgpr_read_b32 v151, a253 // 00000000D818: D3D84097 180001FD - v_accvgpr_read_b32 v152, a2 // 00000000D820: D3D84098 18000102 - v_accvgpr_read_b32 v153, a6 // 00000000D828: D3D84099 18000106 - v_accvgpr_read_b32 v154, a10 // 00000000D830: D3D8409A 1800010A - v_accvgpr_read_b32 v155, a14 // 00000000D838: D3D8409B 1800010E - v_accvgpr_read_b32 v156, a18 // 00000000D840: D3D8409C 18000112 - v_accvgpr_read_b32 v157, a22 // 00000000D848: D3D8409D 18000116 - v_accvgpr_read_b32 v158, a26 // 00000000D850: D3D8409E 1800011A - v_accvgpr_read_b32 v159, a30 // 00000000D858: D3D8409F 1800011E - v_accvgpr_read_b32 v160, a34 // 00000000D860: D3D840A0 18000122 - v_accvgpr_read_b32 v161, a38 // 00000000D868: D3D840A1 18000126 - v_accvgpr_read_b32 v162, a42 // 00000000D870: D3D840A2 1800012A - v_accvgpr_read_b32 v163, a46 // 00000000D878: D3D840A3 1800012E - v_accvgpr_read_b32 v164, a50 // 00000000D880: D3D840A4 18000132 - v_accvgpr_read_b32 v165, a54 // 00000000D888: D3D840A5 18000136 - v_accvgpr_read_b32 v166, a58 // 00000000D890: D3D840A6 1800013A - v_accvgpr_read_b32 v167, a62 // 00000000D898: D3D840A7 1800013E - v_accvgpr_read_b32 v168, a66 // 00000000D8A0: D3D840A8 18000142 - v_accvgpr_read_b32 v169, a70 // 00000000D8A8: D3D840A9 18000146 - v_accvgpr_read_b32 v170, a74 // 00000000D8B0: D3D840AA 1800014A - v_accvgpr_read_b32 v171, a78 // 00000000D8B8: D3D840AB 1800014E - v_accvgpr_read_b32 v172, a82 // 00000000D8C0: D3D840AC 18000152 - v_accvgpr_read_b32 v173, a86 // 00000000D8C8: D3D840AD 18000156 - v_accvgpr_read_b32 v174, a90 // 00000000D8D0: D3D840AE 1800015A - v_accvgpr_read_b32 v175, a94 // 00000000D8D8: D3D840AF 1800015E - v_accvgpr_read_b32 v176, a98 // 00000000D8E0: D3D840B0 18000162 - v_accvgpr_read_b32 v177, a102 // 00000000D8E8: D3D840B1 18000166 - v_accvgpr_read_b32 v178, a106 // 00000000D8F0: D3D840B2 1800016A - v_accvgpr_read_b32 v179, a110 // 00000000D8F8: D3D840B3 1800016E - v_accvgpr_read_b32 v180, a114 // 00000000D900: D3D840B4 18000172 - v_accvgpr_read_b32 v181, a118 // 00000000D908: D3D840B5 18000176 - v_accvgpr_read_b32 v182, a122 // 00000000D910: D3D840B6 1800017A - v_accvgpr_read_b32 v183, a126 // 00000000D918: D3D840B7 1800017E - v_accvgpr_read_b32 v184, a130 // 00000000D920: D3D840B8 18000182 - v_accvgpr_read_b32 v185, a134 // 00000000D928: D3D840B9 18000186 - v_accvgpr_read_b32 v186, a138 // 00000000D930: D3D840BA 1800018A - v_accvgpr_read_b32 v187, a142 // 00000000D938: D3D840BB 1800018E - v_accvgpr_read_b32 v188, a146 // 00000000D940: D3D840BC 18000192 - v_accvgpr_read_b32 v189, a150 // 00000000D948: D3D840BD 18000196 - v_accvgpr_read_b32 v190, a154 // 00000000D950: D3D840BE 1800019A - v_accvgpr_read_b32 v191, a158 // 00000000D958: D3D840BF 1800019E - v_accvgpr_read_b32 v192, a162 // 00000000D960: D3D840C0 180001A2 - v_accvgpr_read_b32 v193, a166 // 00000000D968: D3D840C1 180001A6 - v_accvgpr_read_b32 v194, a170 // 00000000D970: D3D840C2 180001AA - v_accvgpr_read_b32 v195, a174 // 00000000D978: D3D840C3 180001AE - v_accvgpr_read_b32 v196, a178 // 00000000D980: D3D840C4 180001B2 - v_accvgpr_read_b32 v197, a182 // 00000000D988: D3D840C5 180001B6 - v_accvgpr_read_b32 v198, a186 // 00000000D990: D3D840C6 180001BA - v_accvgpr_read_b32 v199, a190 // 00000000D998: D3D840C7 180001BE - v_accvgpr_read_b32 v200, a194 // 00000000D9A0: D3D840C8 180001C2 - v_accvgpr_read_b32 v201, a198 // 00000000D9A8: D3D840C9 180001C6 - v_accvgpr_read_b32 v202, a202 // 00000000D9B0: D3D840CA 180001CA - v_accvgpr_read_b32 v203, a206 // 00000000D9B8: D3D840CB 180001CE - v_accvgpr_read_b32 v204, a210 // 00000000D9C0: D3D840CC 180001D2 - v_accvgpr_read_b32 v205, a214 // 00000000D9C8: D3D840CD 180001D6 - v_accvgpr_read_b32 v206, a218 // 00000000D9D0: D3D840CE 180001DA - v_accvgpr_read_b32 v207, a222 // 00000000D9D8: D3D840CF 180001DE - v_accvgpr_read_b32 v208, a226 // 00000000D9E0: D3D840D0 180001E2 - v_accvgpr_read_b32 v209, a230 // 00000000D9E8: D3D840D1 180001E6 - v_accvgpr_read_b32 v210, a234 // 00000000D9F0: D3D840D2 180001EA - v_accvgpr_read_b32 v211, a238 // 00000000D9F8: D3D840D3 180001EE - v_accvgpr_read_b32 v212, a242 // 00000000DA00: D3D840D4 180001F2 - v_accvgpr_read_b32 v213, a246 // 00000000DA08: D3D840D5 180001F6 - v_accvgpr_read_b32 v214, a250 // 00000000DA10: D3D840D6 180001FA - v_accvgpr_read_b32 v215, a254 // 00000000DA18: D3D840D7 180001FE - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 00000000DA20: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 00000000DA28: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 00000000DA30: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 00000000DA38: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000000DA40: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000000DA48: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000000DA50: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000000DA58: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000000DA60: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000000DA68: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000000DA70: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000000DA78: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000000DA80: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 00000000DA88: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 00000000DA90: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 00000000DA98: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 00000000DAA0: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 00000000DAA8: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 00000000DAB0: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 00000000DAB8: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 00000000DAC0: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000000DAC8: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 00000000DAD0: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 00000000DAD8: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 00000000DAE0: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 00000000DAE8: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 00000000DAF0: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 00000000DAF8: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 00000000DB00: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 00000000DB08: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 00000000DB10: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 00000000DB18: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 00000000DB20: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 00000000DB28: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 00000000DB30: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 00000000DB38: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 00000000DB40: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 00000000DB48: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 00000000DB50: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 00000000DB58: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 00000000DB60: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 00000000DB68: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 00000000DB70: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 00000000DB78: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 00000000DB80: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 00000000DB88: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 00000000DB90: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 00000000DB98: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 00000000DBA0: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 00000000DBA8: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 00000000DBB0: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 00000000DBB8: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 00000000DBC0: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 00000000DBC8: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 00000000DBD0: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 00000000DBD8: D3B1407E 1002FC2C - v_pk_mul_f32 v[136:137], s[44:45], v[136:137] op_sel_hi:[0,1]// 00000000DBE0: D3B14088 1003102C - v_pk_mul_f32 v[138:139], s[44:45], v[138:139] op_sel_hi:[0,1]// 00000000DBE8: D3B1408A 1003142C - v_pk_mul_f32 v[140:141], s[44:45], v[140:141] op_sel_hi:[0,1]// 00000000DBF0: D3B1408C 1003182C - v_pk_mul_f32 v[142:143], s[44:45], v[142:143] op_sel_hi:[0,1]// 00000000DBF8: D3B1408E 10031C2C - v_pk_mul_f32 v[144:145], s[44:45], v[144:145] op_sel_hi:[0,1]// 00000000DC00: D3B14090 1003202C - v_pk_mul_f32 v[146:147], s[44:45], v[146:147] op_sel_hi:[0,1]// 00000000DC08: D3B14092 1003242C - v_pk_mul_f32 v[148:149], s[44:45], v[148:149] op_sel_hi:[0,1]// 00000000DC10: D3B14094 1003282C - v_pk_mul_f32 v[150:151], s[44:45], v[150:151] op_sel_hi:[0,1]// 00000000DC18: D3B14096 10032C2C - v_pk_mul_f32 v[152:153], s[44:45], v[152:153] op_sel_hi:[0,1]// 00000000DC20: D3B14098 1003302C - v_pk_mul_f32 v[154:155], s[44:45], v[154:155] op_sel_hi:[0,1]// 00000000DC28: D3B1409A 1003342C - v_pk_mul_f32 v[156:157], s[44:45], v[156:157] op_sel_hi:[0,1]// 00000000DC30: D3B1409C 1003382C - v_pk_mul_f32 v[158:159], s[44:45], v[158:159] op_sel_hi:[0,1]// 00000000DC38: D3B1409E 10033C2C - v_pk_mul_f32 v[160:161], s[44:45], v[160:161] op_sel_hi:[0,1]// 00000000DC40: D3B140A0 1003402C - v_pk_mul_f32 v[162:163], s[44:45], v[162:163] op_sel_hi:[0,1]// 00000000DC48: D3B140A2 1003442C - v_pk_mul_f32 v[164:165], s[44:45], v[164:165] op_sel_hi:[0,1]// 00000000DC50: D3B140A4 1003482C - v_pk_mul_f32 v[166:167], s[44:45], v[166:167] op_sel_hi:[0,1]// 00000000DC58: D3B140A6 10034C2C - v_pk_mul_f32 v[168:169], s[44:45], v[168:169] op_sel_hi:[0,1]// 00000000DC60: D3B140A8 1003502C - v_pk_mul_f32 v[170:171], s[44:45], v[170:171] op_sel_hi:[0,1]// 00000000DC68: D3B140AA 1003542C - v_pk_mul_f32 v[172:173], s[44:45], v[172:173] op_sel_hi:[0,1]// 00000000DC70: D3B140AC 1003582C - v_pk_mul_f32 v[174:175], s[44:45], v[174:175] op_sel_hi:[0,1]// 00000000DC78: D3B140AE 10035C2C - v_pk_mul_f32 v[176:177], s[44:45], v[176:177] op_sel_hi:[0,1]// 00000000DC80: D3B140B0 1003602C - v_pk_mul_f32 v[178:179], s[44:45], v[178:179] op_sel_hi:[0,1]// 00000000DC88: D3B140B2 1003642C - v_pk_mul_f32 v[180:181], s[44:45], v[180:181] op_sel_hi:[0,1]// 00000000DC90: D3B140B4 1003682C - v_pk_mul_f32 v[182:183], s[44:45], v[182:183] op_sel_hi:[0,1]// 00000000DC98: D3B140B6 10036C2C - v_pk_mul_f32 v[184:185], s[44:45], v[184:185] op_sel_hi:[0,1]// 00000000DCA0: D3B140B8 1003702C - v_pk_mul_f32 v[186:187], s[44:45], v[186:187] op_sel_hi:[0,1]// 00000000DCA8: D3B140BA 1003742C - v_pk_mul_f32 v[188:189], s[44:45], v[188:189] op_sel_hi:[0,1]// 00000000DCB0: D3B140BC 1003782C - v_pk_mul_f32 v[190:191], s[44:45], v[190:191] op_sel_hi:[0,1]// 00000000DCB8: D3B140BE 10037C2C - v_pk_mul_f32 v[192:193], s[44:45], v[192:193] op_sel_hi:[0,1]// 00000000DCC0: D3B140C0 1003802C - v_pk_mul_f32 v[194:195], s[44:45], v[194:195] op_sel_hi:[0,1]// 00000000DCC8: D3B140C2 1003842C - v_pk_mul_f32 v[196:197], s[44:45], v[196:197] op_sel_hi:[0,1]// 00000000DCD0: D3B140C4 1003882C - v_pk_mul_f32 v[198:199], s[44:45], v[198:199] op_sel_hi:[0,1]// 00000000DCD8: D3B140C6 10038C2C - v_pk_mul_f32 v[200:201], s[44:45], v[200:201] op_sel_hi:[0,1]// 00000000DCE0: D3B140C8 1003902C - v_pk_mul_f32 v[202:203], s[44:45], v[202:203] op_sel_hi:[0,1]// 00000000DCE8: D3B140CA 1003942C - v_pk_mul_f32 v[204:205], s[44:45], v[204:205] op_sel_hi:[0,1]// 00000000DCF0: D3B140CC 1003982C - v_pk_mul_f32 v[206:207], s[44:45], v[206:207] op_sel_hi:[0,1]// 00000000DCF8: D3B140CE 10039C2C - v_pk_mul_f32 v[208:209], s[44:45], v[208:209] op_sel_hi:[0,1]// 00000000DD00: D3B140D0 1003A02C - v_pk_mul_f32 v[210:211], s[44:45], v[210:211] op_sel_hi:[0,1]// 00000000DD08: D3B140D2 1003A42C - v_pk_mul_f32 v[212:213], s[44:45], v[212:213] op_sel_hi:[0,1]// 00000000DD10: D3B140D4 1003A82C - v_pk_mul_f32 v[214:215], s[44:45], v[214:215] op_sel_hi:[0,1]// 00000000DD18: D3B140D6 1003AC2C - v_mov_b32_e32 v12, 0xffff0000 // 00000000DD20: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000000DD28: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 00000000DD30: 7E1C02FF 00007FFF - v_cvt_pk_bf16_f32 v16, v16, v17 // 00000000DD38: D2680010 00022310 - v_cvt_pk_bf16_f32 v17, v18, v19 // 00000000DD40: D2680011 00022712 - v_cvt_pk_bf16_f32 v18, v20, v21 // 00000000DD48: D2680012 00022B14 - v_cvt_pk_bf16_f32 v19, v22, v23 // 00000000DD50: D2680013 00022F16 - buffer_store_dwordx4 v[16:19], v15, s[16:19], 0 offen nt // 00000000DD58: E07E1000 8004100F - v_cvt_pk_bf16_f32 v24, v24, v25 // 00000000DD60: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 00000000DD68: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 00000000DD70: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 00000000DD78: D268001B 00023F1E - buffer_store_dwordx4 v[24:27], v128, s[16:19], 0 offen nt // 00000000DD80: E07E1000 80041880 - v_cvt_pk_bf16_f32 v32, v32, v33 // 00000000DD88: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 00000000DD90: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 00000000DD98: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 00000000DDA0: D2680023 00024F26 - buffer_store_dwordx4 v[32:35], v129, s[16:19], 0 offen nt // 00000000DDA8: E07E1000 80042081 - v_cvt_pk_bf16_f32 v40, v40, v41 // 00000000DDB0: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 00000000DDB8: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 00000000DDC0: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 00000000DDC8: D268002B 00025F2E - buffer_store_dwordx4 v[40:43], v130, s[16:19], 0 offen nt // 00000000DDD0: E07E1000 80042882 - v_cvt_pk_bf16_f32 v48, v48, v49 // 00000000DDD8: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 00000000DDE0: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 00000000DDE8: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 00000000DDF0: D2680033 00026F36 - buffer_store_dwordx4 v[48:51], v131, s[16:19], 0 offen nt // 00000000DDF8: E07E1000 80043083 - v_cvt_pk_bf16_f32 v56, v56, v57 // 00000000DE00: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 00000000DE08: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 00000000DE10: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 00000000DE18: D268003B 00027F3E - buffer_store_dwordx4 v[56:59], v135, s[16:19], 0 offen nt // 00000000DE20: E07E1000 80043887 - v_cvt_pk_bf16_f32 v64, v64, v65 // 00000000DE28: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 00000000DE30: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 00000000DE38: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 00000000DE40: D2680043 00028F46 - buffer_store_dwordx4 v[64:67], v216, s[16:19], 0 offen nt // 00000000DE48: E07E1000 800440D8 - v_cvt_pk_bf16_f32 v72, v72, v73 // 00000000DE50: D2680048 00029348 - v_cvt_pk_bf16_f32 v73, v74, v75 // 00000000DE58: D2680049 0002974A - v_cvt_pk_bf16_f32 v74, v76, v77 // 00000000DE60: D268004A 00029B4C - v_cvt_pk_bf16_f32 v75, v78, v79 // 00000000DE68: D268004B 00029F4E - buffer_store_dwordx4 v[72:75], v217, s[16:19], 0 offen nt // 00000000DE70: E07E1000 800448D9 - v_cvt_pk_bf16_f32 v80, v80, v81 // 00000000DE78: D2680050 0002A350 - v_cvt_pk_bf16_f32 v81, v82, v83 // 00000000DE80: D2680051 0002A752 - v_cvt_pk_bf16_f32 v82, v84, v85 // 00000000DE88: D2680052 0002AB54 - v_cvt_pk_bf16_f32 v83, v86, v87 // 00000000DE90: D2680053 0002AF56 - buffer_store_dwordx4 v[80:83], v218, s[16:19], 0 offen nt // 00000000DE98: E07E1000 800450DA - v_cvt_pk_bf16_f32 v88, v88, v89 // 00000000DEA0: D2680058 0002B358 - v_cvt_pk_bf16_f32 v89, v90, v91 // 00000000DEA8: D2680059 0002B75A - v_cvt_pk_bf16_f32 v90, v92, v93 // 00000000DEB0: D268005A 0002BB5C - v_cvt_pk_bf16_f32 v91, v94, v95 // 00000000DEB8: D268005B 0002BF5E - buffer_store_dwordx4 v[88:91], v219, s[16:19], 0 offen nt // 00000000DEC0: E07E1000 800458DB - v_cvt_pk_bf16_f32 v96, v96, v97 // 00000000DEC8: D2680060 0002C360 - v_cvt_pk_bf16_f32 v97, v98, v99 // 00000000DED0: D2680061 0002C762 - v_cvt_pk_bf16_f32 v98, v100, v101 // 00000000DED8: D2680062 0002CB64 - v_cvt_pk_bf16_f32 v99, v102, v103 // 00000000DEE0: D2680063 0002CF66 - buffer_store_dwordx4 v[96:99], v220, s[16:19], 0 offen nt // 00000000DEE8: E07E1000 800460DC - v_cvt_pk_bf16_f32 v104, v104, v105 // 00000000DEF0: D2680068 0002D368 - v_cvt_pk_bf16_f32 v105, v106, v107 // 00000000DEF8: D2680069 0002D76A - v_cvt_pk_bf16_f32 v106, v108, v109 // 00000000DF00: D268006A 0002DB6C - v_cvt_pk_bf16_f32 v107, v110, v111 // 00000000DF08: D268006B 0002DF6E - buffer_store_dwordx4 v[104:107], v221, s[16:19], 0 offen nt// 00000000DF10: E07E1000 800468DD - v_cvt_pk_bf16_f32 v112, v112, v113 // 00000000DF18: D2680070 0002E370 - v_cvt_pk_bf16_f32 v113, v114, v115 // 00000000DF20: D2680071 0002E772 - v_cvt_pk_bf16_f32 v114, v116, v117 // 00000000DF28: D2680072 0002EB74 - v_cvt_pk_bf16_f32 v115, v118, v119 // 00000000DF30: D2680073 0002EF76 - buffer_store_dwordx4 v[112:115], v222, s[16:19], 0 offen nt// 00000000DF38: E07E1000 800470DE - v_cvt_pk_bf16_f32 v120, v120, v121 // 00000000DF40: D2680078 0002F378 - v_cvt_pk_bf16_f32 v121, v122, v123 // 00000000DF48: D2680079 0002F77A - v_cvt_pk_bf16_f32 v122, v124, v125 // 00000000DF50: D268007A 0002FB7C - v_cvt_pk_bf16_f32 v123, v126, v127 // 00000000DF58: D268007B 0002FF7E - buffer_store_dwordx4 v[120:123], v223, s[16:19], 0 offen nt// 00000000DF60: E07E1000 800478DF - v_cvt_pk_bf16_f32 v136, v136, v137 // 00000000DF68: D2680088 00031388 - v_cvt_pk_bf16_f32 v137, v138, v139 // 00000000DF70: D2680089 0003178A - v_cvt_pk_bf16_f32 v138, v140, v141 // 00000000DF78: D268008A 00031B8C - v_cvt_pk_bf16_f32 v139, v142, v143 // 00000000DF80: D268008B 00031F8E - buffer_store_dwordx4 v[136:139], v224, s[16:19], 0 offen nt// 00000000DF88: E07E1000 800488E0 - v_cvt_pk_bf16_f32 v144, v144, v145 // 00000000DF90: D2680090 00032390 - v_cvt_pk_bf16_f32 v145, v146, v147 // 00000000DF98: D2680091 00032792 - v_cvt_pk_bf16_f32 v146, v148, v149 // 00000000DFA0: D2680092 00032B94 - v_cvt_pk_bf16_f32 v147, v150, v151 // 00000000DFA8: D2680093 00032F96 - buffer_store_dwordx4 v[144:147], v225, s[16:19], 0 offen nt// 00000000DFB0: E07E1000 800490E1 - v_cvt_pk_bf16_f32 v152, v152, v153 // 00000000DFB8: D2680098 00033398 - v_cvt_pk_bf16_f32 v153, v154, v155 // 00000000DFC0: D2680099 0003379A - v_cvt_pk_bf16_f32 v154, v156, v157 // 00000000DFC8: D268009A 00033B9C - v_cvt_pk_bf16_f32 v155, v158, v159 // 00000000DFD0: D268009B 00033F9E - buffer_store_dwordx4 v[152:155], v226, s[16:19], 0 offen nt// 00000000DFD8: E07E1000 800498E2 - v_cvt_pk_bf16_f32 v160, v160, v161 // 00000000DFE0: D26800A0 000343A0 - v_cvt_pk_bf16_f32 v161, v162, v163 // 00000000DFE8: D26800A1 000347A2 - v_cvt_pk_bf16_f32 v162, v164, v165 // 00000000DFF0: D26800A2 00034BA4 - v_cvt_pk_bf16_f32 v163, v166, v167 // 00000000DFF8: D26800A3 00034FA6 - buffer_store_dwordx4 v[160:163], v227, s[16:19], 0 offen nt// 00000000E000: E07E1000 8004A0E3 - v_cvt_pk_bf16_f32 v168, v168, v169 // 00000000E008: D26800A8 000353A8 - v_cvt_pk_bf16_f32 v169, v170, v171 // 00000000E010: D26800A9 000357AA - v_cvt_pk_bf16_f32 v170, v172, v173 // 00000000E018: D26800AA 00035BAC - v_cvt_pk_bf16_f32 v171, v174, v175 // 00000000E020: D26800AB 00035FAE - buffer_store_dwordx4 v[168:171], v228, s[16:19], 0 offen nt// 00000000E028: E07E1000 8004A8E4 - v_cvt_pk_bf16_f32 v176, v176, v177 // 00000000E030: D26800B0 000363B0 - v_cvt_pk_bf16_f32 v177, v178, v179 // 00000000E038: D26800B1 000367B2 - v_cvt_pk_bf16_f32 v178, v180, v181 // 00000000E040: D26800B2 00036BB4 - v_cvt_pk_bf16_f32 v179, v182, v183 // 00000000E048: D26800B3 00036FB6 - buffer_store_dwordx4 v[176:179], v229, s[16:19], 0 offen nt// 00000000E050: E07E1000 8004B0E5 - v_cvt_pk_bf16_f32 v184, v184, v185 // 00000000E058: D26800B8 000373B8 - v_cvt_pk_bf16_f32 v185, v186, v187 // 00000000E060: D26800B9 000377BA - v_cvt_pk_bf16_f32 v186, v188, v189 // 00000000E068: D26800BA 00037BBC - v_cvt_pk_bf16_f32 v187, v190, v191 // 00000000E070: D26800BB 00037FBE - buffer_store_dwordx4 v[184:187], v230, s[16:19], 0 offen nt// 00000000E078: E07E1000 8004B8E6 - v_cvt_pk_bf16_f32 v192, v192, v193 // 00000000E080: D26800C0 000383C0 - v_cvt_pk_bf16_f32 v193, v194, v195 // 00000000E088: D26800C1 000387C2 - v_cvt_pk_bf16_f32 v194, v196, v197 // 00000000E090: D26800C2 00038BC4 - v_cvt_pk_bf16_f32 v195, v198, v199 // 00000000E098: D26800C3 00038FC6 - buffer_store_dwordx4 v[192:195], v231, s[16:19], 0 offen nt// 00000000E0A0: E07E1000 8004C0E7 - v_cvt_pk_bf16_f32 v200, v200, v201 // 00000000E0A8: D26800C8 000393C8 - v_cvt_pk_bf16_f32 v201, v202, v203 // 00000000E0B0: D26800C9 000397CA - v_cvt_pk_bf16_f32 v202, v204, v205 // 00000000E0B8: D26800CA 00039BCC - v_cvt_pk_bf16_f32 v203, v206, v207 // 00000000E0C0: D26800CB 00039FCE - buffer_store_dwordx4 v[200:203], v232, s[16:19], 0 offen nt// 00000000E0C8: E07E1000 8004C8E8 - v_cvt_pk_bf16_f32 v208, v208, v209 // 00000000E0D0: D26800D0 0003A3D0 - v_cvt_pk_bf16_f32 v209, v210, v211 // 00000000E0D8: D26800D1 0003A7D2 - v_cvt_pk_bf16_f32 v210, v212, v213 // 00000000E0E0: D26800D2 0003ABD4 - v_cvt_pk_bf16_f32 v211, v214, v215 // 00000000E0E8: D26800D3 0003AFD6 - buffer_store_dwordx4 v[208:211], v233, s[16:19], 0 offen nt// 00000000E0F0: E07E1000 8004D0E9 - s_nop 0 // 00000000E0F8: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 00000000E0FC: 7E1402FF 80000000 - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E104: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E10C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E114: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E11C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E124: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E12C: 86A2221E - v_add_lshl_u32 v15, v7, v4, 1 // 00000000E130: D1FE000F 02060907 - v_cndmask_b32_e64 v15, v10, v15, s[34:35] // 00000000E138: D100000F 008A1F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E140: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E148: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E150: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E158: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E160: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E168: 86A2221E - v_add_lshl_u32 v80, v7, v4, 1 // 00000000E16C: D1FE0050 02060907 - v_cndmask_b32_e64 v80, v10, v80, s[34:35] // 00000000E174: D1000050 008AA10A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E17C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E184: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E18C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E194: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E19C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E1A4: 86A2221E - v_add_lshl_u32 v81, v7, v4, 1 // 00000000E1A8: D1FE0051 02060907 - v_cndmask_b32_e64 v81, v10, v81, s[34:35] // 00000000E1B0: D1000051 008AA30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E1B8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E1C0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E1C8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E1D0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E1D8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E1E0: 86A2221E - v_add_lshl_u32 v82, v7, v4, 1 // 00000000E1E4: D1FE0052 02060907 - v_cndmask_b32_e64 v82, v10, v82, s[34:35] // 00000000E1EC: D1000052 008AA50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E1F4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E1FC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E204: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E20C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E214: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E21C: 86A2221E - v_add_lshl_u32 v83, v7, v4, 1 // 00000000E220: D1FE0053 02060907 - v_cndmask_b32_e64 v83, v10, v83, s[34:35] // 00000000E228: D1000053 008AA70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E230: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E238: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E240: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E248: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E250: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E258: 86A2221E - v_add_lshl_u32 v84, v7, v4, 1 // 00000000E25C: D1FE0054 02060907 - v_cndmask_b32_e64 v84, v10, v84, s[34:35] // 00000000E264: D1000054 008AA90A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E26C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E274: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E27C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E284: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E28C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E294: 86A2221E - v_add_lshl_u32 v85, v7, v4, 1 // 00000000E298: D1FE0055 02060907 - v_cndmask_b32_e64 v85, v10, v85, s[34:35] // 00000000E2A0: D1000055 008AAB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E2A8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E2B0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E2B8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E2C0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E2C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E2D0: 86A2221E - v_add_lshl_u32 v86, v7, v4, 1 // 00000000E2D4: D1FE0056 02060907 - v_cndmask_b32_e64 v86, v10, v86, s[34:35] // 00000000E2DC: D1000056 008AAD0A - v_accvgpr_read_b32 v16, a3 // 00000000E2E4: D3D84010 18000103 - v_accvgpr_read_b32 v17, a7 // 00000000E2EC: D3D84011 18000107 - v_accvgpr_read_b32 v18, a11 // 00000000E2F4: D3D84012 1800010B - v_accvgpr_read_b32 v19, a15 // 00000000E2FC: D3D84013 1800010F - v_accvgpr_read_b32 v20, a19 // 00000000E304: D3D84014 18000113 - v_accvgpr_read_b32 v21, a23 // 00000000E30C: D3D84015 18000117 - v_accvgpr_read_b32 v22, a27 // 00000000E314: D3D84016 1800011B - v_accvgpr_read_b32 v23, a31 // 00000000E31C: D3D84017 1800011F - v_accvgpr_read_b32 v24, a35 // 00000000E324: D3D84018 18000123 - v_accvgpr_read_b32 v25, a39 // 00000000E32C: D3D84019 18000127 - v_accvgpr_read_b32 v26, a43 // 00000000E334: D3D8401A 1800012B - v_accvgpr_read_b32 v27, a47 // 00000000E33C: D3D8401B 1800012F - v_accvgpr_read_b32 v28, a51 // 00000000E344: D3D8401C 18000133 - v_accvgpr_read_b32 v29, a55 // 00000000E34C: D3D8401D 18000137 - v_accvgpr_read_b32 v30, a59 // 00000000E354: D3D8401E 1800013B - v_accvgpr_read_b32 v31, a63 // 00000000E35C: D3D8401F 1800013F - v_accvgpr_read_b32 v32, a67 // 00000000E364: D3D84020 18000143 - v_accvgpr_read_b32 v33, a71 // 00000000E36C: D3D84021 18000147 - v_accvgpr_read_b32 v34, a75 // 00000000E374: D3D84022 1800014B - v_accvgpr_read_b32 v35, a79 // 00000000E37C: D3D84023 1800014F - v_accvgpr_read_b32 v36, a83 // 00000000E384: D3D84024 18000153 - v_accvgpr_read_b32 v37, a87 // 00000000E38C: D3D84025 18000157 - v_accvgpr_read_b32 v38, a91 // 00000000E394: D3D84026 1800015B - v_accvgpr_read_b32 v39, a95 // 00000000E39C: D3D84027 1800015F - v_accvgpr_read_b32 v40, a99 // 00000000E3A4: D3D84028 18000163 - v_accvgpr_read_b32 v41, a103 // 00000000E3AC: D3D84029 18000167 - v_accvgpr_read_b32 v42, a107 // 00000000E3B4: D3D8402A 1800016B - v_accvgpr_read_b32 v43, a111 // 00000000E3BC: D3D8402B 1800016F - v_accvgpr_read_b32 v44, a115 // 00000000E3C4: D3D8402C 18000173 - v_accvgpr_read_b32 v45, a119 // 00000000E3CC: D3D8402D 18000177 - v_accvgpr_read_b32 v46, a123 // 00000000E3D4: D3D8402E 1800017B - v_accvgpr_read_b32 v47, a127 // 00000000E3DC: D3D8402F 1800017F - v_accvgpr_read_b32 v48, a131 // 00000000E3E4: D3D84030 18000183 - v_accvgpr_read_b32 v49, a135 // 00000000E3EC: D3D84031 18000187 - v_accvgpr_read_b32 v50, a139 // 00000000E3F4: D3D84032 1800018B - v_accvgpr_read_b32 v51, a143 // 00000000E3FC: D3D84033 1800018F - v_accvgpr_read_b32 v52, a147 // 00000000E404: D3D84034 18000193 - v_accvgpr_read_b32 v53, a151 // 00000000E40C: D3D84035 18000197 - v_accvgpr_read_b32 v54, a155 // 00000000E414: D3D84036 1800019B - v_accvgpr_read_b32 v55, a159 // 00000000E41C: D3D84037 1800019F - v_accvgpr_read_b32 v56, a163 // 00000000E424: D3D84038 180001A3 - v_accvgpr_read_b32 v57, a167 // 00000000E42C: D3D84039 180001A7 - v_accvgpr_read_b32 v58, a171 // 00000000E434: D3D8403A 180001AB - v_accvgpr_read_b32 v59, a175 // 00000000E43C: D3D8403B 180001AF - v_accvgpr_read_b32 v60, a179 // 00000000E444: D3D8403C 180001B3 - v_accvgpr_read_b32 v61, a183 // 00000000E44C: D3D8403D 180001B7 - v_accvgpr_read_b32 v62, a187 // 00000000E454: D3D8403E 180001BB - v_accvgpr_read_b32 v63, a191 // 00000000E45C: D3D8403F 180001BF - v_accvgpr_read_b32 v64, a195 // 00000000E464: D3D84040 180001C3 - v_accvgpr_read_b32 v65, a199 // 00000000E46C: D3D84041 180001C7 - v_accvgpr_read_b32 v66, a203 // 00000000E474: D3D84042 180001CB - v_accvgpr_read_b32 v67, a207 // 00000000E47C: D3D84043 180001CF - v_accvgpr_read_b32 v68, a211 // 00000000E484: D3D84044 180001D3 - v_accvgpr_read_b32 v69, a215 // 00000000E48C: D3D84045 180001D7 - v_accvgpr_read_b32 v70, a219 // 00000000E494: D3D84046 180001DB - v_accvgpr_read_b32 v71, a223 // 00000000E49C: D3D84047 180001DF - v_accvgpr_read_b32 v72, a227 // 00000000E4A4: D3D84048 180001E3 - v_accvgpr_read_b32 v73, a231 // 00000000E4AC: D3D84049 180001E7 - v_accvgpr_read_b32 v74, a235 // 00000000E4B4: D3D8404A 180001EB - v_accvgpr_read_b32 v75, a239 // 00000000E4BC: D3D8404B 180001EF - v_accvgpr_read_b32 v76, a243 // 00000000E4C4: D3D8404C 180001F3 - v_accvgpr_read_b32 v77, a247 // 00000000E4CC: D3D8404D 180001F7 - v_accvgpr_read_b32 v78, a251 // 00000000E4D4: D3D8404E 180001FB - v_accvgpr_read_b32 v79, a255 // 00000000E4DC: D3D8404F 180001FF - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 00000000E4E4: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 00000000E4EC: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 00000000E4F4: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 00000000E4FC: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000000E504: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000000E50C: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000000E514: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000000E51C: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000000E524: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000000E52C: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000000E534: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000000E53C: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000000E544: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 00000000E54C: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 00000000E554: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 00000000E55C: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 00000000E564: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 00000000E56C: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 00000000E574: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 00000000E57C: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 00000000E584: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000000E58C: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 00000000E594: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 00000000E59C: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 00000000E5A4: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 00000000E5AC: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 00000000E5B4: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 00000000E5BC: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 00000000E5C4: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 00000000E5CC: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 00000000E5D4: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 00000000E5DC: D3B1404E 10029C2C - v_mov_b32_e32 v12, 0xffff0000 // 00000000E5E4: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000000E5EC: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 00000000E5F4: 7E1C02FF 00007FFF - v_cvt_pk_bf16_f32 v16, v16, v17 // 00000000E5FC: D2680010 00022310 - v_cvt_pk_bf16_f32 v17, v18, v19 // 00000000E604: D2680011 00022712 - v_cvt_pk_bf16_f32 v18, v20, v21 // 00000000E60C: D2680012 00022B14 - v_cvt_pk_bf16_f32 v19, v22, v23 // 00000000E614: D2680013 00022F16 - buffer_store_dwordx4 v[16:19], v15, s[16:19], 0 offen nt // 00000000E61C: E07E1000 8004100F - v_cvt_pk_bf16_f32 v24, v24, v25 // 00000000E624: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 00000000E62C: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 00000000E634: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 00000000E63C: D268001B 00023F1E - buffer_store_dwordx4 v[24:27], v80, s[16:19], 0 offen nt // 00000000E644: E07E1000 80041850 - v_cvt_pk_bf16_f32 v32, v32, v33 // 00000000E64C: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 00000000E654: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 00000000E65C: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 00000000E664: D2680023 00024F26 - buffer_store_dwordx4 v[32:35], v81, s[16:19], 0 offen nt // 00000000E66C: E07E1000 80042051 - v_cvt_pk_bf16_f32 v40, v40, v41 // 00000000E674: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 00000000E67C: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 00000000E684: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 00000000E68C: D268002B 00025F2E - buffer_store_dwordx4 v[40:43], v82, s[16:19], 0 offen nt // 00000000E694: E07E1000 80042852 - v_cvt_pk_bf16_f32 v48, v48, v49 // 00000000E69C: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 00000000E6A4: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 00000000E6AC: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 00000000E6B4: D2680033 00026F36 - buffer_store_dwordx4 v[48:51], v83, s[16:19], 0 offen nt // 00000000E6BC: E07E1000 80043053 - v_cvt_pk_bf16_f32 v56, v56, v57 // 00000000E6C4: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 00000000E6CC: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 00000000E6D4: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 00000000E6DC: D268003B 00027F3E - buffer_store_dwordx4 v[56:59], v84, s[16:19], 0 offen nt // 00000000E6E4: E07E1000 80043854 - v_cvt_pk_bf16_f32 v64, v64, v65 // 00000000E6EC: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 00000000E6F4: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 00000000E6FC: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 00000000E704: D2680043 00028F46 - buffer_store_dwordx4 v[64:67], v85, s[16:19], 0 offen nt // 00000000E70C: E07E1000 80044055 - v_cvt_pk_bf16_f32 v72, v72, v73 // 00000000E714: D2680048 00029348 - v_cvt_pk_bf16_f32 v73, v74, v75 // 00000000E71C: D2680049 0002974A - v_cvt_pk_bf16_f32 v74, v76, v77 // 00000000E724: D268004A 00029B4C - v_cvt_pk_bf16_f32 v75, v78, v79 // 00000000E72C: D268004B 00029F4E - buffer_store_dwordx4 v[72:75], v86, s[16:19], 0 offen nt // 00000000E734: E07E1000 80044856 - s_nop 0 // 00000000E73C: BF800000 - s_branch label_GW_End_2 // 00000000E740: BF8243A3 - -label_GW_B0_E1_M_1: - v_mov_b32_e32 v10, 0x80000000 // 00000000E744: 7E1402FF 80000000 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E74C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E754: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E75C: 86A2221E - v_add_lshl_u32 v129, v7, v4, 1 // 00000000E760: D1FE0081 02060907 - v_cndmask_b32_e64 v129, v10, v129, s[34:35] // 00000000E768: D1000081 008B030A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000E770: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E778: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E780: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E788: 86A2221E - v_add_lshl_u32 v130, v7, v8, 1 // 00000000E78C: D1FE0082 02061107 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 00000000E794: D1000082 008B050A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000E79C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E7A4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E7AC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E7B4: 86A2221E - v_add_lshl_u32 v131, v7, v8, 1 // 00000000E7B8: D1FE0083 02061107 - v_cndmask_b32_e64 v131, v10, v131, s[34:35] // 00000000E7C0: D1000083 008B070A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000E7C8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E7D0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E7D8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E7E0: 86A2221E - v_add_lshl_u32 v135, v7, v8, 1 // 00000000E7E4: D1FE0087 02061107 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 00000000E7EC: D1000087 008B0F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000E7F4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E7FC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E804: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E80C: 86A2221E - v_add_lshl_u32 v136, v7, v8, 1 // 00000000E810: D1FE0088 02061107 - v_cndmask_b32_e64 v136, v10, v136, s[34:35] // 00000000E818: D1000088 008B110A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000E820: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E828: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E830: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E838: 86A2221E - v_add_lshl_u32 v137, v7, v8, 1 // 00000000E83C: D1FE0089 02061107 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 00000000E844: D1000089 008B130A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000E84C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E854: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E85C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E864: 86A2221E - v_add_lshl_u32 v138, v7, v8, 1 // 00000000E868: D1FE008A 02061107 - v_cndmask_b32_e64 v138, v10, v138, s[34:35] // 00000000E870: D100008A 008B150A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000E878: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E880: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E888: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E890: 86A2221E - v_add_lshl_u32 v139, v7, v8, 1 // 00000000E894: D1FE008B 02061107 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 00000000E89C: D100008B 008B170A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000E8A4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000E8AC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000E8B4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000E8BC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E8C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E8CC: 86A2221E - v_add_lshl_u32 v140, v7, v4, 1 // 00000000E8D0: D1FE008C 02060907 - v_cndmask_b32_e64 v140, v10, v140, s[34:35] // 00000000E8D8: D100008C 008B190A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000E8E0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E8E8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E8F0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E8F8: 86A2221E - v_add_lshl_u32 v141, v7, v8, 1 // 00000000E8FC: D1FE008D 02061107 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 00000000E904: D100008D 008B1B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000E90C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E914: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E91C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E924: 86A2221E - v_add_lshl_u32 v142, v7, v8, 1 // 00000000E928: D1FE008E 02061107 - v_cndmask_b32_e64 v142, v10, v142, s[34:35] // 00000000E930: D100008E 008B1D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000E938: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E940: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E948: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E950: 86A2221E - v_add_lshl_u32 v143, v7, v8, 1 // 00000000E954: D1FE008F 02061107 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 00000000E95C: D100008F 008B1F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000E964: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E96C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E974: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E97C: 86A2221E - v_add_lshl_u32 v144, v7, v8, 1 // 00000000E980: D1FE0090 02061107 - v_cndmask_b32_e64 v144, v10, v144, s[34:35] // 00000000E988: D1000090 008B210A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000E990: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E998: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E9A0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E9A8: 86A2221E - v_add_lshl_u32 v145, v7, v8, 1 // 00000000E9AC: D1FE0091 02061107 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 00000000E9B4: D1000091 008B230A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000E9BC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E9C4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E9CC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000E9D4: 86A2221E - v_add_lshl_u32 v146, v7, v8, 1 // 00000000E9D8: D1FE0092 02061107 - v_cndmask_b32_e64 v146, v10, v146, s[34:35] // 00000000E9E0: D1000092 008B250A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000E9E8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000E9F0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000E9F8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EA00: 86A2221E - v_add_lshl_u32 v147, v7, v8, 1 // 00000000EA04: D1FE0093 02061107 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 00000000EA0C: D1000093 008B270A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000EA14: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000EA1C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000EA24: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000EA2C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EA34: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EA3C: 86A2221E - v_add_lshl_u32 v148, v7, v4, 1 // 00000000EA40: D1FE0094 02060907 - v_cndmask_b32_e64 v148, v10, v148, s[34:35] // 00000000EA48: D1000094 008B290A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000EA50: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EA58: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EA60: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EA68: 86A2221E - v_add_lshl_u32 v149, v7, v8, 1 // 00000000EA6C: D1FE0095 02061107 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 00000000EA74: D1000095 008B2B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000EA7C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EA84: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EA8C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EA94: 86A2221E - v_add_lshl_u32 v150, v7, v8, 1 // 00000000EA98: D1FE0096 02061107 - v_cndmask_b32_e64 v150, v10, v150, s[34:35] // 00000000EAA0: D1000096 008B2D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000EAA8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EAB0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EAB8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EAC0: 86A2221E - v_add_lshl_u32 v151, v7, v8, 1 // 00000000EAC4: D1FE0097 02061107 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 00000000EACC: D1000097 008B2F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000EAD4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EADC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EAE4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EAEC: 86A2221E - v_add_lshl_u32 v152, v7, v8, 1 // 00000000EAF0: D1FE0098 02061107 - v_cndmask_b32_e64 v152, v10, v152, s[34:35] // 00000000EAF8: D1000098 008B310A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000EB00: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EB08: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EB10: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EB18: 86A2221E - v_add_lshl_u32 v153, v7, v8, 1 // 00000000EB1C: D1FE0099 02061107 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 00000000EB24: D1000099 008B330A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000EB2C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EB34: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EB3C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EB44: 86A2221E - v_add_lshl_u32 v154, v7, v8, 1 // 00000000EB48: D1FE009A 02061107 - v_cndmask_b32_e64 v154, v10, v154, s[34:35] // 00000000EB50: D100009A 008B350A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000EB58: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EB60: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EB68: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EB70: 86A2221E - v_add_lshl_u32 v155, v7, v8, 1 // 00000000EB74: D1FE009B 02061107 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 00000000EB7C: D100009B 008B370A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000EB84: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000EB8C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000EB94: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000EB9C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EBA4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EBAC: 86A2221E - v_add_lshl_u32 v156, v7, v4, 1 // 00000000EBB0: D1FE009C 02060907 - v_cndmask_b32_e64 v156, v10, v156, s[34:35] // 00000000EBB8: D100009C 008B390A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000EBC0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EBC8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EBD0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EBD8: 86A2221E - v_add_lshl_u32 v157, v7, v8, 1 // 00000000EBDC: D1FE009D 02061107 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 00000000EBE4: D100009D 008B3B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000EBEC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EBF4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EBFC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EC04: 86A2221E - v_add_lshl_u32 v158, v7, v8, 1 // 00000000EC08: D1FE009E 02061107 - v_cndmask_b32_e64 v158, v10, v158, s[34:35] // 00000000EC10: D100009E 008B3D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000EC18: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EC20: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EC28: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EC30: 86A2221E - v_add_lshl_u32 v159, v7, v8, 1 // 00000000EC34: D1FE009F 02061107 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 00000000EC3C: D100009F 008B3F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000EC44: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EC4C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EC54: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EC5C: 86A2221E - v_add_lshl_u32 v160, v7, v8, 1 // 00000000EC60: D1FE00A0 02061107 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 00000000EC68: D10000A0 008B410A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000EC70: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EC78: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EC80: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EC88: 86A2221E - v_add_lshl_u32 v161, v7, v8, 1 // 00000000EC8C: D1FE00A1 02061107 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 00000000EC94: D10000A1 008B430A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000EC9C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ECA4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ECAC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ECB4: 86A2221E - v_add_lshl_u32 v162, v7, v8, 1 // 00000000ECB8: D1FE00A2 02061107 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 00000000ECC0: D10000A2 008B450A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000ECC8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ECD0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ECD8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ECE0: 86A2221E - v_add_lshl_u32 v163, v7, v8, 1 // 00000000ECE4: D1FE00A3 02061107 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 00000000ECEC: D10000A3 008B470A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000ECF4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000ECFC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000ED04: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000ED0C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ED14: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ED1C: 86A2221E - v_add_lshl_u32 v164, v7, v4, 1 // 00000000ED20: D1FE00A4 02060907 - v_cndmask_b32_e64 v164, v10, v164, s[34:35] // 00000000ED28: D10000A4 008B490A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000ED30: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ED38: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ED40: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ED48: 86A2221E - v_add_lshl_u32 v165, v7, v8, 1 // 00000000ED4C: D1FE00A5 02061107 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 00000000ED54: D10000A5 008B4B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000ED5C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ED64: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ED6C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ED74: 86A2221E - v_add_lshl_u32 v166, v7, v8, 1 // 00000000ED78: D1FE00A6 02061107 - v_cndmask_b32_e64 v166, v10, v166, s[34:35] // 00000000ED80: D10000A6 008B4D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000ED88: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ED90: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ED98: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EDA0: 86A2221E - v_add_lshl_u32 v167, v7, v8, 1 // 00000000EDA4: D1FE00A7 02061107 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 00000000EDAC: D10000A7 008B4F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000EDB4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EDBC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EDC4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EDCC: 86A2221E - v_add_lshl_u32 v168, v7, v8, 1 // 00000000EDD0: D1FE00A8 02061107 - v_cndmask_b32_e64 v168, v10, v168, s[34:35] // 00000000EDD8: D10000A8 008B510A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000EDE0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EDE8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EDF0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EDF8: 86A2221E - v_add_lshl_u32 v169, v7, v8, 1 // 00000000EDFC: D1FE00A9 02061107 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 00000000EE04: D10000A9 008B530A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000EE0C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EE14: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EE1C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EE24: 86A2221E - v_add_lshl_u32 v170, v7, v8, 1 // 00000000EE28: D1FE00AA 02061107 - v_cndmask_b32_e64 v170, v10, v170, s[34:35] // 00000000EE30: D10000AA 008B550A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000EE38: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EE40: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EE48: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EE50: 86A2221E - v_add_lshl_u32 v171, v7, v8, 1 // 00000000EE54: D1FE00AB 02061107 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 00000000EE5C: D10000AB 008B570A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000EE64: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000EE6C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000EE74: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000EE7C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EE84: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EE8C: 86A2221E - v_add_lshl_u32 v172, v7, v4, 1 // 00000000EE90: D1FE00AC 02060907 - v_cndmask_b32_e64 v172, v10, v172, s[34:35] // 00000000EE98: D10000AC 008B590A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000EEA0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EEA8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EEB0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EEB8: 86A2221E - v_add_lshl_u32 v173, v7, v8, 1 // 00000000EEBC: D1FE00AD 02061107 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 00000000EEC4: D10000AD 008B5B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000EECC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EED4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EEDC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EEE4: 86A2221E - v_add_lshl_u32 v174, v7, v8, 1 // 00000000EEE8: D1FE00AE 02061107 - v_cndmask_b32_e64 v174, v10, v174, s[34:35] // 00000000EEF0: D10000AE 008B5D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000EEF8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EF00: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EF08: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EF10: 86A2221E - v_add_lshl_u32 v175, v7, v8, 1 // 00000000EF14: D1FE00AF 02061107 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 00000000EF1C: D10000AF 008B5F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000EF24: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EF2C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EF34: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EF3C: 86A2221E - v_add_lshl_u32 v176, v7, v8, 1 // 00000000EF40: D1FE00B0 02061107 - v_cndmask_b32_e64 v176, v10, v176, s[34:35] // 00000000EF48: D10000B0 008B610A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000EF50: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EF58: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EF60: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EF68: 86A2221E - v_add_lshl_u32 v177, v7, v8, 1 // 00000000EF6C: D1FE00B1 02061107 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 00000000EF74: D10000B1 008B630A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000EF7C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EF84: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EF8C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EF94: 86A2221E - v_add_lshl_u32 v178, v7, v8, 1 // 00000000EF98: D1FE00B2 02061107 - v_cndmask_b32_e64 v178, v10, v178, s[34:35] // 00000000EFA0: D10000B2 008B650A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000EFA8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000EFB0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EFB8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EFC0: 86A2221E - v_add_lshl_u32 v179, v7, v8, 1 // 00000000EFC4: D1FE00B3 02061107 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 00000000EFCC: D10000B3 008B670A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000EFD4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000EFDC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000EFE4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000EFEC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000EFF4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000EFFC: 86A2221E - v_add_lshl_u32 v180, v7, v4, 1 // 00000000F000: D1FE00B4 02060907 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 00000000F008: D10000B4 008B690A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000F010: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F018: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F020: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F028: 86A2221E - v_add_lshl_u32 v181, v7, v8, 1 // 00000000F02C: D1FE00B5 02061107 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 00000000F034: D10000B5 008B6B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000F03C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F044: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F04C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F054: 86A2221E - v_add_lshl_u32 v182, v7, v8, 1 // 00000000F058: D1FE00B6 02061107 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 00000000F060: D10000B6 008B6D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000F068: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F070: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F078: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F080: 86A2221E - v_add_lshl_u32 v183, v7, v8, 1 // 00000000F084: D1FE00B7 02061107 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 00000000F08C: D10000B7 008B6F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000F094: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F09C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F0A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F0AC: 86A2221E - v_add_lshl_u32 v184, v7, v8, 1 // 00000000F0B0: D1FE00B8 02061107 - v_cndmask_b32_e64 v184, v10, v184, s[34:35] // 00000000F0B8: D10000B8 008B710A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000F0C0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F0C8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F0D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F0D8: 86A2221E - v_add_lshl_u32 v185, v7, v8, 1 // 00000000F0DC: D1FE00B9 02061107 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 00000000F0E4: D10000B9 008B730A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000F0EC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F0F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F0FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F104: 86A2221E - v_add_lshl_u32 v186, v7, v8, 1 // 00000000F108: D1FE00BA 02061107 - v_cndmask_b32_e64 v186, v10, v186, s[34:35] // 00000000F110: D10000BA 008B750A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000F118: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F120: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F128: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F130: 86A2221E - v_add_lshl_u32 v187, v7, v8, 1 // 00000000F134: D1FE00BB 02061107 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 00000000F13C: D10000BB 008B770A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000F144: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000F14C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000F154: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000F15C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F164: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F16C: 86A2221E - v_add_lshl_u32 v188, v7, v4, 1 // 00000000F170: D1FE00BC 02060907 - v_cndmask_b32_e64 v188, v10, v188, s[34:35] // 00000000F178: D10000BC 008B790A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000F180: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F188: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F190: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F198: 86A2221E - v_add_lshl_u32 v189, v7, v8, 1 // 00000000F19C: D1FE00BD 02061107 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 00000000F1A4: D10000BD 008B7B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000F1AC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F1B4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F1BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F1C4: 86A2221E - v_add_lshl_u32 v190, v7, v8, 1 // 00000000F1C8: D1FE00BE 02061107 - v_cndmask_b32_e64 v190, v10, v190, s[34:35] // 00000000F1D0: D10000BE 008B7D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000F1D8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F1E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F1E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F1F0: 86A2221E - v_add_lshl_u32 v191, v7, v8, 1 // 00000000F1F4: D1FE00BF 02061107 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 00000000F1FC: D10000BF 008B7F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000F204: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F20C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F214: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F21C: 86A2221E - v_add_lshl_u32 v192, v7, v8, 1 // 00000000F220: D1FE00C0 02061107 - v_cndmask_b32_e64 v192, v10, v192, s[34:35] // 00000000F228: D10000C0 008B810A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000F230: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F238: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F240: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F248: 86A2221E - v_add_lshl_u32 v193, v7, v8, 1 // 00000000F24C: D1FE00C1 02061107 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 00000000F254: D10000C1 008B830A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000F25C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F264: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F26C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F274: 86A2221E - v_add_lshl_u32 v194, v7, v8, 1 // 00000000F278: D1FE00C2 02061107 - v_cndmask_b32_e64 v194, v10, v194, s[34:35] // 00000000F280: D10000C2 008B850A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000F288: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F290: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F298: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F2A0: 86A2221E - v_add_lshl_u32 v195, v7, v8, 1 // 00000000F2A4: D1FE00C3 02061107 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 00000000F2AC: D10000C3 008B870A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000F2B4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000F2BC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000F2C4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000F2CC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F2D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F2DC: 86A2221E - v_add_lshl_u32 v196, v7, v4, 1 // 00000000F2E0: D1FE00C4 02060907 - v_cndmask_b32_e64 v196, v10, v196, s[34:35] // 00000000F2E8: D10000C4 008B890A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000F2F0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F2F8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F300: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F308: 86A2221E - v_add_lshl_u32 v197, v7, v8, 1 // 00000000F30C: D1FE00C5 02061107 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 00000000F314: D10000C5 008B8B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000F31C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F324: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F32C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F334: 86A2221E - v_add_lshl_u32 v198, v7, v8, 1 // 00000000F338: D1FE00C6 02061107 - v_cndmask_b32_e64 v198, v10, v198, s[34:35] // 00000000F340: D10000C6 008B8D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000F348: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F350: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F358: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F360: 86A2221E - v_add_lshl_u32 v199, v7, v8, 1 // 00000000F364: D1FE00C7 02061107 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 00000000F36C: D10000C7 008B8F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000F374: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F37C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F384: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F38C: 86A2221E - v_add_lshl_u32 v200, v7, v8, 1 // 00000000F390: D1FE00C8 02061107 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 00000000F398: D10000C8 008B910A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000F3A0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F3A8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F3B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F3B8: 86A2221E - v_add_lshl_u32 v201, v7, v8, 1 // 00000000F3BC: D1FE00C9 02061107 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 00000000F3C4: D10000C9 008B930A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000F3CC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F3D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F3DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F3E4: 86A2221E - v_add_lshl_u32 v202, v7, v8, 1 // 00000000F3E8: D1FE00CA 02061107 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 00000000F3F0: D10000CA 008B950A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000F3F8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F400: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F408: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F410: 86A2221E - v_add_lshl_u32 v203, v7, v8, 1 // 00000000F414: D1FE00CB 02061107 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 00000000F41C: D10000CB 008B970A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000F424: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000F42C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000F434: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000F43C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F444: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F44C: 86A2221E - v_add_lshl_u32 v204, v7, v4, 1 // 00000000F450: D1FE00CC 02060907 - v_cndmask_b32_e64 v204, v10, v204, s[34:35] // 00000000F458: D10000CC 008B990A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000F460: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F468: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F470: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F478: 86A2221E - v_add_lshl_u32 v205, v7, v8, 1 // 00000000F47C: D1FE00CD 02061107 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 00000000F484: D10000CD 008B9B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000F48C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F494: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F49C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F4A4: 86A2221E - v_add_lshl_u32 v206, v7, v8, 1 // 00000000F4A8: D1FE00CE 02061107 - v_cndmask_b32_e64 v206, v10, v206, s[34:35] // 00000000F4B0: D10000CE 008B9D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000F4B8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F4C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F4C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F4D0: 86A2221E - v_add_lshl_u32 v207, v7, v8, 1 // 00000000F4D4: D1FE00CF 02061107 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 00000000F4DC: D10000CF 008B9F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000F4E4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F4EC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F4F4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F4FC: 86A2221E - v_add_lshl_u32 v208, v7, v8, 1 // 00000000F500: D1FE00D0 02061107 - v_cndmask_b32_e64 v208, v10, v208, s[34:35] // 00000000F508: D10000D0 008BA10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000F510: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F518: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F520: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F528: 86A2221E - v_add_lshl_u32 v209, v7, v8, 1 // 00000000F52C: D1FE00D1 02061107 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 00000000F534: D10000D1 008BA30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000F53C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F544: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F54C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F554: 86A2221E - v_add_lshl_u32 v210, v7, v8, 1 // 00000000F558: D1FE00D2 02061107 - v_cndmask_b32_e64 v210, v10, v210, s[34:35] // 00000000F560: D10000D2 008BA50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000F568: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F570: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F578: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F580: 86A2221E - v_add_lshl_u32 v211, v7, v8, 1 // 00000000F584: D1FE00D3 02061107 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 00000000F58C: D10000D3 008BA70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000F594: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000F59C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000F5A4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000F5AC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F5B4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F5BC: 86A2221E - v_add_lshl_u32 v212, v7, v4, 1 // 00000000F5C0: D1FE00D4 02060907 - v_cndmask_b32_e64 v212, v10, v212, s[34:35] // 00000000F5C8: D10000D4 008BA90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000F5D0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F5D8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F5E0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F5E8: 86A2221E - v_add_lshl_u32 v213, v7, v8, 1 // 00000000F5EC: D1FE00D5 02061107 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 00000000F5F4: D10000D5 008BAB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000F5FC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F604: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F60C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F614: 86A2221E - v_add_lshl_u32 v214, v7, v8, 1 // 00000000F618: D1FE00D6 02061107 - v_cndmask_b32_e64 v214, v10, v214, s[34:35] // 00000000F620: D10000D6 008BAD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000F628: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F630: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F638: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F640: 86A2221E - v_add_lshl_u32 v215, v7, v8, 1 // 00000000F644: D1FE00D7 02061107 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 00000000F64C: D10000D7 008BAF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000F654: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F65C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F664: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F66C: 86A2221E - v_add_lshl_u32 v216, v7, v8, 1 // 00000000F670: D1FE00D8 02061107 - v_cndmask_b32_e64 v216, v10, v216, s[34:35] // 00000000F678: D10000D8 008BB10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000F680: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F688: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F690: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F698: 86A2221E - v_add_lshl_u32 v217, v7, v8, 1 // 00000000F69C: D1FE00D9 02061107 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000000F6A4: D10000D9 008BB30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000F6AC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F6B4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F6BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F6C4: 86A2221E - v_add_lshl_u32 v218, v7, v8, 1 // 00000000F6C8: D1FE00DA 02061107 - v_cndmask_b32_e64 v218, v10, v218, s[34:35] // 00000000F6D0: D10000DA 008BB50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000F6D8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F6E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F6E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F6F0: 86A2221E - v_add_lshl_u32 v219, v7, v8, 1 // 00000000F6F4: D1FE00DB 02061107 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 00000000F6FC: D10000DB 008BB70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000F704: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000F70C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000F714: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000F71C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F724: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F72C: 86A2221E - v_add_lshl_u32 v220, v7, v4, 1 // 00000000F730: D1FE00DC 02060907 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 00000000F738: D10000DC 008BB90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000F740: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F748: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F750: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F758: 86A2221E - v_add_lshl_u32 v221, v7, v8, 1 // 00000000F75C: D1FE00DD 02061107 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 00000000F764: D10000DD 008BBB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000F76C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F774: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F77C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F784: 86A2221E - v_add_lshl_u32 v222, v7, v8, 1 // 00000000F788: D1FE00DE 02061107 - v_cndmask_b32_e64 v222, v10, v222, s[34:35] // 00000000F790: D10000DE 008BBD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000F798: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F7A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F7A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F7B0: 86A2221E - v_add_lshl_u32 v223, v7, v8, 1 // 00000000F7B4: D1FE00DF 02061107 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 00000000F7BC: D10000DF 008BBF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000F7C4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F7CC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F7D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F7DC: 86A2221E - v_add_lshl_u32 v224, v7, v8, 1 // 00000000F7E0: D1FE00E0 02061107 - v_cndmask_b32_e64 v224, v10, v224, s[34:35] // 00000000F7E8: D10000E0 008BC10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000F7F0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F7F8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F800: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F808: 86A2221E - v_add_lshl_u32 v225, v7, v8, 1 // 00000000F80C: D1FE00E1 02061107 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000000F814: D10000E1 008BC30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000F81C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F824: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F82C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F834: 86A2221E - v_add_lshl_u32 v226, v7, v8, 1 // 00000000F838: D1FE00E2 02061107 - v_cndmask_b32_e64 v226, v10, v226, s[34:35] // 00000000F840: D10000E2 008BC50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000F848: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F850: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F858: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F860: 86A2221E - v_add_lshl_u32 v227, v7, v8, 1 // 00000000F864: D1FE00E3 02061107 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 00000000F86C: D10000E3 008BC70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000F874: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000F87C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000F884: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000F88C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F894: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F89C: 86A2221E - v_add_lshl_u32 v228, v7, v4, 1 // 00000000F8A0: D1FE00E4 02060907 - v_cndmask_b32_e64 v228, v10, v228, s[34:35] // 00000000F8A8: D10000E4 008BC90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000F8B0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F8B8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F8C0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F8C8: 86A2221E - v_add_lshl_u32 v229, v7, v8, 1 // 00000000F8CC: D1FE00E5 02061107 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 00000000F8D4: D10000E5 008BCB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000F8DC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F8E4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F8EC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F8F4: 86A2221E - v_add_lshl_u32 v230, v7, v8, 1 // 00000000F8F8: D1FE00E6 02061107 - v_cndmask_b32_e64 v230, v10, v230, s[34:35] // 00000000F900: D10000E6 008BCD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000F908: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F910: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F918: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F920: 86A2221E - v_add_lshl_u32 v231, v7, v8, 1 // 00000000F924: D1FE00E7 02061107 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 00000000F92C: D10000E7 008BCF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000F934: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F93C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F944: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F94C: 86A2221E - v_add_lshl_u32 v232, v7, v8, 1 // 00000000F950: D1FE00E8 02061107 - v_cndmask_b32_e64 v232, v10, v232, s[34:35] // 00000000F958: D10000E8 008BD10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000F960: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F968: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F970: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F978: 86A2221E - v_add_lshl_u32 v233, v7, v8, 1 // 00000000F97C: D1FE00E9 02061107 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000000F984: D10000E9 008BD30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000F98C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F994: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F99C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F9A4: 86A2221E - v_add_lshl_u32 v234, v7, v8, 1 // 00000000F9A8: D1FE00EA 02061107 - v_cndmask_b32_e64 v234, v10, v234, s[34:35] // 00000000F9B0: D10000EA 008BD50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000F9B8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000F9C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000F9C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000F9D0: 86A2221E - v_add_lshl_u32 v235, v7, v8, 1 // 00000000F9D4: D1FE00EB 02061107 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 00000000F9DC: D10000EB 008BD70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000F9E4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000F9EC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000F9F4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000F9FC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FA04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FA0C: 86A2221E - v_add_lshl_u32 v236, v7, v4, 1 // 00000000FA10: D1FE00EC 02060907 - v_cndmask_b32_e64 v236, v10, v236, s[34:35] // 00000000FA18: D10000EC 008BD90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000FA20: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FA28: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FA30: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FA38: 86A2221E - v_add_lshl_u32 v237, v7, v8, 1 // 00000000FA3C: D1FE00ED 02061107 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 00000000FA44: D10000ED 008BDB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000FA4C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FA54: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FA5C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FA64: 86A2221E - v_add_lshl_u32 v238, v7, v8, 1 // 00000000FA68: D1FE00EE 02061107 - v_cndmask_b32_e64 v238, v10, v238, s[34:35] // 00000000FA70: D10000EE 008BDD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000FA78: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FA80: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FA88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FA90: 86A2221E - v_add_lshl_u32 v239, v7, v8, 1 // 00000000FA94: D1FE00EF 02061107 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 00000000FA9C: D10000EF 008BDF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000FAA4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FAAC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FAB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FABC: 86A2221E - v_add_lshl_u32 v240, v7, v8, 1 // 00000000FAC0: D1FE00F0 02061107 - v_cndmask_b32_e64 v240, v10, v240, s[34:35] // 00000000FAC8: D10000F0 008BE10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000FAD0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FAD8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FAE0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FAE8: 86A2221E - v_add_lshl_u32 v241, v7, v8, 1 // 00000000FAEC: D1FE00F1 02061107 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 00000000FAF4: D10000F1 008BE30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000FAFC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FB04: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FB0C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FB14: 86A2221E - v_add_lshl_u32 v242, v7, v8, 1 // 00000000FB18: D1FE00F2 02061107 - v_cndmask_b32_e64 v242, v10, v242, s[34:35] // 00000000FB20: D10000F2 008BE50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000FB28: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FB30: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FB38: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FB40: 86A2221E - v_add_lshl_u32 v243, v7, v8, 1 // 00000000FB44: D1FE00F3 02061107 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 00000000FB4C: D10000F3 008BE70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000FB54: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000FB5C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000FB64: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000FB6C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FB74: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FB7C: 86A2221E - v_add_lshl_u32 v244, v7, v4, 1 // 00000000FB80: D1FE00F4 02060907 - v_cndmask_b32_e64 v244, v10, v244, s[34:35] // 00000000FB88: D10000F4 008BE90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000FB90: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000FB98: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000FBA0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000FBA8: 86A2221E - v_add_lshl_u32 v245, v7, v8, 1 // 00000000FBAC: D1FE00F5 02061107 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 00000000FBB4: D10000F5 008BEB0A - v_accvgpr_read_b32 v15, a0 // 00000000FBBC: D3D8400F 18000100 - v_accvgpr_read_b32 v16, a4 // 00000000FBC4: D3D84010 18000104 - v_accvgpr_read_b32 v17, a8 // 00000000FBCC: D3D84011 18000108 - v_accvgpr_read_b32 v18, a12 // 00000000FBD4: D3D84012 1800010C - v_accvgpr_read_b32 v19, a16 // 00000000FBDC: D3D84013 18000110 - v_accvgpr_read_b32 v20, a20 // 00000000FBE4: D3D84014 18000114 - v_accvgpr_read_b32 v21, a24 // 00000000FBEC: D3D84015 18000118 - v_accvgpr_read_b32 v22, a28 // 00000000FBF4: D3D84016 1800011C - v_accvgpr_read_b32 v23, a32 // 00000000FBFC: D3D84017 18000120 - v_accvgpr_read_b32 v24, a36 // 00000000FC04: D3D84018 18000124 - v_accvgpr_read_b32 v25, a40 // 00000000FC0C: D3D84019 18000128 - v_accvgpr_read_b32 v26, a44 // 00000000FC14: D3D8401A 1800012C - v_accvgpr_read_b32 v27, a48 // 00000000FC1C: D3D8401B 18000130 - v_accvgpr_read_b32 v28, a52 // 00000000FC24: D3D8401C 18000134 - v_accvgpr_read_b32 v29, a56 // 00000000FC2C: D3D8401D 18000138 - v_accvgpr_read_b32 v30, a60 // 00000000FC34: D3D8401E 1800013C - v_accvgpr_read_b32 v31, a64 // 00000000FC3C: D3D8401F 18000140 - v_accvgpr_read_b32 v32, a68 // 00000000FC44: D3D84020 18000144 - v_accvgpr_read_b32 v33, a72 // 00000000FC4C: D3D84021 18000148 - v_accvgpr_read_b32 v34, a76 // 00000000FC54: D3D84022 1800014C - v_accvgpr_read_b32 v35, a80 // 00000000FC5C: D3D84023 18000150 - v_accvgpr_read_b32 v36, a84 // 00000000FC64: D3D84024 18000154 - v_accvgpr_read_b32 v37, a88 // 00000000FC6C: D3D84025 18000158 - v_accvgpr_read_b32 v38, a92 // 00000000FC74: D3D84026 1800015C - v_accvgpr_read_b32 v39, a96 // 00000000FC7C: D3D84027 18000160 - v_accvgpr_read_b32 v40, a100 // 00000000FC84: D3D84028 18000164 - v_accvgpr_read_b32 v41, a104 // 00000000FC8C: D3D84029 18000168 - v_accvgpr_read_b32 v42, a108 // 00000000FC94: D3D8402A 1800016C - v_accvgpr_read_b32 v43, a112 // 00000000FC9C: D3D8402B 18000170 - v_accvgpr_read_b32 v44, a116 // 00000000FCA4: D3D8402C 18000174 - v_accvgpr_read_b32 v45, a120 // 00000000FCAC: D3D8402D 18000178 - v_accvgpr_read_b32 v46, a124 // 00000000FCB4: D3D8402E 1800017C - v_accvgpr_read_b32 v47, a128 // 00000000FCBC: D3D8402F 18000180 - v_accvgpr_read_b32 v48, a132 // 00000000FCC4: D3D84030 18000184 - v_accvgpr_read_b32 v49, a136 // 00000000FCCC: D3D84031 18000188 - v_accvgpr_read_b32 v50, a140 // 00000000FCD4: D3D84032 1800018C - v_accvgpr_read_b32 v51, a144 // 00000000FCDC: D3D84033 18000190 - v_accvgpr_read_b32 v52, a148 // 00000000FCE4: D3D84034 18000194 - v_accvgpr_read_b32 v53, a152 // 00000000FCEC: D3D84035 18000198 - v_accvgpr_read_b32 v54, a156 // 00000000FCF4: D3D84036 1800019C - v_accvgpr_read_b32 v55, a160 // 00000000FCFC: D3D84037 180001A0 - v_accvgpr_read_b32 v56, a164 // 00000000FD04: D3D84038 180001A4 - v_accvgpr_read_b32 v57, a168 // 00000000FD0C: D3D84039 180001A8 - v_accvgpr_read_b32 v58, a172 // 00000000FD14: D3D8403A 180001AC - v_accvgpr_read_b32 v59, a176 // 00000000FD1C: D3D8403B 180001B0 - v_accvgpr_read_b32 v60, a180 // 00000000FD24: D3D8403C 180001B4 - v_accvgpr_read_b32 v61, a184 // 00000000FD2C: D3D8403D 180001B8 - v_accvgpr_read_b32 v62, a188 // 00000000FD34: D3D8403E 180001BC - v_accvgpr_read_b32 v63, a192 // 00000000FD3C: D3D8403F 180001C0 - v_accvgpr_read_b32 v64, a196 // 00000000FD44: D3D84040 180001C4 - v_accvgpr_read_b32 v65, a200 // 00000000FD4C: D3D84041 180001C8 - v_accvgpr_read_b32 v66, a204 // 00000000FD54: D3D84042 180001CC - v_accvgpr_read_b32 v67, a208 // 00000000FD5C: D3D84043 180001D0 - v_accvgpr_read_b32 v68, a212 // 00000000FD64: D3D84044 180001D4 - v_accvgpr_read_b32 v69, a216 // 00000000FD6C: D3D84045 180001D8 - v_accvgpr_read_b32 v70, a220 // 00000000FD74: D3D84046 180001DC - v_accvgpr_read_b32 v71, a224 // 00000000FD7C: D3D84047 180001E0 - v_accvgpr_read_b32 v72, a228 // 00000000FD84: D3D84048 180001E4 - v_accvgpr_read_b32 v73, a232 // 00000000FD8C: D3D84049 180001E8 - v_accvgpr_read_b32 v74, a236 // 00000000FD94: D3D8404A 180001EC - v_accvgpr_read_b32 v75, a240 // 00000000FD9C: D3D8404B 180001F0 - v_accvgpr_read_b32 v76, a244 // 00000000FDA4: D3D8404C 180001F4 - v_accvgpr_read_b32 v77, a248 // 00000000FDAC: D3D8404D 180001F8 - v_accvgpr_read_b32 v78, a252 // 00000000FDB4: D3D8404E 180001FC - v_accvgpr_read_b32 v79, a1 // 00000000FDBC: D3D8404F 18000101 - v_accvgpr_read_b32 v80, a5 // 00000000FDC4: D3D84050 18000105 - v_accvgpr_read_b32 v81, a9 // 00000000FDCC: D3D84051 18000109 - v_accvgpr_read_b32 v82, a13 // 00000000FDD4: D3D84052 1800010D - v_accvgpr_read_b32 v83, a17 // 00000000FDDC: D3D84053 18000111 - v_accvgpr_read_b32 v84, a21 // 00000000FDE4: D3D84054 18000115 - v_accvgpr_read_b32 v85, a25 // 00000000FDEC: D3D84055 18000119 - v_accvgpr_read_b32 v86, a29 // 00000000FDF4: D3D84056 1800011D - v_accvgpr_read_b32 v87, a33 // 00000000FDFC: D3D84057 18000121 - v_accvgpr_read_b32 v88, a37 // 00000000FE04: D3D84058 18000125 - v_accvgpr_read_b32 v89, a41 // 00000000FE0C: D3D84059 18000129 - v_accvgpr_read_b32 v90, a45 // 00000000FE14: D3D8405A 1800012D - v_accvgpr_read_b32 v91, a49 // 00000000FE1C: D3D8405B 18000131 - v_accvgpr_read_b32 v92, a53 // 00000000FE24: D3D8405C 18000135 - v_accvgpr_read_b32 v93, a57 // 00000000FE2C: D3D8405D 18000139 - v_accvgpr_read_b32 v94, a61 // 00000000FE34: D3D8405E 1800013D - v_accvgpr_read_b32 v95, a65 // 00000000FE3C: D3D8405F 18000141 - v_accvgpr_read_b32 v96, a69 // 00000000FE44: D3D84060 18000145 - v_accvgpr_read_b32 v97, a73 // 00000000FE4C: D3D84061 18000149 - v_accvgpr_read_b32 v98, a77 // 00000000FE54: D3D84062 1800014D - v_accvgpr_read_b32 v99, a81 // 00000000FE5C: D3D84063 18000151 - v_accvgpr_read_b32 v100, a85 // 00000000FE64: D3D84064 18000155 - v_accvgpr_read_b32 v101, a89 // 00000000FE6C: D3D84065 18000159 - v_accvgpr_read_b32 v102, a93 // 00000000FE74: D3D84066 1800015D - v_accvgpr_read_b32 v103, a97 // 00000000FE7C: D3D84067 18000161 - v_accvgpr_read_b32 v104, a101 // 00000000FE84: D3D84068 18000165 - v_accvgpr_read_b32 v105, a105 // 00000000FE8C: D3D84069 18000169 - v_accvgpr_read_b32 v106, a109 // 00000000FE94: D3D8406A 1800016D - v_accvgpr_read_b32 v107, a113 // 00000000FE9C: D3D8406B 18000171 - v_accvgpr_read_b32 v108, a117 // 00000000FEA4: D3D8406C 18000175 - v_accvgpr_read_b32 v109, a121 // 00000000FEAC: D3D8406D 18000179 - v_accvgpr_read_b32 v110, a125 // 00000000FEB4: D3D8406E 1800017D - v_accvgpr_read_b32 v111, a129 // 00000000FEBC: D3D8406F 18000181 - v_accvgpr_read_b32 v112, a133 // 00000000FEC4: D3D84070 18000185 - v_accvgpr_read_b32 v113, a137 // 00000000FECC: D3D84071 18000189 - v_accvgpr_read_b32 v114, a141 // 00000000FED4: D3D84072 1800018D - v_accvgpr_read_b32 v115, a145 // 00000000FEDC: D3D84073 18000191 - v_accvgpr_read_b32 v116, a149 // 00000000FEE4: D3D84074 18000195 - v_accvgpr_read_b32 v117, a153 // 00000000FEEC: D3D84075 18000199 - v_accvgpr_read_b32 v118, a157 // 00000000FEF4: D3D84076 1800019D - v_accvgpr_read_b32 v119, a161 // 00000000FEFC: D3D84077 180001A1 - v_accvgpr_read_b32 v120, a165 // 00000000FF04: D3D84078 180001A5 - v_accvgpr_read_b32 v121, a169 // 00000000FF0C: D3D84079 180001A9 - v_accvgpr_read_b32 v122, a173 // 00000000FF14: D3D8407A 180001AD - v_accvgpr_read_b32 v123, a177 // 00000000FF1C: D3D8407B 180001B1 - v_accvgpr_read_b32 v124, a181 // 00000000FF24: D3D8407C 180001B5 - v_accvgpr_read_b32 v125, a185 // 00000000FF2C: D3D8407D 180001B9 - v_accvgpr_read_b32 v126, a189 // 00000000FF34: D3D8407E 180001BD - v_accvgpr_read_b32 v127, a193 // 00000000FF3C: D3D8407F 180001C1 - v_accvgpr_read_b32 v128, a197 // 00000000FF44: D3D84080 180001C5 - v_mul_f32_e32 v15, s44, v15 // 00000000FF4C: 0A1E1E2C - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 00000000FF50: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 00000000FF58: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 00000000FF60: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 00000000FF68: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000000FF70: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000000FF78: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000000FF80: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000000FF88: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000000FF90: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000000FF98: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000000FFA0: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000000FFA8: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000000FFB0: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 00000000FFB8: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 00000000FFC0: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 00000000FFC8: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 00000000FFD0: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 00000000FFD8: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 00000000FFE0: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 00000000FFE8: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 00000000FFF0: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000000FFF8: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 000000010000: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 000000010008: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 000000010010: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 000000010018: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 000000010020: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 000000010028: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 000000010030: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 000000010038: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 000000010040: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 000000010048: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 000000010050: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 000000010058: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 000000010060: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 000000010068: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 000000010070: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 000000010078: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 000000010080: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 000000010088: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 000000010090: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 000000010098: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 0000000100A0: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 0000000100A8: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 0000000100B0: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 0000000100B8: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 0000000100C0: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 0000000100C8: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 0000000100D0: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 0000000100D8: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 0000000100E0: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 0000000100E8: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 0000000100F0: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 0000000100F8: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 000000010100: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 000000010108: D3B1407E 1002FC2C - v_mul_f32_e32 v128, s44, v128 // 000000010110: 0B01002C - v_mov_b32_e32 v12, 0xffff0000 // 000000010114: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000001011C: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 000000010124: 7E1C02FF 00007FFF - v_cvt_pk_bf16_f32 v15, v15, v15 // 00000001012C: D268000F 00021F0F - buffer_store_short v15, v129, s[16:19], 0 offen nt // 000000010134: E06A1000 80040F81 - v_cvt_pk_bf16_f32 v16, v16, v16 // 00000001013C: D2680010 00022110 - buffer_store_short v16, v130, s[16:19], 0 offen nt // 000000010144: E06A1000 80041082 - v_cvt_pk_bf16_f32 v17, v17, v17 // 00000001014C: D2680011 00022311 - buffer_store_short v17, v131, s[16:19], 0 offen nt // 000000010154: E06A1000 80041183 - v_cvt_pk_bf16_f32 v18, v18, v18 // 00000001015C: D2680012 00022512 - buffer_store_short v18, v135, s[16:19], 0 offen nt // 000000010164: E06A1000 80041287 - v_cvt_pk_bf16_f32 v19, v19, v19 // 00000001016C: D2680013 00022713 - buffer_store_short v19, v136, s[16:19], 0 offen nt // 000000010174: E06A1000 80041388 - v_cvt_pk_bf16_f32 v20, v20, v20 // 00000001017C: D2680014 00022914 - buffer_store_short v20, v137, s[16:19], 0 offen nt // 000000010184: E06A1000 80041489 - v_cvt_pk_bf16_f32 v21, v21, v21 // 00000001018C: D2680015 00022B15 - buffer_store_short v21, v138, s[16:19], 0 offen nt // 000000010194: E06A1000 8004158A - v_cvt_pk_bf16_f32 v22, v22, v22 // 00000001019C: D2680016 00022D16 - buffer_store_short v22, v139, s[16:19], 0 offen nt // 0000000101A4: E06A1000 8004168B - v_cvt_pk_bf16_f32 v23, v23, v23 // 0000000101AC: D2680017 00022F17 - buffer_store_short v23, v140, s[16:19], 0 offen nt // 0000000101B4: E06A1000 8004178C - v_cvt_pk_bf16_f32 v24, v24, v24 // 0000000101BC: D2680018 00023118 - buffer_store_short v24, v141, s[16:19], 0 offen nt // 0000000101C4: E06A1000 8004188D - v_cvt_pk_bf16_f32 v25, v25, v25 // 0000000101CC: D2680019 00023319 - buffer_store_short v25, v142, s[16:19], 0 offen nt // 0000000101D4: E06A1000 8004198E - v_cvt_pk_bf16_f32 v26, v26, v26 // 0000000101DC: D268001A 0002351A - buffer_store_short v26, v143, s[16:19], 0 offen nt // 0000000101E4: E06A1000 80041A8F - v_cvt_pk_bf16_f32 v27, v27, v27 // 0000000101EC: D268001B 0002371B - buffer_store_short v27, v144, s[16:19], 0 offen nt // 0000000101F4: E06A1000 80041B90 - v_cvt_pk_bf16_f32 v28, v28, v28 // 0000000101FC: D268001C 0002391C - buffer_store_short v28, v145, s[16:19], 0 offen nt // 000000010204: E06A1000 80041C91 - v_cvt_pk_bf16_f32 v29, v29, v29 // 00000001020C: D268001D 00023B1D - buffer_store_short v29, v146, s[16:19], 0 offen nt // 000000010214: E06A1000 80041D92 - v_cvt_pk_bf16_f32 v30, v30, v30 // 00000001021C: D268001E 00023D1E - buffer_store_short v30, v147, s[16:19], 0 offen nt // 000000010224: E06A1000 80041E93 - v_cvt_pk_bf16_f32 v31, v31, v31 // 00000001022C: D268001F 00023F1F - buffer_store_short v31, v148, s[16:19], 0 offen nt // 000000010234: E06A1000 80041F94 - v_cvt_pk_bf16_f32 v32, v32, v32 // 00000001023C: D2680020 00024120 - buffer_store_short v32, v149, s[16:19], 0 offen nt // 000000010244: E06A1000 80042095 - v_cvt_pk_bf16_f32 v33, v33, v33 // 00000001024C: D2680021 00024321 - buffer_store_short v33, v150, s[16:19], 0 offen nt // 000000010254: E06A1000 80042196 - v_cvt_pk_bf16_f32 v34, v34, v34 // 00000001025C: D2680022 00024522 - buffer_store_short v34, v151, s[16:19], 0 offen nt // 000000010264: E06A1000 80042297 - v_cvt_pk_bf16_f32 v35, v35, v35 // 00000001026C: D2680023 00024723 - buffer_store_short v35, v152, s[16:19], 0 offen nt // 000000010274: E06A1000 80042398 - v_cvt_pk_bf16_f32 v36, v36, v36 // 00000001027C: D2680024 00024924 - buffer_store_short v36, v153, s[16:19], 0 offen nt // 000000010284: E06A1000 80042499 - v_cvt_pk_bf16_f32 v37, v37, v37 // 00000001028C: D2680025 00024B25 - buffer_store_short v37, v154, s[16:19], 0 offen nt // 000000010294: E06A1000 8004259A - v_cvt_pk_bf16_f32 v38, v38, v38 // 00000001029C: D2680026 00024D26 - buffer_store_short v38, v155, s[16:19], 0 offen nt // 0000000102A4: E06A1000 8004269B - v_cvt_pk_bf16_f32 v39, v39, v39 // 0000000102AC: D2680027 00024F27 - buffer_store_short v39, v156, s[16:19], 0 offen nt // 0000000102B4: E06A1000 8004279C - v_cvt_pk_bf16_f32 v40, v40, v40 // 0000000102BC: D2680028 00025128 - buffer_store_short v40, v157, s[16:19], 0 offen nt // 0000000102C4: E06A1000 8004289D - v_cvt_pk_bf16_f32 v41, v41, v41 // 0000000102CC: D2680029 00025329 - buffer_store_short v41, v158, s[16:19], 0 offen nt // 0000000102D4: E06A1000 8004299E - v_cvt_pk_bf16_f32 v42, v42, v42 // 0000000102DC: D268002A 0002552A - buffer_store_short v42, v159, s[16:19], 0 offen nt // 0000000102E4: E06A1000 80042A9F - v_cvt_pk_bf16_f32 v43, v43, v43 // 0000000102EC: D268002B 0002572B - buffer_store_short v43, v160, s[16:19], 0 offen nt // 0000000102F4: E06A1000 80042BA0 - v_cvt_pk_bf16_f32 v44, v44, v44 // 0000000102FC: D268002C 0002592C - buffer_store_short v44, v161, s[16:19], 0 offen nt // 000000010304: E06A1000 80042CA1 - v_cvt_pk_bf16_f32 v45, v45, v45 // 00000001030C: D268002D 00025B2D - buffer_store_short v45, v162, s[16:19], 0 offen nt // 000000010314: E06A1000 80042DA2 - v_cvt_pk_bf16_f32 v46, v46, v46 // 00000001031C: D268002E 00025D2E - buffer_store_short v46, v163, s[16:19], 0 offen nt // 000000010324: E06A1000 80042EA3 - v_cvt_pk_bf16_f32 v47, v47, v47 // 00000001032C: D268002F 00025F2F - buffer_store_short v47, v164, s[16:19], 0 offen nt // 000000010334: E06A1000 80042FA4 - v_cvt_pk_bf16_f32 v48, v48, v48 // 00000001033C: D2680030 00026130 - buffer_store_short v48, v165, s[16:19], 0 offen nt // 000000010344: E06A1000 800430A5 - v_cvt_pk_bf16_f32 v49, v49, v49 // 00000001034C: D2680031 00026331 - buffer_store_short v49, v166, s[16:19], 0 offen nt // 000000010354: E06A1000 800431A6 - v_cvt_pk_bf16_f32 v50, v50, v50 // 00000001035C: D2680032 00026532 - buffer_store_short v50, v167, s[16:19], 0 offen nt // 000000010364: E06A1000 800432A7 - v_cvt_pk_bf16_f32 v51, v51, v51 // 00000001036C: D2680033 00026733 - buffer_store_short v51, v168, s[16:19], 0 offen nt // 000000010374: E06A1000 800433A8 - v_cvt_pk_bf16_f32 v52, v52, v52 // 00000001037C: D2680034 00026934 - buffer_store_short v52, v169, s[16:19], 0 offen nt // 000000010384: E06A1000 800434A9 - v_cvt_pk_bf16_f32 v53, v53, v53 // 00000001038C: D2680035 00026B35 - buffer_store_short v53, v170, s[16:19], 0 offen nt // 000000010394: E06A1000 800435AA - v_cvt_pk_bf16_f32 v54, v54, v54 // 00000001039C: D2680036 00026D36 - buffer_store_short v54, v171, s[16:19], 0 offen nt // 0000000103A4: E06A1000 800436AB - v_cvt_pk_bf16_f32 v55, v55, v55 // 0000000103AC: D2680037 00026F37 - buffer_store_short v55, v172, s[16:19], 0 offen nt // 0000000103B4: E06A1000 800437AC - v_cvt_pk_bf16_f32 v56, v56, v56 // 0000000103BC: D2680038 00027138 - buffer_store_short v56, v173, s[16:19], 0 offen nt // 0000000103C4: E06A1000 800438AD - v_cvt_pk_bf16_f32 v57, v57, v57 // 0000000103CC: D2680039 00027339 - buffer_store_short v57, v174, s[16:19], 0 offen nt // 0000000103D4: E06A1000 800439AE - v_cvt_pk_bf16_f32 v58, v58, v58 // 0000000103DC: D268003A 0002753A - buffer_store_short v58, v175, s[16:19], 0 offen nt // 0000000103E4: E06A1000 80043AAF - v_cvt_pk_bf16_f32 v59, v59, v59 // 0000000103EC: D268003B 0002773B - buffer_store_short v59, v176, s[16:19], 0 offen nt // 0000000103F4: E06A1000 80043BB0 - v_cvt_pk_bf16_f32 v60, v60, v60 // 0000000103FC: D268003C 0002793C - buffer_store_short v60, v177, s[16:19], 0 offen nt // 000000010404: E06A1000 80043CB1 - v_cvt_pk_bf16_f32 v61, v61, v61 // 00000001040C: D268003D 00027B3D - buffer_store_short v61, v178, s[16:19], 0 offen nt // 000000010414: E06A1000 80043DB2 - v_cvt_pk_bf16_f32 v62, v62, v62 // 00000001041C: D268003E 00027D3E - buffer_store_short v62, v179, s[16:19], 0 offen nt // 000000010424: E06A1000 80043EB3 - v_cvt_pk_bf16_f32 v63, v63, v63 // 00000001042C: D268003F 00027F3F - buffer_store_short v63, v180, s[16:19], 0 offen nt // 000000010434: E06A1000 80043FB4 - v_cvt_pk_bf16_f32 v64, v64, v64 // 00000001043C: D2680040 00028140 - buffer_store_short v64, v181, s[16:19], 0 offen nt // 000000010444: E06A1000 800440B5 - v_cvt_pk_bf16_f32 v65, v65, v65 // 00000001044C: D2680041 00028341 - buffer_store_short v65, v182, s[16:19], 0 offen nt // 000000010454: E06A1000 800441B6 - v_cvt_pk_bf16_f32 v66, v66, v66 // 00000001045C: D2680042 00028542 - buffer_store_short v66, v183, s[16:19], 0 offen nt // 000000010464: E06A1000 800442B7 - v_cvt_pk_bf16_f32 v67, v67, v67 // 00000001046C: D2680043 00028743 - buffer_store_short v67, v184, s[16:19], 0 offen nt // 000000010474: E06A1000 800443B8 - v_cvt_pk_bf16_f32 v68, v68, v68 // 00000001047C: D2680044 00028944 - buffer_store_short v68, v185, s[16:19], 0 offen nt // 000000010484: E06A1000 800444B9 - v_cvt_pk_bf16_f32 v69, v69, v69 // 00000001048C: D2680045 00028B45 - buffer_store_short v69, v186, s[16:19], 0 offen nt // 000000010494: E06A1000 800445BA - v_cvt_pk_bf16_f32 v70, v70, v70 // 00000001049C: D2680046 00028D46 - buffer_store_short v70, v187, s[16:19], 0 offen nt // 0000000104A4: E06A1000 800446BB - v_cvt_pk_bf16_f32 v71, v71, v71 // 0000000104AC: D2680047 00028F47 - buffer_store_short v71, v188, s[16:19], 0 offen nt // 0000000104B4: E06A1000 800447BC - v_cvt_pk_bf16_f32 v72, v72, v72 // 0000000104BC: D2680048 00029148 - buffer_store_short v72, v189, s[16:19], 0 offen nt // 0000000104C4: E06A1000 800448BD - v_cvt_pk_bf16_f32 v73, v73, v73 // 0000000104CC: D2680049 00029349 - buffer_store_short v73, v190, s[16:19], 0 offen nt // 0000000104D4: E06A1000 800449BE - v_cvt_pk_bf16_f32 v74, v74, v74 // 0000000104DC: D268004A 0002954A - buffer_store_short v74, v191, s[16:19], 0 offen nt // 0000000104E4: E06A1000 80044ABF - v_cvt_pk_bf16_f32 v75, v75, v75 // 0000000104EC: D268004B 0002974B - buffer_store_short v75, v192, s[16:19], 0 offen nt // 0000000104F4: E06A1000 80044BC0 - v_cvt_pk_bf16_f32 v76, v76, v76 // 0000000104FC: D268004C 0002994C - buffer_store_short v76, v193, s[16:19], 0 offen nt // 000000010504: E06A1000 80044CC1 - v_cvt_pk_bf16_f32 v77, v77, v77 // 00000001050C: D268004D 00029B4D - buffer_store_short v77, v194, s[16:19], 0 offen nt // 000000010514: E06A1000 80044DC2 - v_cvt_pk_bf16_f32 v78, v78, v78 // 00000001051C: D268004E 00029D4E - buffer_store_short v78, v195, s[16:19], 0 offen nt // 000000010524: E06A1000 80044EC3 - v_cvt_pk_bf16_f32 v79, v79, v79 // 00000001052C: D268004F 00029F4F - buffer_store_short v79, v196, s[16:19], 0 offen nt // 000000010534: E06A1000 80044FC4 - v_cvt_pk_bf16_f32 v80, v80, v80 // 00000001053C: D2680050 0002A150 - buffer_store_short v80, v197, s[16:19], 0 offen nt // 000000010544: E06A1000 800450C5 - v_cvt_pk_bf16_f32 v81, v81, v81 // 00000001054C: D2680051 0002A351 - buffer_store_short v81, v198, s[16:19], 0 offen nt // 000000010554: E06A1000 800451C6 - v_cvt_pk_bf16_f32 v82, v82, v82 // 00000001055C: D2680052 0002A552 - buffer_store_short v82, v199, s[16:19], 0 offen nt // 000000010564: E06A1000 800452C7 - v_cvt_pk_bf16_f32 v83, v83, v83 // 00000001056C: D2680053 0002A753 - buffer_store_short v83, v200, s[16:19], 0 offen nt // 000000010574: E06A1000 800453C8 - v_cvt_pk_bf16_f32 v84, v84, v84 // 00000001057C: D2680054 0002A954 - buffer_store_short v84, v201, s[16:19], 0 offen nt // 000000010584: E06A1000 800454C9 - v_cvt_pk_bf16_f32 v85, v85, v85 // 00000001058C: D2680055 0002AB55 - buffer_store_short v85, v202, s[16:19], 0 offen nt // 000000010594: E06A1000 800455CA - v_cvt_pk_bf16_f32 v86, v86, v86 // 00000001059C: D2680056 0002AD56 - buffer_store_short v86, v203, s[16:19], 0 offen nt // 0000000105A4: E06A1000 800456CB - v_cvt_pk_bf16_f32 v87, v87, v87 // 0000000105AC: D2680057 0002AF57 - buffer_store_short v87, v204, s[16:19], 0 offen nt // 0000000105B4: E06A1000 800457CC - v_cvt_pk_bf16_f32 v88, v88, v88 // 0000000105BC: D2680058 0002B158 - buffer_store_short v88, v205, s[16:19], 0 offen nt // 0000000105C4: E06A1000 800458CD - v_cvt_pk_bf16_f32 v89, v89, v89 // 0000000105CC: D2680059 0002B359 - buffer_store_short v89, v206, s[16:19], 0 offen nt // 0000000105D4: E06A1000 800459CE - v_cvt_pk_bf16_f32 v90, v90, v90 // 0000000105DC: D268005A 0002B55A - buffer_store_short v90, v207, s[16:19], 0 offen nt // 0000000105E4: E06A1000 80045ACF - v_cvt_pk_bf16_f32 v91, v91, v91 // 0000000105EC: D268005B 0002B75B - buffer_store_short v91, v208, s[16:19], 0 offen nt // 0000000105F4: E06A1000 80045BD0 - v_cvt_pk_bf16_f32 v92, v92, v92 // 0000000105FC: D268005C 0002B95C - buffer_store_short v92, v209, s[16:19], 0 offen nt // 000000010604: E06A1000 80045CD1 - v_cvt_pk_bf16_f32 v93, v93, v93 // 00000001060C: D268005D 0002BB5D - buffer_store_short v93, v210, s[16:19], 0 offen nt // 000000010614: E06A1000 80045DD2 - v_cvt_pk_bf16_f32 v94, v94, v94 // 00000001061C: D268005E 0002BD5E - buffer_store_short v94, v211, s[16:19], 0 offen nt // 000000010624: E06A1000 80045ED3 - v_cvt_pk_bf16_f32 v95, v95, v95 // 00000001062C: D268005F 0002BF5F - buffer_store_short v95, v212, s[16:19], 0 offen nt // 000000010634: E06A1000 80045FD4 - v_cvt_pk_bf16_f32 v96, v96, v96 // 00000001063C: D2680060 0002C160 - buffer_store_short v96, v213, s[16:19], 0 offen nt // 000000010644: E06A1000 800460D5 - v_cvt_pk_bf16_f32 v97, v97, v97 // 00000001064C: D2680061 0002C361 - buffer_store_short v97, v214, s[16:19], 0 offen nt // 000000010654: E06A1000 800461D6 - v_cvt_pk_bf16_f32 v98, v98, v98 // 00000001065C: D2680062 0002C562 - buffer_store_short v98, v215, s[16:19], 0 offen nt // 000000010664: E06A1000 800462D7 - v_cvt_pk_bf16_f32 v99, v99, v99 // 00000001066C: D2680063 0002C763 - buffer_store_short v99, v216, s[16:19], 0 offen nt // 000000010674: E06A1000 800463D8 - v_cvt_pk_bf16_f32 v100, v100, v100 // 00000001067C: D2680064 0002C964 - buffer_store_short v100, v217, s[16:19], 0 offen nt // 000000010684: E06A1000 800464D9 - v_cvt_pk_bf16_f32 v101, v101, v101 // 00000001068C: D2680065 0002CB65 - buffer_store_short v101, v218, s[16:19], 0 offen nt // 000000010694: E06A1000 800465DA - v_cvt_pk_bf16_f32 v102, v102, v102 // 00000001069C: D2680066 0002CD66 - buffer_store_short v102, v219, s[16:19], 0 offen nt // 0000000106A4: E06A1000 800466DB - v_cvt_pk_bf16_f32 v103, v103, v103 // 0000000106AC: D2680067 0002CF67 - buffer_store_short v103, v220, s[16:19], 0 offen nt // 0000000106B4: E06A1000 800467DC - v_cvt_pk_bf16_f32 v104, v104, v104 // 0000000106BC: D2680068 0002D168 - buffer_store_short v104, v221, s[16:19], 0 offen nt // 0000000106C4: E06A1000 800468DD - v_cvt_pk_bf16_f32 v105, v105, v105 // 0000000106CC: D2680069 0002D369 - buffer_store_short v105, v222, s[16:19], 0 offen nt // 0000000106D4: E06A1000 800469DE - v_cvt_pk_bf16_f32 v106, v106, v106 // 0000000106DC: D268006A 0002D56A - buffer_store_short v106, v223, s[16:19], 0 offen nt // 0000000106E4: E06A1000 80046ADF - v_cvt_pk_bf16_f32 v107, v107, v107 // 0000000106EC: D268006B 0002D76B - buffer_store_short v107, v224, s[16:19], 0 offen nt // 0000000106F4: E06A1000 80046BE0 - v_cvt_pk_bf16_f32 v108, v108, v108 // 0000000106FC: D268006C 0002D96C - buffer_store_short v108, v225, s[16:19], 0 offen nt // 000000010704: E06A1000 80046CE1 - v_cvt_pk_bf16_f32 v109, v109, v109 // 00000001070C: D268006D 0002DB6D - buffer_store_short v109, v226, s[16:19], 0 offen nt // 000000010714: E06A1000 80046DE2 - v_cvt_pk_bf16_f32 v110, v110, v110 // 00000001071C: D268006E 0002DD6E - buffer_store_short v110, v227, s[16:19], 0 offen nt // 000000010724: E06A1000 80046EE3 - v_cvt_pk_bf16_f32 v111, v111, v111 // 00000001072C: D268006F 0002DF6F - buffer_store_short v111, v228, s[16:19], 0 offen nt // 000000010734: E06A1000 80046FE4 - v_cvt_pk_bf16_f32 v112, v112, v112 // 00000001073C: D2680070 0002E170 - buffer_store_short v112, v229, s[16:19], 0 offen nt // 000000010744: E06A1000 800470E5 - v_cvt_pk_bf16_f32 v113, v113, v113 // 00000001074C: D2680071 0002E371 - buffer_store_short v113, v230, s[16:19], 0 offen nt // 000000010754: E06A1000 800471E6 - v_cvt_pk_bf16_f32 v114, v114, v114 // 00000001075C: D2680072 0002E572 - buffer_store_short v114, v231, s[16:19], 0 offen nt // 000000010764: E06A1000 800472E7 - v_cvt_pk_bf16_f32 v115, v115, v115 // 00000001076C: D2680073 0002E773 - buffer_store_short v115, v232, s[16:19], 0 offen nt // 000000010774: E06A1000 800473E8 - v_cvt_pk_bf16_f32 v116, v116, v116 // 00000001077C: D2680074 0002E974 - buffer_store_short v116, v233, s[16:19], 0 offen nt // 000000010784: E06A1000 800474E9 - v_cvt_pk_bf16_f32 v117, v117, v117 // 00000001078C: D2680075 0002EB75 - buffer_store_short v117, v234, s[16:19], 0 offen nt // 000000010794: E06A1000 800475EA - v_cvt_pk_bf16_f32 v118, v118, v118 // 00000001079C: D2680076 0002ED76 - buffer_store_short v118, v235, s[16:19], 0 offen nt // 0000000107A4: E06A1000 800476EB - v_cvt_pk_bf16_f32 v119, v119, v119 // 0000000107AC: D2680077 0002EF77 - buffer_store_short v119, v236, s[16:19], 0 offen nt // 0000000107B4: E06A1000 800477EC - v_cvt_pk_bf16_f32 v120, v120, v120 // 0000000107BC: D2680078 0002F178 - buffer_store_short v120, v237, s[16:19], 0 offen nt // 0000000107C4: E06A1000 800478ED - v_cvt_pk_bf16_f32 v121, v121, v121 // 0000000107CC: D2680079 0002F379 - buffer_store_short v121, v238, s[16:19], 0 offen nt // 0000000107D4: E06A1000 800479EE - v_cvt_pk_bf16_f32 v122, v122, v122 // 0000000107DC: D268007A 0002F57A - buffer_store_short v122, v239, s[16:19], 0 offen nt // 0000000107E4: E06A1000 80047AEF - v_cvt_pk_bf16_f32 v123, v123, v123 // 0000000107EC: D268007B 0002F77B - buffer_store_short v123, v240, s[16:19], 0 offen nt // 0000000107F4: E06A1000 80047BF0 - v_cvt_pk_bf16_f32 v124, v124, v124 // 0000000107FC: D268007C 0002F97C - buffer_store_short v124, v241, s[16:19], 0 offen nt // 000000010804: E06A1000 80047CF1 - v_cvt_pk_bf16_f32 v125, v125, v125 // 00000001080C: D268007D 0002FB7D - buffer_store_short v125, v242, s[16:19], 0 offen nt // 000000010814: E06A1000 80047DF2 - v_cvt_pk_bf16_f32 v126, v126, v126 // 00000001081C: D268007E 0002FD7E - buffer_store_short v126, v243, s[16:19], 0 offen nt // 000000010824: E06A1000 80047EF3 - v_cvt_pk_bf16_f32 v127, v127, v127 // 00000001082C: D268007F 0002FF7F - buffer_store_short v127, v244, s[16:19], 0 offen nt // 000000010834: E06A1000 80047FF4 - v_cvt_pk_bf16_f32 v128, v128, v128 // 00000001083C: D2680080 00030180 - buffer_store_short v128, v245, s[16:19], 0 offen nt // 000000010844: E06A1000 800480F5 - s_nop 0 // 00000001084C: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 000000010850: 7E1402FF 80000000 - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000010858: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010860: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010868: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010870: 86A2221E - v_add_lshl_u32 v129, v7, v8, 1 // 000000010874: D1FE0081 02061107 - v_cndmask_b32_e64 v129, v10, v129, s[34:35] // 00000001087C: D1000081 008B030A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000010884: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001088C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010894: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001089C: 86A2221E - v_add_lshl_u32 v130, v7, v8, 1 // 0000000108A0: D1FE0082 02061107 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 0000000108A8: D1000082 008B050A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000108B0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000108B8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000108C0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000108C8: 86A2221E - v_add_lshl_u32 v131, v7, v8, 1 // 0000000108CC: D1FE0083 02061107 - v_cndmask_b32_e64 v131, v10, v131, s[34:35] // 0000000108D4: D1000083 008B070A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000108DC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000108E4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000108EC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000108F4: 86A2221E - v_add_lshl_u32 v135, v7, v8, 1 // 0000000108F8: D1FE0087 02061107 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 000000010900: D1000087 008B0F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000010908: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010910: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010918: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010920: 86A2221E - v_add_lshl_u32 v136, v7, v8, 1 // 000000010924: D1FE0088 02061107 - v_cndmask_b32_e64 v136, v10, v136, s[34:35] // 00000001092C: D1000088 008B110A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000010934: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001093C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010944: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001094C: 86A2221E - v_add_lshl_u32 v137, v7, v8, 1 // 000000010950: D1FE0089 02061107 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 000000010958: D1000089 008B130A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000010960: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000010968: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000010970: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000010978: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010980: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010988: 86A2221E - v_add_lshl_u32 v138, v7, v4, 1 // 00000001098C: D1FE008A 02060907 - v_cndmask_b32_e64 v138, v10, v138, s[34:35] // 000000010994: D100008A 008B150A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001099C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000109A4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000109AC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000109B4: 86A2221E - v_add_lshl_u32 v139, v7, v8, 1 // 0000000109B8: D1FE008B 02061107 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 0000000109C0: D100008B 008B170A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000109C8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000109D0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000109D8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000109E0: 86A2221E - v_add_lshl_u32 v140, v7, v8, 1 // 0000000109E4: D1FE008C 02061107 - v_cndmask_b32_e64 v140, v10, v140, s[34:35] // 0000000109EC: D100008C 008B190A - v_add_co_u32_e64 v8, vcc, v4, 3 // 0000000109F4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000109FC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010A04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010A0C: 86A2221E - v_add_lshl_u32 v141, v7, v8, 1 // 000000010A10: D1FE008D 02061107 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 000000010A18: D100008D 008B1B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000010A20: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010A28: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010A30: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010A38: 86A2221E - v_add_lshl_u32 v142, v7, v8, 1 // 000000010A3C: D1FE008E 02061107 - v_cndmask_b32_e64 v142, v10, v142, s[34:35] // 000000010A44: D100008E 008B1D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000010A4C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010A54: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010A5C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010A64: 86A2221E - v_add_lshl_u32 v143, v7, v8, 1 // 000000010A68: D1FE008F 02061107 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 000000010A70: D100008F 008B1F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000010A78: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010A80: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010A88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010A90: 86A2221E - v_add_lshl_u32 v144, v7, v8, 1 // 000000010A94: D1FE0090 02061107 - v_cndmask_b32_e64 v144, v10, v144, s[34:35] // 000000010A9C: D1000090 008B210A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000010AA4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010AAC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010AB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010ABC: 86A2221E - v_add_lshl_u32 v145, v7, v8, 1 // 000000010AC0: D1FE0091 02061107 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 000000010AC8: D1000091 008B230A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000010AD0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000010AD8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000010AE0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000010AE8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010AF0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010AF8: 86A2221E - v_add_lshl_u32 v146, v7, v4, 1 // 000000010AFC: D1FE0092 02060907 - v_cndmask_b32_e64 v146, v10, v146, s[34:35] // 000000010B04: D1000092 008B250A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000010B0C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010B14: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010B1C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010B24: 86A2221E - v_add_lshl_u32 v147, v7, v8, 1 // 000000010B28: D1FE0093 02061107 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 000000010B30: D1000093 008B270A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000010B38: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010B40: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010B48: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010B50: 86A2221E - v_add_lshl_u32 v148, v7, v8, 1 // 000000010B54: D1FE0094 02061107 - v_cndmask_b32_e64 v148, v10, v148, s[34:35] // 000000010B5C: D1000094 008B290A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000010B64: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010B6C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010B74: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010B7C: 86A2221E - v_add_lshl_u32 v149, v7, v8, 1 // 000000010B80: D1FE0095 02061107 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 000000010B88: D1000095 008B2B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000010B90: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010B98: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010BA0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010BA8: 86A2221E - v_add_lshl_u32 v150, v7, v8, 1 // 000000010BAC: D1FE0096 02061107 - v_cndmask_b32_e64 v150, v10, v150, s[34:35] // 000000010BB4: D1000096 008B2D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000010BBC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010BC4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010BCC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010BD4: 86A2221E - v_add_lshl_u32 v151, v7, v8, 1 // 000000010BD8: D1FE0097 02061107 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 000000010BE0: D1000097 008B2F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000010BE8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010BF0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010BF8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010C00: 86A2221E - v_add_lshl_u32 v152, v7, v8, 1 // 000000010C04: D1FE0098 02061107 - v_cndmask_b32_e64 v152, v10, v152, s[34:35] // 000000010C0C: D1000098 008B310A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000010C14: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010C1C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010C24: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010C2C: 86A2221E - v_add_lshl_u32 v153, v7, v8, 1 // 000000010C30: D1FE0099 02061107 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 000000010C38: D1000099 008B330A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000010C40: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000010C48: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000010C50: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000010C58: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010C60: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010C68: 86A2221E - v_add_lshl_u32 v154, v7, v4, 1 // 000000010C6C: D1FE009A 02060907 - v_cndmask_b32_e64 v154, v10, v154, s[34:35] // 000000010C74: D100009A 008B350A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000010C7C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010C84: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010C8C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010C94: 86A2221E - v_add_lshl_u32 v155, v7, v8, 1 // 000000010C98: D1FE009B 02061107 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 000000010CA0: D100009B 008B370A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000010CA8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010CB0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010CB8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010CC0: 86A2221E - v_add_lshl_u32 v156, v7, v8, 1 // 000000010CC4: D1FE009C 02061107 - v_cndmask_b32_e64 v156, v10, v156, s[34:35] // 000000010CCC: D100009C 008B390A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000010CD4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010CDC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010CE4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010CEC: 86A2221E - v_add_lshl_u32 v157, v7, v8, 1 // 000000010CF0: D1FE009D 02061107 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 000000010CF8: D100009D 008B3B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000010D00: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010D08: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010D10: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010D18: 86A2221E - v_add_lshl_u32 v158, v7, v8, 1 // 000000010D1C: D1FE009E 02061107 - v_cndmask_b32_e64 v158, v10, v158, s[34:35] // 000000010D24: D100009E 008B3D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000010D2C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010D34: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010D3C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010D44: 86A2221E - v_add_lshl_u32 v159, v7, v8, 1 // 000000010D48: D1FE009F 02061107 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 000000010D50: D100009F 008B3F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000010D58: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010D60: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010D68: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010D70: 86A2221E - v_add_lshl_u32 v160, v7, v8, 1 // 000000010D74: D1FE00A0 02061107 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 000000010D7C: D10000A0 008B410A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000010D84: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010D8C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010D94: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010D9C: 86A2221E - v_add_lshl_u32 v161, v7, v8, 1 // 000000010DA0: D1FE00A1 02061107 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 000000010DA8: D10000A1 008B430A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000010DB0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000010DB8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000010DC0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000010DC8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010DD0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010DD8: 86A2221E - v_add_lshl_u32 v162, v7, v4, 1 // 000000010DDC: D1FE00A2 02060907 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 000000010DE4: D10000A2 008B450A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000010DEC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010DF4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010DFC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010E04: 86A2221E - v_add_lshl_u32 v163, v7, v8, 1 // 000000010E08: D1FE00A3 02061107 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000010E10: D10000A3 008B470A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000010E18: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010E20: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010E28: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010E30: 86A2221E - v_add_lshl_u32 v164, v7, v8, 1 // 000000010E34: D1FE00A4 02061107 - v_cndmask_b32_e64 v164, v10, v164, s[34:35] // 000000010E3C: D10000A4 008B490A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000010E44: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010E4C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010E54: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010E5C: 86A2221E - v_add_lshl_u32 v165, v7, v8, 1 // 000000010E60: D1FE00A5 02061107 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 000000010E68: D10000A5 008B4B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000010E70: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010E78: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010E80: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010E88: 86A2221E - v_add_lshl_u32 v166, v7, v8, 1 // 000000010E8C: D1FE00A6 02061107 - v_cndmask_b32_e64 v166, v10, v166, s[34:35] // 000000010E94: D10000A6 008B4D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000010E9C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010EA4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010EAC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010EB4: 86A2221E - v_add_lshl_u32 v167, v7, v8, 1 // 000000010EB8: D1FE00A7 02061107 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 000000010EC0: D10000A7 008B4F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000010EC8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010ED0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010ED8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010EE0: 86A2221E - v_add_lshl_u32 v168, v7, v8, 1 // 000000010EE4: D1FE00A8 02061107 - v_cndmask_b32_e64 v168, v10, v168, s[34:35] // 000000010EEC: D10000A8 008B510A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000010EF4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010EFC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010F04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010F0C: 86A2221E - v_add_lshl_u32 v169, v7, v8, 1 // 000000010F10: D1FE00A9 02061107 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 000000010F18: D10000A9 008B530A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000010F20: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000010F28: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000010F30: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000010F38: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010F40: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010F48: 86A2221E - v_add_lshl_u32 v170, v7, v4, 1 // 000000010F4C: D1FE00AA 02060907 - v_cndmask_b32_e64 v170, v10, v170, s[34:35] // 000000010F54: D10000AA 008B550A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000010F5C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010F64: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010F6C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010F74: 86A2221E - v_add_lshl_u32 v171, v7, v8, 1 // 000000010F78: D1FE00AB 02061107 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 000000010F80: D10000AB 008B570A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000010F88: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010F90: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010F98: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010FA0: 86A2221E - v_add_lshl_u32 v172, v7, v8, 1 // 000000010FA4: D1FE00AC 02061107 - v_cndmask_b32_e64 v172, v10, v172, s[34:35] // 000000010FAC: D10000AC 008B590A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000010FB4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010FBC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010FC4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010FCC: 86A2221E - v_add_lshl_u32 v173, v7, v8, 1 // 000000010FD0: D1FE00AD 02061107 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 000000010FD8: D10000AD 008B5B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000010FE0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000010FE8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000010FF0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000010FF8: 86A2221E - v_add_lshl_u32 v174, v7, v8, 1 // 000000010FFC: D1FE00AE 02061107 - v_cndmask_b32_e64 v174, v10, v174, s[34:35] // 000000011004: D10000AE 008B5D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001100C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011014: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001101C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011024: 86A2221E - v_add_lshl_u32 v175, v7, v8, 1 // 000000011028: D1FE00AF 02061107 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 000000011030: D10000AF 008B5F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000011038: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011040: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011048: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011050: 86A2221E - v_add_lshl_u32 v176, v7, v8, 1 // 000000011054: D1FE00B0 02061107 - v_cndmask_b32_e64 v176, v10, v176, s[34:35] // 00000001105C: D10000B0 008B610A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000011064: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001106C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011074: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001107C: 86A2221E - v_add_lshl_u32 v177, v7, v8, 1 // 000000011080: D1FE00B1 02061107 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 000000011088: D10000B1 008B630A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000011090: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000011098: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000110A0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000110A8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000110B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000110B8: 86A2221E - v_add_lshl_u32 v178, v7, v4, 1 // 0000000110BC: D1FE00B2 02060907 - v_cndmask_b32_e64 v178, v10, v178, s[34:35] // 0000000110C4: D10000B2 008B650A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000110CC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000110D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000110DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000110E4: 86A2221E - v_add_lshl_u32 v179, v7, v8, 1 // 0000000110E8: D1FE00B3 02061107 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 0000000110F0: D10000B3 008B670A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000110F8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011100: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011108: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011110: 86A2221E - v_add_lshl_u32 v180, v7, v8, 1 // 000000011114: D1FE00B4 02061107 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 00000001111C: D10000B4 008B690A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000011124: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001112C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011134: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001113C: 86A2221E - v_add_lshl_u32 v181, v7, v8, 1 // 000000011140: D1FE00B5 02061107 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 000000011148: D10000B5 008B6B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000011150: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011158: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011160: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011168: 86A2221E - v_add_lshl_u32 v182, v7, v8, 1 // 00000001116C: D1FE00B6 02061107 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 000000011174: D10000B6 008B6D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001117C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011184: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001118C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011194: 86A2221E - v_add_lshl_u32 v183, v7, v8, 1 // 000000011198: D1FE00B7 02061107 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 0000000111A0: D10000B7 008B6F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000111A8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000111B0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000111B8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000111C0: 86A2221E - v_add_lshl_u32 v184, v7, v8, 1 // 0000000111C4: D1FE00B8 02061107 - v_cndmask_b32_e64 v184, v10, v184, s[34:35] // 0000000111CC: D10000B8 008B710A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000111D4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000111DC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000111E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000111EC: 86A2221E - v_add_lshl_u32 v185, v7, v8, 1 // 0000000111F0: D1FE00B9 02061107 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 0000000111F8: D10000B9 008B730A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000011200: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000011208: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000011210: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000011218: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011220: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011228: 86A2221E - v_add_lshl_u32 v186, v7, v4, 1 // 00000001122C: D1FE00BA 02060907 - v_cndmask_b32_e64 v186, v10, v186, s[34:35] // 000000011234: D10000BA 008B750A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001123C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011244: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001124C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011254: 86A2221E - v_add_lshl_u32 v187, v7, v8, 1 // 000000011258: D1FE00BB 02061107 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 000000011260: D10000BB 008B770A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000011268: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011270: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011278: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011280: 86A2221E - v_add_lshl_u32 v188, v7, v8, 1 // 000000011284: D1FE00BC 02061107 - v_cndmask_b32_e64 v188, v10, v188, s[34:35] // 00000001128C: D10000BC 008B790A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000011294: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001129C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000112A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000112AC: 86A2221E - v_add_lshl_u32 v189, v7, v8, 1 // 0000000112B0: D1FE00BD 02061107 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 0000000112B8: D10000BD 008B7B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000112C0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000112C8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000112D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000112D8: 86A2221E - v_add_lshl_u32 v190, v7, v8, 1 // 0000000112DC: D1FE00BE 02061107 - v_cndmask_b32_e64 v190, v10, v190, s[34:35] // 0000000112E4: D10000BE 008B7D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000112EC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000112F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000112FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011304: 86A2221E - v_add_lshl_u32 v191, v7, v8, 1 // 000000011308: D1FE00BF 02061107 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 000000011310: D10000BF 008B7F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000011318: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011320: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011328: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011330: 86A2221E - v_add_lshl_u32 v192, v7, v8, 1 // 000000011334: D1FE00C0 02061107 - v_cndmask_b32_e64 v192, v10, v192, s[34:35] // 00000001133C: D10000C0 008B810A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000011344: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001134C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011354: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001135C: 86A2221E - v_add_lshl_u32 v193, v7, v8, 1 // 000000011360: D1FE00C1 02061107 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 000000011368: D10000C1 008B830A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000011370: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000011378: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000011380: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000011388: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011390: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011398: 86A2221E - v_add_lshl_u32 v194, v7, v4, 1 // 00000001139C: D1FE00C2 02060907 - v_cndmask_b32_e64 v194, v10, v194, s[34:35] // 0000000113A4: D10000C2 008B850A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000113AC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000113B4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000113BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000113C4: 86A2221E - v_add_lshl_u32 v195, v7, v8, 1 // 0000000113C8: D1FE00C3 02061107 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 0000000113D0: D10000C3 008B870A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000113D8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000113E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000113E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000113F0: 86A2221E - v_add_lshl_u32 v196, v7, v8, 1 // 0000000113F4: D1FE00C4 02061107 - v_cndmask_b32_e64 v196, v10, v196, s[34:35] // 0000000113FC: D10000C4 008B890A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000011404: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001140C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011414: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001141C: 86A2221E - v_add_lshl_u32 v197, v7, v8, 1 // 000000011420: D1FE00C5 02061107 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 000000011428: D10000C5 008B8B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000011430: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011438: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011440: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011448: 86A2221E - v_add_lshl_u32 v198, v7, v8, 1 // 00000001144C: D1FE00C6 02061107 - v_cndmask_b32_e64 v198, v10, v198, s[34:35] // 000000011454: D10000C6 008B8D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001145C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011464: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001146C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011474: 86A2221E - v_add_lshl_u32 v199, v7, v8, 1 // 000000011478: D1FE00C7 02061107 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 000000011480: D10000C7 008B8F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000011488: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011490: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011498: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000114A0: 86A2221E - v_add_lshl_u32 v200, v7, v8, 1 // 0000000114A4: D1FE00C8 02061107 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 0000000114AC: D10000C8 008B910A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000114B4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000114BC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000114C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000114CC: 86A2221E - v_add_lshl_u32 v201, v7, v8, 1 // 0000000114D0: D1FE00C9 02061107 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 0000000114D8: D10000C9 008B930A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000114E0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000114E8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000114F0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000114F8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011500: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011508: 86A2221E - v_add_lshl_u32 v202, v7, v4, 1 // 00000001150C: D1FE00CA 02060907 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 000000011514: D10000CA 008B950A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001151C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011524: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001152C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011534: 86A2221E - v_add_lshl_u32 v203, v7, v8, 1 // 000000011538: D1FE00CB 02061107 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 000000011540: D10000CB 008B970A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000011548: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011550: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011558: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011560: 86A2221E - v_add_lshl_u32 v204, v7, v8, 1 // 000000011564: D1FE00CC 02061107 - v_cndmask_b32_e64 v204, v10, v204, s[34:35] // 00000001156C: D10000CC 008B990A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000011574: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001157C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011584: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001158C: 86A2221E - v_add_lshl_u32 v205, v7, v8, 1 // 000000011590: D1FE00CD 02061107 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 000000011598: D10000CD 008B9B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000115A0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000115A8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000115B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000115B8: 86A2221E - v_add_lshl_u32 v206, v7, v8, 1 // 0000000115BC: D1FE00CE 02061107 - v_cndmask_b32_e64 v206, v10, v206, s[34:35] // 0000000115C4: D10000CE 008B9D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000115CC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000115D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000115DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000115E4: 86A2221E - v_add_lshl_u32 v207, v7, v8, 1 // 0000000115E8: D1FE00CF 02061107 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 0000000115F0: D10000CF 008B9F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000115F8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011600: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011608: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011610: 86A2221E - v_add_lshl_u32 v208, v7, v8, 1 // 000000011614: D1FE00D0 02061107 - v_cndmask_b32_e64 v208, v10, v208, s[34:35] // 00000001161C: D10000D0 008BA10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000011624: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001162C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011634: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001163C: 86A2221E - v_add_lshl_u32 v209, v7, v8, 1 // 000000011640: D1FE00D1 02061107 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 000000011648: D10000D1 008BA30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000011650: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000011658: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000011660: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000011668: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011670: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011678: 86A2221E - v_add_lshl_u32 v210, v7, v4, 1 // 00000001167C: D1FE00D2 02060907 - v_cndmask_b32_e64 v210, v10, v210, s[34:35] // 000000011684: D10000D2 008BA50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001168C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011694: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001169C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000116A4: 86A2221E - v_add_lshl_u32 v211, v7, v8, 1 // 0000000116A8: D1FE00D3 02061107 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 0000000116B0: D10000D3 008BA70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000116B8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000116C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000116C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000116D0: 86A2221E - v_add_lshl_u32 v212, v7, v8, 1 // 0000000116D4: D1FE00D4 02061107 - v_cndmask_b32_e64 v212, v10, v212, s[34:35] // 0000000116DC: D10000D4 008BA90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 0000000116E4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000116EC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000116F4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000116FC: 86A2221E - v_add_lshl_u32 v213, v7, v8, 1 // 000000011700: D1FE00D5 02061107 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 000000011708: D10000D5 008BAB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000011710: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011718: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011720: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011728: 86A2221E - v_add_lshl_u32 v214, v7, v8, 1 // 00000001172C: D1FE00D6 02061107 - v_cndmask_b32_e64 v214, v10, v214, s[34:35] // 000000011734: D10000D6 008BAD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001173C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011744: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001174C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011754: 86A2221E - v_add_lshl_u32 v215, v7, v8, 1 // 000000011758: D1FE00D7 02061107 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 000000011760: D10000D7 008BAF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000011768: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011770: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011778: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011780: 86A2221E - v_add_lshl_u32 v216, v7, v8, 1 // 000000011784: D1FE00D8 02061107 - v_cndmask_b32_e64 v216, v10, v216, s[34:35] // 00000001178C: D10000D8 008BB10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000011794: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001179C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000117A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000117AC: 86A2221E - v_add_lshl_u32 v217, v7, v8, 1 // 0000000117B0: D1FE00D9 02061107 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 0000000117B8: D10000D9 008BB30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000117C0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000117C8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000117D0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000117D8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000117E0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000117E8: 86A2221E - v_add_lshl_u32 v218, v7, v4, 1 // 0000000117EC: D1FE00DA 02060907 - v_cndmask_b32_e64 v218, v10, v218, s[34:35] // 0000000117F4: D10000DA 008BB50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000117FC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011804: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001180C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011814: 86A2221E - v_add_lshl_u32 v219, v7, v8, 1 // 000000011818: D1FE00DB 02061107 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 000000011820: D10000DB 008BB70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000011828: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011830: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011838: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011840: 86A2221E - v_add_lshl_u32 v220, v7, v8, 1 // 000000011844: D1FE00DC 02061107 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 00000001184C: D10000DC 008BB90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000011854: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001185C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011864: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001186C: 86A2221E - v_add_lshl_u32 v221, v7, v8, 1 // 000000011870: D1FE00DD 02061107 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 000000011878: D10000DD 008BBB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000011880: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011888: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011890: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011898: 86A2221E - v_add_lshl_u32 v222, v7, v8, 1 // 00000001189C: D1FE00DE 02061107 - v_cndmask_b32_e64 v222, v10, v222, s[34:35] // 0000000118A4: D10000DE 008BBD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000118AC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000118B4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000118BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000118C4: 86A2221E - v_add_lshl_u32 v223, v7, v8, 1 // 0000000118C8: D1FE00DF 02061107 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 0000000118D0: D10000DF 008BBF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000118D8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000118E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000118E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000118F0: 86A2221E - v_add_lshl_u32 v224, v7, v8, 1 // 0000000118F4: D1FE00E0 02061107 - v_cndmask_b32_e64 v224, v10, v224, s[34:35] // 0000000118FC: D10000E0 008BC10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000011904: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001190C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011914: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001191C: 86A2221E - v_add_lshl_u32 v225, v7, v8, 1 // 000000011920: D1FE00E1 02061107 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 000000011928: D10000E1 008BC30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000011930: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000011938: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000011940: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000011948: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011950: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011958: 86A2221E - v_add_lshl_u32 v226, v7, v4, 1 // 00000001195C: D1FE00E2 02060907 - v_cndmask_b32_e64 v226, v10, v226, s[34:35] // 000000011964: D10000E2 008BC50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001196C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011974: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001197C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011984: 86A2221E - v_add_lshl_u32 v227, v7, v8, 1 // 000000011988: D1FE00E3 02061107 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 000000011990: D10000E3 008BC70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000011998: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000119A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000119A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000119B0: 86A2221E - v_add_lshl_u32 v228, v7, v8, 1 // 0000000119B4: D1FE00E4 02061107 - v_cndmask_b32_e64 v228, v10, v228, s[34:35] // 0000000119BC: D10000E4 008BC90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 0000000119C4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000119CC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000119D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000119DC: 86A2221E - v_add_lshl_u32 v229, v7, v8, 1 // 0000000119E0: D1FE00E5 02061107 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 0000000119E8: D10000E5 008BCB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000119F0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000119F8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011A00: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011A08: 86A2221E - v_add_lshl_u32 v230, v7, v8, 1 // 000000011A0C: D1FE00E6 02061107 - v_cndmask_b32_e64 v230, v10, v230, s[34:35] // 000000011A14: D10000E6 008BCD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000011A1C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011A24: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011A2C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011A34: 86A2221E - v_add_lshl_u32 v231, v7, v8, 1 // 000000011A38: D1FE00E7 02061107 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 000000011A40: D10000E7 008BCF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000011A48: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011A50: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011A58: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011A60: 86A2221E - v_add_lshl_u32 v232, v7, v8, 1 // 000000011A64: D1FE00E8 02061107 - v_cndmask_b32_e64 v232, v10, v232, s[34:35] // 000000011A6C: D10000E8 008BD10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000011A74: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011A7C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011A84: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011A8C: 86A2221E - v_add_lshl_u32 v233, v7, v8, 1 // 000000011A90: D1FE00E9 02061107 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 000000011A98: D10000E9 008BD30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000011AA0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000011AA8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000011AB0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000011AB8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011AC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011AC8: 86A2221E - v_add_lshl_u32 v234, v7, v4, 1 // 000000011ACC: D1FE00EA 02060907 - v_cndmask_b32_e64 v234, v10, v234, s[34:35] // 000000011AD4: D10000EA 008BD50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000011ADC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011AE4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011AEC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011AF4: 86A2221E - v_add_lshl_u32 v235, v7, v8, 1 // 000000011AF8: D1FE00EB 02061107 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 000000011B00: D10000EB 008BD70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000011B08: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011B10: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011B18: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011B20: 86A2221E - v_add_lshl_u32 v236, v7, v8, 1 // 000000011B24: D1FE00EC 02061107 - v_cndmask_b32_e64 v236, v10, v236, s[34:35] // 000000011B2C: D10000EC 008BD90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000011B34: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011B3C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011B44: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011B4C: 86A2221E - v_add_lshl_u32 v237, v7, v8, 1 // 000000011B50: D1FE00ED 02061107 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 000000011B58: D10000ED 008BDB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000011B60: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011B68: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011B70: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011B78: 86A2221E - v_add_lshl_u32 v238, v7, v8, 1 // 000000011B7C: D1FE00EE 02061107 - v_cndmask_b32_e64 v238, v10, v238, s[34:35] // 000000011B84: D10000EE 008BDD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000011B8C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011B94: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011B9C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011BA4: 86A2221E - v_add_lshl_u32 v239, v7, v8, 1 // 000000011BA8: D1FE00EF 02061107 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 000000011BB0: D10000EF 008BDF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000011BB8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011BC0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011BC8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011BD0: 86A2221E - v_add_lshl_u32 v240, v7, v8, 1 // 000000011BD4: D1FE00F0 02061107 - v_cndmask_b32_e64 v240, v10, v240, s[34:35] // 000000011BDC: D10000F0 008BE10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000011BE4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011BEC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011BF4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011BFC: 86A2221E - v_add_lshl_u32 v241, v7, v8, 1 // 000000011C00: D1FE00F1 02061107 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 000000011C08: D10000F1 008BE30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000011C10: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000011C18: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000011C20: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000011C28: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011C30: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011C38: 86A2221E - v_add_lshl_u32 v242, v7, v4, 1 // 000000011C3C: D1FE00F2 02060907 - v_cndmask_b32_e64 v242, v10, v242, s[34:35] // 000000011C44: D10000F2 008BE50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000011C4C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011C54: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011C5C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011C64: 86A2221E - v_add_lshl_u32 v243, v7, v8, 1 // 000000011C68: D1FE00F3 02061107 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 000000011C70: D10000F3 008BE70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000011C78: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011C80: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011C88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011C90: 86A2221E - v_add_lshl_u32 v244, v7, v8, 1 // 000000011C94: D1FE00F4 02061107 - v_cndmask_b32_e64 v244, v10, v244, s[34:35] // 000000011C9C: D10000F4 008BE90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000011CA4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000011CAC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000011CB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000011CBC: 86A2221E - v_add_lshl_u32 v245, v7, v8, 1 // 000000011CC0: D1FE00F5 02061107 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 000000011CC8: D10000F5 008BEB0A - v_accvgpr_read_b32 v15, a201 // 000000011CD0: D3D8400F 180001C9 - v_accvgpr_read_b32 v16, a205 // 000000011CD8: D3D84010 180001CD - v_accvgpr_read_b32 v17, a209 // 000000011CE0: D3D84011 180001D1 - v_accvgpr_read_b32 v18, a213 // 000000011CE8: D3D84012 180001D5 - v_accvgpr_read_b32 v19, a217 // 000000011CF0: D3D84013 180001D9 - v_accvgpr_read_b32 v20, a221 // 000000011CF8: D3D84014 180001DD - v_accvgpr_read_b32 v21, a225 // 000000011D00: D3D84015 180001E1 - v_accvgpr_read_b32 v22, a229 // 000000011D08: D3D84016 180001E5 - v_accvgpr_read_b32 v23, a233 // 000000011D10: D3D84017 180001E9 - v_accvgpr_read_b32 v24, a237 // 000000011D18: D3D84018 180001ED - v_accvgpr_read_b32 v25, a241 // 000000011D20: D3D84019 180001F1 - v_accvgpr_read_b32 v26, a245 // 000000011D28: D3D8401A 180001F5 - v_accvgpr_read_b32 v27, a249 // 000000011D30: D3D8401B 180001F9 - v_accvgpr_read_b32 v28, a253 // 000000011D38: D3D8401C 180001FD - v_accvgpr_read_b32 v29, a2 // 000000011D40: D3D8401D 18000102 - v_accvgpr_read_b32 v30, a6 // 000000011D48: D3D8401E 18000106 - v_accvgpr_read_b32 v31, a10 // 000000011D50: D3D8401F 1800010A - v_accvgpr_read_b32 v32, a14 // 000000011D58: D3D84020 1800010E - v_accvgpr_read_b32 v33, a18 // 000000011D60: D3D84021 18000112 - v_accvgpr_read_b32 v34, a22 // 000000011D68: D3D84022 18000116 - v_accvgpr_read_b32 v35, a26 // 000000011D70: D3D84023 1800011A - v_accvgpr_read_b32 v36, a30 // 000000011D78: D3D84024 1800011E - v_accvgpr_read_b32 v37, a34 // 000000011D80: D3D84025 18000122 - v_accvgpr_read_b32 v38, a38 // 000000011D88: D3D84026 18000126 - v_accvgpr_read_b32 v39, a42 // 000000011D90: D3D84027 1800012A - v_accvgpr_read_b32 v40, a46 // 000000011D98: D3D84028 1800012E - v_accvgpr_read_b32 v41, a50 // 000000011DA0: D3D84029 18000132 - v_accvgpr_read_b32 v42, a54 // 000000011DA8: D3D8402A 18000136 - v_accvgpr_read_b32 v43, a58 // 000000011DB0: D3D8402B 1800013A - v_accvgpr_read_b32 v44, a62 // 000000011DB8: D3D8402C 1800013E - v_accvgpr_read_b32 v45, a66 // 000000011DC0: D3D8402D 18000142 - v_accvgpr_read_b32 v46, a70 // 000000011DC8: D3D8402E 18000146 - v_accvgpr_read_b32 v47, a74 // 000000011DD0: D3D8402F 1800014A - v_accvgpr_read_b32 v48, a78 // 000000011DD8: D3D84030 1800014E - v_accvgpr_read_b32 v49, a82 // 000000011DE0: D3D84031 18000152 - v_accvgpr_read_b32 v50, a86 // 000000011DE8: D3D84032 18000156 - v_accvgpr_read_b32 v51, a90 // 000000011DF0: D3D84033 1800015A - v_accvgpr_read_b32 v52, a94 // 000000011DF8: D3D84034 1800015E - v_accvgpr_read_b32 v53, a98 // 000000011E00: D3D84035 18000162 - v_accvgpr_read_b32 v54, a102 // 000000011E08: D3D84036 18000166 - v_accvgpr_read_b32 v55, a106 // 000000011E10: D3D84037 1800016A - v_accvgpr_read_b32 v56, a110 // 000000011E18: D3D84038 1800016E - v_accvgpr_read_b32 v57, a114 // 000000011E20: D3D84039 18000172 - v_accvgpr_read_b32 v58, a118 // 000000011E28: D3D8403A 18000176 - v_accvgpr_read_b32 v59, a122 // 000000011E30: D3D8403B 1800017A - v_accvgpr_read_b32 v60, a126 // 000000011E38: D3D8403C 1800017E - v_accvgpr_read_b32 v61, a130 // 000000011E40: D3D8403D 18000182 - v_accvgpr_read_b32 v62, a134 // 000000011E48: D3D8403E 18000186 - v_accvgpr_read_b32 v63, a138 // 000000011E50: D3D8403F 1800018A - v_accvgpr_read_b32 v64, a142 // 000000011E58: D3D84040 1800018E - v_accvgpr_read_b32 v65, a146 // 000000011E60: D3D84041 18000192 - v_accvgpr_read_b32 v66, a150 // 000000011E68: D3D84042 18000196 - v_accvgpr_read_b32 v67, a154 // 000000011E70: D3D84043 1800019A - v_accvgpr_read_b32 v68, a158 // 000000011E78: D3D84044 1800019E - v_accvgpr_read_b32 v69, a162 // 000000011E80: D3D84045 180001A2 - v_accvgpr_read_b32 v70, a166 // 000000011E88: D3D84046 180001A6 - v_accvgpr_read_b32 v71, a170 // 000000011E90: D3D84047 180001AA - v_accvgpr_read_b32 v72, a174 // 000000011E98: D3D84048 180001AE - v_accvgpr_read_b32 v73, a178 // 000000011EA0: D3D84049 180001B2 - v_accvgpr_read_b32 v74, a182 // 000000011EA8: D3D8404A 180001B6 - v_accvgpr_read_b32 v75, a186 // 000000011EB0: D3D8404B 180001BA - v_accvgpr_read_b32 v76, a190 // 000000011EB8: D3D8404C 180001BE - v_accvgpr_read_b32 v77, a194 // 000000011EC0: D3D8404D 180001C2 - v_accvgpr_read_b32 v78, a198 // 000000011EC8: D3D8404E 180001C6 - v_accvgpr_read_b32 v79, a202 // 000000011ED0: D3D8404F 180001CA - v_accvgpr_read_b32 v80, a206 // 000000011ED8: D3D84050 180001CE - v_accvgpr_read_b32 v81, a210 // 000000011EE0: D3D84051 180001D2 - v_accvgpr_read_b32 v82, a214 // 000000011EE8: D3D84052 180001D6 - v_accvgpr_read_b32 v83, a218 // 000000011EF0: D3D84053 180001DA - v_accvgpr_read_b32 v84, a222 // 000000011EF8: D3D84054 180001DE - v_accvgpr_read_b32 v85, a226 // 000000011F00: D3D84055 180001E2 - v_accvgpr_read_b32 v86, a230 // 000000011F08: D3D84056 180001E6 - v_accvgpr_read_b32 v87, a234 // 000000011F10: D3D84057 180001EA - v_accvgpr_read_b32 v88, a238 // 000000011F18: D3D84058 180001EE - v_accvgpr_read_b32 v89, a242 // 000000011F20: D3D84059 180001F2 - v_accvgpr_read_b32 v90, a246 // 000000011F28: D3D8405A 180001F6 - v_accvgpr_read_b32 v91, a250 // 000000011F30: D3D8405B 180001FA - v_accvgpr_read_b32 v92, a254 // 000000011F38: D3D8405C 180001FE - v_accvgpr_read_b32 v93, a3 // 000000011F40: D3D8405D 18000103 - v_accvgpr_read_b32 v94, a7 // 000000011F48: D3D8405E 18000107 - v_accvgpr_read_b32 v95, a11 // 000000011F50: D3D8405F 1800010B - v_accvgpr_read_b32 v96, a15 // 000000011F58: D3D84060 1800010F - v_accvgpr_read_b32 v97, a19 // 000000011F60: D3D84061 18000113 - v_accvgpr_read_b32 v98, a23 // 000000011F68: D3D84062 18000117 - v_accvgpr_read_b32 v99, a27 // 000000011F70: D3D84063 1800011B - v_accvgpr_read_b32 v100, a31 // 000000011F78: D3D84064 1800011F - v_accvgpr_read_b32 v101, a35 // 000000011F80: D3D84065 18000123 - v_accvgpr_read_b32 v102, a39 // 000000011F88: D3D84066 18000127 - v_accvgpr_read_b32 v103, a43 // 000000011F90: D3D84067 1800012B - v_accvgpr_read_b32 v104, a47 // 000000011F98: D3D84068 1800012F - v_accvgpr_read_b32 v105, a51 // 000000011FA0: D3D84069 18000133 - v_accvgpr_read_b32 v106, a55 // 000000011FA8: D3D8406A 18000137 - v_accvgpr_read_b32 v107, a59 // 000000011FB0: D3D8406B 1800013B - v_accvgpr_read_b32 v108, a63 // 000000011FB8: D3D8406C 1800013F - v_accvgpr_read_b32 v109, a67 // 000000011FC0: D3D8406D 18000143 - v_accvgpr_read_b32 v110, a71 // 000000011FC8: D3D8406E 18000147 - v_accvgpr_read_b32 v111, a75 // 000000011FD0: D3D8406F 1800014B - v_accvgpr_read_b32 v112, a79 // 000000011FD8: D3D84070 1800014F - v_accvgpr_read_b32 v113, a83 // 000000011FE0: D3D84071 18000153 - v_accvgpr_read_b32 v114, a87 // 000000011FE8: D3D84072 18000157 - v_accvgpr_read_b32 v115, a91 // 000000011FF0: D3D84073 1800015B - v_accvgpr_read_b32 v116, a95 // 000000011FF8: D3D84074 1800015F - v_accvgpr_read_b32 v117, a99 // 000000012000: D3D84075 18000163 - v_accvgpr_read_b32 v118, a103 // 000000012008: D3D84076 18000167 - v_accvgpr_read_b32 v119, a107 // 000000012010: D3D84077 1800016B - v_accvgpr_read_b32 v120, a111 // 000000012018: D3D84078 1800016F - v_accvgpr_read_b32 v121, a115 // 000000012020: D3D84079 18000173 - v_accvgpr_read_b32 v122, a119 // 000000012028: D3D8407A 18000177 - v_accvgpr_read_b32 v123, a123 // 000000012030: D3D8407B 1800017B - v_accvgpr_read_b32 v124, a127 // 000000012038: D3D8407C 1800017F - v_accvgpr_read_b32 v125, a131 // 000000012040: D3D8407D 18000183 - v_accvgpr_read_b32 v126, a135 // 000000012048: D3D8407E 18000187 - v_accvgpr_read_b32 v127, a139 // 000000012050: D3D8407F 1800018B - v_accvgpr_read_b32 v128, a143 // 000000012058: D3D84080 1800018F - v_mul_f32_e32 v15, s44, v15 // 000000012060: 0A1E1E2C - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 000000012064: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 00000001206C: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 000000012074: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 00000001207C: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 000000012084: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000001208C: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 000000012094: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000001209C: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 0000000120A4: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 0000000120AC: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 0000000120B4: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 0000000120BC: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 0000000120C4: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 0000000120CC: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 0000000120D4: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 0000000120DC: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 0000000120E4: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 0000000120EC: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 0000000120F4: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 0000000120FC: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 000000012104: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000001210C: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 000000012114: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 00000001211C: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 000000012124: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 00000001212C: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 000000012134: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 00000001213C: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 000000012144: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 00000001214C: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 000000012154: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 00000001215C: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 000000012164: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 00000001216C: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 000000012174: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 00000001217C: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 000000012184: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 00000001218C: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 000000012194: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 00000001219C: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 0000000121A4: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 0000000121AC: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 0000000121B4: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 0000000121BC: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 0000000121C4: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 0000000121CC: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 0000000121D4: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 0000000121DC: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 0000000121E4: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 0000000121EC: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 0000000121F4: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 0000000121FC: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 000000012204: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 00000001220C: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 000000012214: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 00000001221C: D3B1407E 1002FC2C - v_mul_f32_e32 v128, s44, v128 // 000000012224: 0B01002C - v_mov_b32_e32 v12, 0xffff0000 // 000000012228: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 000000012230: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 000000012238: 7E1C02FF 00007FFF - v_cvt_pk_bf16_f32 v15, v15, v15 // 000000012240: D268000F 00021F0F - buffer_store_short v15, v129, s[16:19], 0 offen nt // 000000012248: E06A1000 80040F81 - v_cvt_pk_bf16_f32 v16, v16, v16 // 000000012250: D2680010 00022110 - buffer_store_short v16, v130, s[16:19], 0 offen nt // 000000012258: E06A1000 80041082 - v_cvt_pk_bf16_f32 v17, v17, v17 // 000000012260: D2680011 00022311 - buffer_store_short v17, v131, s[16:19], 0 offen nt // 000000012268: E06A1000 80041183 - v_cvt_pk_bf16_f32 v18, v18, v18 // 000000012270: D2680012 00022512 - buffer_store_short v18, v135, s[16:19], 0 offen nt // 000000012278: E06A1000 80041287 - v_cvt_pk_bf16_f32 v19, v19, v19 // 000000012280: D2680013 00022713 - buffer_store_short v19, v136, s[16:19], 0 offen nt // 000000012288: E06A1000 80041388 - v_cvt_pk_bf16_f32 v20, v20, v20 // 000000012290: D2680014 00022914 - buffer_store_short v20, v137, s[16:19], 0 offen nt // 000000012298: E06A1000 80041489 - v_cvt_pk_bf16_f32 v21, v21, v21 // 0000000122A0: D2680015 00022B15 - buffer_store_short v21, v138, s[16:19], 0 offen nt // 0000000122A8: E06A1000 8004158A - v_cvt_pk_bf16_f32 v22, v22, v22 // 0000000122B0: D2680016 00022D16 - buffer_store_short v22, v139, s[16:19], 0 offen nt // 0000000122B8: E06A1000 8004168B - v_cvt_pk_bf16_f32 v23, v23, v23 // 0000000122C0: D2680017 00022F17 - buffer_store_short v23, v140, s[16:19], 0 offen nt // 0000000122C8: E06A1000 8004178C - v_cvt_pk_bf16_f32 v24, v24, v24 // 0000000122D0: D2680018 00023118 - buffer_store_short v24, v141, s[16:19], 0 offen nt // 0000000122D8: E06A1000 8004188D - v_cvt_pk_bf16_f32 v25, v25, v25 // 0000000122E0: D2680019 00023319 - buffer_store_short v25, v142, s[16:19], 0 offen nt // 0000000122E8: E06A1000 8004198E - v_cvt_pk_bf16_f32 v26, v26, v26 // 0000000122F0: D268001A 0002351A - buffer_store_short v26, v143, s[16:19], 0 offen nt // 0000000122F8: E06A1000 80041A8F - v_cvt_pk_bf16_f32 v27, v27, v27 // 000000012300: D268001B 0002371B - buffer_store_short v27, v144, s[16:19], 0 offen nt // 000000012308: E06A1000 80041B90 - v_cvt_pk_bf16_f32 v28, v28, v28 // 000000012310: D268001C 0002391C - buffer_store_short v28, v145, s[16:19], 0 offen nt // 000000012318: E06A1000 80041C91 - v_cvt_pk_bf16_f32 v29, v29, v29 // 000000012320: D268001D 00023B1D - buffer_store_short v29, v146, s[16:19], 0 offen nt // 000000012328: E06A1000 80041D92 - v_cvt_pk_bf16_f32 v30, v30, v30 // 000000012330: D268001E 00023D1E - buffer_store_short v30, v147, s[16:19], 0 offen nt // 000000012338: E06A1000 80041E93 - v_cvt_pk_bf16_f32 v31, v31, v31 // 000000012340: D268001F 00023F1F - buffer_store_short v31, v148, s[16:19], 0 offen nt // 000000012348: E06A1000 80041F94 - v_cvt_pk_bf16_f32 v32, v32, v32 // 000000012350: D2680020 00024120 - buffer_store_short v32, v149, s[16:19], 0 offen nt // 000000012358: E06A1000 80042095 - v_cvt_pk_bf16_f32 v33, v33, v33 // 000000012360: D2680021 00024321 - buffer_store_short v33, v150, s[16:19], 0 offen nt // 000000012368: E06A1000 80042196 - v_cvt_pk_bf16_f32 v34, v34, v34 // 000000012370: D2680022 00024522 - buffer_store_short v34, v151, s[16:19], 0 offen nt // 000000012378: E06A1000 80042297 - v_cvt_pk_bf16_f32 v35, v35, v35 // 000000012380: D2680023 00024723 - buffer_store_short v35, v152, s[16:19], 0 offen nt // 000000012388: E06A1000 80042398 - v_cvt_pk_bf16_f32 v36, v36, v36 // 000000012390: D2680024 00024924 - buffer_store_short v36, v153, s[16:19], 0 offen nt // 000000012398: E06A1000 80042499 - v_cvt_pk_bf16_f32 v37, v37, v37 // 0000000123A0: D2680025 00024B25 - buffer_store_short v37, v154, s[16:19], 0 offen nt // 0000000123A8: E06A1000 8004259A - v_cvt_pk_bf16_f32 v38, v38, v38 // 0000000123B0: D2680026 00024D26 - buffer_store_short v38, v155, s[16:19], 0 offen nt // 0000000123B8: E06A1000 8004269B - v_cvt_pk_bf16_f32 v39, v39, v39 // 0000000123C0: D2680027 00024F27 - buffer_store_short v39, v156, s[16:19], 0 offen nt // 0000000123C8: E06A1000 8004279C - v_cvt_pk_bf16_f32 v40, v40, v40 // 0000000123D0: D2680028 00025128 - buffer_store_short v40, v157, s[16:19], 0 offen nt // 0000000123D8: E06A1000 8004289D - v_cvt_pk_bf16_f32 v41, v41, v41 // 0000000123E0: D2680029 00025329 - buffer_store_short v41, v158, s[16:19], 0 offen nt // 0000000123E8: E06A1000 8004299E - v_cvt_pk_bf16_f32 v42, v42, v42 // 0000000123F0: D268002A 0002552A - buffer_store_short v42, v159, s[16:19], 0 offen nt // 0000000123F8: E06A1000 80042A9F - v_cvt_pk_bf16_f32 v43, v43, v43 // 000000012400: D268002B 0002572B - buffer_store_short v43, v160, s[16:19], 0 offen nt // 000000012408: E06A1000 80042BA0 - v_cvt_pk_bf16_f32 v44, v44, v44 // 000000012410: D268002C 0002592C - buffer_store_short v44, v161, s[16:19], 0 offen nt // 000000012418: E06A1000 80042CA1 - v_cvt_pk_bf16_f32 v45, v45, v45 // 000000012420: D268002D 00025B2D - buffer_store_short v45, v162, s[16:19], 0 offen nt // 000000012428: E06A1000 80042DA2 - v_cvt_pk_bf16_f32 v46, v46, v46 // 000000012430: D268002E 00025D2E - buffer_store_short v46, v163, s[16:19], 0 offen nt // 000000012438: E06A1000 80042EA3 - v_cvt_pk_bf16_f32 v47, v47, v47 // 000000012440: D268002F 00025F2F - buffer_store_short v47, v164, s[16:19], 0 offen nt // 000000012448: E06A1000 80042FA4 - v_cvt_pk_bf16_f32 v48, v48, v48 // 000000012450: D2680030 00026130 - buffer_store_short v48, v165, s[16:19], 0 offen nt // 000000012458: E06A1000 800430A5 - v_cvt_pk_bf16_f32 v49, v49, v49 // 000000012460: D2680031 00026331 - buffer_store_short v49, v166, s[16:19], 0 offen nt // 000000012468: E06A1000 800431A6 - v_cvt_pk_bf16_f32 v50, v50, v50 // 000000012470: D2680032 00026532 - buffer_store_short v50, v167, s[16:19], 0 offen nt // 000000012478: E06A1000 800432A7 - v_cvt_pk_bf16_f32 v51, v51, v51 // 000000012480: D2680033 00026733 - buffer_store_short v51, v168, s[16:19], 0 offen nt // 000000012488: E06A1000 800433A8 - v_cvt_pk_bf16_f32 v52, v52, v52 // 000000012490: D2680034 00026934 - buffer_store_short v52, v169, s[16:19], 0 offen nt // 000000012498: E06A1000 800434A9 - v_cvt_pk_bf16_f32 v53, v53, v53 // 0000000124A0: D2680035 00026B35 - buffer_store_short v53, v170, s[16:19], 0 offen nt // 0000000124A8: E06A1000 800435AA - v_cvt_pk_bf16_f32 v54, v54, v54 // 0000000124B0: D2680036 00026D36 - buffer_store_short v54, v171, s[16:19], 0 offen nt // 0000000124B8: E06A1000 800436AB - v_cvt_pk_bf16_f32 v55, v55, v55 // 0000000124C0: D2680037 00026F37 - buffer_store_short v55, v172, s[16:19], 0 offen nt // 0000000124C8: E06A1000 800437AC - v_cvt_pk_bf16_f32 v56, v56, v56 // 0000000124D0: D2680038 00027138 - buffer_store_short v56, v173, s[16:19], 0 offen nt // 0000000124D8: E06A1000 800438AD - v_cvt_pk_bf16_f32 v57, v57, v57 // 0000000124E0: D2680039 00027339 - buffer_store_short v57, v174, s[16:19], 0 offen nt // 0000000124E8: E06A1000 800439AE - v_cvt_pk_bf16_f32 v58, v58, v58 // 0000000124F0: D268003A 0002753A - buffer_store_short v58, v175, s[16:19], 0 offen nt // 0000000124F8: E06A1000 80043AAF - v_cvt_pk_bf16_f32 v59, v59, v59 // 000000012500: D268003B 0002773B - buffer_store_short v59, v176, s[16:19], 0 offen nt // 000000012508: E06A1000 80043BB0 - v_cvt_pk_bf16_f32 v60, v60, v60 // 000000012510: D268003C 0002793C - buffer_store_short v60, v177, s[16:19], 0 offen nt // 000000012518: E06A1000 80043CB1 - v_cvt_pk_bf16_f32 v61, v61, v61 // 000000012520: D268003D 00027B3D - buffer_store_short v61, v178, s[16:19], 0 offen nt // 000000012528: E06A1000 80043DB2 - v_cvt_pk_bf16_f32 v62, v62, v62 // 000000012530: D268003E 00027D3E - buffer_store_short v62, v179, s[16:19], 0 offen nt // 000000012538: E06A1000 80043EB3 - v_cvt_pk_bf16_f32 v63, v63, v63 // 000000012540: D268003F 00027F3F - buffer_store_short v63, v180, s[16:19], 0 offen nt // 000000012548: E06A1000 80043FB4 - v_cvt_pk_bf16_f32 v64, v64, v64 // 000000012550: D2680040 00028140 - buffer_store_short v64, v181, s[16:19], 0 offen nt // 000000012558: E06A1000 800440B5 - v_cvt_pk_bf16_f32 v65, v65, v65 // 000000012560: D2680041 00028341 - buffer_store_short v65, v182, s[16:19], 0 offen nt // 000000012568: E06A1000 800441B6 - v_cvt_pk_bf16_f32 v66, v66, v66 // 000000012570: D2680042 00028542 - buffer_store_short v66, v183, s[16:19], 0 offen nt // 000000012578: E06A1000 800442B7 - v_cvt_pk_bf16_f32 v67, v67, v67 // 000000012580: D2680043 00028743 - buffer_store_short v67, v184, s[16:19], 0 offen nt // 000000012588: E06A1000 800443B8 - v_cvt_pk_bf16_f32 v68, v68, v68 // 000000012590: D2680044 00028944 - buffer_store_short v68, v185, s[16:19], 0 offen nt // 000000012598: E06A1000 800444B9 - v_cvt_pk_bf16_f32 v69, v69, v69 // 0000000125A0: D2680045 00028B45 - buffer_store_short v69, v186, s[16:19], 0 offen nt // 0000000125A8: E06A1000 800445BA - v_cvt_pk_bf16_f32 v70, v70, v70 // 0000000125B0: D2680046 00028D46 - buffer_store_short v70, v187, s[16:19], 0 offen nt // 0000000125B8: E06A1000 800446BB - v_cvt_pk_bf16_f32 v71, v71, v71 // 0000000125C0: D2680047 00028F47 - buffer_store_short v71, v188, s[16:19], 0 offen nt // 0000000125C8: E06A1000 800447BC - v_cvt_pk_bf16_f32 v72, v72, v72 // 0000000125D0: D2680048 00029148 - buffer_store_short v72, v189, s[16:19], 0 offen nt // 0000000125D8: E06A1000 800448BD - v_cvt_pk_bf16_f32 v73, v73, v73 // 0000000125E0: D2680049 00029349 - buffer_store_short v73, v190, s[16:19], 0 offen nt // 0000000125E8: E06A1000 800449BE - v_cvt_pk_bf16_f32 v74, v74, v74 // 0000000125F0: D268004A 0002954A - buffer_store_short v74, v191, s[16:19], 0 offen nt // 0000000125F8: E06A1000 80044ABF - v_cvt_pk_bf16_f32 v75, v75, v75 // 000000012600: D268004B 0002974B - buffer_store_short v75, v192, s[16:19], 0 offen nt // 000000012608: E06A1000 80044BC0 - v_cvt_pk_bf16_f32 v76, v76, v76 // 000000012610: D268004C 0002994C - buffer_store_short v76, v193, s[16:19], 0 offen nt // 000000012618: E06A1000 80044CC1 - v_cvt_pk_bf16_f32 v77, v77, v77 // 000000012620: D268004D 00029B4D - buffer_store_short v77, v194, s[16:19], 0 offen nt // 000000012628: E06A1000 80044DC2 - v_cvt_pk_bf16_f32 v78, v78, v78 // 000000012630: D268004E 00029D4E - buffer_store_short v78, v195, s[16:19], 0 offen nt // 000000012638: E06A1000 80044EC3 - v_cvt_pk_bf16_f32 v79, v79, v79 // 000000012640: D268004F 00029F4F - buffer_store_short v79, v196, s[16:19], 0 offen nt // 000000012648: E06A1000 80044FC4 - v_cvt_pk_bf16_f32 v80, v80, v80 // 000000012650: D2680050 0002A150 - buffer_store_short v80, v197, s[16:19], 0 offen nt // 000000012658: E06A1000 800450C5 - v_cvt_pk_bf16_f32 v81, v81, v81 // 000000012660: D2680051 0002A351 - buffer_store_short v81, v198, s[16:19], 0 offen nt // 000000012668: E06A1000 800451C6 - v_cvt_pk_bf16_f32 v82, v82, v82 // 000000012670: D2680052 0002A552 - buffer_store_short v82, v199, s[16:19], 0 offen nt // 000000012678: E06A1000 800452C7 - v_cvt_pk_bf16_f32 v83, v83, v83 // 000000012680: D2680053 0002A753 - buffer_store_short v83, v200, s[16:19], 0 offen nt // 000000012688: E06A1000 800453C8 - v_cvt_pk_bf16_f32 v84, v84, v84 // 000000012690: D2680054 0002A954 - buffer_store_short v84, v201, s[16:19], 0 offen nt // 000000012698: E06A1000 800454C9 - v_cvt_pk_bf16_f32 v85, v85, v85 // 0000000126A0: D2680055 0002AB55 - buffer_store_short v85, v202, s[16:19], 0 offen nt // 0000000126A8: E06A1000 800455CA - v_cvt_pk_bf16_f32 v86, v86, v86 // 0000000126B0: D2680056 0002AD56 - buffer_store_short v86, v203, s[16:19], 0 offen nt // 0000000126B8: E06A1000 800456CB - v_cvt_pk_bf16_f32 v87, v87, v87 // 0000000126C0: D2680057 0002AF57 - buffer_store_short v87, v204, s[16:19], 0 offen nt // 0000000126C8: E06A1000 800457CC - v_cvt_pk_bf16_f32 v88, v88, v88 // 0000000126D0: D2680058 0002B158 - buffer_store_short v88, v205, s[16:19], 0 offen nt // 0000000126D8: E06A1000 800458CD - v_cvt_pk_bf16_f32 v89, v89, v89 // 0000000126E0: D2680059 0002B359 - buffer_store_short v89, v206, s[16:19], 0 offen nt // 0000000126E8: E06A1000 800459CE - v_cvt_pk_bf16_f32 v90, v90, v90 // 0000000126F0: D268005A 0002B55A - buffer_store_short v90, v207, s[16:19], 0 offen nt // 0000000126F8: E06A1000 80045ACF - v_cvt_pk_bf16_f32 v91, v91, v91 // 000000012700: D268005B 0002B75B - buffer_store_short v91, v208, s[16:19], 0 offen nt // 000000012708: E06A1000 80045BD0 - v_cvt_pk_bf16_f32 v92, v92, v92 // 000000012710: D268005C 0002B95C - buffer_store_short v92, v209, s[16:19], 0 offen nt // 000000012718: E06A1000 80045CD1 - v_cvt_pk_bf16_f32 v93, v93, v93 // 000000012720: D268005D 0002BB5D - buffer_store_short v93, v210, s[16:19], 0 offen nt // 000000012728: E06A1000 80045DD2 - v_cvt_pk_bf16_f32 v94, v94, v94 // 000000012730: D268005E 0002BD5E - buffer_store_short v94, v211, s[16:19], 0 offen nt // 000000012738: E06A1000 80045ED3 - v_cvt_pk_bf16_f32 v95, v95, v95 // 000000012740: D268005F 0002BF5F - buffer_store_short v95, v212, s[16:19], 0 offen nt // 000000012748: E06A1000 80045FD4 - v_cvt_pk_bf16_f32 v96, v96, v96 // 000000012750: D2680060 0002C160 - buffer_store_short v96, v213, s[16:19], 0 offen nt // 000000012758: E06A1000 800460D5 - v_cvt_pk_bf16_f32 v97, v97, v97 // 000000012760: D2680061 0002C361 - buffer_store_short v97, v214, s[16:19], 0 offen nt // 000000012768: E06A1000 800461D6 - v_cvt_pk_bf16_f32 v98, v98, v98 // 000000012770: D2680062 0002C562 - buffer_store_short v98, v215, s[16:19], 0 offen nt // 000000012778: E06A1000 800462D7 - v_cvt_pk_bf16_f32 v99, v99, v99 // 000000012780: D2680063 0002C763 - buffer_store_short v99, v216, s[16:19], 0 offen nt // 000000012788: E06A1000 800463D8 - v_cvt_pk_bf16_f32 v100, v100, v100 // 000000012790: D2680064 0002C964 - buffer_store_short v100, v217, s[16:19], 0 offen nt // 000000012798: E06A1000 800464D9 - v_cvt_pk_bf16_f32 v101, v101, v101 // 0000000127A0: D2680065 0002CB65 - buffer_store_short v101, v218, s[16:19], 0 offen nt // 0000000127A8: E06A1000 800465DA - v_cvt_pk_bf16_f32 v102, v102, v102 // 0000000127B0: D2680066 0002CD66 - buffer_store_short v102, v219, s[16:19], 0 offen nt // 0000000127B8: E06A1000 800466DB - v_cvt_pk_bf16_f32 v103, v103, v103 // 0000000127C0: D2680067 0002CF67 - buffer_store_short v103, v220, s[16:19], 0 offen nt // 0000000127C8: E06A1000 800467DC - v_cvt_pk_bf16_f32 v104, v104, v104 // 0000000127D0: D2680068 0002D168 - buffer_store_short v104, v221, s[16:19], 0 offen nt // 0000000127D8: E06A1000 800468DD - v_cvt_pk_bf16_f32 v105, v105, v105 // 0000000127E0: D2680069 0002D369 - buffer_store_short v105, v222, s[16:19], 0 offen nt // 0000000127E8: E06A1000 800469DE - v_cvt_pk_bf16_f32 v106, v106, v106 // 0000000127F0: D268006A 0002D56A - buffer_store_short v106, v223, s[16:19], 0 offen nt // 0000000127F8: E06A1000 80046ADF - v_cvt_pk_bf16_f32 v107, v107, v107 // 000000012800: D268006B 0002D76B - buffer_store_short v107, v224, s[16:19], 0 offen nt // 000000012808: E06A1000 80046BE0 - v_cvt_pk_bf16_f32 v108, v108, v108 // 000000012810: D268006C 0002D96C - buffer_store_short v108, v225, s[16:19], 0 offen nt // 000000012818: E06A1000 80046CE1 - v_cvt_pk_bf16_f32 v109, v109, v109 // 000000012820: D268006D 0002DB6D - buffer_store_short v109, v226, s[16:19], 0 offen nt // 000000012828: E06A1000 80046DE2 - v_cvt_pk_bf16_f32 v110, v110, v110 // 000000012830: D268006E 0002DD6E - buffer_store_short v110, v227, s[16:19], 0 offen nt // 000000012838: E06A1000 80046EE3 - v_cvt_pk_bf16_f32 v111, v111, v111 // 000000012840: D268006F 0002DF6F - buffer_store_short v111, v228, s[16:19], 0 offen nt // 000000012848: E06A1000 80046FE4 - v_cvt_pk_bf16_f32 v112, v112, v112 // 000000012850: D2680070 0002E170 - buffer_store_short v112, v229, s[16:19], 0 offen nt // 000000012858: E06A1000 800470E5 - v_cvt_pk_bf16_f32 v113, v113, v113 // 000000012860: D2680071 0002E371 - buffer_store_short v113, v230, s[16:19], 0 offen nt // 000000012868: E06A1000 800471E6 - v_cvt_pk_bf16_f32 v114, v114, v114 // 000000012870: D2680072 0002E572 - buffer_store_short v114, v231, s[16:19], 0 offen nt // 000000012878: E06A1000 800472E7 - v_cvt_pk_bf16_f32 v115, v115, v115 // 000000012880: D2680073 0002E773 - buffer_store_short v115, v232, s[16:19], 0 offen nt // 000000012888: E06A1000 800473E8 - v_cvt_pk_bf16_f32 v116, v116, v116 // 000000012890: D2680074 0002E974 - buffer_store_short v116, v233, s[16:19], 0 offen nt // 000000012898: E06A1000 800474E9 - v_cvt_pk_bf16_f32 v117, v117, v117 // 0000000128A0: D2680075 0002EB75 - buffer_store_short v117, v234, s[16:19], 0 offen nt // 0000000128A8: E06A1000 800475EA - v_cvt_pk_bf16_f32 v118, v118, v118 // 0000000128B0: D2680076 0002ED76 - buffer_store_short v118, v235, s[16:19], 0 offen nt // 0000000128B8: E06A1000 800476EB - v_cvt_pk_bf16_f32 v119, v119, v119 // 0000000128C0: D2680077 0002EF77 - buffer_store_short v119, v236, s[16:19], 0 offen nt // 0000000128C8: E06A1000 800477EC - v_cvt_pk_bf16_f32 v120, v120, v120 // 0000000128D0: D2680078 0002F178 - buffer_store_short v120, v237, s[16:19], 0 offen nt // 0000000128D8: E06A1000 800478ED - v_cvt_pk_bf16_f32 v121, v121, v121 // 0000000128E0: D2680079 0002F379 - buffer_store_short v121, v238, s[16:19], 0 offen nt // 0000000128E8: E06A1000 800479EE - v_cvt_pk_bf16_f32 v122, v122, v122 // 0000000128F0: D268007A 0002F57A - buffer_store_short v122, v239, s[16:19], 0 offen nt // 0000000128F8: E06A1000 80047AEF - v_cvt_pk_bf16_f32 v123, v123, v123 // 000000012900: D268007B 0002F77B - buffer_store_short v123, v240, s[16:19], 0 offen nt // 000000012908: E06A1000 80047BF0 - v_cvt_pk_bf16_f32 v124, v124, v124 // 000000012910: D268007C 0002F97C - buffer_store_short v124, v241, s[16:19], 0 offen nt // 000000012918: E06A1000 80047CF1 - v_cvt_pk_bf16_f32 v125, v125, v125 // 000000012920: D268007D 0002FB7D - buffer_store_short v125, v242, s[16:19], 0 offen nt // 000000012928: E06A1000 80047DF2 - v_cvt_pk_bf16_f32 v126, v126, v126 // 000000012930: D268007E 0002FD7E - buffer_store_short v126, v243, s[16:19], 0 offen nt // 000000012938: E06A1000 80047EF3 - v_cvt_pk_bf16_f32 v127, v127, v127 // 000000012940: D268007F 0002FF7F - buffer_store_short v127, v244, s[16:19], 0 offen nt // 000000012948: E06A1000 80047FF4 - v_cvt_pk_bf16_f32 v128, v128, v128 // 000000012950: D2680080 00030180 - buffer_store_short v128, v245, s[16:19], 0 offen nt // 000000012958: E06A1000 800480F5 - s_nop 0 // 000000012960: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 000000012964: 7E1402FF 80000000 - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001296C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012974: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001297C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012984: 86A2221E - v_add_lshl_u32 v43, v7, v8, 1 // 000000012988: D1FE002B 02061107 - v_cndmask_b32_e64 v43, v10, v43, s[34:35] // 000000012990: D100002B 008A570A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000012998: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000129A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000129A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000129B0: 86A2221E - v_add_lshl_u32 v44, v7, v8, 1 // 0000000129B4: D1FE002C 02061107 - v_cndmask_b32_e64 v44, v10, v44, s[34:35] // 0000000129BC: D100002C 008A590A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000129C4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000129CC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000129D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000129DC: 86A2221E - v_add_lshl_u32 v45, v7, v8, 1 // 0000000129E0: D1FE002D 02061107 - v_cndmask_b32_e64 v45, v10, v45, s[34:35] // 0000000129E8: D100002D 008A5B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000129F0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000129F8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012A00: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012A08: 86A2221E - v_add_lshl_u32 v46, v7, v8, 1 // 000000012A0C: D1FE002E 02061107 - v_cndmask_b32_e64 v46, v10, v46, s[34:35] // 000000012A14: D100002E 008A5D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000012A1C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000012A24: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000012A2C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000012A34: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012A3C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012A44: 86A2221E - v_add_lshl_u32 v47, v7, v4, 1 // 000000012A48: D1FE002F 02060907 - v_cndmask_b32_e64 v47, v10, v47, s[34:35] // 000000012A50: D100002F 008A5F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000012A58: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012A60: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012A68: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012A70: 86A2221E - v_add_lshl_u32 v48, v7, v8, 1 // 000000012A74: D1FE0030 02061107 - v_cndmask_b32_e64 v48, v10, v48, s[34:35] // 000000012A7C: D1000030 008A610A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000012A84: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012A8C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012A94: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012A9C: 86A2221E - v_add_lshl_u32 v49, v7, v8, 1 // 000000012AA0: D1FE0031 02061107 - v_cndmask_b32_e64 v49, v10, v49, s[34:35] // 000000012AA8: D1000031 008A630A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000012AB0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012AB8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012AC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012AC8: 86A2221E - v_add_lshl_u32 v50, v7, v8, 1 // 000000012ACC: D1FE0032 02061107 - v_cndmask_b32_e64 v50, v10, v50, s[34:35] // 000000012AD4: D1000032 008A650A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000012ADC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012AE4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012AEC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012AF4: 86A2221E - v_add_lshl_u32 v51, v7, v8, 1 // 000000012AF8: D1FE0033 02061107 - v_cndmask_b32_e64 v51, v10, v51, s[34:35] // 000000012B00: D1000033 008A670A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000012B08: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012B10: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012B18: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012B20: 86A2221E - v_add_lshl_u32 v52, v7, v8, 1 // 000000012B24: D1FE0034 02061107 - v_cndmask_b32_e64 v52, v10, v52, s[34:35] // 000000012B2C: D1000034 008A690A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000012B34: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012B3C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012B44: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012B4C: 86A2221E - v_add_lshl_u32 v53, v7, v8, 1 // 000000012B50: D1FE0035 02061107 - v_cndmask_b32_e64 v53, v10, v53, s[34:35] // 000000012B58: D1000035 008A6B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000012B60: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012B68: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012B70: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012B78: 86A2221E - v_add_lshl_u32 v54, v7, v8, 1 // 000000012B7C: D1FE0036 02061107 - v_cndmask_b32_e64 v54, v10, v54, s[34:35] // 000000012B84: D1000036 008A6D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000012B8C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000012B94: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000012B9C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000012BA4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012BAC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012BB4: 86A2221E - v_add_lshl_u32 v55, v7, v4, 1 // 000000012BB8: D1FE0037 02060907 - v_cndmask_b32_e64 v55, v10, v55, s[34:35] // 000000012BC0: D1000037 008A6F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000012BC8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012BD0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012BD8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012BE0: 86A2221E - v_add_lshl_u32 v56, v7, v8, 1 // 000000012BE4: D1FE0038 02061107 - v_cndmask_b32_e64 v56, v10, v56, s[34:35] // 000000012BEC: D1000038 008A710A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000012BF4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012BFC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012C04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012C0C: 86A2221E - v_add_lshl_u32 v57, v7, v8, 1 // 000000012C10: D1FE0039 02061107 - v_cndmask_b32_e64 v57, v10, v57, s[34:35] // 000000012C18: D1000039 008A730A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000012C20: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012C28: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012C30: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012C38: 86A2221E - v_add_lshl_u32 v58, v7, v8, 1 // 000000012C3C: D1FE003A 02061107 - v_cndmask_b32_e64 v58, v10, v58, s[34:35] // 000000012C44: D100003A 008A750A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000012C4C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012C54: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012C5C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012C64: 86A2221E - v_add_lshl_u32 v59, v7, v8, 1 // 000000012C68: D1FE003B 02061107 - v_cndmask_b32_e64 v59, v10, v59, s[34:35] // 000000012C70: D100003B 008A770A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000012C78: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012C80: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012C88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012C90: 86A2221E - v_add_lshl_u32 v60, v7, v8, 1 // 000000012C94: D1FE003C 02061107 - v_cndmask_b32_e64 v60, v10, v60, s[34:35] // 000000012C9C: D100003C 008A790A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000012CA4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012CAC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012CB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012CBC: 86A2221E - v_add_lshl_u32 v61, v7, v8, 1 // 000000012CC0: D1FE003D 02061107 - v_cndmask_b32_e64 v61, v10, v61, s[34:35] // 000000012CC8: D100003D 008A7B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000012CD0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012CD8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012CE0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012CE8: 86A2221E - v_add_lshl_u32 v62, v7, v8, 1 // 000000012CEC: D1FE003E 02061107 - v_cndmask_b32_e64 v62, v10, v62, s[34:35] // 000000012CF4: D100003E 008A7D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000012CFC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000012D04: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000012D0C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000012D14: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012D1C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012D24: 86A2221E - v_add_lshl_u32 v63, v7, v4, 1 // 000000012D28: D1FE003F 02060907 - v_cndmask_b32_e64 v63, v10, v63, s[34:35] // 000000012D30: D100003F 008A7F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000012D38: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012D40: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012D48: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012D50: 86A2221E - v_add_lshl_u32 v64, v7, v8, 1 // 000000012D54: D1FE0040 02061107 - v_cndmask_b32_e64 v64, v10, v64, s[34:35] // 000000012D5C: D1000040 008A810A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000012D64: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012D6C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012D74: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012D7C: 86A2221E - v_add_lshl_u32 v65, v7, v8, 1 // 000000012D80: D1FE0041 02061107 - v_cndmask_b32_e64 v65, v10, v65, s[34:35] // 000000012D88: D1000041 008A830A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000012D90: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012D98: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012DA0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012DA8: 86A2221E - v_add_lshl_u32 v66, v7, v8, 1 // 000000012DAC: D1FE0042 02061107 - v_cndmask_b32_e64 v66, v10, v66, s[34:35] // 000000012DB4: D1000042 008A850A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000012DBC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012DC4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012DCC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012DD4: 86A2221E - v_add_lshl_u32 v67, v7, v8, 1 // 000000012DD8: D1FE0043 02061107 - v_cndmask_b32_e64 v67, v10, v67, s[34:35] // 000000012DE0: D1000043 008A870A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000012DE8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012DF0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012DF8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012E00: 86A2221E - v_add_lshl_u32 v68, v7, v8, 1 // 000000012E04: D1FE0044 02061107 - v_cndmask_b32_e64 v68, v10, v68, s[34:35] // 000000012E0C: D1000044 008A890A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000012E14: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012E1C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012E24: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012E2C: 86A2221E - v_add_lshl_u32 v69, v7, v8, 1 // 000000012E30: D1FE0045 02061107 - v_cndmask_b32_e64 v69, v10, v69, s[34:35] // 000000012E38: D1000045 008A8B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000012E40: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000012E48: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000012E50: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000012E58: 86A2221E - v_add_lshl_u32 v70, v7, v8, 1 // 000000012E5C: D1FE0046 02061107 - v_cndmask_b32_e64 v70, v10, v70, s[34:35] // 000000012E64: D1000046 008A8D0A - v_accvgpr_read_b32 v15, a147 // 000000012E6C: D3D8400F 18000193 - v_accvgpr_read_b32 v16, a151 // 000000012E74: D3D84010 18000197 - v_accvgpr_read_b32 v17, a155 // 000000012E7C: D3D84011 1800019B - v_accvgpr_read_b32 v18, a159 // 000000012E84: D3D84012 1800019F - v_accvgpr_read_b32 v19, a163 // 000000012E8C: D3D84013 180001A3 - v_accvgpr_read_b32 v20, a167 // 000000012E94: D3D84014 180001A7 - v_accvgpr_read_b32 v21, a171 // 000000012E9C: D3D84015 180001AB - v_accvgpr_read_b32 v22, a175 // 000000012EA4: D3D84016 180001AF - v_accvgpr_read_b32 v23, a179 // 000000012EAC: D3D84017 180001B3 - v_accvgpr_read_b32 v24, a183 // 000000012EB4: D3D84018 180001B7 - v_accvgpr_read_b32 v25, a187 // 000000012EBC: D3D84019 180001BB - v_accvgpr_read_b32 v26, a191 // 000000012EC4: D3D8401A 180001BF - v_accvgpr_read_b32 v27, a195 // 000000012ECC: D3D8401B 180001C3 - v_accvgpr_read_b32 v28, a199 // 000000012ED4: D3D8401C 180001C7 - v_accvgpr_read_b32 v29, a203 // 000000012EDC: D3D8401D 180001CB - v_accvgpr_read_b32 v30, a207 // 000000012EE4: D3D8401E 180001CF - v_accvgpr_read_b32 v31, a211 // 000000012EEC: D3D8401F 180001D3 - v_accvgpr_read_b32 v32, a215 // 000000012EF4: D3D84020 180001D7 - v_accvgpr_read_b32 v33, a219 // 000000012EFC: D3D84021 180001DB - v_accvgpr_read_b32 v34, a223 // 000000012F04: D3D84022 180001DF - v_accvgpr_read_b32 v35, a227 // 000000012F0C: D3D84023 180001E3 - v_accvgpr_read_b32 v36, a231 // 000000012F14: D3D84024 180001E7 - v_accvgpr_read_b32 v37, a235 // 000000012F1C: D3D84025 180001EB - v_accvgpr_read_b32 v38, a239 // 000000012F24: D3D84026 180001EF - v_accvgpr_read_b32 v39, a243 // 000000012F2C: D3D84027 180001F3 - v_accvgpr_read_b32 v40, a247 // 000000012F34: D3D84028 180001F7 - v_accvgpr_read_b32 v41, a251 // 000000012F3C: D3D84029 180001FB - v_accvgpr_read_b32 v42, a255 // 000000012F44: D3D8402A 180001FF - v_mul_f32_e32 v15, s44, v15 // 000000012F4C: 0A1E1E2C - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 000000012F50: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 000000012F58: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 000000012F60: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 000000012F68: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 000000012F70: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 000000012F78: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 000000012F80: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 000000012F88: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 000000012F90: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 000000012F98: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 000000012FA0: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 000000012FA8: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 000000012FB0: D3B14028 1002502C - v_mul_f32_e32 v42, s44, v42 // 000000012FB8: 0A54542C - v_mov_b32_e32 v12, 0xffff0000 // 000000012FBC: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 000000012FC4: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 000000012FCC: 7E1C02FF 00007FFF - v_cvt_pk_bf16_f32 v15, v15, v15 // 000000012FD4: D268000F 00021F0F - buffer_store_short v15, v43, s[16:19], 0 offen nt // 000000012FDC: E06A1000 80040F2B - v_cvt_pk_bf16_f32 v16, v16, v16 // 000000012FE4: D2680010 00022110 - buffer_store_short v16, v44, s[16:19], 0 offen nt // 000000012FEC: E06A1000 8004102C - v_cvt_pk_bf16_f32 v17, v17, v17 // 000000012FF4: D2680011 00022311 - buffer_store_short v17, v45, s[16:19], 0 offen nt // 000000012FFC: E06A1000 8004112D - v_cvt_pk_bf16_f32 v18, v18, v18 // 000000013004: D2680012 00022512 - buffer_store_short v18, v46, s[16:19], 0 offen nt // 00000001300C: E06A1000 8004122E - v_cvt_pk_bf16_f32 v19, v19, v19 // 000000013014: D2680013 00022713 - buffer_store_short v19, v47, s[16:19], 0 offen nt // 00000001301C: E06A1000 8004132F - v_cvt_pk_bf16_f32 v20, v20, v20 // 000000013024: D2680014 00022914 - buffer_store_short v20, v48, s[16:19], 0 offen nt // 00000001302C: E06A1000 80041430 - v_cvt_pk_bf16_f32 v21, v21, v21 // 000000013034: D2680015 00022B15 - buffer_store_short v21, v49, s[16:19], 0 offen nt // 00000001303C: E06A1000 80041531 - v_cvt_pk_bf16_f32 v22, v22, v22 // 000000013044: D2680016 00022D16 - buffer_store_short v22, v50, s[16:19], 0 offen nt // 00000001304C: E06A1000 80041632 - v_cvt_pk_bf16_f32 v23, v23, v23 // 000000013054: D2680017 00022F17 - buffer_store_short v23, v51, s[16:19], 0 offen nt // 00000001305C: E06A1000 80041733 - v_cvt_pk_bf16_f32 v24, v24, v24 // 000000013064: D2680018 00023118 - buffer_store_short v24, v52, s[16:19], 0 offen nt // 00000001306C: E06A1000 80041834 - v_cvt_pk_bf16_f32 v25, v25, v25 // 000000013074: D2680019 00023319 - buffer_store_short v25, v53, s[16:19], 0 offen nt // 00000001307C: E06A1000 80041935 - v_cvt_pk_bf16_f32 v26, v26, v26 // 000000013084: D268001A 0002351A - buffer_store_short v26, v54, s[16:19], 0 offen nt // 00000001308C: E06A1000 80041A36 - v_cvt_pk_bf16_f32 v27, v27, v27 // 000000013094: D268001B 0002371B - buffer_store_short v27, v55, s[16:19], 0 offen nt // 00000001309C: E06A1000 80041B37 - v_cvt_pk_bf16_f32 v28, v28, v28 // 0000000130A4: D268001C 0002391C - buffer_store_short v28, v56, s[16:19], 0 offen nt // 0000000130AC: E06A1000 80041C38 - v_cvt_pk_bf16_f32 v29, v29, v29 // 0000000130B4: D268001D 00023B1D - buffer_store_short v29, v57, s[16:19], 0 offen nt // 0000000130BC: E06A1000 80041D39 - v_cvt_pk_bf16_f32 v30, v30, v30 // 0000000130C4: D268001E 00023D1E - buffer_store_short v30, v58, s[16:19], 0 offen nt // 0000000130CC: E06A1000 80041E3A - v_cvt_pk_bf16_f32 v31, v31, v31 // 0000000130D4: D268001F 00023F1F - buffer_store_short v31, v59, s[16:19], 0 offen nt // 0000000130DC: E06A1000 80041F3B - v_cvt_pk_bf16_f32 v32, v32, v32 // 0000000130E4: D2680020 00024120 - buffer_store_short v32, v60, s[16:19], 0 offen nt // 0000000130EC: E06A1000 8004203C - v_cvt_pk_bf16_f32 v33, v33, v33 // 0000000130F4: D2680021 00024321 - buffer_store_short v33, v61, s[16:19], 0 offen nt // 0000000130FC: E06A1000 8004213D - v_cvt_pk_bf16_f32 v34, v34, v34 // 000000013104: D2680022 00024522 - buffer_store_short v34, v62, s[16:19], 0 offen nt // 00000001310C: E06A1000 8004223E - v_cvt_pk_bf16_f32 v35, v35, v35 // 000000013114: D2680023 00024723 - buffer_store_short v35, v63, s[16:19], 0 offen nt // 00000001311C: E06A1000 8004233F - v_cvt_pk_bf16_f32 v36, v36, v36 // 000000013124: D2680024 00024924 - buffer_store_short v36, v64, s[16:19], 0 offen nt // 00000001312C: E06A1000 80042440 - v_cvt_pk_bf16_f32 v37, v37, v37 // 000000013134: D2680025 00024B25 - buffer_store_short v37, v65, s[16:19], 0 offen nt // 00000001313C: E06A1000 80042541 - v_cvt_pk_bf16_f32 v38, v38, v38 // 000000013144: D2680026 00024D26 - buffer_store_short v38, v66, s[16:19], 0 offen nt // 00000001314C: E06A1000 80042642 - v_cvt_pk_bf16_f32 v39, v39, v39 // 000000013154: D2680027 00024F27 - buffer_store_short v39, v67, s[16:19], 0 offen nt // 00000001315C: E06A1000 80042743 - v_cvt_pk_bf16_f32 v40, v40, v40 // 000000013164: D2680028 00025128 - buffer_store_short v40, v68, s[16:19], 0 offen nt // 00000001316C: E06A1000 80042844 - v_cvt_pk_bf16_f32 v41, v41, v41 // 000000013174: D2680029 00025329 - buffer_store_short v41, v69, s[16:19], 0 offen nt // 00000001317C: E06A1000 80042945 - v_cvt_pk_bf16_f32 v42, v42, v42 // 000000013184: D268002A 0002552A - buffer_store_short v42, v70, s[16:19], 0 offen nt // 00000001318C: E06A1000 80042A46 - s_nop 0 // 000000013194: BF800000 - s_branch label_GW_End_2 // 000000013198: BF82310D - -label_GW_Beta_2: - s_and_b32 s30, 0xff, s24 // 00000001319C: 861E18FF 000000FF - s_add_u32 s31, -1, s14 // 0000000131A4: 801F0EC1 - s_cmp_ge_u32 s2, s31 // 0000000131A8: BF091F02 - s_cselect_b32 s30, s30, 0 // 0000000131AC: 851E801E - s_cmpk_gt_u32 s30, 0x0 // 0000000131B0: B51E0000 - s_cbranch_scc1 label_GW_B1_E1_M // 0000000131B4: BF851463 - s_and_b32 s30, 0xff, s25 // 0000000131B8: 861E19FF 000000FF - s_add_u32 s31, -1, s15 // 0000000131C0: 801F0FC1 - s_cmp_ge_u32 s3, s31 // 0000000131C4: BF091F03 - s_cselect_b32 s30, s30, 0 // 0000000131C8: 851E801E - s_cmpk_gt_u32 s30, 0x0 // 0000000131CC: B51E0000 - s_cbranch_scc1 label_GW_B1_E1_N // 0000000131D0: BF85096D - -label_GW_B1_E0: - v_add_lshl_u32 v16, v6, v4, 1 // 0000000131D4: D1FE0010 02060906 - buffer_load_dwordx4 v[20:23], v16, s[20:23], 0 offen // 0000000131DC: E05C1000 80051410 - s_lshl_b32 s12, s38, 1 // 0000000131E4: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000131E8: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000131EC: 82158015 - buffer_load_dwordx4 v[128:131], v16, s[20:23], 0 offen // 0000000131F0: E05C1000 80058010 - s_lshl_b32 s12, s38, 1 // 0000000131F8: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000131FC: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013200: 82158015 - buffer_load_dwordx4 v[176:179], v16, s[20:23], 0 offen // 000000013204: E05C1000 8005B010 - s_lshl_b32 s12, s38, 1 // 00000001320C: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013210: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013214: 82158015 - buffer_load_dwordx4 v[180:183], v16, s[20:23], 0 offen // 000000013218: E05C1000 8005B410 - s_lshl_b32 s12, s38, 1 // 000000013220: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013224: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013228: 82158015 - buffer_load_dwordx4 v[184:187], v16, s[20:23], 0 offen // 00000001322C: E05C1000 8005B810 - s_lshl_b32 s12, s38, 1 // 000000013234: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013238: 80140C14 - s_addc_u32 s21, s21, 0 // 00000001323C: 82158015 - buffer_load_dwordx4 v[188:191], v16, s[20:23], 0 offen // 000000013240: E05C1000 8005BC10 - s_lshl_b32 s12, s38, 1 // 000000013248: 8E0C8126 - s_add_u32 s20, s20, s12 // 00000001324C: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013250: 82158015 - buffer_load_dwordx4 v[192:195], v16, s[20:23], 0 offen // 000000013254: E05C1000 8005C010 - s_lshl_b32 s12, s38, 1 // 00000001325C: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013260: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013264: 82158015 - buffer_load_dwordx4 v[196:199], v16, s[20:23], 0 offen // 000000013268: E05C1000 8005C410 - s_lshl_b32 s12, s38, 1 // 000000013270: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013274: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013278: 82158015 - buffer_load_dwordx4 v[200:203], v16, s[20:23], 0 offen // 00000001327C: E05C1000 8005C810 - s_lshl_b32 s12, s38, 1 // 000000013284: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013288: 80140C14 - s_addc_u32 s21, s21, 0 // 00000001328C: 82158015 - buffer_load_dwordx4 v[204:207], v16, s[20:23], 0 offen // 000000013290: E05C1000 8005CC10 - s_lshl_b32 s12, s38, 1 // 000000013298: 8E0C8126 - s_add_u32 s20, s20, s12 // 00000001329C: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000132A0: 82158015 - buffer_load_dwordx4 v[208:211], v16, s[20:23], 0 offen // 0000000132A4: E05C1000 8005D010 - s_lshl_b32 s12, s38, 1 // 0000000132AC: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000132B0: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000132B4: 82158015 - buffer_load_dwordx4 v[212:215], v16, s[20:23], 0 offen // 0000000132B8: E05C1000 8005D410 - s_lshl_b32 s12, s38, 1 // 0000000132C0: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000132C4: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000132C8: 82158015 - buffer_load_dwordx4 v[216:219], v16, s[20:23], 0 offen // 0000000132CC: E05C1000 8005D810 - s_lshl_b32 s12, s38, 1 // 0000000132D4: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000132D8: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000132DC: 82158015 - buffer_load_dwordx4 v[220:223], v16, s[20:23], 0 offen // 0000000132E0: E05C1000 8005DC10 - s_lshl_b32 s12, s38, 1 // 0000000132E8: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000132EC: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000132F0: 82158015 - buffer_load_dwordx4 v[224:227], v16, s[20:23], 0 offen // 0000000132F4: E05C1000 8005E010 - s_lshl_b32 s12, s38, 1 // 0000000132FC: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013300: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013304: 82158015 - buffer_load_dwordx4 v[228:231], v16, s[20:23], 0 offen // 000000013308: E05C1000 8005E410 - s_lshl_b32 s12, s38, 1 // 000000013310: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013314: 80140C14 - s_addc_u32 s21, s21, 0 // 000000013318: 82158015 - buffer_load_dwordx4 v[232:235], v16, s[20:23], 0 offen // 00000001331C: E05C1000 8005E810 - s_lshl_b32 s12, s38, 1 // 000000013324: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000013328: 80140C14 - s_addc_u32 s21, s21, 0 // 00000001332C: 82158015 - buffer_load_dwordx4 v[236:239], v16, s[20:23], 0 offen // 000000013330: E05C1000 8005EC10 - v_add_lshl_u32 v15, v7, v4, 1 // 000000013338: D1FE000F 02060907 - v_accvgpr_read_b32 v24, a0 // 000000013340: D3D84018 18000100 - v_accvgpr_read_b32 v25, a4 // 000000013348: D3D84019 18000104 - v_accvgpr_read_b32 v26, a8 // 000000013350: D3D8401A 18000108 - v_accvgpr_read_b32 v27, a12 // 000000013358: D3D8401B 1800010C - v_accvgpr_read_b32 v28, a16 // 000000013360: D3D8401C 18000110 - v_accvgpr_read_b32 v29, a20 // 000000013368: D3D8401D 18000114 - v_accvgpr_read_b32 v30, a24 // 000000013370: D3D8401E 18000118 - v_accvgpr_read_b32 v31, a28 // 000000013378: D3D8401F 1800011C - v_accvgpr_read_b32 v32, a32 // 000000013380: D3D84020 18000120 - v_accvgpr_read_b32 v33, a36 // 000000013388: D3D84021 18000124 - v_accvgpr_read_b32 v34, a40 // 000000013390: D3D84022 18000128 - v_accvgpr_read_b32 v35, a44 // 000000013398: D3D84023 1800012C - v_accvgpr_read_b32 v36, a48 // 0000000133A0: D3D84024 18000130 - v_accvgpr_read_b32 v37, a52 // 0000000133A8: D3D84025 18000134 - v_accvgpr_read_b32 v38, a56 // 0000000133B0: D3D84026 18000138 - v_accvgpr_read_b32 v39, a60 // 0000000133B8: D3D84027 1800013C - v_accvgpr_read_b32 v40, a64 // 0000000133C0: D3D84028 18000140 - v_accvgpr_read_b32 v41, a68 // 0000000133C8: D3D84029 18000144 - v_accvgpr_read_b32 v42, a72 // 0000000133D0: D3D8402A 18000148 - v_accvgpr_read_b32 v43, a76 // 0000000133D8: D3D8402B 1800014C - v_accvgpr_read_b32 v44, a80 // 0000000133E0: D3D8402C 18000150 - v_accvgpr_read_b32 v45, a84 // 0000000133E8: D3D8402D 18000154 - v_accvgpr_read_b32 v46, a88 // 0000000133F0: D3D8402E 18000158 - v_accvgpr_read_b32 v47, a92 // 0000000133F8: D3D8402F 1800015C - v_accvgpr_read_b32 v48, a96 // 000000013400: D3D84030 18000160 - v_accvgpr_read_b32 v49, a100 // 000000013408: D3D84031 18000164 - v_accvgpr_read_b32 v50, a104 // 000000013410: D3D84032 18000168 - v_accvgpr_read_b32 v51, a108 // 000000013418: D3D84033 1800016C - v_accvgpr_read_b32 v52, a112 // 000000013420: D3D84034 18000170 - v_accvgpr_read_b32 v53, a116 // 000000013428: D3D84035 18000174 - v_accvgpr_read_b32 v54, a120 // 000000013430: D3D84036 18000178 - v_accvgpr_read_b32 v55, a124 // 000000013438: D3D84037 1800017C - v_accvgpr_read_b32 v56, a128 // 000000013440: D3D84038 18000180 - v_accvgpr_read_b32 v57, a132 // 000000013448: D3D84039 18000184 - v_accvgpr_read_b32 v58, a136 // 000000013450: D3D8403A 18000188 - v_accvgpr_read_b32 v59, a140 // 000000013458: D3D8403B 1800018C - v_accvgpr_read_b32 v60, a144 // 000000013460: D3D8403C 18000190 - v_accvgpr_read_b32 v61, a148 // 000000013468: D3D8403D 18000194 - v_accvgpr_read_b32 v62, a152 // 000000013470: D3D8403E 18000198 - v_accvgpr_read_b32 v63, a156 // 000000013478: D3D8403F 1800019C - v_accvgpr_read_b32 v64, a160 // 000000013480: D3D84040 180001A0 - v_accvgpr_read_b32 v65, a164 // 000000013488: D3D84041 180001A4 - v_accvgpr_read_b32 v66, a168 // 000000013490: D3D84042 180001A8 - v_accvgpr_read_b32 v67, a172 // 000000013498: D3D84043 180001AC - v_accvgpr_read_b32 v68, a176 // 0000000134A0: D3D84044 180001B0 - v_accvgpr_read_b32 v69, a180 // 0000000134A8: D3D84045 180001B4 - v_accvgpr_read_b32 v70, a184 // 0000000134B0: D3D84046 180001B8 - v_accvgpr_read_b32 v71, a188 // 0000000134B8: D3D84047 180001BC - v_accvgpr_read_b32 v72, a192 // 0000000134C0: D3D84048 180001C0 - v_accvgpr_read_b32 v73, a196 // 0000000134C8: D3D84049 180001C4 - v_accvgpr_read_b32 v74, a200 // 0000000134D0: D3D8404A 180001C8 - v_accvgpr_read_b32 v75, a204 // 0000000134D8: D3D8404B 180001CC - v_accvgpr_read_b32 v76, a208 // 0000000134E0: D3D8404C 180001D0 - v_accvgpr_read_b32 v77, a212 // 0000000134E8: D3D8404D 180001D4 - v_accvgpr_read_b32 v78, a216 // 0000000134F0: D3D8404E 180001D8 - v_accvgpr_read_b32 v79, a220 // 0000000134F8: D3D8404F 180001DC - v_accvgpr_read_b32 v80, a224 // 000000013500: D3D84050 180001E0 - v_accvgpr_read_b32 v81, a228 // 000000013508: D3D84051 180001E4 - v_accvgpr_read_b32 v82, a232 // 000000013510: D3D84052 180001E8 - v_accvgpr_read_b32 v83, a236 // 000000013518: D3D84053 180001EC - v_accvgpr_read_b32 v84, a240 // 000000013520: D3D84054 180001F0 - v_accvgpr_read_b32 v85, a244 // 000000013528: D3D84055 180001F4 - v_accvgpr_read_b32 v86, a248 // 000000013530: D3D84056 180001F8 - v_accvgpr_read_b32 v87, a252 // 000000013538: D3D84057 180001FC - v_accvgpr_read_b32 v88, a1 // 000000013540: D3D84058 18000101 - v_accvgpr_read_b32 v89, a5 // 000000013548: D3D84059 18000105 - v_accvgpr_read_b32 v90, a9 // 000000013550: D3D8405A 18000109 - v_accvgpr_read_b32 v91, a13 // 000000013558: D3D8405B 1800010D - v_accvgpr_read_b32 v92, a17 // 000000013560: D3D8405C 18000111 - v_accvgpr_read_b32 v93, a21 // 000000013568: D3D8405D 18000115 - v_accvgpr_read_b32 v94, a25 // 000000013570: D3D8405E 18000119 - v_accvgpr_read_b32 v95, a29 // 000000013578: D3D8405F 1800011D - v_accvgpr_read_b32 v96, a33 // 000000013580: D3D84060 18000121 - v_accvgpr_read_b32 v97, a37 // 000000013588: D3D84061 18000125 - v_accvgpr_read_b32 v98, a41 // 000000013590: D3D84062 18000129 - v_accvgpr_read_b32 v99, a45 // 000000013598: D3D84063 1800012D - v_accvgpr_read_b32 v100, a49 // 0000000135A0: D3D84064 18000131 - v_accvgpr_read_b32 v101, a53 // 0000000135A8: D3D84065 18000135 - v_accvgpr_read_b32 v102, a57 // 0000000135B0: D3D84066 18000139 - v_accvgpr_read_b32 v103, a61 // 0000000135B8: D3D84067 1800013D - v_accvgpr_read_b32 v104, a65 // 0000000135C0: D3D84068 18000141 - v_accvgpr_read_b32 v105, a69 // 0000000135C8: D3D84069 18000145 - v_accvgpr_read_b32 v106, a73 // 0000000135D0: D3D8406A 18000149 - v_accvgpr_read_b32 v107, a77 // 0000000135D8: D3D8406B 1800014D - v_accvgpr_read_b32 v108, a81 // 0000000135E0: D3D8406C 18000151 - v_accvgpr_read_b32 v109, a85 // 0000000135E8: D3D8406D 18000155 - v_accvgpr_read_b32 v110, a89 // 0000000135F0: D3D8406E 18000159 - v_accvgpr_read_b32 v111, a93 // 0000000135F8: D3D8406F 1800015D - v_accvgpr_read_b32 v112, a97 // 000000013600: D3D84070 18000161 - v_accvgpr_read_b32 v113, a101 // 000000013608: D3D84071 18000165 - v_accvgpr_read_b32 v114, a105 // 000000013610: D3D84072 18000169 - v_accvgpr_read_b32 v115, a109 // 000000013618: D3D84073 1800016D - v_accvgpr_read_b32 v116, a113 // 000000013620: D3D84074 18000171 - v_accvgpr_read_b32 v117, a117 // 000000013628: D3D84075 18000175 - v_accvgpr_read_b32 v118, a121 // 000000013630: D3D84076 18000179 - v_accvgpr_read_b32 v119, a125 // 000000013638: D3D84077 1800017D - v_accvgpr_read_b32 v120, a129 // 000000013640: D3D84078 18000181 - v_accvgpr_read_b32 v121, a133 // 000000013648: D3D84079 18000185 - v_accvgpr_read_b32 v122, a137 // 000000013650: D3D8407A 18000189 - v_accvgpr_read_b32 v123, a141 // 000000013658: D3D8407B 1800018D - v_accvgpr_read_b32 v124, a145 // 000000013660: D3D8407C 18000191 - v_accvgpr_read_b32 v125, a149 // 000000013668: D3D8407D 18000195 - v_accvgpr_read_b32 v126, a153 // 000000013670: D3D8407E 18000199 - v_accvgpr_read_b32 v127, a157 // 000000013678: D3D8407F 1800019D - v_accvgpr_read_b32 v136, a161 // 000000013680: D3D84088 180001A1 - v_accvgpr_read_b32 v137, a165 // 000000013688: D3D84089 180001A5 - v_accvgpr_read_b32 v138, a169 // 000000013690: D3D8408A 180001A9 - v_accvgpr_read_b32 v139, a173 // 000000013698: D3D8408B 180001AD - v_accvgpr_read_b32 v140, a177 // 0000000136A0: D3D8408C 180001B1 - v_accvgpr_read_b32 v141, a181 // 0000000136A8: D3D8408D 180001B5 - v_accvgpr_read_b32 v142, a185 // 0000000136B0: D3D8408E 180001B9 - v_accvgpr_read_b32 v143, a189 // 0000000136B8: D3D8408F 180001BD - v_accvgpr_read_b32 v144, a193 // 0000000136C0: D3D84090 180001C1 - v_accvgpr_read_b32 v145, a197 // 0000000136C8: D3D84091 180001C5 - v_accvgpr_read_b32 v146, a201 // 0000000136D0: D3D84092 180001C9 - v_accvgpr_read_b32 v147, a205 // 0000000136D8: D3D84093 180001CD - v_accvgpr_read_b32 v148, a209 // 0000000136E0: D3D84094 180001D1 - v_accvgpr_read_b32 v149, a213 // 0000000136E8: D3D84095 180001D5 - v_accvgpr_read_b32 v150, a217 // 0000000136F0: D3D84096 180001D9 - v_accvgpr_read_b32 v151, a221 // 0000000136F8: D3D84097 180001DD - v_accvgpr_read_b32 v152, a225 // 000000013700: D3D84098 180001E1 - v_accvgpr_read_b32 v153, a229 // 000000013708: D3D84099 180001E5 - v_accvgpr_read_b32 v154, a233 // 000000013710: D3D8409A 180001E9 - v_accvgpr_read_b32 v155, a237 // 000000013718: D3D8409B 180001ED - v_accvgpr_read_b32 v156, a241 // 000000013720: D3D8409C 180001F1 - v_accvgpr_read_b32 v157, a245 // 000000013728: D3D8409D 180001F5 - v_accvgpr_read_b32 v158, a249 // 000000013730: D3D8409E 180001F9 - v_accvgpr_read_b32 v159, a253 // 000000013738: D3D8409F 180001FD - v_accvgpr_read_b32 v160, a2 // 000000013740: D3D840A0 18000102 - v_accvgpr_read_b32 v161, a6 // 000000013748: D3D840A1 18000106 - v_accvgpr_read_b32 v162, a10 // 000000013750: D3D840A2 1800010A - v_accvgpr_read_b32 v163, a14 // 000000013758: D3D840A3 1800010E - v_accvgpr_read_b32 v164, a18 // 000000013760: D3D840A4 18000112 - v_accvgpr_read_b32 v165, a22 // 000000013768: D3D840A5 18000116 - v_accvgpr_read_b32 v166, a26 // 000000013770: D3D840A6 1800011A - v_accvgpr_read_b32 v167, a30 // 000000013778: D3D840A7 1800011E - v_accvgpr_read_b32 v168, a34 // 000000013780: D3D840A8 18000122 - v_accvgpr_read_b32 v169, a38 // 000000013788: D3D840A9 18000126 - v_accvgpr_read_b32 v170, a42 // 000000013790: D3D840AA 1800012A - v_accvgpr_read_b32 v171, a46 // 000000013798: D3D840AB 1800012E - v_accvgpr_read_b32 v172, a50 // 0000000137A0: D3D840AC 18000132 - v_accvgpr_read_b32 v173, a54 // 0000000137A8: D3D840AD 18000136 - v_accvgpr_read_b32 v174, a58 // 0000000137B0: D3D840AE 1800013A - v_accvgpr_read_b32 v175, a62 // 0000000137B8: D3D840AF 1800013E - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 0000000137C0: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 0000000137C8: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 0000000137D0: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 0000000137D8: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 0000000137E0: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 0000000137E8: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 0000000137F0: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 0000000137F8: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 000000013800: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 000000013808: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 000000013810: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 000000013818: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 000000013820: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 000000013828: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 000000013830: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 000000013838: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 000000013840: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 000000013848: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 000000013850: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 000000013858: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 000000013860: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 000000013868: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 000000013870: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 000000013878: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 000000013880: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 000000013888: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 000000013890: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 000000013898: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 0000000138A0: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 0000000138A8: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 0000000138B0: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 0000000138B8: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 0000000138C0: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 0000000138C8: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 0000000138D0: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 0000000138D8: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 0000000138E0: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 0000000138E8: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 0000000138F0: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 0000000138F8: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 000000013900: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 000000013908: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 000000013910: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 000000013918: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 000000013920: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 000000013928: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 000000013930: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 000000013938: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 000000013940: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 000000013948: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 000000013950: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 000000013958: D3B1407E 1002FC2C - v_pk_mul_f32 v[136:137], s[44:45], v[136:137] op_sel_hi:[0,1]// 000000013960: D3B14088 1003102C - v_pk_mul_f32 v[138:139], s[44:45], v[138:139] op_sel_hi:[0,1]// 000000013968: D3B1408A 1003142C - v_pk_mul_f32 v[140:141], s[44:45], v[140:141] op_sel_hi:[0,1]// 000000013970: D3B1408C 1003182C - v_pk_mul_f32 v[142:143], s[44:45], v[142:143] op_sel_hi:[0,1]// 000000013978: D3B1408E 10031C2C - v_pk_mul_f32 v[144:145], s[44:45], v[144:145] op_sel_hi:[0,1]// 000000013980: D3B14090 1003202C - v_pk_mul_f32 v[146:147], s[44:45], v[146:147] op_sel_hi:[0,1]// 000000013988: D3B14092 1003242C - v_pk_mul_f32 v[148:149], s[44:45], v[148:149] op_sel_hi:[0,1]// 000000013990: D3B14094 1003282C - v_pk_mul_f32 v[150:151], s[44:45], v[150:151] op_sel_hi:[0,1]// 000000013998: D3B14096 10032C2C - v_pk_mul_f32 v[152:153], s[44:45], v[152:153] op_sel_hi:[0,1]// 0000000139A0: D3B14098 1003302C - v_pk_mul_f32 v[154:155], s[44:45], v[154:155] op_sel_hi:[0,1]// 0000000139A8: D3B1409A 1003342C - v_pk_mul_f32 v[156:157], s[44:45], v[156:157] op_sel_hi:[0,1]// 0000000139B0: D3B1409C 1003382C - v_pk_mul_f32 v[158:159], s[44:45], v[158:159] op_sel_hi:[0,1]// 0000000139B8: D3B1409E 10033C2C - v_pk_mul_f32 v[160:161], s[44:45], v[160:161] op_sel_hi:[0,1]// 0000000139C0: D3B140A0 1003402C - v_pk_mul_f32 v[162:163], s[44:45], v[162:163] op_sel_hi:[0,1]// 0000000139C8: D3B140A2 1003442C - v_pk_mul_f32 v[164:165], s[44:45], v[164:165] op_sel_hi:[0,1]// 0000000139D0: D3B140A4 1003482C - v_pk_mul_f32 v[166:167], s[44:45], v[166:167] op_sel_hi:[0,1]// 0000000139D8: D3B140A6 10034C2C - v_pk_mul_f32 v[168:169], s[44:45], v[168:169] op_sel_hi:[0,1]// 0000000139E0: D3B140A8 1003502C - v_pk_mul_f32 v[170:171], s[44:45], v[170:171] op_sel_hi:[0,1]// 0000000139E8: D3B140AA 1003542C - v_pk_mul_f32 v[172:173], s[44:45], v[172:173] op_sel_hi:[0,1]// 0000000139F0: D3B140AC 1003582C - v_pk_mul_f32 v[174:175], s[44:45], v[174:175] op_sel_hi:[0,1]// 0000000139F8: D3B140AE 10035C2C - v_mov_b32_e32 v12, 0xffff0000 // 000000013A00: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 000000013A08: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 000000013A10: 7E1C02FF 00007FFF - s_waitcnt vmcnt(17) // 000000013A18: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v20 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013A1C: 7E10B6F9 00041614 - v_fmac_f32_e64 v24, v8, s45 // 000000013A24: D13B0018 00005B08 - v_cvt_f32_bf16_sdwa v8, v20 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013A2C: 7E10B6F9 00051614 - v_fmac_f32_e64 v25, v8, s45 // 000000013A34: D13B0019 00005B08 - v_cvt_f32_bf16_sdwa v8, v21 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013A3C: 7E10B6F9 00041615 - v_fmac_f32_e64 v26, v8, s45 // 000000013A44: D13B001A 00005B08 - v_cvt_f32_bf16_sdwa v8, v21 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013A4C: 7E10B6F9 00051615 - v_fmac_f32_e64 v27, v8, s45 // 000000013A54: D13B001B 00005B08 - v_cvt_f32_bf16_sdwa v8, v22 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013A5C: 7E10B6F9 00041616 - v_fmac_f32_e64 v28, v8, s45 // 000000013A64: D13B001C 00005B08 - v_cvt_f32_bf16_sdwa v8, v22 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013A6C: 7E10B6F9 00051616 - v_fmac_f32_e64 v29, v8, s45 // 000000013A74: D13B001D 00005B08 - v_cvt_f32_bf16_sdwa v8, v23 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013A7C: 7E10B6F9 00041617 - v_fmac_f32_e64 v30, v8, s45 // 000000013A84: D13B001E 00005B08 - v_cvt_f32_bf16_sdwa v8, v23 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013A8C: 7E10B6F9 00051617 - v_fmac_f32_e64 v31, v8, s45 // 000000013A94: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v25 // 000000013A9C: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 000000013AA4: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 000000013AAC: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 000000013AB4: D268001B 00023F1E - buffer_store_dwordx4 v[24:27], v15, s[16:19], 0 offen nt // 000000013ABC: E07E1000 8004180F - s_waitcnt vmcnt(17) // 000000013AC4: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013AC8: 7E10B6F9 00041680 - v_fmac_f32_e64 v32, v8, s45 // 000000013AD0: D13B0020 00005B08 - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013AD8: 7E10B6F9 00051680 - v_fmac_f32_e64 v33, v8, s45 // 000000013AE0: D13B0021 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013AE8: 7E10B6F9 00041681 - v_fmac_f32_e64 v34, v8, s45 // 000000013AF0: D13B0022 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013AF8: 7E10B6F9 00051681 - v_fmac_f32_e64 v35, v8, s45 // 000000013B00: D13B0023 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013B08: 7E10B6F9 00041682 - v_fmac_f32_e64 v36, v8, s45 // 000000013B10: D13B0024 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013B18: 7E10B6F9 00051682 - v_fmac_f32_e64 v37, v8, s45 // 000000013B20: D13B0025 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013B28: 7E10B6F9 00041683 - v_fmac_f32_e64 v38, v8, s45 // 000000013B30: D13B0026 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013B38: 7E10B6F9 00051683 - v_fmac_f32_e64 v39, v8, s45 // 000000013B40: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v33 // 000000013B48: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 000000013B50: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 000000013B58: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 000000013B60: D2680023 00024F26 - s_lshl_b32 s12, s36, 1 // 000000013B68: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000013B6C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000013B70: 82118011 - buffer_store_dwordx4 v[32:35], v15, s[16:19], 0 offen nt // 000000013B74: E07E1000 8004200F - s_waitcnt vmcnt(17) // 000000013B7C: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013B80: 7E10B6F9 000416B0 - v_fmac_f32_e64 v40, v8, s45 // 000000013B88: D13B0028 00005B08 - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013B90: 7E10B6F9 000516B0 - v_fmac_f32_e64 v41, v8, s45 // 000000013B98: D13B0029 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013BA0: 7E10B6F9 000416B1 - v_fmac_f32_e64 v42, v8, s45 // 000000013BA8: D13B002A 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013BB0: 7E10B6F9 000516B1 - v_fmac_f32_e64 v43, v8, s45 // 000000013BB8: D13B002B 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013BC0: 7E10B6F9 000416B2 - v_fmac_f32_e64 v44, v8, s45 // 000000013BC8: D13B002C 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013BD0: 7E10B6F9 000516B2 - v_fmac_f32_e64 v45, v8, s45 // 000000013BD8: D13B002D 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013BE0: 7E10B6F9 000416B3 - v_fmac_f32_e64 v46, v8, s45 // 000000013BE8: D13B002E 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013BF0: 7E10B6F9 000516B3 - v_fmac_f32_e64 v47, v8, s45 // 000000013BF8: D13B002F 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v41 // 000000013C00: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 000000013C08: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 000000013C10: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 000000013C18: D268002B 00025F2E - s_lshl_b32 s12, s36, 1 // 000000013C20: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000013C24: 80100C10 - s_addc_u32 s17, s17, 0 // 000000013C28: 82118011 - buffer_store_dwordx4 v[40:43], v15, s[16:19], 0 offen nt // 000000013C2C: E07E1000 8004280F - s_waitcnt vmcnt(17) // 000000013C34: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v180 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013C38: 7E10B6F9 000416B4 - v_fmac_f32_e64 v48, v8, s45 // 000000013C40: D13B0030 00005B08 - v_cvt_f32_bf16_sdwa v8, v180 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013C48: 7E10B6F9 000516B4 - v_fmac_f32_e64 v49, v8, s45 // 000000013C50: D13B0031 00005B08 - v_cvt_f32_bf16_sdwa v8, v181 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013C58: 7E10B6F9 000416B5 - v_fmac_f32_e64 v50, v8, s45 // 000000013C60: D13B0032 00005B08 - v_cvt_f32_bf16_sdwa v8, v181 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013C68: 7E10B6F9 000516B5 - v_fmac_f32_e64 v51, v8, s45 // 000000013C70: D13B0033 00005B08 - v_cvt_f32_bf16_sdwa v8, v182 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013C78: 7E10B6F9 000416B6 - v_fmac_f32_e64 v52, v8, s45 // 000000013C80: D13B0034 00005B08 - v_cvt_f32_bf16_sdwa v8, v182 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013C88: 7E10B6F9 000516B6 - v_fmac_f32_e64 v53, v8, s45 // 000000013C90: D13B0035 00005B08 - v_cvt_f32_bf16_sdwa v8, v183 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013C98: 7E10B6F9 000416B7 - v_fmac_f32_e64 v54, v8, s45 // 000000013CA0: D13B0036 00005B08 - v_cvt_f32_bf16_sdwa v8, v183 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013CA8: 7E10B6F9 000516B7 - v_fmac_f32_e64 v55, v8, s45 // 000000013CB0: D13B0037 00005B08 - v_cvt_pk_bf16_f32 v48, v48, v49 // 000000013CB8: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 000000013CC0: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 000000013CC8: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 000000013CD0: D2680033 00026F36 - s_lshl_b32 s12, s36, 1 // 000000013CD8: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000013CDC: 80100C10 - s_addc_u32 s17, s17, 0 // 000000013CE0: 82118011 - buffer_store_dwordx4 v[48:51], v15, s[16:19], 0 offen nt // 000000013CE4: E07E1000 8004300F - s_waitcnt vmcnt(17) // 000000013CEC: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013CF0: 7E10B6F9 000416B8 - v_fmac_f32_e64 v56, v8, s45 // 000000013CF8: D13B0038 00005B08 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013D00: 7E10B6F9 000516B8 - v_fmac_f32_e64 v57, v8, s45 // 000000013D08: D13B0039 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013D10: 7E10B6F9 000416B9 - v_fmac_f32_e64 v58, v8, s45 // 000000013D18: D13B003A 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013D20: 7E10B6F9 000516B9 - v_fmac_f32_e64 v59, v8, s45 // 000000013D28: D13B003B 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013D30: 7E10B6F9 000416BA - v_fmac_f32_e64 v60, v8, s45 // 000000013D38: D13B003C 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013D40: 7E10B6F9 000516BA - v_fmac_f32_e64 v61, v8, s45 // 000000013D48: D13B003D 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013D50: 7E10B6F9 000416BB - v_fmac_f32_e64 v62, v8, s45 // 000000013D58: D13B003E 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013D60: 7E10B6F9 000516BB - v_fmac_f32_e64 v63, v8, s45 // 000000013D68: D13B003F 00005B08 - v_cvt_pk_bf16_f32 v56, v56, v57 // 000000013D70: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 000000013D78: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 000000013D80: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 000000013D88: D268003B 00027F3E - s_lshl_b32 s12, s36, 1 // 000000013D90: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000013D94: 80100C10 - s_addc_u32 s17, s17, 0 // 000000013D98: 82118011 - buffer_store_dwordx4 v[56:59], v15, s[16:19], 0 offen nt // 000000013D9C: E07E1000 8004380F - s_waitcnt vmcnt(17) // 000000013DA4: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013DA8: 7E10B6F9 000416BC - v_fmac_f32_e64 v64, v8, s45 // 000000013DB0: D13B0040 00005B08 - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013DB8: 7E10B6F9 000516BC - v_fmac_f32_e64 v65, v8, s45 // 000000013DC0: D13B0041 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013DC8: 7E10B6F9 000416BD - v_fmac_f32_e64 v66, v8, s45 // 000000013DD0: D13B0042 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013DD8: 7E10B6F9 000516BD - v_fmac_f32_e64 v67, v8, s45 // 000000013DE0: D13B0043 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013DE8: 7E10B6F9 000416BE - v_fmac_f32_e64 v68, v8, s45 // 000000013DF0: D13B0044 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013DF8: 7E10B6F9 000516BE - v_fmac_f32_e64 v69, v8, s45 // 000000013E00: D13B0045 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013E08: 7E10B6F9 000416BF - v_fmac_f32_e64 v70, v8, s45 // 000000013E10: D13B0046 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013E18: 7E10B6F9 000516BF - v_fmac_f32_e64 v71, v8, s45 // 000000013E20: D13B0047 00005B08 - v_cvt_pk_bf16_f32 v64, v64, v65 // 000000013E28: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 000000013E30: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 000000013E38: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 000000013E40: D2680043 00028F46 - s_lshl_b32 s12, s36, 1 // 000000013E48: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000013E4C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000013E50: 82118011 - buffer_store_dwordx4 v[64:67], v15, s[16:19], 0 offen nt // 000000013E54: E07E1000 8004400F - s_waitcnt vmcnt(17) // 000000013E5C: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013E60: 7E10B6F9 000416C0 - v_fmac_f32_e64 v72, v8, s45 // 000000013E68: D13B0048 00005B08 - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013E70: 7E10B6F9 000516C0 - v_fmac_f32_e64 v73, v8, s45 // 000000013E78: D13B0049 00005B08 - v_cvt_f32_bf16_sdwa v8, v193 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013E80: 7E10B6F9 000416C1 - v_fmac_f32_e64 v74, v8, s45 // 000000013E88: D13B004A 00005B08 - v_cvt_f32_bf16_sdwa v8, v193 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013E90: 7E10B6F9 000516C1 - v_fmac_f32_e64 v75, v8, s45 // 000000013E98: D13B004B 00005B08 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013EA0: 7E10B6F9 000416C2 - v_fmac_f32_e64 v76, v8, s45 // 000000013EA8: D13B004C 00005B08 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013EB0: 7E10B6F9 000516C2 - v_fmac_f32_e64 v77, v8, s45 // 000000013EB8: D13B004D 00005B08 - v_cvt_f32_bf16_sdwa v8, v195 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013EC0: 7E10B6F9 000416C3 - v_fmac_f32_e64 v78, v8, s45 // 000000013EC8: D13B004E 00005B08 - v_cvt_f32_bf16_sdwa v8, v195 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013ED0: 7E10B6F9 000516C3 - v_fmac_f32_e64 v79, v8, s45 // 000000013ED8: D13B004F 00005B08 - v_cvt_pk_bf16_f32 v72, v72, v73 // 000000013EE0: D2680048 00029348 - v_cvt_pk_bf16_f32 v73, v74, v75 // 000000013EE8: D2680049 0002974A - v_cvt_pk_bf16_f32 v74, v76, v77 // 000000013EF0: D268004A 00029B4C - v_cvt_pk_bf16_f32 v75, v78, v79 // 000000013EF8: D268004B 00029F4E - s_lshl_b32 s12, s36, 1 // 000000013F00: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000013F04: 80100C10 - s_addc_u32 s17, s17, 0 // 000000013F08: 82118011 - buffer_store_dwordx4 v[72:75], v15, s[16:19], 0 offen nt // 000000013F0C: E07E1000 8004480F - s_waitcnt vmcnt(17) // 000000013F14: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013F18: 7E10B6F9 000416C4 - v_fmac_f32_e64 v80, v8, s45 // 000000013F20: D13B0050 00005B08 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013F28: 7E10B6F9 000516C4 - v_fmac_f32_e64 v81, v8, s45 // 000000013F30: D13B0051 00005B08 - v_cvt_f32_bf16_sdwa v8, v197 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013F38: 7E10B6F9 000416C5 - v_fmac_f32_e64 v82, v8, s45 // 000000013F40: D13B0052 00005B08 - v_cvt_f32_bf16_sdwa v8, v197 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013F48: 7E10B6F9 000516C5 - v_fmac_f32_e64 v83, v8, s45 // 000000013F50: D13B0053 00005B08 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013F58: 7E10B6F9 000416C6 - v_fmac_f32_e64 v84, v8, s45 // 000000013F60: D13B0054 00005B08 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013F68: 7E10B6F9 000516C6 - v_fmac_f32_e64 v85, v8, s45 // 000000013F70: D13B0055 00005B08 - v_cvt_f32_bf16_sdwa v8, v199 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013F78: 7E10B6F9 000416C7 - v_fmac_f32_e64 v86, v8, s45 // 000000013F80: D13B0056 00005B08 - v_cvt_f32_bf16_sdwa v8, v199 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013F88: 7E10B6F9 000516C7 - v_fmac_f32_e64 v87, v8, s45 // 000000013F90: D13B0057 00005B08 - v_cvt_pk_bf16_f32 v80, v80, v81 // 000000013F98: D2680050 0002A350 - v_cvt_pk_bf16_f32 v81, v82, v83 // 000000013FA0: D2680051 0002A752 - v_cvt_pk_bf16_f32 v82, v84, v85 // 000000013FA8: D2680052 0002AB54 - v_cvt_pk_bf16_f32 v83, v86, v87 // 000000013FB0: D2680053 0002AF56 - s_lshl_b32 s12, s36, 1 // 000000013FB8: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000013FBC: 80100C10 - s_addc_u32 s17, s17, 0 // 000000013FC0: 82118011 - buffer_store_dwordx4 v[80:83], v15, s[16:19], 0 offen nt // 000000013FC4: E07E1000 8004500F - s_waitcnt vmcnt(17) // 000000013FCC: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v200 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013FD0: 7E10B6F9 000416C8 - v_fmac_f32_e64 v88, v8, s45 // 000000013FD8: D13B0058 00005B08 - v_cvt_f32_bf16_sdwa v8, v200 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000013FE0: 7E10B6F9 000516C8 - v_fmac_f32_e64 v89, v8, s45 // 000000013FE8: D13B0059 00005B08 - v_cvt_f32_bf16_sdwa v8, v201 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000013FF0: 7E10B6F9 000416C9 - v_fmac_f32_e64 v90, v8, s45 // 000000013FF8: D13B005A 00005B08 - v_cvt_f32_bf16_sdwa v8, v201 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014000: 7E10B6F9 000516C9 - v_fmac_f32_e64 v91, v8, s45 // 000000014008: D13B005B 00005B08 - v_cvt_f32_bf16_sdwa v8, v202 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014010: 7E10B6F9 000416CA - v_fmac_f32_e64 v92, v8, s45 // 000000014018: D13B005C 00005B08 - v_cvt_f32_bf16_sdwa v8, v202 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014020: 7E10B6F9 000516CA - v_fmac_f32_e64 v93, v8, s45 // 000000014028: D13B005D 00005B08 - v_cvt_f32_bf16_sdwa v8, v203 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014030: 7E10B6F9 000416CB - v_fmac_f32_e64 v94, v8, s45 // 000000014038: D13B005E 00005B08 - v_cvt_f32_bf16_sdwa v8, v203 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014040: 7E10B6F9 000516CB - v_fmac_f32_e64 v95, v8, s45 // 000000014048: D13B005F 00005B08 - v_cvt_pk_bf16_f32 v88, v88, v89 // 000000014050: D2680058 0002B358 - v_cvt_pk_bf16_f32 v89, v90, v91 // 000000014058: D2680059 0002B75A - v_cvt_pk_bf16_f32 v90, v92, v93 // 000000014060: D268005A 0002BB5C - v_cvt_pk_bf16_f32 v91, v94, v95 // 000000014068: D268005B 0002BF5E - s_lshl_b32 s12, s36, 1 // 000000014070: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000014074: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014078: 82118011 - buffer_store_dwordx4 v[88:91], v15, s[16:19], 0 offen nt // 00000001407C: E07E1000 8004580F - s_waitcnt vmcnt(17) // 000000014084: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014088: 7E10B6F9 000416CC - v_fmac_f32_e64 v96, v8, s45 // 000000014090: D13B0060 00005B08 - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014098: 7E10B6F9 000516CC - v_fmac_f32_e64 v97, v8, s45 // 0000000140A0: D13B0061 00005B08 - v_cvt_f32_bf16_sdwa v8, v205 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000140A8: 7E10B6F9 000416CD - v_fmac_f32_e64 v98, v8, s45 // 0000000140B0: D13B0062 00005B08 - v_cvt_f32_bf16_sdwa v8, v205 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000140B8: 7E10B6F9 000516CD - v_fmac_f32_e64 v99, v8, s45 // 0000000140C0: D13B0063 00005B08 - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000140C8: 7E10B6F9 000416CE - v_fmac_f32_e64 v100, v8, s45 // 0000000140D0: D13B0064 00005B08 - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000140D8: 7E10B6F9 000516CE - v_fmac_f32_e64 v101, v8, s45 // 0000000140E0: D13B0065 00005B08 - v_cvt_f32_bf16_sdwa v8, v207 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000140E8: 7E10B6F9 000416CF - v_fmac_f32_e64 v102, v8, s45 // 0000000140F0: D13B0066 00005B08 - v_cvt_f32_bf16_sdwa v8, v207 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000140F8: 7E10B6F9 000516CF - v_fmac_f32_e64 v103, v8, s45 // 000000014100: D13B0067 00005B08 - v_cvt_pk_bf16_f32 v96, v96, v97 // 000000014108: D2680060 0002C360 - v_cvt_pk_bf16_f32 v97, v98, v99 // 000000014110: D2680061 0002C762 - v_cvt_pk_bf16_f32 v98, v100, v101 // 000000014118: D2680062 0002CB64 - v_cvt_pk_bf16_f32 v99, v102, v103 // 000000014120: D2680063 0002CF66 - s_lshl_b32 s12, s36, 1 // 000000014128: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000001412C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014130: 82118011 - buffer_store_dwordx4 v[96:99], v15, s[16:19], 0 offen nt // 000000014134: E07E1000 8004600F - s_waitcnt vmcnt(17) // 00000001413C: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014140: 7E10B6F9 000416D0 - v_fmac_f32_e64 v104, v8, s45 // 000000014148: D13B0068 00005B08 - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014150: 7E10B6F9 000516D0 - v_fmac_f32_e64 v105, v8, s45 // 000000014158: D13B0069 00005B08 - v_cvt_f32_bf16_sdwa v8, v209 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014160: 7E10B6F9 000416D1 - v_fmac_f32_e64 v106, v8, s45 // 000000014168: D13B006A 00005B08 - v_cvt_f32_bf16_sdwa v8, v209 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014170: 7E10B6F9 000516D1 - v_fmac_f32_e64 v107, v8, s45 // 000000014178: D13B006B 00005B08 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014180: 7E10B6F9 000416D2 - v_fmac_f32_e64 v108, v8, s45 // 000000014188: D13B006C 00005B08 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014190: 7E10B6F9 000516D2 - v_fmac_f32_e64 v109, v8, s45 // 000000014198: D13B006D 00005B08 - v_cvt_f32_bf16_sdwa v8, v211 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000141A0: 7E10B6F9 000416D3 - v_fmac_f32_e64 v110, v8, s45 // 0000000141A8: D13B006E 00005B08 - v_cvt_f32_bf16_sdwa v8, v211 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000141B0: 7E10B6F9 000516D3 - v_fmac_f32_e64 v111, v8, s45 // 0000000141B8: D13B006F 00005B08 - v_cvt_pk_bf16_f32 v104, v104, v105 // 0000000141C0: D2680068 0002D368 - v_cvt_pk_bf16_f32 v105, v106, v107 // 0000000141C8: D2680069 0002D76A - v_cvt_pk_bf16_f32 v106, v108, v109 // 0000000141D0: D268006A 0002DB6C - v_cvt_pk_bf16_f32 v107, v110, v111 // 0000000141D8: D268006B 0002DF6E - s_lshl_b32 s12, s36, 1 // 0000000141E0: 8E0C8124 - s_add_u32 s16, s16, s12 // 0000000141E4: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000141E8: 82118011 - buffer_store_dwordx4 v[104:107], v15, s[16:19], 0 offen nt // 0000000141EC: E07E1000 8004680F - s_waitcnt vmcnt(17) // 0000000141F4: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000141F8: 7E10B6F9 000416D4 - v_fmac_f32_e64 v112, v8, s45 // 000000014200: D13B0070 00005B08 - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014208: 7E10B6F9 000516D4 - v_fmac_f32_e64 v113, v8, s45 // 000000014210: D13B0071 00005B08 - v_cvt_f32_bf16_sdwa v8, v213 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014218: 7E10B6F9 000416D5 - v_fmac_f32_e64 v114, v8, s45 // 000000014220: D13B0072 00005B08 - v_cvt_f32_bf16_sdwa v8, v213 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014228: 7E10B6F9 000516D5 - v_fmac_f32_e64 v115, v8, s45 // 000000014230: D13B0073 00005B08 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014238: 7E10B6F9 000416D6 - v_fmac_f32_e64 v116, v8, s45 // 000000014240: D13B0074 00005B08 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014248: 7E10B6F9 000516D6 - v_fmac_f32_e64 v117, v8, s45 // 000000014250: D13B0075 00005B08 - v_cvt_f32_bf16_sdwa v8, v215 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014258: 7E10B6F9 000416D7 - v_fmac_f32_e64 v118, v8, s45 // 000000014260: D13B0076 00005B08 - v_cvt_f32_bf16_sdwa v8, v215 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014268: 7E10B6F9 000516D7 - v_fmac_f32_e64 v119, v8, s45 // 000000014270: D13B0077 00005B08 - v_cvt_pk_bf16_f32 v112, v112, v113 // 000000014278: D2680070 0002E370 - v_cvt_pk_bf16_f32 v113, v114, v115 // 000000014280: D2680071 0002E772 - v_cvt_pk_bf16_f32 v114, v116, v117 // 000000014288: D2680072 0002EB74 - v_cvt_pk_bf16_f32 v115, v118, v119 // 000000014290: D2680073 0002EF76 - s_lshl_b32 s12, s36, 1 // 000000014298: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000001429C: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000142A0: 82118011 - buffer_store_dwordx4 v[112:115], v15, s[16:19], 0 offen nt // 0000000142A4: E07E1000 8004700F - s_waitcnt vmcnt(17) // 0000000142AC: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000142B0: 7E10B6F9 000416D8 - v_fmac_f32_e64 v120, v8, s45 // 0000000142B8: D13B0078 00005B08 - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000142C0: 7E10B6F9 000516D8 - v_fmac_f32_e64 v121, v8, s45 // 0000000142C8: D13B0079 00005B08 - v_cvt_f32_bf16_sdwa v8, v217 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000142D0: 7E10B6F9 000416D9 - v_fmac_f32_e64 v122, v8, s45 // 0000000142D8: D13B007A 00005B08 - v_cvt_f32_bf16_sdwa v8, v217 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000142E0: 7E10B6F9 000516D9 - v_fmac_f32_e64 v123, v8, s45 // 0000000142E8: D13B007B 00005B08 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000142F0: 7E10B6F9 000416DA - v_fmac_f32_e64 v124, v8, s45 // 0000000142F8: D13B007C 00005B08 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014300: 7E10B6F9 000516DA - v_fmac_f32_e64 v125, v8, s45 // 000000014308: D13B007D 00005B08 - v_cvt_f32_bf16_sdwa v8, v219 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014310: 7E10B6F9 000416DB - v_fmac_f32_e64 v126, v8, s45 // 000000014318: D13B007E 00005B08 - v_cvt_f32_bf16_sdwa v8, v219 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014320: 7E10B6F9 000516DB - v_fmac_f32_e64 v127, v8, s45 // 000000014328: D13B007F 00005B08 - v_cvt_pk_bf16_f32 v120, v120, v121 // 000000014330: D2680078 0002F378 - v_cvt_pk_bf16_f32 v121, v122, v123 // 000000014338: D2680079 0002F77A - v_cvt_pk_bf16_f32 v122, v124, v125 // 000000014340: D268007A 0002FB7C - v_cvt_pk_bf16_f32 v123, v126, v127 // 000000014348: D268007B 0002FF7E - s_lshl_b32 s12, s36, 1 // 000000014350: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000014354: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014358: 82118011 - buffer_store_dwordx4 v[120:123], v15, s[16:19], 0 offen nt // 00000001435C: E07E1000 8004780F - s_waitcnt vmcnt(17) // 000000014364: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v220 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014368: 7E10B6F9 000416DC - v_fmac_f32_e64 v136, v8, s45 // 000000014370: D13B0088 00005B08 - v_cvt_f32_bf16_sdwa v8, v220 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014378: 7E10B6F9 000516DC - v_fmac_f32_e64 v137, v8, s45 // 000000014380: D13B0089 00005B08 - v_cvt_f32_bf16_sdwa v8, v221 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014388: 7E10B6F9 000416DD - v_fmac_f32_e64 v138, v8, s45 // 000000014390: D13B008A 00005B08 - v_cvt_f32_bf16_sdwa v8, v221 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014398: 7E10B6F9 000516DD - v_fmac_f32_e64 v139, v8, s45 // 0000000143A0: D13B008B 00005B08 - v_cvt_f32_bf16_sdwa v8, v222 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000143A8: 7E10B6F9 000416DE - v_fmac_f32_e64 v140, v8, s45 // 0000000143B0: D13B008C 00005B08 - v_cvt_f32_bf16_sdwa v8, v222 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000143B8: 7E10B6F9 000516DE - v_fmac_f32_e64 v141, v8, s45 // 0000000143C0: D13B008D 00005B08 - v_cvt_f32_bf16_sdwa v8, v223 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000143C8: 7E10B6F9 000416DF - v_fmac_f32_e64 v142, v8, s45 // 0000000143D0: D13B008E 00005B08 - v_cvt_f32_bf16_sdwa v8, v223 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000143D8: 7E10B6F9 000516DF - v_fmac_f32_e64 v143, v8, s45 // 0000000143E0: D13B008F 00005B08 - v_cvt_pk_bf16_f32 v136, v136, v137 // 0000000143E8: D2680088 00031388 - v_cvt_pk_bf16_f32 v137, v138, v139 // 0000000143F0: D2680089 0003178A - v_cvt_pk_bf16_f32 v138, v140, v141 // 0000000143F8: D268008A 00031B8C - v_cvt_pk_bf16_f32 v139, v142, v143 // 000000014400: D268008B 00031F8E - s_lshl_b32 s12, s36, 1 // 000000014408: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000001440C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014410: 82118011 - buffer_store_dwordx4 v[136:139], v15, s[16:19], 0 offen nt // 000000014414: E07E1000 8004880F - s_waitcnt vmcnt(17) // 00000001441C: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014420: 7E10B6F9 000416E0 - v_fmac_f32_e64 v144, v8, s45 // 000000014428: D13B0090 00005B08 - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014430: 7E10B6F9 000516E0 - v_fmac_f32_e64 v145, v8, s45 // 000000014438: D13B0091 00005B08 - v_cvt_f32_bf16_sdwa v8, v225 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014440: 7E10B6F9 000416E1 - v_fmac_f32_e64 v146, v8, s45 // 000000014448: D13B0092 00005B08 - v_cvt_f32_bf16_sdwa v8, v225 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014450: 7E10B6F9 000516E1 - v_fmac_f32_e64 v147, v8, s45 // 000000014458: D13B0093 00005B08 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014460: 7E10B6F9 000416E2 - v_fmac_f32_e64 v148, v8, s45 // 000000014468: D13B0094 00005B08 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014470: 7E10B6F9 000516E2 - v_fmac_f32_e64 v149, v8, s45 // 000000014478: D13B0095 00005B08 - v_cvt_f32_bf16_sdwa v8, v227 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014480: 7E10B6F9 000416E3 - v_fmac_f32_e64 v150, v8, s45 // 000000014488: D13B0096 00005B08 - v_cvt_f32_bf16_sdwa v8, v227 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014490: 7E10B6F9 000516E3 - v_fmac_f32_e64 v151, v8, s45 // 000000014498: D13B0097 00005B08 - v_cvt_pk_bf16_f32 v144, v144, v145 // 0000000144A0: D2680090 00032390 - v_cvt_pk_bf16_f32 v145, v146, v147 // 0000000144A8: D2680091 00032792 - v_cvt_pk_bf16_f32 v146, v148, v149 // 0000000144B0: D2680092 00032B94 - v_cvt_pk_bf16_f32 v147, v150, v151 // 0000000144B8: D2680093 00032F96 - s_lshl_b32 s12, s36, 1 // 0000000144C0: 8E0C8124 - s_add_u32 s16, s16, s12 // 0000000144C4: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000144C8: 82118011 - buffer_store_dwordx4 v[144:147], v15, s[16:19], 0 offen nt // 0000000144CC: E07E1000 8004900F - s_waitcnt vmcnt(17) // 0000000144D4: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v228 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000144D8: 7E10B6F9 000416E4 - v_fmac_f32_e64 v152, v8, s45 // 0000000144E0: D13B0098 00005B08 - v_cvt_f32_bf16_sdwa v8, v228 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000144E8: 7E10B6F9 000516E4 - v_fmac_f32_e64 v153, v8, s45 // 0000000144F0: D13B0099 00005B08 - v_cvt_f32_bf16_sdwa v8, v229 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000144F8: 7E10B6F9 000416E5 - v_fmac_f32_e64 v154, v8, s45 // 000000014500: D13B009A 00005B08 - v_cvt_f32_bf16_sdwa v8, v229 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014508: 7E10B6F9 000516E5 - v_fmac_f32_e64 v155, v8, s45 // 000000014510: D13B009B 00005B08 - v_cvt_f32_bf16_sdwa v8, v230 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014518: 7E10B6F9 000416E6 - v_fmac_f32_e64 v156, v8, s45 // 000000014520: D13B009C 00005B08 - v_cvt_f32_bf16_sdwa v8, v230 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014528: 7E10B6F9 000516E6 - v_fmac_f32_e64 v157, v8, s45 // 000000014530: D13B009D 00005B08 - v_cvt_f32_bf16_sdwa v8, v231 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014538: 7E10B6F9 000416E7 - v_fmac_f32_e64 v158, v8, s45 // 000000014540: D13B009E 00005B08 - v_cvt_f32_bf16_sdwa v8, v231 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014548: 7E10B6F9 000516E7 - v_fmac_f32_e64 v159, v8, s45 // 000000014550: D13B009F 00005B08 - v_cvt_pk_bf16_f32 v152, v152, v153 // 000000014558: D2680098 00033398 - v_cvt_pk_bf16_f32 v153, v154, v155 // 000000014560: D2680099 0003379A - v_cvt_pk_bf16_f32 v154, v156, v157 // 000000014568: D268009A 00033B9C - v_cvt_pk_bf16_f32 v155, v158, v159 // 000000014570: D268009B 00033F9E - s_lshl_b32 s12, s36, 1 // 000000014578: 8E0C8124 - s_add_u32 s16, s16, s12 // 00000001457C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014580: 82118011 - buffer_store_dwordx4 v[152:155], v15, s[16:19], 0 offen nt // 000000014584: E07E1000 8004980F - s_waitcnt vmcnt(17) // 00000001458C: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v232 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014590: 7E10B6F9 000416E8 - v_fmac_f32_e64 v160, v8, s45 // 000000014598: D13B00A0 00005B08 - v_cvt_f32_bf16_sdwa v8, v232 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000145A0: 7E10B6F9 000516E8 - v_fmac_f32_e64 v161, v8, s45 // 0000000145A8: D13B00A1 00005B08 - v_cvt_f32_bf16_sdwa v8, v233 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000145B0: 7E10B6F9 000416E9 - v_fmac_f32_e64 v162, v8, s45 // 0000000145B8: D13B00A2 00005B08 - v_cvt_f32_bf16_sdwa v8, v233 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000145C0: 7E10B6F9 000516E9 - v_fmac_f32_e64 v163, v8, s45 // 0000000145C8: D13B00A3 00005B08 - v_cvt_f32_bf16_sdwa v8, v234 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000145D0: 7E10B6F9 000416EA - v_fmac_f32_e64 v164, v8, s45 // 0000000145D8: D13B00A4 00005B08 - v_cvt_f32_bf16_sdwa v8, v234 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000145E0: 7E10B6F9 000516EA - v_fmac_f32_e64 v165, v8, s45 // 0000000145E8: D13B00A5 00005B08 - v_cvt_f32_bf16_sdwa v8, v235 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000145F0: 7E10B6F9 000416EB - v_fmac_f32_e64 v166, v8, s45 // 0000000145F8: D13B00A6 00005B08 - v_cvt_f32_bf16_sdwa v8, v235 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014600: 7E10B6F9 000516EB - v_fmac_f32_e64 v167, v8, s45 // 000000014608: D13B00A7 00005B08 - v_cvt_pk_bf16_f32 v160, v160, v161 // 000000014610: D26800A0 000343A0 - v_cvt_pk_bf16_f32 v161, v162, v163 // 000000014618: D26800A1 000347A2 - v_cvt_pk_bf16_f32 v162, v164, v165 // 000000014620: D26800A2 00034BA4 - v_cvt_pk_bf16_f32 v163, v166, v167 // 000000014628: D26800A3 00034FA6 - s_lshl_b32 s12, s36, 1 // 000000014630: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000014634: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014638: 82118011 - buffer_store_dwordx4 v[160:163], v15, s[16:19], 0 offen nt // 00000001463C: E07E1000 8004A00F - s_waitcnt vmcnt(17) // 000000014644: BF8C4F71 - v_cvt_f32_bf16_sdwa v8, v236 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014648: 7E10B6F9 000416EC - v_fmac_f32_e64 v168, v8, s45 // 000000014650: D13B00A8 00005B08 - v_cvt_f32_bf16_sdwa v8, v236 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014658: 7E10B6F9 000516EC - v_fmac_f32_e64 v169, v8, s45 // 000000014660: D13B00A9 00005B08 - v_cvt_f32_bf16_sdwa v8, v237 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014668: 7E10B6F9 000416ED - v_fmac_f32_e64 v170, v8, s45 // 000000014670: D13B00AA 00005B08 - v_cvt_f32_bf16_sdwa v8, v237 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014678: 7E10B6F9 000516ED - v_fmac_f32_e64 v171, v8, s45 // 000000014680: D13B00AB 00005B08 - v_cvt_f32_bf16_sdwa v8, v238 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014688: 7E10B6F9 000416EE - v_fmac_f32_e64 v172, v8, s45 // 000000014690: D13B00AC 00005B08 - v_cvt_f32_bf16_sdwa v8, v238 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014698: 7E10B6F9 000516EE - v_fmac_f32_e64 v173, v8, s45 // 0000000146A0: D13B00AD 00005B08 - v_cvt_f32_bf16_sdwa v8, v239 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000146A8: 7E10B6F9 000416EF - v_fmac_f32_e64 v174, v8, s45 // 0000000146B0: D13B00AE 00005B08 - v_cvt_f32_bf16_sdwa v8, v239 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000146B8: 7E10B6F9 000516EF - v_fmac_f32_e64 v175, v8, s45 // 0000000146C0: D13B00AF 00005B08 - v_cvt_pk_bf16_f32 v168, v168, v169 // 0000000146C8: D26800A8 000353A8 - v_cvt_pk_bf16_f32 v169, v170, v171 // 0000000146D0: D26800A9 000357AA - v_cvt_pk_bf16_f32 v170, v172, v173 // 0000000146D8: D26800AA 00035BAC - v_cvt_pk_bf16_f32 v171, v174, v175 // 0000000146E0: D26800AB 00035FAE - s_lshl_b32 s12, s36, 1 // 0000000146E8: 8E0C8124 - s_add_u32 s16, s16, s12 // 0000000146EC: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000146F0: 82118011 - buffer_store_dwordx4 v[168:171], v15, s[16:19], 0 offen nt // 0000000146F4: E07E1000 8004A80F - s_nop 0 // 0000000146FC: BF800000 - s_lshl_b32 s12, s38, 1 // 000000014700: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000014704: 80140C14 - s_addc_u32 s21, s21, 0 // 000000014708: 82158015 - buffer_load_dwordx4 v[20:23], v16, s[20:23], 0 offen // 00000001470C: E05C1000 80051410 - s_lshl_b32 s12, s38, 1 // 000000014714: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000014718: 80140C14 - s_addc_u32 s21, s21, 0 // 00000001471C: 82158015 - buffer_load_dwordx4 v[128:131], v16, s[20:23], 0 offen // 000000014720: E05C1000 80058010 - s_lshl_b32 s12, s38, 1 // 000000014728: 8E0C8126 - s_add_u32 s20, s20, s12 // 00000001472C: 80140C14 - s_addc_u32 s21, s21, 0 // 000000014730: 82158015 - buffer_load_dwordx4 v[144:147], v16, s[20:23], 0 offen // 000000014734: E05C1000 80059010 - s_lshl_b32 s12, s38, 1 // 00000001473C: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000014740: 80140C14 - s_addc_u32 s21, s21, 0 // 000000014744: 82158015 - buffer_load_dwordx4 v[148:151], v16, s[20:23], 0 offen // 000000014748: E05C1000 80059410 - s_lshl_b32 s12, s38, 1 // 000000014750: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000014754: 80140C14 - s_addc_u32 s21, s21, 0 // 000000014758: 82158015 - buffer_load_dwordx4 v[152:155], v16, s[20:23], 0 offen // 00000001475C: E05C1000 80059810 - s_lshl_b32 s12, s38, 1 // 000000014764: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000014768: 80140C14 - s_addc_u32 s21, s21, 0 // 00000001476C: 82158015 - buffer_load_dwordx4 v[156:159], v16, s[20:23], 0 offen // 000000014770: E05C1000 80059C10 - s_lshl_b32 s12, s38, 1 // 000000014778: 8E0C8126 - s_add_u32 s20, s20, s12 // 00000001477C: 80140C14 - s_addc_u32 s21, s21, 0 // 000000014780: 82158015 - buffer_load_dwordx4 v[160:163], v16, s[20:23], 0 offen // 000000014784: E05C1000 8005A010 - s_lshl_b32 s12, s38, 1 // 00000001478C: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000014790: 80140C14 - s_addc_u32 s21, s21, 0 // 000000014794: 82158015 - buffer_load_dwordx4 v[164:167], v16, s[20:23], 0 offen // 000000014798: E05C1000 8005A410 - s_lshl_b32 s12, s38, 1 // 0000000147A0: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000147A4: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000147A8: 82158015 - buffer_load_dwordx4 v[168:171], v16, s[20:23], 0 offen // 0000000147AC: E05C1000 8005A810 - s_lshl_b32 s12, s38, 1 // 0000000147B4: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000147B8: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000147BC: 82158015 - buffer_load_dwordx4 v[172:175], v16, s[20:23], 0 offen // 0000000147C0: E05C1000 8005AC10 - s_lshl_b32 s12, s38, 1 // 0000000147C8: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000147CC: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000147D0: 82158015 - buffer_load_dwordx4 v[176:179], v16, s[20:23], 0 offen // 0000000147D4: E05C1000 8005B010 - s_lshl_b32 s12, s38, 1 // 0000000147DC: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000147E0: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000147E4: 82158015 - buffer_load_dwordx4 v[180:183], v16, s[20:23], 0 offen // 0000000147E8: E05C1000 8005B410 - s_lshl_b32 s12, s38, 1 // 0000000147F0: 8E0C8126 - s_add_u32 s20, s20, s12 // 0000000147F4: 80140C14 - s_addc_u32 s21, s21, 0 // 0000000147F8: 82158015 - buffer_load_dwordx4 v[184:187], v16, s[20:23], 0 offen // 0000000147FC: E05C1000 8005B810 - s_lshl_b32 s12, s38, 1 // 000000014804: 8E0C8126 - s_add_u32 s20, s20, s12 // 000000014808: 80140C14 - s_addc_u32 s21, s21, 0 // 00000001480C: 82158015 - buffer_load_dwordx4 v[188:191], v16, s[20:23], 0 offen // 000000014810: E05C1000 8005BC10 - v_accvgpr_read_b32 v24, a66 // 000000014818: D3D84018 18000142 - v_accvgpr_read_b32 v25, a70 // 000000014820: D3D84019 18000146 - v_accvgpr_read_b32 v26, a74 // 000000014828: D3D8401A 1800014A - v_accvgpr_read_b32 v27, a78 // 000000014830: D3D8401B 1800014E - v_accvgpr_read_b32 v28, a82 // 000000014838: D3D8401C 18000152 - v_accvgpr_read_b32 v29, a86 // 000000014840: D3D8401D 18000156 - v_accvgpr_read_b32 v30, a90 // 000000014848: D3D8401E 1800015A - v_accvgpr_read_b32 v31, a94 // 000000014850: D3D8401F 1800015E - v_accvgpr_read_b32 v32, a98 // 000000014858: D3D84020 18000162 - v_accvgpr_read_b32 v33, a102 // 000000014860: D3D84021 18000166 - v_accvgpr_read_b32 v34, a106 // 000000014868: D3D84022 1800016A - v_accvgpr_read_b32 v35, a110 // 000000014870: D3D84023 1800016E - v_accvgpr_read_b32 v36, a114 // 000000014878: D3D84024 18000172 - v_accvgpr_read_b32 v37, a118 // 000000014880: D3D84025 18000176 - v_accvgpr_read_b32 v38, a122 // 000000014888: D3D84026 1800017A - v_accvgpr_read_b32 v39, a126 // 000000014890: D3D84027 1800017E - v_accvgpr_read_b32 v40, a130 // 000000014898: D3D84028 18000182 - v_accvgpr_read_b32 v41, a134 // 0000000148A0: D3D84029 18000186 - v_accvgpr_read_b32 v42, a138 // 0000000148A8: D3D8402A 1800018A - v_accvgpr_read_b32 v43, a142 // 0000000148B0: D3D8402B 1800018E - v_accvgpr_read_b32 v44, a146 // 0000000148B8: D3D8402C 18000192 - v_accvgpr_read_b32 v45, a150 // 0000000148C0: D3D8402D 18000196 - v_accvgpr_read_b32 v46, a154 // 0000000148C8: D3D8402E 1800019A - v_accvgpr_read_b32 v47, a158 // 0000000148D0: D3D8402F 1800019E - v_accvgpr_read_b32 v48, a162 // 0000000148D8: D3D84030 180001A2 - v_accvgpr_read_b32 v49, a166 // 0000000148E0: D3D84031 180001A6 - v_accvgpr_read_b32 v50, a170 // 0000000148E8: D3D84032 180001AA - v_accvgpr_read_b32 v51, a174 // 0000000148F0: D3D84033 180001AE - v_accvgpr_read_b32 v52, a178 // 0000000148F8: D3D84034 180001B2 - v_accvgpr_read_b32 v53, a182 // 000000014900: D3D84035 180001B6 - v_accvgpr_read_b32 v54, a186 // 000000014908: D3D84036 180001BA - v_accvgpr_read_b32 v55, a190 // 000000014910: D3D84037 180001BE - v_accvgpr_read_b32 v56, a194 // 000000014918: D3D84038 180001C2 - v_accvgpr_read_b32 v57, a198 // 000000014920: D3D84039 180001C6 - v_accvgpr_read_b32 v58, a202 // 000000014928: D3D8403A 180001CA - v_accvgpr_read_b32 v59, a206 // 000000014930: D3D8403B 180001CE - v_accvgpr_read_b32 v60, a210 // 000000014938: D3D8403C 180001D2 - v_accvgpr_read_b32 v61, a214 // 000000014940: D3D8403D 180001D6 - v_accvgpr_read_b32 v62, a218 // 000000014948: D3D8403E 180001DA - v_accvgpr_read_b32 v63, a222 // 000000014950: D3D8403F 180001DE - v_accvgpr_read_b32 v64, a226 // 000000014958: D3D84040 180001E2 - v_accvgpr_read_b32 v65, a230 // 000000014960: D3D84041 180001E6 - v_accvgpr_read_b32 v66, a234 // 000000014968: D3D84042 180001EA - v_accvgpr_read_b32 v67, a238 // 000000014970: D3D84043 180001EE - v_accvgpr_read_b32 v68, a242 // 000000014978: D3D84044 180001F2 - v_accvgpr_read_b32 v69, a246 // 000000014980: D3D84045 180001F6 - v_accvgpr_read_b32 v70, a250 // 000000014988: D3D84046 180001FA - v_accvgpr_read_b32 v71, a254 // 000000014990: D3D84047 180001FE - v_accvgpr_read_b32 v72, a3 // 000000014998: D3D84048 18000103 - v_accvgpr_read_b32 v73, a7 // 0000000149A0: D3D84049 18000107 - v_accvgpr_read_b32 v74, a11 // 0000000149A8: D3D8404A 1800010B - v_accvgpr_read_b32 v75, a15 // 0000000149B0: D3D8404B 1800010F - v_accvgpr_read_b32 v76, a19 // 0000000149B8: D3D8404C 18000113 - v_accvgpr_read_b32 v77, a23 // 0000000149C0: D3D8404D 18000117 - v_accvgpr_read_b32 v78, a27 // 0000000149C8: D3D8404E 1800011B - v_accvgpr_read_b32 v79, a31 // 0000000149D0: D3D8404F 1800011F - v_accvgpr_read_b32 v80, a35 // 0000000149D8: D3D84050 18000123 - v_accvgpr_read_b32 v81, a39 // 0000000149E0: D3D84051 18000127 - v_accvgpr_read_b32 v82, a43 // 0000000149E8: D3D84052 1800012B - v_accvgpr_read_b32 v83, a47 // 0000000149F0: D3D84053 1800012F - v_accvgpr_read_b32 v84, a51 // 0000000149F8: D3D84054 18000133 - v_accvgpr_read_b32 v85, a55 // 000000014A00: D3D84055 18000137 - v_accvgpr_read_b32 v86, a59 // 000000014A08: D3D84056 1800013B - v_accvgpr_read_b32 v87, a63 // 000000014A10: D3D84057 1800013F - v_accvgpr_read_b32 v88, a67 // 000000014A18: D3D84058 18000143 - v_accvgpr_read_b32 v89, a71 // 000000014A20: D3D84059 18000147 - v_accvgpr_read_b32 v90, a75 // 000000014A28: D3D8405A 1800014B - v_accvgpr_read_b32 v91, a79 // 000000014A30: D3D8405B 1800014F - v_accvgpr_read_b32 v92, a83 // 000000014A38: D3D8405C 18000153 - v_accvgpr_read_b32 v93, a87 // 000000014A40: D3D8405D 18000157 - v_accvgpr_read_b32 v94, a91 // 000000014A48: D3D8405E 1800015B - v_accvgpr_read_b32 v95, a95 // 000000014A50: D3D8405F 1800015F - v_accvgpr_read_b32 v96, a99 // 000000014A58: D3D84060 18000163 - v_accvgpr_read_b32 v97, a103 // 000000014A60: D3D84061 18000167 - v_accvgpr_read_b32 v98, a107 // 000000014A68: D3D84062 1800016B - v_accvgpr_read_b32 v99, a111 // 000000014A70: D3D84063 1800016F - v_accvgpr_read_b32 v100, a115 // 000000014A78: D3D84064 18000173 - v_accvgpr_read_b32 v101, a119 // 000000014A80: D3D84065 18000177 - v_accvgpr_read_b32 v102, a123 // 000000014A88: D3D84066 1800017B - v_accvgpr_read_b32 v103, a127 // 000000014A90: D3D84067 1800017F - v_accvgpr_read_b32 v104, a131 // 000000014A98: D3D84068 18000183 - v_accvgpr_read_b32 v105, a135 // 000000014AA0: D3D84069 18000187 - v_accvgpr_read_b32 v106, a139 // 000000014AA8: D3D8406A 1800018B - v_accvgpr_read_b32 v107, a143 // 000000014AB0: D3D8406B 1800018F - v_accvgpr_read_b32 v108, a147 // 000000014AB8: D3D8406C 18000193 - v_accvgpr_read_b32 v109, a151 // 000000014AC0: D3D8406D 18000197 - v_accvgpr_read_b32 v110, a155 // 000000014AC8: D3D8406E 1800019B - v_accvgpr_read_b32 v111, a159 // 000000014AD0: D3D8406F 1800019F - v_accvgpr_read_b32 v112, a163 // 000000014AD8: D3D84070 180001A3 - v_accvgpr_read_b32 v113, a167 // 000000014AE0: D3D84071 180001A7 - v_accvgpr_read_b32 v114, a171 // 000000014AE8: D3D84072 180001AB - v_accvgpr_read_b32 v115, a175 // 000000014AF0: D3D84073 180001AF - v_accvgpr_read_b32 v116, a179 // 000000014AF8: D3D84074 180001B3 - v_accvgpr_read_b32 v117, a183 // 000000014B00: D3D84075 180001B7 - v_accvgpr_read_b32 v118, a187 // 000000014B08: D3D84076 180001BB - v_accvgpr_read_b32 v119, a191 // 000000014B10: D3D84077 180001BF - v_accvgpr_read_b32 v120, a195 // 000000014B18: D3D84078 180001C3 - v_accvgpr_read_b32 v121, a199 // 000000014B20: D3D84079 180001C7 - v_accvgpr_read_b32 v122, a203 // 000000014B28: D3D8407A 180001CB - v_accvgpr_read_b32 v123, a207 // 000000014B30: D3D8407B 180001CF - v_accvgpr_read_b32 v124, a211 // 000000014B38: D3D8407C 180001D3 - v_accvgpr_read_b32 v125, a215 // 000000014B40: D3D8407D 180001D7 - v_accvgpr_read_b32 v126, a219 // 000000014B48: D3D8407E 180001DB - v_accvgpr_read_b32 v127, a223 // 000000014B50: D3D8407F 180001DF - v_accvgpr_read_b32 v136, a227 // 000000014B58: D3D84088 180001E3 - v_accvgpr_read_b32 v137, a231 // 000000014B60: D3D84089 180001E7 - v_accvgpr_read_b32 v138, a235 // 000000014B68: D3D8408A 180001EB - v_accvgpr_read_b32 v139, a239 // 000000014B70: D3D8408B 180001EF - v_accvgpr_read_b32 v140, a243 // 000000014B78: D3D8408C 180001F3 - v_accvgpr_read_b32 v141, a247 // 000000014B80: D3D8408D 180001F7 - v_accvgpr_read_b32 v142, a251 // 000000014B88: D3D8408E 180001FB - v_accvgpr_read_b32 v143, a255 // 000000014B90: D3D8408F 180001FF - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 000000014B98: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 000000014BA0: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 000000014BA8: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 000000014BB0: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 000000014BB8: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 000000014BC0: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 000000014BC8: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 000000014BD0: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 000000014BD8: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 000000014BE0: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 000000014BE8: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 000000014BF0: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 000000014BF8: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 000000014C00: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 000000014C08: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 000000014C10: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 000000014C18: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 000000014C20: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 000000014C28: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 000000014C30: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 000000014C38: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 000000014C40: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 000000014C48: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 000000014C50: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 000000014C58: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 000000014C60: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 000000014C68: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 000000014C70: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 000000014C78: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 000000014C80: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 000000014C88: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 000000014C90: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 000000014C98: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 000000014CA0: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 000000014CA8: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 000000014CB0: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 000000014CB8: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 000000014CC0: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 000000014CC8: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 000000014CD0: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 000000014CD8: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 000000014CE0: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 000000014CE8: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 000000014CF0: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 000000014CF8: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 000000014D00: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 000000014D08: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 000000014D10: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 000000014D18: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 000000014D20: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 000000014D28: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 000000014D30: D3B1407E 1002FC2C - v_pk_mul_f32 v[136:137], s[44:45], v[136:137] op_sel_hi:[0,1]// 000000014D38: D3B14088 1003102C - v_pk_mul_f32 v[138:139], s[44:45], v[138:139] op_sel_hi:[0,1]// 000000014D40: D3B1408A 1003142C - v_pk_mul_f32 v[140:141], s[44:45], v[140:141] op_sel_hi:[0,1]// 000000014D48: D3B1408C 1003182C - v_pk_mul_f32 v[142:143], s[44:45], v[142:143] op_sel_hi:[0,1]// 000000014D50: D3B1408E 10031C2C - v_mov_b32_e32 v12, 0xffff0000 // 000000014D58: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 000000014D60: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 000000014D68: 7E1C02FF 00007FFF - s_waitcnt vmcnt(13) // 000000014D70: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v20 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014D74: 7E10B6F9 00041614 - v_fmac_f32_e64 v24, v8, s45 // 000000014D7C: D13B0018 00005B08 - v_cvt_f32_bf16_sdwa v8, v20 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014D84: 7E10B6F9 00051614 - v_fmac_f32_e64 v25, v8, s45 // 000000014D8C: D13B0019 00005B08 - v_cvt_f32_bf16_sdwa v8, v21 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014D94: 7E10B6F9 00041615 - v_fmac_f32_e64 v26, v8, s45 // 000000014D9C: D13B001A 00005B08 - v_cvt_f32_bf16_sdwa v8, v21 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014DA4: 7E10B6F9 00051615 - v_fmac_f32_e64 v27, v8, s45 // 000000014DAC: D13B001B 00005B08 - v_cvt_f32_bf16_sdwa v8, v22 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014DB4: 7E10B6F9 00041616 - v_fmac_f32_e64 v28, v8, s45 // 000000014DBC: D13B001C 00005B08 - v_cvt_f32_bf16_sdwa v8, v22 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014DC4: 7E10B6F9 00051616 - v_fmac_f32_e64 v29, v8, s45 // 000000014DCC: D13B001D 00005B08 - v_cvt_f32_bf16_sdwa v8, v23 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014DD4: 7E10B6F9 00041617 - v_fmac_f32_e64 v30, v8, s45 // 000000014DDC: D13B001E 00005B08 - v_cvt_f32_bf16_sdwa v8, v23 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014DE4: 7E10B6F9 00051617 - v_fmac_f32_e64 v31, v8, s45 // 000000014DEC: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v25 // 000000014DF4: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 000000014DFC: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 000000014E04: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 000000014E0C: D268001B 00023F1E - s_lshl_b32 s12, s36, 1 // 000000014E14: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000014E18: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014E1C: 82118011 - buffer_store_dwordx4 v[24:27], v15, s[16:19], 0 offen nt // 000000014E20: E07E1000 8004180F - s_waitcnt vmcnt(13) // 000000014E28: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014E2C: 7E10B6F9 00041680 - v_fmac_f32_e64 v32, v8, s45 // 000000014E34: D13B0020 00005B08 - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014E3C: 7E10B6F9 00051680 - v_fmac_f32_e64 v33, v8, s45 // 000000014E44: D13B0021 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014E4C: 7E10B6F9 00041681 - v_fmac_f32_e64 v34, v8, s45 // 000000014E54: D13B0022 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014E5C: 7E10B6F9 00051681 - v_fmac_f32_e64 v35, v8, s45 // 000000014E64: D13B0023 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014E6C: 7E10B6F9 00041682 - v_fmac_f32_e64 v36, v8, s45 // 000000014E74: D13B0024 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014E7C: 7E10B6F9 00051682 - v_fmac_f32_e64 v37, v8, s45 // 000000014E84: D13B0025 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014E8C: 7E10B6F9 00041683 - v_fmac_f32_e64 v38, v8, s45 // 000000014E94: D13B0026 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014E9C: 7E10B6F9 00051683 - v_fmac_f32_e64 v39, v8, s45 // 000000014EA4: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v33 // 000000014EAC: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 000000014EB4: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 000000014EBC: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 000000014EC4: D2680023 00024F26 - s_lshl_b32 s12, s36, 1 // 000000014ECC: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000014ED0: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014ED4: 82118011 - buffer_store_dwordx4 v[32:35], v15, s[16:19], 0 offen nt // 000000014ED8: E07E1000 8004200F - s_waitcnt vmcnt(13) // 000000014EE0: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v144 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014EE4: 7E10B6F9 00041690 - v_fmac_f32_e64 v40, v8, s45 // 000000014EEC: D13B0028 00005B08 - v_cvt_f32_bf16_sdwa v8, v144 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014EF4: 7E10B6F9 00051690 - v_fmac_f32_e64 v41, v8, s45 // 000000014EFC: D13B0029 00005B08 - v_cvt_f32_bf16_sdwa v8, v145 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014F04: 7E10B6F9 00041691 - v_fmac_f32_e64 v42, v8, s45 // 000000014F0C: D13B002A 00005B08 - v_cvt_f32_bf16_sdwa v8, v145 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014F14: 7E10B6F9 00051691 - v_fmac_f32_e64 v43, v8, s45 // 000000014F1C: D13B002B 00005B08 - v_cvt_f32_bf16_sdwa v8, v146 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014F24: 7E10B6F9 00041692 - v_fmac_f32_e64 v44, v8, s45 // 000000014F2C: D13B002C 00005B08 - v_cvt_f32_bf16_sdwa v8, v146 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014F34: 7E10B6F9 00051692 - v_fmac_f32_e64 v45, v8, s45 // 000000014F3C: D13B002D 00005B08 - v_cvt_f32_bf16_sdwa v8, v147 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014F44: 7E10B6F9 00041693 - v_fmac_f32_e64 v46, v8, s45 // 000000014F4C: D13B002E 00005B08 - v_cvt_f32_bf16_sdwa v8, v147 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014F54: 7E10B6F9 00051693 - v_fmac_f32_e64 v47, v8, s45 // 000000014F5C: D13B002F 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v41 // 000000014F64: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 000000014F6C: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 000000014F74: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 000000014F7C: D268002B 00025F2E - s_lshl_b32 s12, s36, 1 // 000000014F84: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000014F88: 80100C10 - s_addc_u32 s17, s17, 0 // 000000014F8C: 82118011 - buffer_store_dwordx4 v[40:43], v15, s[16:19], 0 offen nt // 000000014F90: E07E1000 8004280F - s_waitcnt vmcnt(13) // 000000014F98: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v148 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014F9C: 7E10B6F9 00041694 - v_fmac_f32_e64 v48, v8, s45 // 000000014FA4: D13B0030 00005B08 - v_cvt_f32_bf16_sdwa v8, v148 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014FAC: 7E10B6F9 00051694 - v_fmac_f32_e64 v49, v8, s45 // 000000014FB4: D13B0031 00005B08 - v_cvt_f32_bf16_sdwa v8, v149 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014FBC: 7E10B6F9 00041695 - v_fmac_f32_e64 v50, v8, s45 // 000000014FC4: D13B0032 00005B08 - v_cvt_f32_bf16_sdwa v8, v149 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014FCC: 7E10B6F9 00051695 - v_fmac_f32_e64 v51, v8, s45 // 000000014FD4: D13B0033 00005B08 - v_cvt_f32_bf16_sdwa v8, v150 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014FDC: 7E10B6F9 00041696 - v_fmac_f32_e64 v52, v8, s45 // 000000014FE4: D13B0034 00005B08 - v_cvt_f32_bf16_sdwa v8, v150 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000014FEC: 7E10B6F9 00051696 - v_fmac_f32_e64 v53, v8, s45 // 000000014FF4: D13B0035 00005B08 - v_cvt_f32_bf16_sdwa v8, v151 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000014FFC: 7E10B6F9 00041697 - v_fmac_f32_e64 v54, v8, s45 // 000000015004: D13B0036 00005B08 - v_cvt_f32_bf16_sdwa v8, v151 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001500C: 7E10B6F9 00051697 - v_fmac_f32_e64 v55, v8, s45 // 000000015014: D13B0037 00005B08 - v_cvt_pk_bf16_f32 v48, v48, v49 // 00000001501C: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 000000015024: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 00000001502C: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 000000015034: D2680033 00026F36 - s_lshl_b32 s12, s36, 1 // 00000001503C: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000015040: 80100C10 - s_addc_u32 s17, s17, 0 // 000000015044: 82118011 - buffer_store_dwordx4 v[48:51], v15, s[16:19], 0 offen nt // 000000015048: E07E1000 8004300F - s_waitcnt vmcnt(13) // 000000015050: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015054: 7E10B6F9 00041698 - v_fmac_f32_e64 v56, v8, s45 // 00000001505C: D13B0038 00005B08 - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015064: 7E10B6F9 00051698 - v_fmac_f32_e64 v57, v8, s45 // 00000001506C: D13B0039 00005B08 - v_cvt_f32_bf16_sdwa v8, v153 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015074: 7E10B6F9 00041699 - v_fmac_f32_e64 v58, v8, s45 // 00000001507C: D13B003A 00005B08 - v_cvt_f32_bf16_sdwa v8, v153 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015084: 7E10B6F9 00051699 - v_fmac_f32_e64 v59, v8, s45 // 00000001508C: D13B003B 00005B08 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015094: 7E10B6F9 0004169A - v_fmac_f32_e64 v60, v8, s45 // 00000001509C: D13B003C 00005B08 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000150A4: 7E10B6F9 0005169A - v_fmac_f32_e64 v61, v8, s45 // 0000000150AC: D13B003D 00005B08 - v_cvt_f32_bf16_sdwa v8, v155 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000150B4: 7E10B6F9 0004169B - v_fmac_f32_e64 v62, v8, s45 // 0000000150BC: D13B003E 00005B08 - v_cvt_f32_bf16_sdwa v8, v155 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000150C4: 7E10B6F9 0005169B - v_fmac_f32_e64 v63, v8, s45 // 0000000150CC: D13B003F 00005B08 - v_cvt_pk_bf16_f32 v56, v56, v57 // 0000000150D4: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 0000000150DC: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 0000000150E4: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 0000000150EC: D268003B 00027F3E - s_lshl_b32 s12, s36, 1 // 0000000150F4: 8E0C8124 - s_add_u32 s16, s16, s12 // 0000000150F8: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000150FC: 82118011 - buffer_store_dwordx4 v[56:59], v15, s[16:19], 0 offen nt // 000000015100: E07E1000 8004380F - s_waitcnt vmcnt(13) // 000000015108: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001510C: 7E10B6F9 0004169C - v_fmac_f32_e64 v64, v8, s45 // 000000015114: D13B0040 00005B08 - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001511C: 7E10B6F9 0005169C - v_fmac_f32_e64 v65, v8, s45 // 000000015124: D13B0041 00005B08 - v_cvt_f32_bf16_sdwa v8, v157 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001512C: 7E10B6F9 0004169D - v_fmac_f32_e64 v66, v8, s45 // 000000015134: D13B0042 00005B08 - v_cvt_f32_bf16_sdwa v8, v157 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001513C: 7E10B6F9 0005169D - v_fmac_f32_e64 v67, v8, s45 // 000000015144: D13B0043 00005B08 - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001514C: 7E10B6F9 0004169E - v_fmac_f32_e64 v68, v8, s45 // 000000015154: D13B0044 00005B08 - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001515C: 7E10B6F9 0005169E - v_fmac_f32_e64 v69, v8, s45 // 000000015164: D13B0045 00005B08 - v_cvt_f32_bf16_sdwa v8, v159 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001516C: 7E10B6F9 0004169F - v_fmac_f32_e64 v70, v8, s45 // 000000015174: D13B0046 00005B08 - v_cvt_f32_bf16_sdwa v8, v159 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001517C: 7E10B6F9 0005169F - v_fmac_f32_e64 v71, v8, s45 // 000000015184: D13B0047 00005B08 - v_cvt_pk_bf16_f32 v64, v64, v65 // 00000001518C: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 000000015194: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 00000001519C: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 0000000151A4: D2680043 00028F46 - s_lshl_b32 s12, s36, 1 // 0000000151AC: 8E0C8124 - s_add_u32 s16, s16, s12 // 0000000151B0: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000151B4: 82118011 - buffer_store_dwordx4 v[64:67], v15, s[16:19], 0 offen nt // 0000000151B8: E07E1000 8004400F - s_waitcnt vmcnt(13) // 0000000151C0: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v160 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000151C4: 7E10B6F9 000416A0 - v_fmac_f32_e64 v72, v8, s45 // 0000000151CC: D13B0048 00005B08 - v_cvt_f32_bf16_sdwa v8, v160 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000151D4: 7E10B6F9 000516A0 - v_fmac_f32_e64 v73, v8, s45 // 0000000151DC: D13B0049 00005B08 - v_cvt_f32_bf16_sdwa v8, v161 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000151E4: 7E10B6F9 000416A1 - v_fmac_f32_e64 v74, v8, s45 // 0000000151EC: D13B004A 00005B08 - v_cvt_f32_bf16_sdwa v8, v161 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000151F4: 7E10B6F9 000516A1 - v_fmac_f32_e64 v75, v8, s45 // 0000000151FC: D13B004B 00005B08 - v_cvt_f32_bf16_sdwa v8, v162 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015204: 7E10B6F9 000416A2 - v_fmac_f32_e64 v76, v8, s45 // 00000001520C: D13B004C 00005B08 - v_cvt_f32_bf16_sdwa v8, v162 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015214: 7E10B6F9 000516A2 - v_fmac_f32_e64 v77, v8, s45 // 00000001521C: D13B004D 00005B08 - v_cvt_f32_bf16_sdwa v8, v163 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015224: 7E10B6F9 000416A3 - v_fmac_f32_e64 v78, v8, s45 // 00000001522C: D13B004E 00005B08 - v_cvt_f32_bf16_sdwa v8, v163 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015234: 7E10B6F9 000516A3 - v_fmac_f32_e64 v79, v8, s45 // 00000001523C: D13B004F 00005B08 - v_cvt_pk_bf16_f32 v72, v72, v73 // 000000015244: D2680048 00029348 - v_cvt_pk_bf16_f32 v73, v74, v75 // 00000001524C: D2680049 0002974A - v_cvt_pk_bf16_f32 v74, v76, v77 // 000000015254: D268004A 00029B4C - v_cvt_pk_bf16_f32 v75, v78, v79 // 00000001525C: D268004B 00029F4E - s_lshl_b32 s12, s36, 1 // 000000015264: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000015268: 80100C10 - s_addc_u32 s17, s17, 0 // 00000001526C: 82118011 - buffer_store_dwordx4 v[72:75], v15, s[16:19], 0 offen nt // 000000015270: E07E1000 8004480F - s_waitcnt vmcnt(13) // 000000015278: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001527C: 7E10B6F9 000416A4 - v_fmac_f32_e64 v80, v8, s45 // 000000015284: D13B0050 00005B08 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001528C: 7E10B6F9 000516A4 - v_fmac_f32_e64 v81, v8, s45 // 000000015294: D13B0051 00005B08 - v_cvt_f32_bf16_sdwa v8, v165 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001529C: 7E10B6F9 000416A5 - v_fmac_f32_e64 v82, v8, s45 // 0000000152A4: D13B0052 00005B08 - v_cvt_f32_bf16_sdwa v8, v165 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000152AC: 7E10B6F9 000516A5 - v_fmac_f32_e64 v83, v8, s45 // 0000000152B4: D13B0053 00005B08 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000152BC: 7E10B6F9 000416A6 - v_fmac_f32_e64 v84, v8, s45 // 0000000152C4: D13B0054 00005B08 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000152CC: 7E10B6F9 000516A6 - v_fmac_f32_e64 v85, v8, s45 // 0000000152D4: D13B0055 00005B08 - v_cvt_f32_bf16_sdwa v8, v167 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000152DC: 7E10B6F9 000416A7 - v_fmac_f32_e64 v86, v8, s45 // 0000000152E4: D13B0056 00005B08 - v_cvt_f32_bf16_sdwa v8, v167 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000152EC: 7E10B6F9 000516A7 - v_fmac_f32_e64 v87, v8, s45 // 0000000152F4: D13B0057 00005B08 - v_cvt_pk_bf16_f32 v80, v80, v81 // 0000000152FC: D2680050 0002A350 - v_cvt_pk_bf16_f32 v81, v82, v83 // 000000015304: D2680051 0002A752 - v_cvt_pk_bf16_f32 v82, v84, v85 // 00000001530C: D2680052 0002AB54 - v_cvt_pk_bf16_f32 v83, v86, v87 // 000000015314: D2680053 0002AF56 - s_lshl_b32 s12, s36, 1 // 00000001531C: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000015320: 80100C10 - s_addc_u32 s17, s17, 0 // 000000015324: 82118011 - buffer_store_dwordx4 v[80:83], v15, s[16:19], 0 offen nt // 000000015328: E07E1000 8004500F - s_waitcnt vmcnt(13) // 000000015330: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015334: 7E10B6F9 000416A8 - v_fmac_f32_e64 v88, v8, s45 // 00000001533C: D13B0058 00005B08 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015344: 7E10B6F9 000516A8 - v_fmac_f32_e64 v89, v8, s45 // 00000001534C: D13B0059 00005B08 - v_cvt_f32_bf16_sdwa v8, v169 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015354: 7E10B6F9 000416A9 - v_fmac_f32_e64 v90, v8, s45 // 00000001535C: D13B005A 00005B08 - v_cvt_f32_bf16_sdwa v8, v169 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015364: 7E10B6F9 000516A9 - v_fmac_f32_e64 v91, v8, s45 // 00000001536C: D13B005B 00005B08 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015374: 7E10B6F9 000416AA - v_fmac_f32_e64 v92, v8, s45 // 00000001537C: D13B005C 00005B08 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015384: 7E10B6F9 000516AA - v_fmac_f32_e64 v93, v8, s45 // 00000001538C: D13B005D 00005B08 - v_cvt_f32_bf16_sdwa v8, v171 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015394: 7E10B6F9 000416AB - v_fmac_f32_e64 v94, v8, s45 // 00000001539C: D13B005E 00005B08 - v_cvt_f32_bf16_sdwa v8, v171 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000153A4: 7E10B6F9 000516AB - v_fmac_f32_e64 v95, v8, s45 // 0000000153AC: D13B005F 00005B08 - v_cvt_pk_bf16_f32 v88, v88, v89 // 0000000153B4: D2680058 0002B358 - v_cvt_pk_bf16_f32 v89, v90, v91 // 0000000153BC: D2680059 0002B75A - v_cvt_pk_bf16_f32 v90, v92, v93 // 0000000153C4: D268005A 0002BB5C - v_cvt_pk_bf16_f32 v91, v94, v95 // 0000000153CC: D268005B 0002BF5E - s_lshl_b32 s12, s36, 1 // 0000000153D4: 8E0C8124 - s_add_u32 s16, s16, s12 // 0000000153D8: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000153DC: 82118011 - buffer_store_dwordx4 v[88:91], v15, s[16:19], 0 offen nt // 0000000153E0: E07E1000 8004580F - s_waitcnt vmcnt(13) // 0000000153E8: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000153EC: 7E10B6F9 000416AC - v_fmac_f32_e64 v96, v8, s45 // 0000000153F4: D13B0060 00005B08 - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000153FC: 7E10B6F9 000516AC - v_fmac_f32_e64 v97, v8, s45 // 000000015404: D13B0061 00005B08 - v_cvt_f32_bf16_sdwa v8, v173 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001540C: 7E10B6F9 000416AD - v_fmac_f32_e64 v98, v8, s45 // 000000015414: D13B0062 00005B08 - v_cvt_f32_bf16_sdwa v8, v173 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001541C: 7E10B6F9 000516AD - v_fmac_f32_e64 v99, v8, s45 // 000000015424: D13B0063 00005B08 - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001542C: 7E10B6F9 000416AE - v_fmac_f32_e64 v100, v8, s45 // 000000015434: D13B0064 00005B08 - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001543C: 7E10B6F9 000516AE - v_fmac_f32_e64 v101, v8, s45 // 000000015444: D13B0065 00005B08 - v_cvt_f32_bf16_sdwa v8, v175 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001544C: 7E10B6F9 000416AF - v_fmac_f32_e64 v102, v8, s45 // 000000015454: D13B0066 00005B08 - v_cvt_f32_bf16_sdwa v8, v175 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001545C: 7E10B6F9 000516AF - v_fmac_f32_e64 v103, v8, s45 // 000000015464: D13B0067 00005B08 - v_cvt_pk_bf16_f32 v96, v96, v97 // 00000001546C: D2680060 0002C360 - v_cvt_pk_bf16_f32 v97, v98, v99 // 000000015474: D2680061 0002C762 - v_cvt_pk_bf16_f32 v98, v100, v101 // 00000001547C: D2680062 0002CB64 - v_cvt_pk_bf16_f32 v99, v102, v103 // 000000015484: D2680063 0002CF66 - s_lshl_b32 s12, s36, 1 // 00000001548C: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000015490: 80100C10 - s_addc_u32 s17, s17, 0 // 000000015494: 82118011 - buffer_store_dwordx4 v[96:99], v15, s[16:19], 0 offen nt // 000000015498: E07E1000 8004600F - s_waitcnt vmcnt(13) // 0000000154A0: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000154A4: 7E10B6F9 000416B0 - v_fmac_f32_e64 v104, v8, s45 // 0000000154AC: D13B0068 00005B08 - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000154B4: 7E10B6F9 000516B0 - v_fmac_f32_e64 v105, v8, s45 // 0000000154BC: D13B0069 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000154C4: 7E10B6F9 000416B1 - v_fmac_f32_e64 v106, v8, s45 // 0000000154CC: D13B006A 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000154D4: 7E10B6F9 000516B1 - v_fmac_f32_e64 v107, v8, s45 // 0000000154DC: D13B006B 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000154E4: 7E10B6F9 000416B2 - v_fmac_f32_e64 v108, v8, s45 // 0000000154EC: D13B006C 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000154F4: 7E10B6F9 000516B2 - v_fmac_f32_e64 v109, v8, s45 // 0000000154FC: D13B006D 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015504: 7E10B6F9 000416B3 - v_fmac_f32_e64 v110, v8, s45 // 00000001550C: D13B006E 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015514: 7E10B6F9 000516B3 - v_fmac_f32_e64 v111, v8, s45 // 00000001551C: D13B006F 00005B08 - v_cvt_pk_bf16_f32 v104, v104, v105 // 000000015524: D2680068 0002D368 - v_cvt_pk_bf16_f32 v105, v106, v107 // 00000001552C: D2680069 0002D76A - v_cvt_pk_bf16_f32 v106, v108, v109 // 000000015534: D268006A 0002DB6C - v_cvt_pk_bf16_f32 v107, v110, v111 // 00000001553C: D268006B 0002DF6E - s_lshl_b32 s12, s36, 1 // 000000015544: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000015548: 80100C10 - s_addc_u32 s17, s17, 0 // 00000001554C: 82118011 - buffer_store_dwordx4 v[104:107], v15, s[16:19], 0 offen nt // 000000015550: E07E1000 8004680F - s_waitcnt vmcnt(13) // 000000015558: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v180 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001555C: 7E10B6F9 000416B4 - v_fmac_f32_e64 v112, v8, s45 // 000000015564: D13B0070 00005B08 - v_cvt_f32_bf16_sdwa v8, v180 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001556C: 7E10B6F9 000516B4 - v_fmac_f32_e64 v113, v8, s45 // 000000015574: D13B0071 00005B08 - v_cvt_f32_bf16_sdwa v8, v181 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001557C: 7E10B6F9 000416B5 - v_fmac_f32_e64 v114, v8, s45 // 000000015584: D13B0072 00005B08 - v_cvt_f32_bf16_sdwa v8, v181 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001558C: 7E10B6F9 000516B5 - v_fmac_f32_e64 v115, v8, s45 // 000000015594: D13B0073 00005B08 - v_cvt_f32_bf16_sdwa v8, v182 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001559C: 7E10B6F9 000416B6 - v_fmac_f32_e64 v116, v8, s45 // 0000000155A4: D13B0074 00005B08 - v_cvt_f32_bf16_sdwa v8, v182 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000155AC: 7E10B6F9 000516B6 - v_fmac_f32_e64 v117, v8, s45 // 0000000155B4: D13B0075 00005B08 - v_cvt_f32_bf16_sdwa v8, v183 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000155BC: 7E10B6F9 000416B7 - v_fmac_f32_e64 v118, v8, s45 // 0000000155C4: D13B0076 00005B08 - v_cvt_f32_bf16_sdwa v8, v183 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000155CC: 7E10B6F9 000516B7 - v_fmac_f32_e64 v119, v8, s45 // 0000000155D4: D13B0077 00005B08 - v_cvt_pk_bf16_f32 v112, v112, v113 // 0000000155DC: D2680070 0002E370 - v_cvt_pk_bf16_f32 v113, v114, v115 // 0000000155E4: D2680071 0002E772 - v_cvt_pk_bf16_f32 v114, v116, v117 // 0000000155EC: D2680072 0002EB74 - v_cvt_pk_bf16_f32 v115, v118, v119 // 0000000155F4: D2680073 0002EF76 - s_lshl_b32 s12, s36, 1 // 0000000155FC: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000015600: 80100C10 - s_addc_u32 s17, s17, 0 // 000000015604: 82118011 - buffer_store_dwordx4 v[112:115], v15, s[16:19], 0 offen nt // 000000015608: E07E1000 8004700F - s_waitcnt vmcnt(13) // 000000015610: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015614: 7E10B6F9 000416B8 - v_fmac_f32_e64 v120, v8, s45 // 00000001561C: D13B0078 00005B08 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015624: 7E10B6F9 000516B8 - v_fmac_f32_e64 v121, v8, s45 // 00000001562C: D13B0079 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015634: 7E10B6F9 000416B9 - v_fmac_f32_e64 v122, v8, s45 // 00000001563C: D13B007A 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015644: 7E10B6F9 000516B9 - v_fmac_f32_e64 v123, v8, s45 // 00000001564C: D13B007B 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015654: 7E10B6F9 000416BA - v_fmac_f32_e64 v124, v8, s45 // 00000001565C: D13B007C 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015664: 7E10B6F9 000516BA - v_fmac_f32_e64 v125, v8, s45 // 00000001566C: D13B007D 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000015674: 7E10B6F9 000416BB - v_fmac_f32_e64 v126, v8, s45 // 00000001567C: D13B007E 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000015684: 7E10B6F9 000516BB - v_fmac_f32_e64 v127, v8, s45 // 00000001568C: D13B007F 00005B08 - v_cvt_pk_bf16_f32 v120, v120, v121 // 000000015694: D2680078 0002F378 - v_cvt_pk_bf16_f32 v121, v122, v123 // 00000001569C: D2680079 0002F77A - v_cvt_pk_bf16_f32 v122, v124, v125 // 0000000156A4: D268007A 0002FB7C - v_cvt_pk_bf16_f32 v123, v126, v127 // 0000000156AC: D268007B 0002FF7E - s_lshl_b32 s12, s36, 1 // 0000000156B4: 8E0C8124 - s_add_u32 s16, s16, s12 // 0000000156B8: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000156BC: 82118011 - buffer_store_dwordx4 v[120:123], v15, s[16:19], 0 offen nt // 0000000156C0: E07E1000 8004780F - s_waitcnt vmcnt(13) // 0000000156C8: BF8C0F7D - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000156CC: 7E10B6F9 000416BC - v_fmac_f32_e64 v136, v8, s45 // 0000000156D4: D13B0088 00005B08 - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000156DC: 7E10B6F9 000516BC - v_fmac_f32_e64 v137, v8, s45 // 0000000156E4: D13B0089 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000156EC: 7E10B6F9 000416BD - v_fmac_f32_e64 v138, v8, s45 // 0000000156F4: D13B008A 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000156FC: 7E10B6F9 000516BD - v_fmac_f32_e64 v139, v8, s45 // 000000015704: D13B008B 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001570C: 7E10B6F9 000416BE - v_fmac_f32_e64 v140, v8, s45 // 000000015714: D13B008C 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001571C: 7E10B6F9 000516BE - v_fmac_f32_e64 v141, v8, s45 // 000000015724: D13B008D 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001572C: 7E10B6F9 000416BF - v_fmac_f32_e64 v142, v8, s45 // 000000015734: D13B008E 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001573C: 7E10B6F9 000516BF - v_fmac_f32_e64 v143, v8, s45 // 000000015744: D13B008F 00005B08 - v_cvt_pk_bf16_f32 v136, v136, v137 // 00000001574C: D2680088 00031388 - v_cvt_pk_bf16_f32 v137, v138, v139 // 000000015754: D2680089 0003178A - v_cvt_pk_bf16_f32 v138, v140, v141 // 00000001575C: D268008A 00031B8C - v_cvt_pk_bf16_f32 v139, v142, v143 // 000000015764: D268008B 00031F8E - s_lshl_b32 s12, s36, 1 // 00000001576C: 8E0C8124 - s_add_u32 s16, s16, s12 // 000000015770: 80100C10 - s_addc_u32 s17, s17, 0 // 000000015774: 82118011 - buffer_store_dwordx4 v[136:139], v15, s[16:19], 0 offen nt // 000000015778: E07E1000 8004880F - s_nop 0 // 000000015780: BF800000 - s_branch label_GW_End_2 // 000000015784: BF822792 - -label_GW_B1_E1_N: - v_mov_b32_e32 v10, 0x80000000 // 000000015788: 7E1402FF 80000000 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015790: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015798: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000157A0: 86A2221E - v_add_lshl_u32 v15, v6, v4, 1 // 0000000157A4: D1FE000F 02060906 - v_cndmask_b32_e64 v15, v10, v15, s[34:35] // 0000000157AC: D100000F 008A1F0A - buffer_load_dwordx4 v[128:131], v15, s[20:23], 0 offen // 0000000157B4: E05C1000 8005800F - v_add_lshl_u32 v15, v7, v4, 1 // 0000000157BC: D1FE000F 02060907 - v_cndmask_b32_e64 v15, v10, v15, s[34:35] // 0000000157C4: D100000F 008A1F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000157CC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000157D4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000157DC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000157E4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000157EC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000157F4: 86A2221E - v_add_lshl_u32 v135, v6, v4, 1 // 0000000157F8: D1FE0087 02060906 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 000000015800: D1000087 008B0F0A - buffer_load_dwordx4 v[152:155], v135, s[20:23], 0 offen // 000000015808: E05C1000 80059887 - v_add_lshl_u32 v135, v7, v4, 1 // 000000015810: D1FE0087 02060907 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 000000015818: D1000087 008B0F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015820: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015828: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015830: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015838: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015840: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015848: 86A2221E - v_add_lshl_u32 v160, v6, v4, 1 // 00000001584C: D1FE00A0 02060906 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 000000015854: D10000A0 008B410A - buffer_load_dwordx4 v[156:159], v160, s[20:23], 0 offen // 00000001585C: E05C1000 80059CA0 - v_add_lshl_u32 v160, v7, v4, 1 // 000000015864: D1FE00A0 02060907 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 00000001586C: D10000A0 008B410A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015874: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001587C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015884: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001588C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015894: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001589C: 86A2221E - v_add_lshl_u32 v161, v6, v4, 1 // 0000000158A0: D1FE00A1 02060906 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 0000000158A8: D10000A1 008B430A - buffer_load_dwordx4 v[164:167], v161, s[20:23], 0 offen // 0000000158B0: E05C1000 8005A4A1 - v_add_lshl_u32 v161, v7, v4, 1 // 0000000158B8: D1FE00A1 02060907 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 0000000158C0: D10000A1 008B430A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000158C8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000158D0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000158D8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000158E0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000158E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000158F0: 86A2221E - v_add_lshl_u32 v162, v6, v4, 1 // 0000000158F4: D1FE00A2 02060906 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 0000000158FC: D10000A2 008B450A - buffer_load_dwordx4 v[168:171], v162, s[20:23], 0 offen // 000000015904: E05C1000 8005A8A2 - v_add_lshl_u32 v162, v7, v4, 1 // 00000001590C: D1FE00A2 02060907 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 000000015914: D10000A2 008B450A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001591C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015924: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001592C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015934: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001593C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015944: 86A2221E - v_add_lshl_u32 v163, v6, v4, 1 // 000000015948: D1FE00A3 02060906 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000015950: D10000A3 008B470A - buffer_load_dwordx4 v[172:175], v163, s[20:23], 0 offen // 000000015958: E05C1000 8005ACA3 - v_add_lshl_u32 v163, v7, v4, 1 // 000000015960: D1FE00A3 02060907 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000015968: D10000A3 008B470A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015970: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015978: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015980: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015988: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015990: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015998: 86A2221E - v_add_lshl_u32 v180, v6, v4, 1 // 00000001599C: D1FE00B4 02060906 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 0000000159A4: D10000B4 008B690A - buffer_load_dwordx4 v[176:179], v180, s[20:23], 0 offen // 0000000159AC: E05C1000 8005B0B4 - v_add_lshl_u32 v180, v7, v4, 1 // 0000000159B4: D1FE00B4 02060907 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 0000000159BC: D10000B4 008B690A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000159C4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000159CC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000159D4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000159DC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000159E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000159EC: 86A2221E - v_add_lshl_u32 v181, v6, v4, 1 // 0000000159F0: D1FE00B5 02060906 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 0000000159F8: D10000B5 008B6B0A - buffer_load_dwordx4 v[184:187], v181, s[20:23], 0 offen // 000000015A00: E05C1000 8005B8B5 - v_add_lshl_u32 v181, v7, v4, 1 // 000000015A08: D1FE00B5 02060907 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 000000015A10: D10000B5 008B6B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015A18: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015A20: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015A28: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015A30: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015A38: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015A40: 86A2221E - v_add_lshl_u32 v182, v6, v4, 1 // 000000015A44: D1FE00B6 02060906 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 000000015A4C: D10000B6 008B6D0A - buffer_load_dwordx4 v[188:191], v182, s[20:23], 0 offen // 000000015A54: E05C1000 8005BCB6 - v_add_lshl_u32 v182, v7, v4, 1 // 000000015A5C: D1FE00B6 02060907 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 000000015A64: D10000B6 008B6D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015A6C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015A74: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015A7C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015A84: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015A8C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015A94: 86A2221E - v_add_lshl_u32 v183, v6, v4, 1 // 000000015A98: D1FE00B7 02060906 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 000000015AA0: D10000B7 008B6F0A - buffer_load_dwordx4 v[192:195], v183, s[20:23], 0 offen // 000000015AA8: E05C1000 8005C0B7 - v_add_lshl_u32 v183, v7, v4, 1 // 000000015AB0: D1FE00B7 02060907 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 000000015AB8: D10000B7 008B6F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015AC0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015AC8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015AD0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015AD8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015AE0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015AE8: 86A2221E - v_add_lshl_u32 v200, v6, v4, 1 // 000000015AEC: D1FE00C8 02060906 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 000000015AF4: D10000C8 008B910A - buffer_load_dwordx4 v[196:199], v200, s[20:23], 0 offen // 000000015AFC: E05C1000 8005C4C8 - v_add_lshl_u32 v200, v7, v4, 1 // 000000015B04: D1FE00C8 02060907 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 000000015B0C: D10000C8 008B910A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015B14: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015B1C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015B24: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015B2C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015B34: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015B3C: 86A2221E - v_add_lshl_u32 v201, v6, v4, 1 // 000000015B40: D1FE00C9 02060906 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 000000015B48: D10000C9 008B930A - buffer_load_dwordx4 v[204:207], v201, s[20:23], 0 offen // 000000015B50: E05C1000 8005CCC9 - v_add_lshl_u32 v201, v7, v4, 1 // 000000015B58: D1FE00C9 02060907 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 000000015B60: D10000C9 008B930A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015B68: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015B70: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015B78: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015B80: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015B88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015B90: 86A2221E - v_add_lshl_u32 v202, v6, v4, 1 // 000000015B94: D1FE00CA 02060906 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 000000015B9C: D10000CA 008B950A - buffer_load_dwordx4 v[208:211], v202, s[20:23], 0 offen // 000000015BA4: E05C1000 8005D0CA - v_add_lshl_u32 v202, v7, v4, 1 // 000000015BAC: D1FE00CA 02060907 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 000000015BB4: D10000CA 008B950A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015BBC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015BC4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015BCC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015BD4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015BDC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015BE4: 86A2221E - v_add_lshl_u32 v203, v6, v4, 1 // 000000015BE8: D1FE00CB 02060906 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 000000015BF0: D10000CB 008B970A - buffer_load_dwordx4 v[212:215], v203, s[20:23], 0 offen // 000000015BF8: E05C1000 8005D4CB - v_add_lshl_u32 v203, v7, v4, 1 // 000000015C00: D1FE00CB 02060907 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 000000015C08: D10000CB 008B970A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015C10: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015C18: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015C20: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015C28: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015C30: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015C38: 86A2221E - v_add_lshl_u32 v220, v6, v4, 1 // 000000015C3C: D1FE00DC 02060906 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 000000015C44: D10000DC 008BB90A - buffer_load_dwordx4 v[216:219], v220, s[20:23], 0 offen // 000000015C4C: E05C1000 8005D8DC - v_add_lshl_u32 v220, v7, v4, 1 // 000000015C54: D1FE00DC 02060907 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 000000015C5C: D10000DC 008BB90A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000015C64: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000015C6C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000015C74: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000015C7C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000015C84: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000015C8C: 86A2221E - v_add_lshl_u32 v221, v6, v4, 1 // 000000015C90: D1FE00DD 02060906 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 000000015C98: D10000DD 008BBB0A - buffer_load_dwordx4 v[224:227], v221, s[20:23], 0 offen // 000000015CA0: E05C1000 8005E0DD - v_add_lshl_u32 v221, v7, v4, 1 // 000000015CA8: D1FE00DD 02060907 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 000000015CB0: D10000DD 008BBB0A - v_accvgpr_read_b32 v16, a0 // 000000015CB8: D3D84010 18000100 - v_accvgpr_read_b32 v17, a4 // 000000015CC0: D3D84011 18000104 - v_accvgpr_read_b32 v18, a8 // 000000015CC8: D3D84012 18000108 - v_accvgpr_read_b32 v19, a12 // 000000015CD0: D3D84013 1800010C - v_accvgpr_read_b32 v20, a16 // 000000015CD8: D3D84014 18000110 - v_accvgpr_read_b32 v21, a20 // 000000015CE0: D3D84015 18000114 - v_accvgpr_read_b32 v22, a24 // 000000015CE8: D3D84016 18000118 - v_accvgpr_read_b32 v23, a28 // 000000015CF0: D3D84017 1800011C - v_accvgpr_read_b32 v24, a32 // 000000015CF8: D3D84018 18000120 - v_accvgpr_read_b32 v25, a36 // 000000015D00: D3D84019 18000124 - v_accvgpr_read_b32 v26, a40 // 000000015D08: D3D8401A 18000128 - v_accvgpr_read_b32 v27, a44 // 000000015D10: D3D8401B 1800012C - v_accvgpr_read_b32 v28, a48 // 000000015D18: D3D8401C 18000130 - v_accvgpr_read_b32 v29, a52 // 000000015D20: D3D8401D 18000134 - v_accvgpr_read_b32 v30, a56 // 000000015D28: D3D8401E 18000138 - v_accvgpr_read_b32 v31, a60 // 000000015D30: D3D8401F 1800013C - v_accvgpr_read_b32 v32, a64 // 000000015D38: D3D84020 18000140 - v_accvgpr_read_b32 v33, a68 // 000000015D40: D3D84021 18000144 - v_accvgpr_read_b32 v34, a72 // 000000015D48: D3D84022 18000148 - v_accvgpr_read_b32 v35, a76 // 000000015D50: D3D84023 1800014C - v_accvgpr_read_b32 v36, a80 // 000000015D58: D3D84024 18000150 - v_accvgpr_read_b32 v37, a84 // 000000015D60: D3D84025 18000154 - v_accvgpr_read_b32 v38, a88 // 000000015D68: D3D84026 18000158 - v_accvgpr_read_b32 v39, a92 // 000000015D70: D3D84027 1800015C - v_accvgpr_read_b32 v40, a96 // 000000015D78: D3D84028 18000160 - v_accvgpr_read_b32 v41, a100 // 000000015D80: D3D84029 18000164 - v_accvgpr_read_b32 v42, a104 // 000000015D88: D3D8402A 18000168 - v_accvgpr_read_b32 v43, a108 // 000000015D90: D3D8402B 1800016C - v_accvgpr_read_b32 v44, a112 // 000000015D98: D3D8402C 18000170 - v_accvgpr_read_b32 v45, a116 // 000000015DA0: D3D8402D 18000174 - v_accvgpr_read_b32 v46, a120 // 000000015DA8: D3D8402E 18000178 - v_accvgpr_read_b32 v47, a124 // 000000015DB0: D3D8402F 1800017C - v_accvgpr_read_b32 v48, a128 // 000000015DB8: D3D84030 18000180 - v_accvgpr_read_b32 v49, a132 // 000000015DC0: D3D84031 18000184 - v_accvgpr_read_b32 v50, a136 // 000000015DC8: D3D84032 18000188 - v_accvgpr_read_b32 v51, a140 // 000000015DD0: D3D84033 1800018C - v_accvgpr_read_b32 v52, a144 // 000000015DD8: D3D84034 18000190 - v_accvgpr_read_b32 v53, a148 // 000000015DE0: D3D84035 18000194 - v_accvgpr_read_b32 v54, a152 // 000000015DE8: D3D84036 18000198 - v_accvgpr_read_b32 v55, a156 // 000000015DF0: D3D84037 1800019C - v_accvgpr_read_b32 v56, a160 // 000000015DF8: D3D84038 180001A0 - v_accvgpr_read_b32 v57, a164 // 000000015E00: D3D84039 180001A4 - v_accvgpr_read_b32 v58, a168 // 000000015E08: D3D8403A 180001A8 - v_accvgpr_read_b32 v59, a172 // 000000015E10: D3D8403B 180001AC - v_accvgpr_read_b32 v60, a176 // 000000015E18: D3D8403C 180001B0 - v_accvgpr_read_b32 v61, a180 // 000000015E20: D3D8403D 180001B4 - v_accvgpr_read_b32 v62, a184 // 000000015E28: D3D8403E 180001B8 - v_accvgpr_read_b32 v63, a188 // 000000015E30: D3D8403F 180001BC - v_accvgpr_read_b32 v64, a192 // 000000015E38: D3D84040 180001C0 - v_accvgpr_read_b32 v65, a196 // 000000015E40: D3D84041 180001C4 - v_accvgpr_read_b32 v66, a200 // 000000015E48: D3D84042 180001C8 - v_accvgpr_read_b32 v67, a204 // 000000015E50: D3D84043 180001CC - v_accvgpr_read_b32 v68, a208 // 000000015E58: D3D84044 180001D0 - v_accvgpr_read_b32 v69, a212 // 000000015E60: D3D84045 180001D4 - v_accvgpr_read_b32 v70, a216 // 000000015E68: D3D84046 180001D8 - v_accvgpr_read_b32 v71, a220 // 000000015E70: D3D84047 180001DC - v_accvgpr_read_b32 v72, a224 // 000000015E78: D3D84048 180001E0 - v_accvgpr_read_b32 v73, a228 // 000000015E80: D3D84049 180001E4 - v_accvgpr_read_b32 v74, a232 // 000000015E88: D3D8404A 180001E8 - v_accvgpr_read_b32 v75, a236 // 000000015E90: D3D8404B 180001EC - v_accvgpr_read_b32 v76, a240 // 000000015E98: D3D8404C 180001F0 - v_accvgpr_read_b32 v77, a244 // 000000015EA0: D3D8404D 180001F4 - v_accvgpr_read_b32 v78, a248 // 000000015EA8: D3D8404E 180001F8 - v_accvgpr_read_b32 v79, a252 // 000000015EB0: D3D8404F 180001FC - v_accvgpr_read_b32 v80, a1 // 000000015EB8: D3D84050 18000101 - v_accvgpr_read_b32 v81, a5 // 000000015EC0: D3D84051 18000105 - v_accvgpr_read_b32 v82, a9 // 000000015EC8: D3D84052 18000109 - v_accvgpr_read_b32 v83, a13 // 000000015ED0: D3D84053 1800010D - v_accvgpr_read_b32 v84, a17 // 000000015ED8: D3D84054 18000111 - v_accvgpr_read_b32 v85, a21 // 000000015EE0: D3D84055 18000115 - v_accvgpr_read_b32 v86, a25 // 000000015EE8: D3D84056 18000119 - v_accvgpr_read_b32 v87, a29 // 000000015EF0: D3D84057 1800011D - v_accvgpr_read_b32 v88, a33 // 000000015EF8: D3D84058 18000121 - v_accvgpr_read_b32 v89, a37 // 000000015F00: D3D84059 18000125 - v_accvgpr_read_b32 v90, a41 // 000000015F08: D3D8405A 18000129 - v_accvgpr_read_b32 v91, a45 // 000000015F10: D3D8405B 1800012D - v_accvgpr_read_b32 v92, a49 // 000000015F18: D3D8405C 18000131 - v_accvgpr_read_b32 v93, a53 // 000000015F20: D3D8405D 18000135 - v_accvgpr_read_b32 v94, a57 // 000000015F28: D3D8405E 18000139 - v_accvgpr_read_b32 v95, a61 // 000000015F30: D3D8405F 1800013D - v_accvgpr_read_b32 v96, a65 // 000000015F38: D3D84060 18000141 - v_accvgpr_read_b32 v97, a69 // 000000015F40: D3D84061 18000145 - v_accvgpr_read_b32 v98, a73 // 000000015F48: D3D84062 18000149 - v_accvgpr_read_b32 v99, a77 // 000000015F50: D3D84063 1800014D - v_accvgpr_read_b32 v100, a81 // 000000015F58: D3D84064 18000151 - v_accvgpr_read_b32 v101, a85 // 000000015F60: D3D84065 18000155 - v_accvgpr_read_b32 v102, a89 // 000000015F68: D3D84066 18000159 - v_accvgpr_read_b32 v103, a93 // 000000015F70: D3D84067 1800015D - v_accvgpr_read_b32 v104, a97 // 000000015F78: D3D84068 18000161 - v_accvgpr_read_b32 v105, a101 // 000000015F80: D3D84069 18000165 - v_accvgpr_read_b32 v106, a105 // 000000015F88: D3D8406A 18000169 - v_accvgpr_read_b32 v107, a109 // 000000015F90: D3D8406B 1800016D - v_accvgpr_read_b32 v108, a113 // 000000015F98: D3D8406C 18000171 - v_accvgpr_read_b32 v109, a117 // 000000015FA0: D3D8406D 18000175 - v_accvgpr_read_b32 v110, a121 // 000000015FA8: D3D8406E 18000179 - v_accvgpr_read_b32 v111, a125 // 000000015FB0: D3D8406F 1800017D - v_accvgpr_read_b32 v112, a129 // 000000015FB8: D3D84070 18000181 - v_accvgpr_read_b32 v113, a133 // 000000015FC0: D3D84071 18000185 - v_accvgpr_read_b32 v114, a137 // 000000015FC8: D3D84072 18000189 - v_accvgpr_read_b32 v115, a141 // 000000015FD0: D3D84073 1800018D - v_accvgpr_read_b32 v116, a145 // 000000015FD8: D3D84074 18000191 - v_accvgpr_read_b32 v117, a149 // 000000015FE0: D3D84075 18000195 - v_accvgpr_read_b32 v118, a153 // 000000015FE8: D3D84076 18000199 - v_accvgpr_read_b32 v119, a157 // 000000015FF0: D3D84077 1800019D - v_accvgpr_read_b32 v120, a161 // 000000015FF8: D3D84078 180001A1 - v_accvgpr_read_b32 v121, a165 // 000000016000: D3D84079 180001A5 - v_accvgpr_read_b32 v122, a169 // 000000016008: D3D8407A 180001A9 - v_accvgpr_read_b32 v123, a173 // 000000016010: D3D8407B 180001AD - v_accvgpr_read_b32 v124, a177 // 000000016018: D3D8407C 180001B1 - v_accvgpr_read_b32 v125, a181 // 000000016020: D3D8407D 180001B5 - v_accvgpr_read_b32 v126, a185 // 000000016028: D3D8407E 180001B9 - v_accvgpr_read_b32 v127, a189 // 000000016030: D3D8407F 180001BD - v_accvgpr_read_b32 v136, a193 // 000000016038: D3D84088 180001C1 - v_accvgpr_read_b32 v137, a197 // 000000016040: D3D84089 180001C5 - v_accvgpr_read_b32 v138, a201 // 000000016048: D3D8408A 180001C9 - v_accvgpr_read_b32 v139, a205 // 000000016050: D3D8408B 180001CD - v_accvgpr_read_b32 v140, a209 // 000000016058: D3D8408C 180001D1 - v_accvgpr_read_b32 v141, a213 // 000000016060: D3D8408D 180001D5 - v_accvgpr_read_b32 v142, a217 // 000000016068: D3D8408E 180001D9 - v_accvgpr_read_b32 v143, a221 // 000000016070: D3D8408F 180001DD - v_accvgpr_read_b32 v144, a225 // 000000016078: D3D84090 180001E1 - v_accvgpr_read_b32 v145, a229 // 000000016080: D3D84091 180001E5 - v_accvgpr_read_b32 v146, a233 // 000000016088: D3D84092 180001E9 - v_accvgpr_read_b32 v147, a237 // 000000016090: D3D84093 180001ED - v_accvgpr_read_b32 v148, a241 // 000000016098: D3D84094 180001F1 - v_accvgpr_read_b32 v149, a245 // 0000000160A0: D3D84095 180001F5 - v_accvgpr_read_b32 v150, a249 // 0000000160A8: D3D84096 180001F9 - v_accvgpr_read_b32 v151, a253 // 0000000160B0: D3D84097 180001FD - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 0000000160B8: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 0000000160C0: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 0000000160C8: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 0000000160D0: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 0000000160D8: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 0000000160E0: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 0000000160E8: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 0000000160F0: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 0000000160F8: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 000000016100: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 000000016108: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 000000016110: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 000000016118: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 000000016120: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 000000016128: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 000000016130: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 000000016138: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 000000016140: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 000000016148: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 000000016150: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 000000016158: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 000000016160: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 000000016168: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 000000016170: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 000000016178: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 000000016180: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 000000016188: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 000000016190: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 000000016198: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 0000000161A0: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 0000000161A8: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 0000000161B0: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 0000000161B8: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 0000000161C0: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 0000000161C8: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 0000000161D0: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 0000000161D8: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 0000000161E0: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 0000000161E8: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 0000000161F0: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 0000000161F8: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 000000016200: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 000000016208: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 000000016210: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 000000016218: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 000000016220: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 000000016228: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 000000016230: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 000000016238: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 000000016240: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 000000016248: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 000000016250: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 000000016258: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 000000016260: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 000000016268: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 000000016270: D3B1407E 1002FC2C - v_pk_mul_f32 v[136:137], s[44:45], v[136:137] op_sel_hi:[0,1]// 000000016278: D3B14088 1003102C - v_pk_mul_f32 v[138:139], s[44:45], v[138:139] op_sel_hi:[0,1]// 000000016280: D3B1408A 1003142C - v_pk_mul_f32 v[140:141], s[44:45], v[140:141] op_sel_hi:[0,1]// 000000016288: D3B1408C 1003182C - v_pk_mul_f32 v[142:143], s[44:45], v[142:143] op_sel_hi:[0,1]// 000000016290: D3B1408E 10031C2C - v_pk_mul_f32 v[144:145], s[44:45], v[144:145] op_sel_hi:[0,1]// 000000016298: D3B14090 1003202C - v_pk_mul_f32 v[146:147], s[44:45], v[146:147] op_sel_hi:[0,1]// 0000000162A0: D3B14092 1003242C - v_pk_mul_f32 v[148:149], s[44:45], v[148:149] op_sel_hi:[0,1]// 0000000162A8: D3B14094 1003282C - v_pk_mul_f32 v[150:151], s[44:45], v[150:151] op_sel_hi:[0,1]// 0000000162B0: D3B14096 10032C2C - s_waitcnt vmcnt(0) // 0000000162B8: BF8C0F70 - v_mov_b32_e32 v12, 0xffff0000 // 0000000162BC: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 0000000162C4: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 0000000162CC: 7E1C02FF 00007FFF - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000162D4: 7E10B6F9 00041680 - v_fmac_f32_e64 v16, v8, s45 // 0000000162DC: D13B0010 00005B08 - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000162E4: 7E10B6F9 00051680 - v_fmac_f32_e64 v17, v8, s45 // 0000000162EC: D13B0011 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000162F4: 7E10B6F9 00041681 - v_fmac_f32_e64 v18, v8, s45 // 0000000162FC: D13B0012 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016304: 7E10B6F9 00051681 - v_fmac_f32_e64 v19, v8, s45 // 00000001630C: D13B0013 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016314: 7E10B6F9 00041682 - v_fmac_f32_e64 v20, v8, s45 // 00000001631C: D13B0014 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016324: 7E10B6F9 00051682 - v_fmac_f32_e64 v21, v8, s45 // 00000001632C: D13B0015 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016334: 7E10B6F9 00041683 - v_fmac_f32_e64 v22, v8, s45 // 00000001633C: D13B0016 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016344: 7E10B6F9 00051683 - v_fmac_f32_e64 v23, v8, s45 // 00000001634C: D13B0017 00005B08 - v_cvt_pk_bf16_f32 v16, v16, v17 // 000000016354: D2680010 00022310 - v_cvt_pk_bf16_f32 v17, v18, v19 // 00000001635C: D2680011 00022712 - v_cvt_pk_bf16_f32 v18, v20, v21 // 000000016364: D2680012 00022B14 - v_cvt_pk_bf16_f32 v19, v22, v23 // 00000001636C: D2680013 00022F16 - buffer_store_dwordx4 v[16:19], v15, s[16:19], 0 offen nt // 000000016374: E07E1000 8004100F - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001637C: 7E10B6F9 00041698 - v_fmac_f32_e64 v24, v8, s45 // 000000016384: D13B0018 00005B08 - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001638C: 7E10B6F9 00051698 - v_fmac_f32_e64 v25, v8, s45 // 000000016394: D13B0019 00005B08 - v_cvt_f32_bf16_sdwa v8, v153 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001639C: 7E10B6F9 00041699 - v_fmac_f32_e64 v26, v8, s45 // 0000000163A4: D13B001A 00005B08 - v_cvt_f32_bf16_sdwa v8, v153 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000163AC: 7E10B6F9 00051699 - v_fmac_f32_e64 v27, v8, s45 // 0000000163B4: D13B001B 00005B08 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000163BC: 7E10B6F9 0004169A - v_fmac_f32_e64 v28, v8, s45 // 0000000163C4: D13B001C 00005B08 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000163CC: 7E10B6F9 0005169A - v_fmac_f32_e64 v29, v8, s45 // 0000000163D4: D13B001D 00005B08 - v_cvt_f32_bf16_sdwa v8, v155 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000163DC: 7E10B6F9 0004169B - v_fmac_f32_e64 v30, v8, s45 // 0000000163E4: D13B001E 00005B08 - v_cvt_f32_bf16_sdwa v8, v155 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000163EC: 7E10B6F9 0005169B - v_fmac_f32_e64 v31, v8, s45 // 0000000163F4: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v25 // 0000000163FC: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 000000016404: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 00000001640C: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 000000016414: D268001B 00023F1E - buffer_store_dwordx4 v[24:27], v135, s[16:19], 0 offen nt // 00000001641C: E07E1000 80041887 - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016424: 7E10B6F9 0004169C - v_fmac_f32_e64 v32, v8, s45 // 00000001642C: D13B0020 00005B08 - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016434: 7E10B6F9 0005169C - v_fmac_f32_e64 v33, v8, s45 // 00000001643C: D13B0021 00005B08 - v_cvt_f32_bf16_sdwa v8, v157 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016444: 7E10B6F9 0004169D - v_fmac_f32_e64 v34, v8, s45 // 00000001644C: D13B0022 00005B08 - v_cvt_f32_bf16_sdwa v8, v157 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016454: 7E10B6F9 0005169D - v_fmac_f32_e64 v35, v8, s45 // 00000001645C: D13B0023 00005B08 - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016464: 7E10B6F9 0004169E - v_fmac_f32_e64 v36, v8, s45 // 00000001646C: D13B0024 00005B08 - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016474: 7E10B6F9 0005169E - v_fmac_f32_e64 v37, v8, s45 // 00000001647C: D13B0025 00005B08 - v_cvt_f32_bf16_sdwa v8, v159 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016484: 7E10B6F9 0004169F - v_fmac_f32_e64 v38, v8, s45 // 00000001648C: D13B0026 00005B08 - v_cvt_f32_bf16_sdwa v8, v159 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016494: 7E10B6F9 0005169F - v_fmac_f32_e64 v39, v8, s45 // 00000001649C: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v33 // 0000000164A4: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 0000000164AC: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 0000000164B4: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 0000000164BC: D2680023 00024F26 - buffer_store_dwordx4 v[32:35], v160, s[16:19], 0 offen nt // 0000000164C4: E07E1000 800420A0 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000164CC: 7E10B6F9 000416A4 - v_fmac_f32_e64 v40, v8, s45 // 0000000164D4: D13B0028 00005B08 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000164DC: 7E10B6F9 000516A4 - v_fmac_f32_e64 v41, v8, s45 // 0000000164E4: D13B0029 00005B08 - v_cvt_f32_bf16_sdwa v8, v165 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000164EC: 7E10B6F9 000416A5 - v_fmac_f32_e64 v42, v8, s45 // 0000000164F4: D13B002A 00005B08 - v_cvt_f32_bf16_sdwa v8, v165 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000164FC: 7E10B6F9 000516A5 - v_fmac_f32_e64 v43, v8, s45 // 000000016504: D13B002B 00005B08 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001650C: 7E10B6F9 000416A6 - v_fmac_f32_e64 v44, v8, s45 // 000000016514: D13B002C 00005B08 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001651C: 7E10B6F9 000516A6 - v_fmac_f32_e64 v45, v8, s45 // 000000016524: D13B002D 00005B08 - v_cvt_f32_bf16_sdwa v8, v167 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001652C: 7E10B6F9 000416A7 - v_fmac_f32_e64 v46, v8, s45 // 000000016534: D13B002E 00005B08 - v_cvt_f32_bf16_sdwa v8, v167 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001653C: 7E10B6F9 000516A7 - v_fmac_f32_e64 v47, v8, s45 // 000000016544: D13B002F 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v41 // 00000001654C: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 000000016554: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 00000001655C: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 000000016564: D268002B 00025F2E - buffer_store_dwordx4 v[40:43], v161, s[16:19], 0 offen nt // 00000001656C: E07E1000 800428A1 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016574: 7E10B6F9 000416A8 - v_fmac_f32_e64 v48, v8, s45 // 00000001657C: D13B0030 00005B08 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016584: 7E10B6F9 000516A8 - v_fmac_f32_e64 v49, v8, s45 // 00000001658C: D13B0031 00005B08 - v_cvt_f32_bf16_sdwa v8, v169 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016594: 7E10B6F9 000416A9 - v_fmac_f32_e64 v50, v8, s45 // 00000001659C: D13B0032 00005B08 - v_cvt_f32_bf16_sdwa v8, v169 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000165A4: 7E10B6F9 000516A9 - v_fmac_f32_e64 v51, v8, s45 // 0000000165AC: D13B0033 00005B08 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000165B4: 7E10B6F9 000416AA - v_fmac_f32_e64 v52, v8, s45 // 0000000165BC: D13B0034 00005B08 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000165C4: 7E10B6F9 000516AA - v_fmac_f32_e64 v53, v8, s45 // 0000000165CC: D13B0035 00005B08 - v_cvt_f32_bf16_sdwa v8, v171 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000165D4: 7E10B6F9 000416AB - v_fmac_f32_e64 v54, v8, s45 // 0000000165DC: D13B0036 00005B08 - v_cvt_f32_bf16_sdwa v8, v171 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000165E4: 7E10B6F9 000516AB - v_fmac_f32_e64 v55, v8, s45 // 0000000165EC: D13B0037 00005B08 - v_cvt_pk_bf16_f32 v48, v48, v49 // 0000000165F4: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 0000000165FC: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 000000016604: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 00000001660C: D2680033 00026F36 - buffer_store_dwordx4 v[48:51], v162, s[16:19], 0 offen nt // 000000016614: E07E1000 800430A2 - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001661C: 7E10B6F9 000416AC - v_fmac_f32_e64 v56, v8, s45 // 000000016624: D13B0038 00005B08 - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001662C: 7E10B6F9 000516AC - v_fmac_f32_e64 v57, v8, s45 // 000000016634: D13B0039 00005B08 - v_cvt_f32_bf16_sdwa v8, v173 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001663C: 7E10B6F9 000416AD - v_fmac_f32_e64 v58, v8, s45 // 000000016644: D13B003A 00005B08 - v_cvt_f32_bf16_sdwa v8, v173 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001664C: 7E10B6F9 000516AD - v_fmac_f32_e64 v59, v8, s45 // 000000016654: D13B003B 00005B08 - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001665C: 7E10B6F9 000416AE - v_fmac_f32_e64 v60, v8, s45 // 000000016664: D13B003C 00005B08 - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001666C: 7E10B6F9 000516AE - v_fmac_f32_e64 v61, v8, s45 // 000000016674: D13B003D 00005B08 - v_cvt_f32_bf16_sdwa v8, v175 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001667C: 7E10B6F9 000416AF - v_fmac_f32_e64 v62, v8, s45 // 000000016684: D13B003E 00005B08 - v_cvt_f32_bf16_sdwa v8, v175 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001668C: 7E10B6F9 000516AF - v_fmac_f32_e64 v63, v8, s45 // 000000016694: D13B003F 00005B08 - v_cvt_pk_bf16_f32 v56, v56, v57 // 00000001669C: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 0000000166A4: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 0000000166AC: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 0000000166B4: D268003B 00027F3E - buffer_store_dwordx4 v[56:59], v163, s[16:19], 0 offen nt // 0000000166BC: E07E1000 800438A3 - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000166C4: 7E10B6F9 000416B0 - v_fmac_f32_e64 v64, v8, s45 // 0000000166CC: D13B0040 00005B08 - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000166D4: 7E10B6F9 000516B0 - v_fmac_f32_e64 v65, v8, s45 // 0000000166DC: D13B0041 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000166E4: 7E10B6F9 000416B1 - v_fmac_f32_e64 v66, v8, s45 // 0000000166EC: D13B0042 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000166F4: 7E10B6F9 000516B1 - v_fmac_f32_e64 v67, v8, s45 // 0000000166FC: D13B0043 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016704: 7E10B6F9 000416B2 - v_fmac_f32_e64 v68, v8, s45 // 00000001670C: D13B0044 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016714: 7E10B6F9 000516B2 - v_fmac_f32_e64 v69, v8, s45 // 00000001671C: D13B0045 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016724: 7E10B6F9 000416B3 - v_fmac_f32_e64 v70, v8, s45 // 00000001672C: D13B0046 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016734: 7E10B6F9 000516B3 - v_fmac_f32_e64 v71, v8, s45 // 00000001673C: D13B0047 00005B08 - v_cvt_pk_bf16_f32 v64, v64, v65 // 000000016744: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 00000001674C: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 000000016754: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 00000001675C: D2680043 00028F46 - buffer_store_dwordx4 v[64:67], v180, s[16:19], 0 offen nt // 000000016764: E07E1000 800440B4 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001676C: 7E10B6F9 000416B8 - v_fmac_f32_e64 v72, v8, s45 // 000000016774: D13B0048 00005B08 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001677C: 7E10B6F9 000516B8 - v_fmac_f32_e64 v73, v8, s45 // 000000016784: D13B0049 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001678C: 7E10B6F9 000416B9 - v_fmac_f32_e64 v74, v8, s45 // 000000016794: D13B004A 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001679C: 7E10B6F9 000516B9 - v_fmac_f32_e64 v75, v8, s45 // 0000000167A4: D13B004B 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000167AC: 7E10B6F9 000416BA - v_fmac_f32_e64 v76, v8, s45 // 0000000167B4: D13B004C 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000167BC: 7E10B6F9 000516BA - v_fmac_f32_e64 v77, v8, s45 // 0000000167C4: D13B004D 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000167CC: 7E10B6F9 000416BB - v_fmac_f32_e64 v78, v8, s45 // 0000000167D4: D13B004E 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000167DC: 7E10B6F9 000516BB - v_fmac_f32_e64 v79, v8, s45 // 0000000167E4: D13B004F 00005B08 - v_cvt_pk_bf16_f32 v72, v72, v73 // 0000000167EC: D2680048 00029348 - v_cvt_pk_bf16_f32 v73, v74, v75 // 0000000167F4: D2680049 0002974A - v_cvt_pk_bf16_f32 v74, v76, v77 // 0000000167FC: D268004A 00029B4C - v_cvt_pk_bf16_f32 v75, v78, v79 // 000000016804: D268004B 00029F4E - buffer_store_dwordx4 v[72:75], v181, s[16:19], 0 offen nt // 00000001680C: E07E1000 800448B5 - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016814: 7E10B6F9 000416BC - v_fmac_f32_e64 v80, v8, s45 // 00000001681C: D13B0050 00005B08 - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016824: 7E10B6F9 000516BC - v_fmac_f32_e64 v81, v8, s45 // 00000001682C: D13B0051 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016834: 7E10B6F9 000416BD - v_fmac_f32_e64 v82, v8, s45 // 00000001683C: D13B0052 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016844: 7E10B6F9 000516BD - v_fmac_f32_e64 v83, v8, s45 // 00000001684C: D13B0053 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016854: 7E10B6F9 000416BE - v_fmac_f32_e64 v84, v8, s45 // 00000001685C: D13B0054 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016864: 7E10B6F9 000516BE - v_fmac_f32_e64 v85, v8, s45 // 00000001686C: D13B0055 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016874: 7E10B6F9 000416BF - v_fmac_f32_e64 v86, v8, s45 // 00000001687C: D13B0056 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016884: 7E10B6F9 000516BF - v_fmac_f32_e64 v87, v8, s45 // 00000001688C: D13B0057 00005B08 - v_cvt_pk_bf16_f32 v80, v80, v81 // 000000016894: D2680050 0002A350 - v_cvt_pk_bf16_f32 v81, v82, v83 // 00000001689C: D2680051 0002A752 - v_cvt_pk_bf16_f32 v82, v84, v85 // 0000000168A4: D2680052 0002AB54 - v_cvt_pk_bf16_f32 v83, v86, v87 // 0000000168AC: D2680053 0002AF56 - buffer_store_dwordx4 v[80:83], v182, s[16:19], 0 offen nt // 0000000168B4: E07E1000 800450B6 - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000168BC: 7E10B6F9 000416C0 - v_fmac_f32_e64 v88, v8, s45 // 0000000168C4: D13B0058 00005B08 - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000168CC: 7E10B6F9 000516C0 - v_fmac_f32_e64 v89, v8, s45 // 0000000168D4: D13B0059 00005B08 - v_cvt_f32_bf16_sdwa v8, v193 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000168DC: 7E10B6F9 000416C1 - v_fmac_f32_e64 v90, v8, s45 // 0000000168E4: D13B005A 00005B08 - v_cvt_f32_bf16_sdwa v8, v193 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000168EC: 7E10B6F9 000516C1 - v_fmac_f32_e64 v91, v8, s45 // 0000000168F4: D13B005B 00005B08 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000168FC: 7E10B6F9 000416C2 - v_fmac_f32_e64 v92, v8, s45 // 000000016904: D13B005C 00005B08 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001690C: 7E10B6F9 000516C2 - v_fmac_f32_e64 v93, v8, s45 // 000000016914: D13B005D 00005B08 - v_cvt_f32_bf16_sdwa v8, v195 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001691C: 7E10B6F9 000416C3 - v_fmac_f32_e64 v94, v8, s45 // 000000016924: D13B005E 00005B08 - v_cvt_f32_bf16_sdwa v8, v195 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001692C: 7E10B6F9 000516C3 - v_fmac_f32_e64 v95, v8, s45 // 000000016934: D13B005F 00005B08 - v_cvt_pk_bf16_f32 v88, v88, v89 // 00000001693C: D2680058 0002B358 - v_cvt_pk_bf16_f32 v89, v90, v91 // 000000016944: D2680059 0002B75A - v_cvt_pk_bf16_f32 v90, v92, v93 // 00000001694C: D268005A 0002BB5C - v_cvt_pk_bf16_f32 v91, v94, v95 // 000000016954: D268005B 0002BF5E - buffer_store_dwordx4 v[88:91], v183, s[16:19], 0 offen nt // 00000001695C: E07E1000 800458B7 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016964: 7E10B6F9 000416C4 - v_fmac_f32_e64 v96, v8, s45 // 00000001696C: D13B0060 00005B08 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016974: 7E10B6F9 000516C4 - v_fmac_f32_e64 v97, v8, s45 // 00000001697C: D13B0061 00005B08 - v_cvt_f32_bf16_sdwa v8, v197 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016984: 7E10B6F9 000416C5 - v_fmac_f32_e64 v98, v8, s45 // 00000001698C: D13B0062 00005B08 - v_cvt_f32_bf16_sdwa v8, v197 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016994: 7E10B6F9 000516C5 - v_fmac_f32_e64 v99, v8, s45 // 00000001699C: D13B0063 00005B08 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000169A4: 7E10B6F9 000416C6 - v_fmac_f32_e64 v100, v8, s45 // 0000000169AC: D13B0064 00005B08 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000169B4: 7E10B6F9 000516C6 - v_fmac_f32_e64 v101, v8, s45 // 0000000169BC: D13B0065 00005B08 - v_cvt_f32_bf16_sdwa v8, v199 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000169C4: 7E10B6F9 000416C7 - v_fmac_f32_e64 v102, v8, s45 // 0000000169CC: D13B0066 00005B08 - v_cvt_f32_bf16_sdwa v8, v199 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000169D4: 7E10B6F9 000516C7 - v_fmac_f32_e64 v103, v8, s45 // 0000000169DC: D13B0067 00005B08 - v_cvt_pk_bf16_f32 v96, v96, v97 // 0000000169E4: D2680060 0002C360 - v_cvt_pk_bf16_f32 v97, v98, v99 // 0000000169EC: D2680061 0002C762 - v_cvt_pk_bf16_f32 v98, v100, v101 // 0000000169F4: D2680062 0002CB64 - v_cvt_pk_bf16_f32 v99, v102, v103 // 0000000169FC: D2680063 0002CF66 - buffer_store_dwordx4 v[96:99], v200, s[16:19], 0 offen nt // 000000016A04: E07E1000 800460C8 - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016A0C: 7E10B6F9 000416CC - v_fmac_f32_e64 v104, v8, s45 // 000000016A14: D13B0068 00005B08 - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016A1C: 7E10B6F9 000516CC - v_fmac_f32_e64 v105, v8, s45 // 000000016A24: D13B0069 00005B08 - v_cvt_f32_bf16_sdwa v8, v205 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016A2C: 7E10B6F9 000416CD - v_fmac_f32_e64 v106, v8, s45 // 000000016A34: D13B006A 00005B08 - v_cvt_f32_bf16_sdwa v8, v205 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016A3C: 7E10B6F9 000516CD - v_fmac_f32_e64 v107, v8, s45 // 000000016A44: D13B006B 00005B08 - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016A4C: 7E10B6F9 000416CE - v_fmac_f32_e64 v108, v8, s45 // 000000016A54: D13B006C 00005B08 - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016A5C: 7E10B6F9 000516CE - v_fmac_f32_e64 v109, v8, s45 // 000000016A64: D13B006D 00005B08 - v_cvt_f32_bf16_sdwa v8, v207 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016A6C: 7E10B6F9 000416CF - v_fmac_f32_e64 v110, v8, s45 // 000000016A74: D13B006E 00005B08 - v_cvt_f32_bf16_sdwa v8, v207 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016A7C: 7E10B6F9 000516CF - v_fmac_f32_e64 v111, v8, s45 // 000000016A84: D13B006F 00005B08 - v_cvt_pk_bf16_f32 v104, v104, v105 // 000000016A8C: D2680068 0002D368 - v_cvt_pk_bf16_f32 v105, v106, v107 // 000000016A94: D2680069 0002D76A - v_cvt_pk_bf16_f32 v106, v108, v109 // 000000016A9C: D268006A 0002DB6C - v_cvt_pk_bf16_f32 v107, v110, v111 // 000000016AA4: D268006B 0002DF6E - buffer_store_dwordx4 v[104:107], v201, s[16:19], 0 offen nt// 000000016AAC: E07E1000 800468C9 - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016AB4: 7E10B6F9 000416D0 - v_fmac_f32_e64 v112, v8, s45 // 000000016ABC: D13B0070 00005B08 - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016AC4: 7E10B6F9 000516D0 - v_fmac_f32_e64 v113, v8, s45 // 000000016ACC: D13B0071 00005B08 - v_cvt_f32_bf16_sdwa v8, v209 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016AD4: 7E10B6F9 000416D1 - v_fmac_f32_e64 v114, v8, s45 // 000000016ADC: D13B0072 00005B08 - v_cvt_f32_bf16_sdwa v8, v209 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016AE4: 7E10B6F9 000516D1 - v_fmac_f32_e64 v115, v8, s45 // 000000016AEC: D13B0073 00005B08 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016AF4: 7E10B6F9 000416D2 - v_fmac_f32_e64 v116, v8, s45 // 000000016AFC: D13B0074 00005B08 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016B04: 7E10B6F9 000516D2 - v_fmac_f32_e64 v117, v8, s45 // 000000016B0C: D13B0075 00005B08 - v_cvt_f32_bf16_sdwa v8, v211 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016B14: 7E10B6F9 000416D3 - v_fmac_f32_e64 v118, v8, s45 // 000000016B1C: D13B0076 00005B08 - v_cvt_f32_bf16_sdwa v8, v211 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016B24: 7E10B6F9 000516D3 - v_fmac_f32_e64 v119, v8, s45 // 000000016B2C: D13B0077 00005B08 - v_cvt_pk_bf16_f32 v112, v112, v113 // 000000016B34: D2680070 0002E370 - v_cvt_pk_bf16_f32 v113, v114, v115 // 000000016B3C: D2680071 0002E772 - v_cvt_pk_bf16_f32 v114, v116, v117 // 000000016B44: D2680072 0002EB74 - v_cvt_pk_bf16_f32 v115, v118, v119 // 000000016B4C: D2680073 0002EF76 - buffer_store_dwordx4 v[112:115], v202, s[16:19], 0 offen nt// 000000016B54: E07E1000 800470CA - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016B5C: 7E10B6F9 000416D4 - v_fmac_f32_e64 v120, v8, s45 // 000000016B64: D13B0078 00005B08 - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016B6C: 7E10B6F9 000516D4 - v_fmac_f32_e64 v121, v8, s45 // 000000016B74: D13B0079 00005B08 - v_cvt_f32_bf16_sdwa v8, v213 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016B7C: 7E10B6F9 000416D5 - v_fmac_f32_e64 v122, v8, s45 // 000000016B84: D13B007A 00005B08 - v_cvt_f32_bf16_sdwa v8, v213 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016B8C: 7E10B6F9 000516D5 - v_fmac_f32_e64 v123, v8, s45 // 000000016B94: D13B007B 00005B08 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016B9C: 7E10B6F9 000416D6 - v_fmac_f32_e64 v124, v8, s45 // 000000016BA4: D13B007C 00005B08 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016BAC: 7E10B6F9 000516D6 - v_fmac_f32_e64 v125, v8, s45 // 000000016BB4: D13B007D 00005B08 - v_cvt_f32_bf16_sdwa v8, v215 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016BBC: 7E10B6F9 000416D7 - v_fmac_f32_e64 v126, v8, s45 // 000000016BC4: D13B007E 00005B08 - v_cvt_f32_bf16_sdwa v8, v215 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016BCC: 7E10B6F9 000516D7 - v_fmac_f32_e64 v127, v8, s45 // 000000016BD4: D13B007F 00005B08 - v_cvt_pk_bf16_f32 v120, v120, v121 // 000000016BDC: D2680078 0002F378 - v_cvt_pk_bf16_f32 v121, v122, v123 // 000000016BE4: D2680079 0002F77A - v_cvt_pk_bf16_f32 v122, v124, v125 // 000000016BEC: D268007A 0002FB7C - v_cvt_pk_bf16_f32 v123, v126, v127 // 000000016BF4: D268007B 0002FF7E - buffer_store_dwordx4 v[120:123], v203, s[16:19], 0 offen nt// 000000016BFC: E07E1000 800478CB - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016C04: 7E10B6F9 000416D8 - v_fmac_f32_e64 v136, v8, s45 // 000000016C0C: D13B0088 00005B08 - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016C14: 7E10B6F9 000516D8 - v_fmac_f32_e64 v137, v8, s45 // 000000016C1C: D13B0089 00005B08 - v_cvt_f32_bf16_sdwa v8, v217 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016C24: 7E10B6F9 000416D9 - v_fmac_f32_e64 v138, v8, s45 // 000000016C2C: D13B008A 00005B08 - v_cvt_f32_bf16_sdwa v8, v217 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016C34: 7E10B6F9 000516D9 - v_fmac_f32_e64 v139, v8, s45 // 000000016C3C: D13B008B 00005B08 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016C44: 7E10B6F9 000416DA - v_fmac_f32_e64 v140, v8, s45 // 000000016C4C: D13B008C 00005B08 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016C54: 7E10B6F9 000516DA - v_fmac_f32_e64 v141, v8, s45 // 000000016C5C: D13B008D 00005B08 - v_cvt_f32_bf16_sdwa v8, v219 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016C64: 7E10B6F9 000416DB - v_fmac_f32_e64 v142, v8, s45 // 000000016C6C: D13B008E 00005B08 - v_cvt_f32_bf16_sdwa v8, v219 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016C74: 7E10B6F9 000516DB - v_fmac_f32_e64 v143, v8, s45 // 000000016C7C: D13B008F 00005B08 - v_cvt_pk_bf16_f32 v136, v136, v137 // 000000016C84: D2680088 00031388 - v_cvt_pk_bf16_f32 v137, v138, v139 // 000000016C8C: D2680089 0003178A - v_cvt_pk_bf16_f32 v138, v140, v141 // 000000016C94: D268008A 00031B8C - v_cvt_pk_bf16_f32 v139, v142, v143 // 000000016C9C: D268008B 00031F8E - buffer_store_dwordx4 v[136:139], v220, s[16:19], 0 offen nt// 000000016CA4: E07E1000 800488DC - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016CAC: 7E10B6F9 000416E0 - v_fmac_f32_e64 v144, v8, s45 // 000000016CB4: D13B0090 00005B08 - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016CBC: 7E10B6F9 000516E0 - v_fmac_f32_e64 v145, v8, s45 // 000000016CC4: D13B0091 00005B08 - v_cvt_f32_bf16_sdwa v8, v225 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016CCC: 7E10B6F9 000416E1 - v_fmac_f32_e64 v146, v8, s45 // 000000016CD4: D13B0092 00005B08 - v_cvt_f32_bf16_sdwa v8, v225 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016CDC: 7E10B6F9 000516E1 - v_fmac_f32_e64 v147, v8, s45 // 000000016CE4: D13B0093 00005B08 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016CEC: 7E10B6F9 000416E2 - v_fmac_f32_e64 v148, v8, s45 // 000000016CF4: D13B0094 00005B08 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016CFC: 7E10B6F9 000516E2 - v_fmac_f32_e64 v149, v8, s45 // 000000016D04: D13B0095 00005B08 - v_cvt_f32_bf16_sdwa v8, v227 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000016D0C: 7E10B6F9 000416E3 - v_fmac_f32_e64 v150, v8, s45 // 000000016D14: D13B0096 00005B08 - v_cvt_f32_bf16_sdwa v8, v227 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000016D1C: 7E10B6F9 000516E3 - v_fmac_f32_e64 v151, v8, s45 // 000000016D24: D13B0097 00005B08 - v_cvt_pk_bf16_f32 v144, v144, v145 // 000000016D2C: D2680090 00032390 - v_cvt_pk_bf16_f32 v145, v146, v147 // 000000016D34: D2680091 00032792 - v_cvt_pk_bf16_f32 v146, v148, v149 // 000000016D3C: D2680092 00032B94 - v_cvt_pk_bf16_f32 v147, v150, v151 // 000000016D44: D2680093 00032F96 - buffer_store_dwordx4 v[144:147], v221, s[16:19], 0 offen nt// 000000016D4C: E07E1000 800490DD - s_nop 0 // 000000016D54: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 000000016D58: 7E1402FF 80000000 - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016D60: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016D68: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016D70: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016D78: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016D80: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016D88: 86A2221E - v_add_lshl_u32 v15, v6, v4, 1 // 000000016D8C: D1FE000F 02060906 - v_cndmask_b32_e64 v15, v10, v15, s[34:35] // 000000016D94: D100000F 008A1F0A - buffer_load_dwordx4 v[128:131], v15, s[20:23], 0 offen // 000000016D9C: E05C1000 8005800F - v_add_lshl_u32 v15, v7, v4, 1 // 000000016DA4: D1FE000F 02060907 - v_cndmask_b32_e64 v15, v10, v15, s[34:35] // 000000016DAC: D100000F 008A1F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016DB4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016DBC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016DC4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016DCC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016DD4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016DDC: 86A2221E - v_add_lshl_u32 v135, v6, v4, 1 // 000000016DE0: D1FE0087 02060906 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 000000016DE8: D1000087 008B0F0A - buffer_load_dwordx4 v[152:155], v135, s[20:23], 0 offen // 000000016DF0: E05C1000 80059887 - v_add_lshl_u32 v135, v7, v4, 1 // 000000016DF8: D1FE0087 02060907 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 000000016E00: D1000087 008B0F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016E08: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016E10: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016E18: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016E20: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016E28: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016E30: 86A2221E - v_add_lshl_u32 v160, v6, v4, 1 // 000000016E34: D1FE00A0 02060906 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 000000016E3C: D10000A0 008B410A - buffer_load_dwordx4 v[156:159], v160, s[20:23], 0 offen // 000000016E44: E05C1000 80059CA0 - v_add_lshl_u32 v160, v7, v4, 1 // 000000016E4C: D1FE00A0 02060907 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 000000016E54: D10000A0 008B410A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016E5C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016E64: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016E6C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016E74: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016E7C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016E84: 86A2221E - v_add_lshl_u32 v161, v6, v4, 1 // 000000016E88: D1FE00A1 02060906 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 000000016E90: D10000A1 008B430A - buffer_load_dwordx4 v[164:167], v161, s[20:23], 0 offen // 000000016E98: E05C1000 8005A4A1 - v_add_lshl_u32 v161, v7, v4, 1 // 000000016EA0: D1FE00A1 02060907 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 000000016EA8: D10000A1 008B430A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016EB0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016EB8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016EC0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016EC8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016ED0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016ED8: 86A2221E - v_add_lshl_u32 v162, v6, v4, 1 // 000000016EDC: D1FE00A2 02060906 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 000000016EE4: D10000A2 008B450A - buffer_load_dwordx4 v[168:171], v162, s[20:23], 0 offen // 000000016EEC: E05C1000 8005A8A2 - v_add_lshl_u32 v162, v7, v4, 1 // 000000016EF4: D1FE00A2 02060907 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 000000016EFC: D10000A2 008B450A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016F04: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016F0C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016F14: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016F1C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016F24: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016F2C: 86A2221E - v_add_lshl_u32 v163, v6, v4, 1 // 000000016F30: D1FE00A3 02060906 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000016F38: D10000A3 008B470A - buffer_load_dwordx4 v[172:175], v163, s[20:23], 0 offen // 000000016F40: E05C1000 8005ACA3 - v_add_lshl_u32 v163, v7, v4, 1 // 000000016F48: D1FE00A3 02060907 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000016F50: D10000A3 008B470A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016F58: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016F60: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016F68: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016F70: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016F78: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016F80: 86A2221E - v_add_lshl_u32 v180, v6, v4, 1 // 000000016F84: D1FE00B4 02060906 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 000000016F8C: D10000B4 008B690A - buffer_load_dwordx4 v[176:179], v180, s[20:23], 0 offen // 000000016F94: E05C1000 8005B0B4 - v_add_lshl_u32 v180, v7, v4, 1 // 000000016F9C: D1FE00B4 02060907 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 000000016FA4: D10000B4 008B690A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000016FAC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000016FB4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000016FBC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000016FC4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000016FCC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000016FD4: 86A2221E - v_add_lshl_u32 v181, v6, v4, 1 // 000000016FD8: D1FE00B5 02060906 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 000000016FE0: D10000B5 008B6B0A - buffer_load_dwordx4 v[184:187], v181, s[20:23], 0 offen // 000000016FE8: E05C1000 8005B8B5 - v_add_lshl_u32 v181, v7, v4, 1 // 000000016FF0: D1FE00B5 02060907 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 000000016FF8: D10000B5 008B6B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000017000: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000017008: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000017010: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000017018: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000017020: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000017028: 86A2221E - v_add_lshl_u32 v182, v6, v4, 1 // 00000001702C: D1FE00B6 02060906 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 000000017034: D10000B6 008B6D0A - buffer_load_dwordx4 v[188:191], v182, s[20:23], 0 offen // 00000001703C: E05C1000 8005BCB6 - v_add_lshl_u32 v182, v7, v4, 1 // 000000017044: D1FE00B6 02060907 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 00000001704C: D10000B6 008B6D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000017054: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001705C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000017064: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001706C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000017074: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001707C: 86A2221E - v_add_lshl_u32 v183, v6, v4, 1 // 000000017080: D1FE00B7 02060906 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 000000017088: D10000B7 008B6F0A - buffer_load_dwordx4 v[192:195], v183, s[20:23], 0 offen // 000000017090: E05C1000 8005C0B7 - v_add_lshl_u32 v183, v7, v4, 1 // 000000017098: D1FE00B7 02060907 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 0000000170A0: D10000B7 008B6F0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000170A8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000170B0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000170B8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000170C0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000170C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000170D0: 86A2221E - v_add_lshl_u32 v200, v6, v4, 1 // 0000000170D4: D1FE00C8 02060906 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 0000000170DC: D10000C8 008B910A - buffer_load_dwordx4 v[196:199], v200, s[20:23], 0 offen // 0000000170E4: E05C1000 8005C4C8 - v_add_lshl_u32 v200, v7, v4, 1 // 0000000170EC: D1FE00C8 02060907 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 0000000170F4: D10000C8 008B910A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000170FC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000017104: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001710C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000017114: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001711C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000017124: 86A2221E - v_add_lshl_u32 v201, v6, v4, 1 // 000000017128: D1FE00C9 02060906 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 000000017130: D10000C9 008B930A - buffer_load_dwordx4 v[204:207], v201, s[20:23], 0 offen // 000000017138: E05C1000 8005CCC9 - v_add_lshl_u32 v201, v7, v4, 1 // 000000017140: D1FE00C9 02060907 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 000000017148: D10000C9 008B930A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000017150: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000017158: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000017160: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000017168: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000017170: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000017178: 86A2221E - v_add_lshl_u32 v202, v6, v4, 1 // 00000001717C: D1FE00CA 02060906 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 000000017184: D10000CA 008B950A - buffer_load_dwordx4 v[208:211], v202, s[20:23], 0 offen // 00000001718C: E05C1000 8005D0CA - v_add_lshl_u32 v202, v7, v4, 1 // 000000017194: D1FE00CA 02060907 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 00000001719C: D10000CA 008B950A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000171A4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000171AC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000171B4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000171BC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000171C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000171CC: 86A2221E - v_add_lshl_u32 v203, v6, v4, 1 // 0000000171D0: D1FE00CB 02060906 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 0000000171D8: D10000CB 008B970A - buffer_load_dwordx4 v[212:215], v203, s[20:23], 0 offen // 0000000171E0: E05C1000 8005D4CB - v_add_lshl_u32 v203, v7, v4, 1 // 0000000171E8: D1FE00CB 02060907 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 0000000171F0: D10000CB 008B970A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000171F8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000017200: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000017208: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000017210: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000017218: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000017220: 86A2221E - v_add_lshl_u32 v220, v6, v4, 1 // 000000017224: D1FE00DC 02060906 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 00000001722C: D10000DC 008BB90A - buffer_load_dwordx4 v[216:219], v220, s[20:23], 0 offen // 000000017234: E05C1000 8005D8DC - v_add_lshl_u32 v220, v7, v4, 1 // 00000001723C: D1FE00DC 02060907 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 000000017244: D10000DC 008BB90A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001724C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000017254: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001725C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000017264: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001726C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000017274: 86A2221E - v_add_lshl_u32 v221, v6, v4, 1 // 000000017278: D1FE00DD 02060906 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 000000017280: D10000DD 008BBB0A - buffer_load_dwordx4 v[224:227], v221, s[20:23], 0 offen // 000000017288: E05C1000 8005E0DD - v_add_lshl_u32 v221, v7, v4, 1 // 000000017290: D1FE00DD 02060907 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 000000017298: D10000DD 008BBB0A - v_accvgpr_read_b32 v16, a2 // 0000000172A0: D3D84010 18000102 - v_accvgpr_read_b32 v17, a6 // 0000000172A8: D3D84011 18000106 - v_accvgpr_read_b32 v18, a10 // 0000000172B0: D3D84012 1800010A - v_accvgpr_read_b32 v19, a14 // 0000000172B8: D3D84013 1800010E - v_accvgpr_read_b32 v20, a18 // 0000000172C0: D3D84014 18000112 - v_accvgpr_read_b32 v21, a22 // 0000000172C8: D3D84015 18000116 - v_accvgpr_read_b32 v22, a26 // 0000000172D0: D3D84016 1800011A - v_accvgpr_read_b32 v23, a30 // 0000000172D8: D3D84017 1800011E - v_accvgpr_read_b32 v24, a34 // 0000000172E0: D3D84018 18000122 - v_accvgpr_read_b32 v25, a38 // 0000000172E8: D3D84019 18000126 - v_accvgpr_read_b32 v26, a42 // 0000000172F0: D3D8401A 1800012A - v_accvgpr_read_b32 v27, a46 // 0000000172F8: D3D8401B 1800012E - v_accvgpr_read_b32 v28, a50 // 000000017300: D3D8401C 18000132 - v_accvgpr_read_b32 v29, a54 // 000000017308: D3D8401D 18000136 - v_accvgpr_read_b32 v30, a58 // 000000017310: D3D8401E 1800013A - v_accvgpr_read_b32 v31, a62 // 000000017318: D3D8401F 1800013E - v_accvgpr_read_b32 v32, a66 // 000000017320: D3D84020 18000142 - v_accvgpr_read_b32 v33, a70 // 000000017328: D3D84021 18000146 - v_accvgpr_read_b32 v34, a74 // 000000017330: D3D84022 1800014A - v_accvgpr_read_b32 v35, a78 // 000000017338: D3D84023 1800014E - v_accvgpr_read_b32 v36, a82 // 000000017340: D3D84024 18000152 - v_accvgpr_read_b32 v37, a86 // 000000017348: D3D84025 18000156 - v_accvgpr_read_b32 v38, a90 // 000000017350: D3D84026 1800015A - v_accvgpr_read_b32 v39, a94 // 000000017358: D3D84027 1800015E - v_accvgpr_read_b32 v40, a98 // 000000017360: D3D84028 18000162 - v_accvgpr_read_b32 v41, a102 // 000000017368: D3D84029 18000166 - v_accvgpr_read_b32 v42, a106 // 000000017370: D3D8402A 1800016A - v_accvgpr_read_b32 v43, a110 // 000000017378: D3D8402B 1800016E - v_accvgpr_read_b32 v44, a114 // 000000017380: D3D8402C 18000172 - v_accvgpr_read_b32 v45, a118 // 000000017388: D3D8402D 18000176 - v_accvgpr_read_b32 v46, a122 // 000000017390: D3D8402E 1800017A - v_accvgpr_read_b32 v47, a126 // 000000017398: D3D8402F 1800017E - v_accvgpr_read_b32 v48, a130 // 0000000173A0: D3D84030 18000182 - v_accvgpr_read_b32 v49, a134 // 0000000173A8: D3D84031 18000186 - v_accvgpr_read_b32 v50, a138 // 0000000173B0: D3D84032 1800018A - v_accvgpr_read_b32 v51, a142 // 0000000173B8: D3D84033 1800018E - v_accvgpr_read_b32 v52, a146 // 0000000173C0: D3D84034 18000192 - v_accvgpr_read_b32 v53, a150 // 0000000173C8: D3D84035 18000196 - v_accvgpr_read_b32 v54, a154 // 0000000173D0: D3D84036 1800019A - v_accvgpr_read_b32 v55, a158 // 0000000173D8: D3D84037 1800019E - v_accvgpr_read_b32 v56, a162 // 0000000173E0: D3D84038 180001A2 - v_accvgpr_read_b32 v57, a166 // 0000000173E8: D3D84039 180001A6 - v_accvgpr_read_b32 v58, a170 // 0000000173F0: D3D8403A 180001AA - v_accvgpr_read_b32 v59, a174 // 0000000173F8: D3D8403B 180001AE - v_accvgpr_read_b32 v60, a178 // 000000017400: D3D8403C 180001B2 - v_accvgpr_read_b32 v61, a182 // 000000017408: D3D8403D 180001B6 - v_accvgpr_read_b32 v62, a186 // 000000017410: D3D8403E 180001BA - v_accvgpr_read_b32 v63, a190 // 000000017418: D3D8403F 180001BE - v_accvgpr_read_b32 v64, a194 // 000000017420: D3D84040 180001C2 - v_accvgpr_read_b32 v65, a198 // 000000017428: D3D84041 180001C6 - v_accvgpr_read_b32 v66, a202 // 000000017430: D3D84042 180001CA - v_accvgpr_read_b32 v67, a206 // 000000017438: D3D84043 180001CE - v_accvgpr_read_b32 v68, a210 // 000000017440: D3D84044 180001D2 - v_accvgpr_read_b32 v69, a214 // 000000017448: D3D84045 180001D6 - v_accvgpr_read_b32 v70, a218 // 000000017450: D3D84046 180001DA - v_accvgpr_read_b32 v71, a222 // 000000017458: D3D84047 180001DE - v_accvgpr_read_b32 v72, a226 // 000000017460: D3D84048 180001E2 - v_accvgpr_read_b32 v73, a230 // 000000017468: D3D84049 180001E6 - v_accvgpr_read_b32 v74, a234 // 000000017470: D3D8404A 180001EA - v_accvgpr_read_b32 v75, a238 // 000000017478: D3D8404B 180001EE - v_accvgpr_read_b32 v76, a242 // 000000017480: D3D8404C 180001F2 - v_accvgpr_read_b32 v77, a246 // 000000017488: D3D8404D 180001F6 - v_accvgpr_read_b32 v78, a250 // 000000017490: D3D8404E 180001FA - v_accvgpr_read_b32 v79, a254 // 000000017498: D3D8404F 180001FE - v_accvgpr_read_b32 v80, a3 // 0000000174A0: D3D84050 18000103 - v_accvgpr_read_b32 v81, a7 // 0000000174A8: D3D84051 18000107 - v_accvgpr_read_b32 v82, a11 // 0000000174B0: D3D84052 1800010B - v_accvgpr_read_b32 v83, a15 // 0000000174B8: D3D84053 1800010F - v_accvgpr_read_b32 v84, a19 // 0000000174C0: D3D84054 18000113 - v_accvgpr_read_b32 v85, a23 // 0000000174C8: D3D84055 18000117 - v_accvgpr_read_b32 v86, a27 // 0000000174D0: D3D84056 1800011B - v_accvgpr_read_b32 v87, a31 // 0000000174D8: D3D84057 1800011F - v_accvgpr_read_b32 v88, a35 // 0000000174E0: D3D84058 18000123 - v_accvgpr_read_b32 v89, a39 // 0000000174E8: D3D84059 18000127 - v_accvgpr_read_b32 v90, a43 // 0000000174F0: D3D8405A 1800012B - v_accvgpr_read_b32 v91, a47 // 0000000174F8: D3D8405B 1800012F - v_accvgpr_read_b32 v92, a51 // 000000017500: D3D8405C 18000133 - v_accvgpr_read_b32 v93, a55 // 000000017508: D3D8405D 18000137 - v_accvgpr_read_b32 v94, a59 // 000000017510: D3D8405E 1800013B - v_accvgpr_read_b32 v95, a63 // 000000017518: D3D8405F 1800013F - v_accvgpr_read_b32 v96, a67 // 000000017520: D3D84060 18000143 - v_accvgpr_read_b32 v97, a71 // 000000017528: D3D84061 18000147 - v_accvgpr_read_b32 v98, a75 // 000000017530: D3D84062 1800014B - v_accvgpr_read_b32 v99, a79 // 000000017538: D3D84063 1800014F - v_accvgpr_read_b32 v100, a83 // 000000017540: D3D84064 18000153 - v_accvgpr_read_b32 v101, a87 // 000000017548: D3D84065 18000157 - v_accvgpr_read_b32 v102, a91 // 000000017550: D3D84066 1800015B - v_accvgpr_read_b32 v103, a95 // 000000017558: D3D84067 1800015F - v_accvgpr_read_b32 v104, a99 // 000000017560: D3D84068 18000163 - v_accvgpr_read_b32 v105, a103 // 000000017568: D3D84069 18000167 - v_accvgpr_read_b32 v106, a107 // 000000017570: D3D8406A 1800016B - v_accvgpr_read_b32 v107, a111 // 000000017578: D3D8406B 1800016F - v_accvgpr_read_b32 v108, a115 // 000000017580: D3D8406C 18000173 - v_accvgpr_read_b32 v109, a119 // 000000017588: D3D8406D 18000177 - v_accvgpr_read_b32 v110, a123 // 000000017590: D3D8406E 1800017B - v_accvgpr_read_b32 v111, a127 // 000000017598: D3D8406F 1800017F - v_accvgpr_read_b32 v112, a131 // 0000000175A0: D3D84070 18000183 - v_accvgpr_read_b32 v113, a135 // 0000000175A8: D3D84071 18000187 - v_accvgpr_read_b32 v114, a139 // 0000000175B0: D3D84072 1800018B - v_accvgpr_read_b32 v115, a143 // 0000000175B8: D3D84073 1800018F - v_accvgpr_read_b32 v116, a147 // 0000000175C0: D3D84074 18000193 - v_accvgpr_read_b32 v117, a151 // 0000000175C8: D3D84075 18000197 - v_accvgpr_read_b32 v118, a155 // 0000000175D0: D3D84076 1800019B - v_accvgpr_read_b32 v119, a159 // 0000000175D8: D3D84077 1800019F - v_accvgpr_read_b32 v120, a163 // 0000000175E0: D3D84078 180001A3 - v_accvgpr_read_b32 v121, a167 // 0000000175E8: D3D84079 180001A7 - v_accvgpr_read_b32 v122, a171 // 0000000175F0: D3D8407A 180001AB - v_accvgpr_read_b32 v123, a175 // 0000000175F8: D3D8407B 180001AF - v_accvgpr_read_b32 v124, a179 // 000000017600: D3D8407C 180001B3 - v_accvgpr_read_b32 v125, a183 // 000000017608: D3D8407D 180001B7 - v_accvgpr_read_b32 v126, a187 // 000000017610: D3D8407E 180001BB - v_accvgpr_read_b32 v127, a191 // 000000017618: D3D8407F 180001BF - v_accvgpr_read_b32 v136, a195 // 000000017620: D3D84088 180001C3 - v_accvgpr_read_b32 v137, a199 // 000000017628: D3D84089 180001C7 - v_accvgpr_read_b32 v138, a203 // 000000017630: D3D8408A 180001CB - v_accvgpr_read_b32 v139, a207 // 000000017638: D3D8408B 180001CF - v_accvgpr_read_b32 v140, a211 // 000000017640: D3D8408C 180001D3 - v_accvgpr_read_b32 v141, a215 // 000000017648: D3D8408D 180001D7 - v_accvgpr_read_b32 v142, a219 // 000000017650: D3D8408E 180001DB - v_accvgpr_read_b32 v143, a223 // 000000017658: D3D8408F 180001DF - v_accvgpr_read_b32 v144, a227 // 000000017660: D3D84090 180001E3 - v_accvgpr_read_b32 v145, a231 // 000000017668: D3D84091 180001E7 - v_accvgpr_read_b32 v146, a235 // 000000017670: D3D84092 180001EB - v_accvgpr_read_b32 v147, a239 // 000000017678: D3D84093 180001EF - v_accvgpr_read_b32 v148, a243 // 000000017680: D3D84094 180001F3 - v_accvgpr_read_b32 v149, a247 // 000000017688: D3D84095 180001F7 - v_accvgpr_read_b32 v150, a251 // 000000017690: D3D84096 180001FB - v_accvgpr_read_b32 v151, a255 // 000000017698: D3D84097 180001FF - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 0000000176A0: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 0000000176A8: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 0000000176B0: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 0000000176B8: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 0000000176C0: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 0000000176C8: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 0000000176D0: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 0000000176D8: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 0000000176E0: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 0000000176E8: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 0000000176F0: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 0000000176F8: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 000000017700: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 000000017708: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 000000017710: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 000000017718: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 000000017720: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 000000017728: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 000000017730: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 000000017738: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 000000017740: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 000000017748: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 000000017750: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 000000017758: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 000000017760: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 000000017768: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 000000017770: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 000000017778: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 000000017780: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 000000017788: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 000000017790: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 000000017798: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 0000000177A0: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 0000000177A8: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 0000000177B0: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 0000000177B8: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 0000000177C0: D3B14058 1002B02C - v_pk_mul_f32 v[90:91], s[44:45], v[90:91] op_sel_hi:[0,1] // 0000000177C8: D3B1405A 1002B42C - v_pk_mul_f32 v[92:93], s[44:45], v[92:93] op_sel_hi:[0,1] // 0000000177D0: D3B1405C 1002B82C - v_pk_mul_f32 v[94:95], s[44:45], v[94:95] op_sel_hi:[0,1] // 0000000177D8: D3B1405E 1002BC2C - v_pk_mul_f32 v[96:97], s[44:45], v[96:97] op_sel_hi:[0,1] // 0000000177E0: D3B14060 1002C02C - v_pk_mul_f32 v[98:99], s[44:45], v[98:99] op_sel_hi:[0,1] // 0000000177E8: D3B14062 1002C42C - v_pk_mul_f32 v[100:101], s[44:45], v[100:101] op_sel_hi:[0,1]// 0000000177F0: D3B14064 1002C82C - v_pk_mul_f32 v[102:103], s[44:45], v[102:103] op_sel_hi:[0,1]// 0000000177F8: D3B14066 1002CC2C - v_pk_mul_f32 v[104:105], s[44:45], v[104:105] op_sel_hi:[0,1]// 000000017800: D3B14068 1002D02C - v_pk_mul_f32 v[106:107], s[44:45], v[106:107] op_sel_hi:[0,1]// 000000017808: D3B1406A 1002D42C - v_pk_mul_f32 v[108:109], s[44:45], v[108:109] op_sel_hi:[0,1]// 000000017810: D3B1406C 1002D82C - v_pk_mul_f32 v[110:111], s[44:45], v[110:111] op_sel_hi:[0,1]// 000000017818: D3B1406E 1002DC2C - v_pk_mul_f32 v[112:113], s[44:45], v[112:113] op_sel_hi:[0,1]// 000000017820: D3B14070 1002E02C - v_pk_mul_f32 v[114:115], s[44:45], v[114:115] op_sel_hi:[0,1]// 000000017828: D3B14072 1002E42C - v_pk_mul_f32 v[116:117], s[44:45], v[116:117] op_sel_hi:[0,1]// 000000017830: D3B14074 1002E82C - v_pk_mul_f32 v[118:119], s[44:45], v[118:119] op_sel_hi:[0,1]// 000000017838: D3B14076 1002EC2C - v_pk_mul_f32 v[120:121], s[44:45], v[120:121] op_sel_hi:[0,1]// 000000017840: D3B14078 1002F02C - v_pk_mul_f32 v[122:123], s[44:45], v[122:123] op_sel_hi:[0,1]// 000000017848: D3B1407A 1002F42C - v_pk_mul_f32 v[124:125], s[44:45], v[124:125] op_sel_hi:[0,1]// 000000017850: D3B1407C 1002F82C - v_pk_mul_f32 v[126:127], s[44:45], v[126:127] op_sel_hi:[0,1]// 000000017858: D3B1407E 1002FC2C - v_pk_mul_f32 v[136:137], s[44:45], v[136:137] op_sel_hi:[0,1]// 000000017860: D3B14088 1003102C - v_pk_mul_f32 v[138:139], s[44:45], v[138:139] op_sel_hi:[0,1]// 000000017868: D3B1408A 1003142C - v_pk_mul_f32 v[140:141], s[44:45], v[140:141] op_sel_hi:[0,1]// 000000017870: D3B1408C 1003182C - v_pk_mul_f32 v[142:143], s[44:45], v[142:143] op_sel_hi:[0,1]// 000000017878: D3B1408E 10031C2C - v_pk_mul_f32 v[144:145], s[44:45], v[144:145] op_sel_hi:[0,1]// 000000017880: D3B14090 1003202C - v_pk_mul_f32 v[146:147], s[44:45], v[146:147] op_sel_hi:[0,1]// 000000017888: D3B14092 1003242C - v_pk_mul_f32 v[148:149], s[44:45], v[148:149] op_sel_hi:[0,1]// 000000017890: D3B14094 1003282C - v_pk_mul_f32 v[150:151], s[44:45], v[150:151] op_sel_hi:[0,1]// 000000017898: D3B14096 10032C2C - s_waitcnt vmcnt(0) // 0000000178A0: BF8C0F70 - v_mov_b32_e32 v12, 0xffff0000 // 0000000178A4: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 0000000178AC: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 0000000178B4: 7E1C02FF 00007FFF - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000178BC: 7E10B6F9 00041680 - v_fmac_f32_e64 v16, v8, s45 // 0000000178C4: D13B0010 00005B08 - v_cvt_f32_bf16_sdwa v8, v128 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000178CC: 7E10B6F9 00051680 - v_fmac_f32_e64 v17, v8, s45 // 0000000178D4: D13B0011 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000178DC: 7E10B6F9 00041681 - v_fmac_f32_e64 v18, v8, s45 // 0000000178E4: D13B0012 00005B08 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000178EC: 7E10B6F9 00051681 - v_fmac_f32_e64 v19, v8, s45 // 0000000178F4: D13B0013 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000178FC: 7E10B6F9 00041682 - v_fmac_f32_e64 v20, v8, s45 // 000000017904: D13B0014 00005B08 - v_cvt_f32_bf16_sdwa v8, v130 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001790C: 7E10B6F9 00051682 - v_fmac_f32_e64 v21, v8, s45 // 000000017914: D13B0015 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001791C: 7E10B6F9 00041683 - v_fmac_f32_e64 v22, v8, s45 // 000000017924: D13B0016 00005B08 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001792C: 7E10B6F9 00051683 - v_fmac_f32_e64 v23, v8, s45 // 000000017934: D13B0017 00005B08 - v_cvt_pk_bf16_f32 v16, v16, v17 // 00000001793C: D2680010 00022310 - v_cvt_pk_bf16_f32 v17, v18, v19 // 000000017944: D2680011 00022712 - v_cvt_pk_bf16_f32 v18, v20, v21 // 00000001794C: D2680012 00022B14 - v_cvt_pk_bf16_f32 v19, v22, v23 // 000000017954: D2680013 00022F16 - buffer_store_dwordx4 v[16:19], v15, s[16:19], 0 offen nt // 00000001795C: E07E1000 8004100F - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017964: 7E10B6F9 00041698 - v_fmac_f32_e64 v24, v8, s45 // 00000001796C: D13B0018 00005B08 - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017974: 7E10B6F9 00051698 - v_fmac_f32_e64 v25, v8, s45 // 00000001797C: D13B0019 00005B08 - v_cvt_f32_bf16_sdwa v8, v153 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017984: 7E10B6F9 00041699 - v_fmac_f32_e64 v26, v8, s45 // 00000001798C: D13B001A 00005B08 - v_cvt_f32_bf16_sdwa v8, v153 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017994: 7E10B6F9 00051699 - v_fmac_f32_e64 v27, v8, s45 // 00000001799C: D13B001B 00005B08 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000179A4: 7E10B6F9 0004169A - v_fmac_f32_e64 v28, v8, s45 // 0000000179AC: D13B001C 00005B08 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000179B4: 7E10B6F9 0005169A - v_fmac_f32_e64 v29, v8, s45 // 0000000179BC: D13B001D 00005B08 - v_cvt_f32_bf16_sdwa v8, v155 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000179C4: 7E10B6F9 0004169B - v_fmac_f32_e64 v30, v8, s45 // 0000000179CC: D13B001E 00005B08 - v_cvt_f32_bf16_sdwa v8, v155 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000179D4: 7E10B6F9 0005169B - v_fmac_f32_e64 v31, v8, s45 // 0000000179DC: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v25 // 0000000179E4: D2680018 00023318 - v_cvt_pk_bf16_f32 v25, v26, v27 // 0000000179EC: D2680019 0002371A - v_cvt_pk_bf16_f32 v26, v28, v29 // 0000000179F4: D268001A 00023B1C - v_cvt_pk_bf16_f32 v27, v30, v31 // 0000000179FC: D268001B 00023F1E - buffer_store_dwordx4 v[24:27], v135, s[16:19], 0 offen nt // 000000017A04: E07E1000 80041887 - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017A0C: 7E10B6F9 0004169C - v_fmac_f32_e64 v32, v8, s45 // 000000017A14: D13B0020 00005B08 - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017A1C: 7E10B6F9 0005169C - v_fmac_f32_e64 v33, v8, s45 // 000000017A24: D13B0021 00005B08 - v_cvt_f32_bf16_sdwa v8, v157 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017A2C: 7E10B6F9 0004169D - v_fmac_f32_e64 v34, v8, s45 // 000000017A34: D13B0022 00005B08 - v_cvt_f32_bf16_sdwa v8, v157 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017A3C: 7E10B6F9 0005169D - v_fmac_f32_e64 v35, v8, s45 // 000000017A44: D13B0023 00005B08 - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017A4C: 7E10B6F9 0004169E - v_fmac_f32_e64 v36, v8, s45 // 000000017A54: D13B0024 00005B08 - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017A5C: 7E10B6F9 0005169E - v_fmac_f32_e64 v37, v8, s45 // 000000017A64: D13B0025 00005B08 - v_cvt_f32_bf16_sdwa v8, v159 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017A6C: 7E10B6F9 0004169F - v_fmac_f32_e64 v38, v8, s45 // 000000017A74: D13B0026 00005B08 - v_cvt_f32_bf16_sdwa v8, v159 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017A7C: 7E10B6F9 0005169F - v_fmac_f32_e64 v39, v8, s45 // 000000017A84: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v33 // 000000017A8C: D2680020 00024320 - v_cvt_pk_bf16_f32 v33, v34, v35 // 000000017A94: D2680021 00024722 - v_cvt_pk_bf16_f32 v34, v36, v37 // 000000017A9C: D2680022 00024B24 - v_cvt_pk_bf16_f32 v35, v38, v39 // 000000017AA4: D2680023 00024F26 - buffer_store_dwordx4 v[32:35], v160, s[16:19], 0 offen nt // 000000017AAC: E07E1000 800420A0 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017AB4: 7E10B6F9 000416A4 - v_fmac_f32_e64 v40, v8, s45 // 000000017ABC: D13B0028 00005B08 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017AC4: 7E10B6F9 000516A4 - v_fmac_f32_e64 v41, v8, s45 // 000000017ACC: D13B0029 00005B08 - v_cvt_f32_bf16_sdwa v8, v165 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017AD4: 7E10B6F9 000416A5 - v_fmac_f32_e64 v42, v8, s45 // 000000017ADC: D13B002A 00005B08 - v_cvt_f32_bf16_sdwa v8, v165 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017AE4: 7E10B6F9 000516A5 - v_fmac_f32_e64 v43, v8, s45 // 000000017AEC: D13B002B 00005B08 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017AF4: 7E10B6F9 000416A6 - v_fmac_f32_e64 v44, v8, s45 // 000000017AFC: D13B002C 00005B08 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017B04: 7E10B6F9 000516A6 - v_fmac_f32_e64 v45, v8, s45 // 000000017B0C: D13B002D 00005B08 - v_cvt_f32_bf16_sdwa v8, v167 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017B14: 7E10B6F9 000416A7 - v_fmac_f32_e64 v46, v8, s45 // 000000017B1C: D13B002E 00005B08 - v_cvt_f32_bf16_sdwa v8, v167 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017B24: 7E10B6F9 000516A7 - v_fmac_f32_e64 v47, v8, s45 // 000000017B2C: D13B002F 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v41 // 000000017B34: D2680028 00025328 - v_cvt_pk_bf16_f32 v41, v42, v43 // 000000017B3C: D2680029 0002572A - v_cvt_pk_bf16_f32 v42, v44, v45 // 000000017B44: D268002A 00025B2C - v_cvt_pk_bf16_f32 v43, v46, v47 // 000000017B4C: D268002B 00025F2E - buffer_store_dwordx4 v[40:43], v161, s[16:19], 0 offen nt // 000000017B54: E07E1000 800428A1 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017B5C: 7E10B6F9 000416A8 - v_fmac_f32_e64 v48, v8, s45 // 000000017B64: D13B0030 00005B08 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017B6C: 7E10B6F9 000516A8 - v_fmac_f32_e64 v49, v8, s45 // 000000017B74: D13B0031 00005B08 - v_cvt_f32_bf16_sdwa v8, v169 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017B7C: 7E10B6F9 000416A9 - v_fmac_f32_e64 v50, v8, s45 // 000000017B84: D13B0032 00005B08 - v_cvt_f32_bf16_sdwa v8, v169 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017B8C: 7E10B6F9 000516A9 - v_fmac_f32_e64 v51, v8, s45 // 000000017B94: D13B0033 00005B08 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017B9C: 7E10B6F9 000416AA - v_fmac_f32_e64 v52, v8, s45 // 000000017BA4: D13B0034 00005B08 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017BAC: 7E10B6F9 000516AA - v_fmac_f32_e64 v53, v8, s45 // 000000017BB4: D13B0035 00005B08 - v_cvt_f32_bf16_sdwa v8, v171 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017BBC: 7E10B6F9 000416AB - v_fmac_f32_e64 v54, v8, s45 // 000000017BC4: D13B0036 00005B08 - v_cvt_f32_bf16_sdwa v8, v171 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017BCC: 7E10B6F9 000516AB - v_fmac_f32_e64 v55, v8, s45 // 000000017BD4: D13B0037 00005B08 - v_cvt_pk_bf16_f32 v48, v48, v49 // 000000017BDC: D2680030 00026330 - v_cvt_pk_bf16_f32 v49, v50, v51 // 000000017BE4: D2680031 00026732 - v_cvt_pk_bf16_f32 v50, v52, v53 // 000000017BEC: D2680032 00026B34 - v_cvt_pk_bf16_f32 v51, v54, v55 // 000000017BF4: D2680033 00026F36 - buffer_store_dwordx4 v[48:51], v162, s[16:19], 0 offen nt // 000000017BFC: E07E1000 800430A2 - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017C04: 7E10B6F9 000416AC - v_fmac_f32_e64 v56, v8, s45 // 000000017C0C: D13B0038 00005B08 - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017C14: 7E10B6F9 000516AC - v_fmac_f32_e64 v57, v8, s45 // 000000017C1C: D13B0039 00005B08 - v_cvt_f32_bf16_sdwa v8, v173 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017C24: 7E10B6F9 000416AD - v_fmac_f32_e64 v58, v8, s45 // 000000017C2C: D13B003A 00005B08 - v_cvt_f32_bf16_sdwa v8, v173 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017C34: 7E10B6F9 000516AD - v_fmac_f32_e64 v59, v8, s45 // 000000017C3C: D13B003B 00005B08 - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017C44: 7E10B6F9 000416AE - v_fmac_f32_e64 v60, v8, s45 // 000000017C4C: D13B003C 00005B08 - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017C54: 7E10B6F9 000516AE - v_fmac_f32_e64 v61, v8, s45 // 000000017C5C: D13B003D 00005B08 - v_cvt_f32_bf16_sdwa v8, v175 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017C64: 7E10B6F9 000416AF - v_fmac_f32_e64 v62, v8, s45 // 000000017C6C: D13B003E 00005B08 - v_cvt_f32_bf16_sdwa v8, v175 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017C74: 7E10B6F9 000516AF - v_fmac_f32_e64 v63, v8, s45 // 000000017C7C: D13B003F 00005B08 - v_cvt_pk_bf16_f32 v56, v56, v57 // 000000017C84: D2680038 00027338 - v_cvt_pk_bf16_f32 v57, v58, v59 // 000000017C8C: D2680039 0002773A - v_cvt_pk_bf16_f32 v58, v60, v61 // 000000017C94: D268003A 00027B3C - v_cvt_pk_bf16_f32 v59, v62, v63 // 000000017C9C: D268003B 00027F3E - buffer_store_dwordx4 v[56:59], v163, s[16:19], 0 offen nt // 000000017CA4: E07E1000 800438A3 - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017CAC: 7E10B6F9 000416B0 - v_fmac_f32_e64 v64, v8, s45 // 000000017CB4: D13B0040 00005B08 - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017CBC: 7E10B6F9 000516B0 - v_fmac_f32_e64 v65, v8, s45 // 000000017CC4: D13B0041 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017CCC: 7E10B6F9 000416B1 - v_fmac_f32_e64 v66, v8, s45 // 000000017CD4: D13B0042 00005B08 - v_cvt_f32_bf16_sdwa v8, v177 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017CDC: 7E10B6F9 000516B1 - v_fmac_f32_e64 v67, v8, s45 // 000000017CE4: D13B0043 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017CEC: 7E10B6F9 000416B2 - v_fmac_f32_e64 v68, v8, s45 // 000000017CF4: D13B0044 00005B08 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017CFC: 7E10B6F9 000516B2 - v_fmac_f32_e64 v69, v8, s45 // 000000017D04: D13B0045 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017D0C: 7E10B6F9 000416B3 - v_fmac_f32_e64 v70, v8, s45 // 000000017D14: D13B0046 00005B08 - v_cvt_f32_bf16_sdwa v8, v179 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017D1C: 7E10B6F9 000516B3 - v_fmac_f32_e64 v71, v8, s45 // 000000017D24: D13B0047 00005B08 - v_cvt_pk_bf16_f32 v64, v64, v65 // 000000017D2C: D2680040 00028340 - v_cvt_pk_bf16_f32 v65, v66, v67 // 000000017D34: D2680041 00028742 - v_cvt_pk_bf16_f32 v66, v68, v69 // 000000017D3C: D2680042 00028B44 - v_cvt_pk_bf16_f32 v67, v70, v71 // 000000017D44: D2680043 00028F46 - buffer_store_dwordx4 v[64:67], v180, s[16:19], 0 offen nt // 000000017D4C: E07E1000 800440B4 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017D54: 7E10B6F9 000416B8 - v_fmac_f32_e64 v72, v8, s45 // 000000017D5C: D13B0048 00005B08 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017D64: 7E10B6F9 000516B8 - v_fmac_f32_e64 v73, v8, s45 // 000000017D6C: D13B0049 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017D74: 7E10B6F9 000416B9 - v_fmac_f32_e64 v74, v8, s45 // 000000017D7C: D13B004A 00005B08 - v_cvt_f32_bf16_sdwa v8, v185 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017D84: 7E10B6F9 000516B9 - v_fmac_f32_e64 v75, v8, s45 // 000000017D8C: D13B004B 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017D94: 7E10B6F9 000416BA - v_fmac_f32_e64 v76, v8, s45 // 000000017D9C: D13B004C 00005B08 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017DA4: 7E10B6F9 000516BA - v_fmac_f32_e64 v77, v8, s45 // 000000017DAC: D13B004D 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017DB4: 7E10B6F9 000416BB - v_fmac_f32_e64 v78, v8, s45 // 000000017DBC: D13B004E 00005B08 - v_cvt_f32_bf16_sdwa v8, v187 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017DC4: 7E10B6F9 000516BB - v_fmac_f32_e64 v79, v8, s45 // 000000017DCC: D13B004F 00005B08 - v_cvt_pk_bf16_f32 v72, v72, v73 // 000000017DD4: D2680048 00029348 - v_cvt_pk_bf16_f32 v73, v74, v75 // 000000017DDC: D2680049 0002974A - v_cvt_pk_bf16_f32 v74, v76, v77 // 000000017DE4: D268004A 00029B4C - v_cvt_pk_bf16_f32 v75, v78, v79 // 000000017DEC: D268004B 00029F4E - buffer_store_dwordx4 v[72:75], v181, s[16:19], 0 offen nt // 000000017DF4: E07E1000 800448B5 - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017DFC: 7E10B6F9 000416BC - v_fmac_f32_e64 v80, v8, s45 // 000000017E04: D13B0050 00005B08 - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017E0C: 7E10B6F9 000516BC - v_fmac_f32_e64 v81, v8, s45 // 000000017E14: D13B0051 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017E1C: 7E10B6F9 000416BD - v_fmac_f32_e64 v82, v8, s45 // 000000017E24: D13B0052 00005B08 - v_cvt_f32_bf16_sdwa v8, v189 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017E2C: 7E10B6F9 000516BD - v_fmac_f32_e64 v83, v8, s45 // 000000017E34: D13B0053 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017E3C: 7E10B6F9 000416BE - v_fmac_f32_e64 v84, v8, s45 // 000000017E44: D13B0054 00005B08 - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017E4C: 7E10B6F9 000516BE - v_fmac_f32_e64 v85, v8, s45 // 000000017E54: D13B0055 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017E5C: 7E10B6F9 000416BF - v_fmac_f32_e64 v86, v8, s45 // 000000017E64: D13B0056 00005B08 - v_cvt_f32_bf16_sdwa v8, v191 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017E6C: 7E10B6F9 000516BF - v_fmac_f32_e64 v87, v8, s45 // 000000017E74: D13B0057 00005B08 - v_cvt_pk_bf16_f32 v80, v80, v81 // 000000017E7C: D2680050 0002A350 - v_cvt_pk_bf16_f32 v81, v82, v83 // 000000017E84: D2680051 0002A752 - v_cvt_pk_bf16_f32 v82, v84, v85 // 000000017E8C: D2680052 0002AB54 - v_cvt_pk_bf16_f32 v83, v86, v87 // 000000017E94: D2680053 0002AF56 - buffer_store_dwordx4 v[80:83], v182, s[16:19], 0 offen nt // 000000017E9C: E07E1000 800450B6 - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017EA4: 7E10B6F9 000416C0 - v_fmac_f32_e64 v88, v8, s45 // 000000017EAC: D13B0058 00005B08 - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017EB4: 7E10B6F9 000516C0 - v_fmac_f32_e64 v89, v8, s45 // 000000017EBC: D13B0059 00005B08 - v_cvt_f32_bf16_sdwa v8, v193 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017EC4: 7E10B6F9 000416C1 - v_fmac_f32_e64 v90, v8, s45 // 000000017ECC: D13B005A 00005B08 - v_cvt_f32_bf16_sdwa v8, v193 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017ED4: 7E10B6F9 000516C1 - v_fmac_f32_e64 v91, v8, s45 // 000000017EDC: D13B005B 00005B08 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017EE4: 7E10B6F9 000416C2 - v_fmac_f32_e64 v92, v8, s45 // 000000017EEC: D13B005C 00005B08 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017EF4: 7E10B6F9 000516C2 - v_fmac_f32_e64 v93, v8, s45 // 000000017EFC: D13B005D 00005B08 - v_cvt_f32_bf16_sdwa v8, v195 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017F04: 7E10B6F9 000416C3 - v_fmac_f32_e64 v94, v8, s45 // 000000017F0C: D13B005E 00005B08 - v_cvt_f32_bf16_sdwa v8, v195 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017F14: 7E10B6F9 000516C3 - v_fmac_f32_e64 v95, v8, s45 // 000000017F1C: D13B005F 00005B08 - v_cvt_pk_bf16_f32 v88, v88, v89 // 000000017F24: D2680058 0002B358 - v_cvt_pk_bf16_f32 v89, v90, v91 // 000000017F2C: D2680059 0002B75A - v_cvt_pk_bf16_f32 v90, v92, v93 // 000000017F34: D268005A 0002BB5C - v_cvt_pk_bf16_f32 v91, v94, v95 // 000000017F3C: D268005B 0002BF5E - buffer_store_dwordx4 v[88:91], v183, s[16:19], 0 offen nt // 000000017F44: E07E1000 800458B7 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017F4C: 7E10B6F9 000416C4 - v_fmac_f32_e64 v96, v8, s45 // 000000017F54: D13B0060 00005B08 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017F5C: 7E10B6F9 000516C4 - v_fmac_f32_e64 v97, v8, s45 // 000000017F64: D13B0061 00005B08 - v_cvt_f32_bf16_sdwa v8, v197 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017F6C: 7E10B6F9 000416C5 - v_fmac_f32_e64 v98, v8, s45 // 000000017F74: D13B0062 00005B08 - v_cvt_f32_bf16_sdwa v8, v197 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017F7C: 7E10B6F9 000516C5 - v_fmac_f32_e64 v99, v8, s45 // 000000017F84: D13B0063 00005B08 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017F8C: 7E10B6F9 000416C6 - v_fmac_f32_e64 v100, v8, s45 // 000000017F94: D13B0064 00005B08 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017F9C: 7E10B6F9 000516C6 - v_fmac_f32_e64 v101, v8, s45 // 000000017FA4: D13B0065 00005B08 - v_cvt_f32_bf16_sdwa v8, v199 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017FAC: 7E10B6F9 000416C7 - v_fmac_f32_e64 v102, v8, s45 // 000000017FB4: D13B0066 00005B08 - v_cvt_f32_bf16_sdwa v8, v199 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000017FBC: 7E10B6F9 000516C7 - v_fmac_f32_e64 v103, v8, s45 // 000000017FC4: D13B0067 00005B08 - v_cvt_pk_bf16_f32 v96, v96, v97 // 000000017FCC: D2680060 0002C360 - v_cvt_pk_bf16_f32 v97, v98, v99 // 000000017FD4: D2680061 0002C762 - v_cvt_pk_bf16_f32 v98, v100, v101 // 000000017FDC: D2680062 0002CB64 - v_cvt_pk_bf16_f32 v99, v102, v103 // 000000017FE4: D2680063 0002CF66 - buffer_store_dwordx4 v[96:99], v200, s[16:19], 0 offen nt // 000000017FEC: E07E1000 800460C8 - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000017FF4: 7E10B6F9 000416CC - v_fmac_f32_e64 v104, v8, s45 // 000000017FFC: D13B0068 00005B08 - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018004: 7E10B6F9 000516CC - v_fmac_f32_e64 v105, v8, s45 // 00000001800C: D13B0069 00005B08 - v_cvt_f32_bf16_sdwa v8, v205 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000018014: 7E10B6F9 000416CD - v_fmac_f32_e64 v106, v8, s45 // 00000001801C: D13B006A 00005B08 - v_cvt_f32_bf16_sdwa v8, v205 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018024: 7E10B6F9 000516CD - v_fmac_f32_e64 v107, v8, s45 // 00000001802C: D13B006B 00005B08 - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000018034: 7E10B6F9 000416CE - v_fmac_f32_e64 v108, v8, s45 // 00000001803C: D13B006C 00005B08 - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018044: 7E10B6F9 000516CE - v_fmac_f32_e64 v109, v8, s45 // 00000001804C: D13B006D 00005B08 - v_cvt_f32_bf16_sdwa v8, v207 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000018054: 7E10B6F9 000416CF - v_fmac_f32_e64 v110, v8, s45 // 00000001805C: D13B006E 00005B08 - v_cvt_f32_bf16_sdwa v8, v207 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018064: 7E10B6F9 000516CF - v_fmac_f32_e64 v111, v8, s45 // 00000001806C: D13B006F 00005B08 - v_cvt_pk_bf16_f32 v104, v104, v105 // 000000018074: D2680068 0002D368 - v_cvt_pk_bf16_f32 v105, v106, v107 // 00000001807C: D2680069 0002D76A - v_cvt_pk_bf16_f32 v106, v108, v109 // 000000018084: D268006A 0002DB6C - v_cvt_pk_bf16_f32 v107, v110, v111 // 00000001808C: D268006B 0002DF6E - buffer_store_dwordx4 v[104:107], v201, s[16:19], 0 offen nt// 000000018094: E07E1000 800468C9 - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001809C: 7E10B6F9 000416D0 - v_fmac_f32_e64 v112, v8, s45 // 0000000180A4: D13B0070 00005B08 - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000180AC: 7E10B6F9 000516D0 - v_fmac_f32_e64 v113, v8, s45 // 0000000180B4: D13B0071 00005B08 - v_cvt_f32_bf16_sdwa v8, v209 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000180BC: 7E10B6F9 000416D1 - v_fmac_f32_e64 v114, v8, s45 // 0000000180C4: D13B0072 00005B08 - v_cvt_f32_bf16_sdwa v8, v209 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000180CC: 7E10B6F9 000516D1 - v_fmac_f32_e64 v115, v8, s45 // 0000000180D4: D13B0073 00005B08 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000180DC: 7E10B6F9 000416D2 - v_fmac_f32_e64 v116, v8, s45 // 0000000180E4: D13B0074 00005B08 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000180EC: 7E10B6F9 000516D2 - v_fmac_f32_e64 v117, v8, s45 // 0000000180F4: D13B0075 00005B08 - v_cvt_f32_bf16_sdwa v8, v211 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000180FC: 7E10B6F9 000416D3 - v_fmac_f32_e64 v118, v8, s45 // 000000018104: D13B0076 00005B08 - v_cvt_f32_bf16_sdwa v8, v211 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001810C: 7E10B6F9 000516D3 - v_fmac_f32_e64 v119, v8, s45 // 000000018114: D13B0077 00005B08 - v_cvt_pk_bf16_f32 v112, v112, v113 // 00000001811C: D2680070 0002E370 - v_cvt_pk_bf16_f32 v113, v114, v115 // 000000018124: D2680071 0002E772 - v_cvt_pk_bf16_f32 v114, v116, v117 // 00000001812C: D2680072 0002EB74 - v_cvt_pk_bf16_f32 v115, v118, v119 // 000000018134: D2680073 0002EF76 - buffer_store_dwordx4 v[112:115], v202, s[16:19], 0 offen nt// 00000001813C: E07E1000 800470CA - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000018144: 7E10B6F9 000416D4 - v_fmac_f32_e64 v120, v8, s45 // 00000001814C: D13B0078 00005B08 - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018154: 7E10B6F9 000516D4 - v_fmac_f32_e64 v121, v8, s45 // 00000001815C: D13B0079 00005B08 - v_cvt_f32_bf16_sdwa v8, v213 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000018164: 7E10B6F9 000416D5 - v_fmac_f32_e64 v122, v8, s45 // 00000001816C: D13B007A 00005B08 - v_cvt_f32_bf16_sdwa v8, v213 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018174: 7E10B6F9 000516D5 - v_fmac_f32_e64 v123, v8, s45 // 00000001817C: D13B007B 00005B08 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000018184: 7E10B6F9 000416D6 - v_fmac_f32_e64 v124, v8, s45 // 00000001818C: D13B007C 00005B08 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018194: 7E10B6F9 000516D6 - v_fmac_f32_e64 v125, v8, s45 // 00000001819C: D13B007D 00005B08 - v_cvt_f32_bf16_sdwa v8, v215 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000181A4: 7E10B6F9 000416D7 - v_fmac_f32_e64 v126, v8, s45 // 0000000181AC: D13B007E 00005B08 - v_cvt_f32_bf16_sdwa v8, v215 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000181B4: 7E10B6F9 000516D7 - v_fmac_f32_e64 v127, v8, s45 // 0000000181BC: D13B007F 00005B08 - v_cvt_pk_bf16_f32 v120, v120, v121 // 0000000181C4: D2680078 0002F378 - v_cvt_pk_bf16_f32 v121, v122, v123 // 0000000181CC: D2680079 0002F77A - v_cvt_pk_bf16_f32 v122, v124, v125 // 0000000181D4: D268007A 0002FB7C - v_cvt_pk_bf16_f32 v123, v126, v127 // 0000000181DC: D268007B 0002FF7E - buffer_store_dwordx4 v[120:123], v203, s[16:19], 0 offen nt// 0000000181E4: E07E1000 800478CB - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000181EC: 7E10B6F9 000416D8 - v_fmac_f32_e64 v136, v8, s45 // 0000000181F4: D13B0088 00005B08 - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000181FC: 7E10B6F9 000516D8 - v_fmac_f32_e64 v137, v8, s45 // 000000018204: D13B0089 00005B08 - v_cvt_f32_bf16_sdwa v8, v217 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001820C: 7E10B6F9 000416D9 - v_fmac_f32_e64 v138, v8, s45 // 000000018214: D13B008A 00005B08 - v_cvt_f32_bf16_sdwa v8, v217 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001821C: 7E10B6F9 000516D9 - v_fmac_f32_e64 v139, v8, s45 // 000000018224: D13B008B 00005B08 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001822C: 7E10B6F9 000416DA - v_fmac_f32_e64 v140, v8, s45 // 000000018234: D13B008C 00005B08 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001823C: 7E10B6F9 000516DA - v_fmac_f32_e64 v141, v8, s45 // 000000018244: D13B008D 00005B08 - v_cvt_f32_bf16_sdwa v8, v219 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001824C: 7E10B6F9 000416DB - v_fmac_f32_e64 v142, v8, s45 // 000000018254: D13B008E 00005B08 - v_cvt_f32_bf16_sdwa v8, v219 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 00000001825C: 7E10B6F9 000516DB - v_fmac_f32_e64 v143, v8, s45 // 000000018264: D13B008F 00005B08 - v_cvt_pk_bf16_f32 v136, v136, v137 // 00000001826C: D2680088 00031388 - v_cvt_pk_bf16_f32 v137, v138, v139 // 000000018274: D2680089 0003178A - v_cvt_pk_bf16_f32 v138, v140, v141 // 00000001827C: D268008A 00031B8C - v_cvt_pk_bf16_f32 v139, v142, v143 // 000000018284: D268008B 00031F8E - buffer_store_dwordx4 v[136:139], v220, s[16:19], 0 offen nt// 00000001828C: E07E1000 800488DC - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000018294: 7E10B6F9 000416E0 - v_fmac_f32_e64 v144, v8, s45 // 00000001829C: D13B0090 00005B08 - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000182A4: 7E10B6F9 000516E0 - v_fmac_f32_e64 v145, v8, s45 // 0000000182AC: D13B0091 00005B08 - v_cvt_f32_bf16_sdwa v8, v225 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000182B4: 7E10B6F9 000416E1 - v_fmac_f32_e64 v146, v8, s45 // 0000000182BC: D13B0092 00005B08 - v_cvt_f32_bf16_sdwa v8, v225 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000182C4: 7E10B6F9 000516E1 - v_fmac_f32_e64 v147, v8, s45 // 0000000182CC: D13B0093 00005B08 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000182D4: 7E10B6F9 000416E2 - v_fmac_f32_e64 v148, v8, s45 // 0000000182DC: D13B0094 00005B08 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 0000000182E4: 7E10B6F9 000516E2 - v_fmac_f32_e64 v149, v8, s45 // 0000000182EC: D13B0095 00005B08 - v_cvt_f32_bf16_sdwa v8, v227 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 0000000182F4: 7E10B6F9 000416E3 - v_fmac_f32_e64 v150, v8, s45 // 0000000182FC: D13B0096 00005B08 - v_cvt_f32_bf16_sdwa v8, v227 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1// 000000018304: 7E10B6F9 000516E3 - v_fmac_f32_e64 v151, v8, s45 // 00000001830C: D13B0097 00005B08 - v_cvt_pk_bf16_f32 v144, v144, v145 // 000000018314: D2680090 00032390 - v_cvt_pk_bf16_f32 v145, v146, v147 // 00000001831C: D2680091 00032792 - v_cvt_pk_bf16_f32 v146, v148, v149 // 000000018324: D2680092 00032B94 - v_cvt_pk_bf16_f32 v147, v150, v151 // 00000001832C: D2680093 00032F96 - buffer_store_dwordx4 v[144:147], v221, s[16:19], 0 offen nt// 000000018334: E07E1000 800490DD - s_nop 0 // 00000001833C: BF800000 - s_branch label_GW_End_2 // 000000018340: BF821CA3 - -label_GW_B1_E1_M: - v_mov_b32_e32 v10, 0x80000000 // 000000018344: 7E1402FF 80000000 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001834C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018354: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001835C: 86A2221E - v_add_lshl_u32 v92, v6, v4, 1 // 000000018360: D1FE005C 02060906 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 000000018368: D100005C 008AB90A - buffer_load_short_d16 v91, v92, s[20:23], 0 offen // 000000018370: E0901000 80055B5C - v_add_lshl_u32 v92, v7, v4, 1 // 000000018378: D1FE005C 02060907 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 000000018380: D100005C 008AB90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000018388: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018390: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018398: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000183A0: 86A2221E - v_add_lshl_u32 v94, v6, v8, 1 // 0000000183A4: D1FE005E 02061106 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 0000000183AC: D100005E 008ABD0A - buffer_load_short_d16 v93, v94, s[20:23], 0 offen // 0000000183B4: E0901000 80055D5E - v_add_lshl_u32 v94, v7, v8, 1 // 0000000183BC: D1FE005E 02061107 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 0000000183C4: D100005E 008ABD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000183CC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000183D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000183DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000183E4: 86A2221E - v_add_lshl_u32 v96, v6, v8, 1 // 0000000183E8: D1FE0060 02061106 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 0000000183F0: D1000060 008AC10A - buffer_load_short_d16 v95, v96, s[20:23], 0 offen // 0000000183F8: E0901000 80055F60 - v_add_lshl_u32 v96, v7, v8, 1 // 000000018400: D1FE0060 02061107 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 000000018408: D1000060 008AC10A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000018410: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018418: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018420: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018428: 86A2221E - v_add_lshl_u32 v98, v6, v8, 1 // 00000001842C: D1FE0062 02061106 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 000000018434: D1000062 008AC50A - buffer_load_short_d16 v97, v98, s[20:23], 0 offen // 00000001843C: E0901000 80056162 - v_add_lshl_u32 v98, v7, v8, 1 // 000000018444: D1FE0062 02061107 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 00000001844C: D1000062 008AC50A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000018454: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001845C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018464: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001846C: 86A2221E - v_add_lshl_u32 v100, v6, v8, 1 // 000000018470: D1FE0064 02061106 - v_cndmask_b32_e64 v100, v10, v100, s[34:35] // 000000018478: D1000064 008AC90A - buffer_load_short_d16 v99, v100, s[20:23], 0 offen // 000000018480: E0901000 80056364 - v_add_lshl_u32 v100, v7, v8, 1 // 000000018488: D1FE0064 02061107 - v_cndmask_b32_e64 v100, v10, v100, s[34:35] // 000000018490: D1000064 008AC90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000018498: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000184A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000184A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000184B0: 86A2221E - v_add_lshl_u32 v102, v6, v8, 1 // 0000000184B4: D1FE0066 02061106 - v_cndmask_b32_e64 v102, v10, v102, s[34:35] // 0000000184BC: D1000066 008ACD0A - buffer_load_short_d16 v101, v102, s[20:23], 0 offen // 0000000184C4: E0901000 80056566 - v_add_lshl_u32 v102, v7, v8, 1 // 0000000184CC: D1FE0066 02061107 - v_cndmask_b32_e64 v102, v10, v102, s[34:35] // 0000000184D4: D1000066 008ACD0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000184DC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000184E4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000184EC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000184F4: 86A2221E - v_add_lshl_u32 v104, v6, v8, 1 // 0000000184F8: D1FE0068 02061106 - v_cndmask_b32_e64 v104, v10, v104, s[34:35] // 000000018500: D1000068 008AD10A - buffer_load_short_d16 v103, v104, s[20:23], 0 offen // 000000018508: E0901000 80056768 - v_add_lshl_u32 v104, v7, v8, 1 // 000000018510: D1FE0068 02061107 - v_cndmask_b32_e64 v104, v10, v104, s[34:35] // 000000018518: D1000068 008AD10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000018520: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018528: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018530: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018538: 86A2221E - v_add_lshl_u32 v106, v6, v8, 1 // 00000001853C: D1FE006A 02061106 - v_cndmask_b32_e64 v106, v10, v106, s[34:35] // 000000018544: D100006A 008AD50A - buffer_load_short_d16 v105, v106, s[20:23], 0 offen // 00000001854C: E0901000 8005696A - v_add_lshl_u32 v106, v7, v8, 1 // 000000018554: D1FE006A 02061107 - v_cndmask_b32_e64 v106, v10, v106, s[34:35] // 00000001855C: D100006A 008AD50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000018564: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001856C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000018574: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001857C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018584: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001858C: 86A2221E - v_add_lshl_u32 v108, v6, v4, 1 // 000000018590: D1FE006C 02060906 - v_cndmask_b32_e64 v108, v10, v108, s[34:35] // 000000018598: D100006C 008AD90A - buffer_load_short_d16 v107, v108, s[20:23], 0 offen // 0000000185A0: E0901000 80056B6C - v_add_lshl_u32 v108, v7, v4, 1 // 0000000185A8: D1FE006C 02060907 - v_cndmask_b32_e64 v108, v10, v108, s[34:35] // 0000000185B0: D100006C 008AD90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000185B8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000185C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000185C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000185D0: 86A2221E - v_add_lshl_u32 v110, v6, v8, 1 // 0000000185D4: D1FE006E 02061106 - v_cndmask_b32_e64 v110, v10, v110, s[34:35] // 0000000185DC: D100006E 008ADD0A - buffer_load_short_d16 v109, v110, s[20:23], 0 offen // 0000000185E4: E0901000 80056D6E - v_add_lshl_u32 v110, v7, v8, 1 // 0000000185EC: D1FE006E 02061107 - v_cndmask_b32_e64 v110, v10, v110, s[34:35] // 0000000185F4: D100006E 008ADD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000185FC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018604: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001860C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018614: 86A2221E - v_add_lshl_u32 v112, v6, v8, 1 // 000000018618: D1FE0070 02061106 - v_cndmask_b32_e64 v112, v10, v112, s[34:35] // 000000018620: D1000070 008AE10A - buffer_load_short_d16 v111, v112, s[20:23], 0 offen // 000000018628: E0901000 80056F70 - v_add_lshl_u32 v112, v7, v8, 1 // 000000018630: D1FE0070 02061107 - v_cndmask_b32_e64 v112, v10, v112, s[34:35] // 000000018638: D1000070 008AE10A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000018640: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018648: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018650: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018658: 86A2221E - v_add_lshl_u32 v114, v6, v8, 1 // 00000001865C: D1FE0072 02061106 - v_cndmask_b32_e64 v114, v10, v114, s[34:35] // 000000018664: D1000072 008AE50A - buffer_load_short_d16 v113, v114, s[20:23], 0 offen // 00000001866C: E0901000 80057172 - v_add_lshl_u32 v114, v7, v8, 1 // 000000018674: D1FE0072 02061107 - v_cndmask_b32_e64 v114, v10, v114, s[34:35] // 00000001867C: D1000072 008AE50A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000018684: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001868C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018694: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001869C: 86A2221E - v_add_lshl_u32 v116, v6, v8, 1 // 0000000186A0: D1FE0074 02061106 - v_cndmask_b32_e64 v116, v10, v116, s[34:35] // 0000000186A8: D1000074 008AE90A - buffer_load_short_d16 v115, v116, s[20:23], 0 offen // 0000000186B0: E0901000 80057374 - v_add_lshl_u32 v116, v7, v8, 1 // 0000000186B8: D1FE0074 02061107 - v_cndmask_b32_e64 v116, v10, v116, s[34:35] // 0000000186C0: D1000074 008AE90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000186C8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000186D0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000186D8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000186E0: 86A2221E - v_add_lshl_u32 v118, v6, v8, 1 // 0000000186E4: D1FE0076 02061106 - v_cndmask_b32_e64 v118, v10, v118, s[34:35] // 0000000186EC: D1000076 008AED0A - buffer_load_short_d16 v117, v118, s[20:23], 0 offen // 0000000186F4: E0901000 80057576 - v_add_lshl_u32 v118, v7, v8, 1 // 0000000186FC: D1FE0076 02061107 - v_cndmask_b32_e64 v118, v10, v118, s[34:35] // 000000018704: D1000076 008AED0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001870C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018714: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001871C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018724: 86A2221E - v_add_lshl_u32 v120, v6, v8, 1 // 000000018728: D1FE0078 02061106 - v_cndmask_b32_e64 v120, v10, v120, s[34:35] // 000000018730: D1000078 008AF10A - buffer_load_short_d16 v119, v120, s[20:23], 0 offen // 000000018738: E0901000 80057778 - v_add_lshl_u32 v120, v7, v8, 1 // 000000018740: D1FE0078 02061107 - v_cndmask_b32_e64 v120, v10, v120, s[34:35] // 000000018748: D1000078 008AF10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000018750: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018758: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018760: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018768: 86A2221E - v_add_lshl_u32 v122, v6, v8, 1 // 00000001876C: D1FE007A 02061106 - v_cndmask_b32_e64 v122, v10, v122, s[34:35] // 000000018774: D100007A 008AF50A - buffer_load_short_d16 v121, v122, s[20:23], 0 offen // 00000001877C: E0901000 8005797A - v_add_lshl_u32 v122, v7, v8, 1 // 000000018784: D1FE007A 02061107 - v_cndmask_b32_e64 v122, v10, v122, s[34:35] // 00000001878C: D100007A 008AF50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000018794: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001879C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000187A4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000187AC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000187B4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000187BC: 86A2221E - v_add_lshl_u32 v124, v6, v4, 1 // 0000000187C0: D1FE007C 02060906 - v_cndmask_b32_e64 v124, v10, v124, s[34:35] // 0000000187C8: D100007C 008AF90A - buffer_load_short_d16 v123, v124, s[20:23], 0 offen // 0000000187D0: E0901000 80057B7C - v_add_lshl_u32 v124, v7, v4, 1 // 0000000187D8: D1FE007C 02060907 - v_cndmask_b32_e64 v124, v10, v124, s[34:35] // 0000000187E0: D100007C 008AF90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000187E8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000187F0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000187F8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018800: 86A2221E - v_add_lshl_u32 v126, v6, v8, 1 // 000000018804: D1FE007E 02061106 - v_cndmask_b32_e64 v126, v10, v126, s[34:35] // 00000001880C: D100007E 008AFD0A - buffer_load_short_d16 v125, v126, s[20:23], 0 offen // 000000018814: E0901000 80057D7E - v_add_lshl_u32 v126, v7, v8, 1 // 00000001881C: D1FE007E 02061107 - v_cndmask_b32_e64 v126, v10, v126, s[34:35] // 000000018824: D100007E 008AFD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001882C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018834: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001883C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018844: 86A2221E - v_add_lshl_u32 v128, v6, v8, 1 // 000000018848: D1FE0080 02061106 - v_cndmask_b32_e64 v128, v10, v128, s[34:35] // 000000018850: D1000080 008B010A - buffer_load_short_d16 v127, v128, s[20:23], 0 offen // 000000018858: E0901000 80057F80 - v_add_lshl_u32 v128, v7, v8, 1 // 000000018860: D1FE0080 02061107 - v_cndmask_b32_e64 v128, v10, v128, s[34:35] // 000000018868: D1000080 008B010A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000018870: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018878: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018880: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018888: 86A2221E - v_add_lshl_u32 v130, v6, v8, 1 // 00000001888C: D1FE0082 02061106 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 000000018894: D1000082 008B050A - buffer_load_short_d16 v129, v130, s[20:23], 0 offen // 00000001889C: E0901000 80058182 - v_add_lshl_u32 v130, v7, v8, 1 // 0000000188A4: D1FE0082 02061107 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 0000000188AC: D1000082 008B050A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000188B4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000188BC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000188C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000188CC: 86A2221E - v_add_lshl_u32 v135, v6, v8, 1 // 0000000188D0: D1FE0087 02061106 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 0000000188D8: D1000087 008B0F0A - buffer_load_short_d16 v131, v135, s[20:23], 0 offen // 0000000188E0: E0901000 80058387 - v_add_lshl_u32 v135, v7, v8, 1 // 0000000188E8: D1FE0087 02061107 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 0000000188F0: D1000087 008B0F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000188F8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018900: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018908: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018910: 86A2221E - v_add_lshl_u32 v137, v6, v8, 1 // 000000018914: D1FE0089 02061106 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 00000001891C: D1000089 008B130A - buffer_load_short_d16 v136, v137, s[20:23], 0 offen // 000000018924: E0901000 80058889 - v_add_lshl_u32 v137, v7, v8, 1 // 00000001892C: D1FE0089 02061107 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 000000018934: D1000089 008B130A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001893C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018944: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001894C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018954: 86A2221E - v_add_lshl_u32 v139, v6, v8, 1 // 000000018958: D1FE008B 02061106 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 000000018960: D100008B 008B170A - buffer_load_short_d16 v138, v139, s[20:23], 0 offen // 000000018968: E0901000 80058A8B - v_add_lshl_u32 v139, v7, v8, 1 // 000000018970: D1FE008B 02061107 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 000000018978: D100008B 008B170A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000018980: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018988: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018990: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018998: 86A2221E - v_add_lshl_u32 v141, v6, v8, 1 // 00000001899C: D1FE008D 02061106 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 0000000189A4: D100008D 008B1B0A - buffer_load_short_d16 v140, v141, s[20:23], 0 offen // 0000000189AC: E0901000 80058C8D - v_add_lshl_u32 v141, v7, v8, 1 // 0000000189B4: D1FE008D 02061107 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 0000000189BC: D100008D 008B1B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000189C4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000189CC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000189D4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000189DC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000189E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000189EC: 86A2221E - v_add_lshl_u32 v143, v6, v4, 1 // 0000000189F0: D1FE008F 02060906 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 0000000189F8: D100008F 008B1F0A - buffer_load_short_d16 v142, v143, s[20:23], 0 offen // 000000018A00: E0901000 80058E8F - v_add_lshl_u32 v143, v7, v4, 1 // 000000018A08: D1FE008F 02060907 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 000000018A10: D100008F 008B1F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000018A18: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018A20: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018A28: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018A30: 86A2221E - v_add_lshl_u32 v145, v6, v8, 1 // 000000018A34: D1FE0091 02061106 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 000000018A3C: D1000091 008B230A - buffer_load_short_d16 v144, v145, s[20:23], 0 offen // 000000018A44: E0901000 80059091 - v_add_lshl_u32 v145, v7, v8, 1 // 000000018A4C: D1FE0091 02061107 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 000000018A54: D1000091 008B230A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000018A5C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018A64: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018A6C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018A74: 86A2221E - v_add_lshl_u32 v147, v6, v8, 1 // 000000018A78: D1FE0093 02061106 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 000000018A80: D1000093 008B270A - buffer_load_short_d16 v146, v147, s[20:23], 0 offen // 000000018A88: E0901000 80059293 - v_add_lshl_u32 v147, v7, v8, 1 // 000000018A90: D1FE0093 02061107 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 000000018A98: D1000093 008B270A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000018AA0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018AA8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018AB0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018AB8: 86A2221E - v_add_lshl_u32 v149, v6, v8, 1 // 000000018ABC: D1FE0095 02061106 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 000000018AC4: D1000095 008B2B0A - buffer_load_short_d16 v148, v149, s[20:23], 0 offen // 000000018ACC: E0901000 80059495 - v_add_lshl_u32 v149, v7, v8, 1 // 000000018AD4: D1FE0095 02061107 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 000000018ADC: D1000095 008B2B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000018AE4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018AEC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018AF4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018AFC: 86A2221E - v_add_lshl_u32 v151, v6, v8, 1 // 000000018B00: D1FE0097 02061106 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 000000018B08: D1000097 008B2F0A - buffer_load_short_d16 v150, v151, s[20:23], 0 offen // 000000018B10: E0901000 80059697 - v_add_lshl_u32 v151, v7, v8, 1 // 000000018B18: D1FE0097 02061107 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 000000018B20: D1000097 008B2F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000018B28: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018B30: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018B38: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018B40: 86A2221E - v_add_lshl_u32 v153, v6, v8, 1 // 000000018B44: D1FE0099 02061106 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 000000018B4C: D1000099 008B330A - buffer_load_short_d16 v152, v153, s[20:23], 0 offen // 000000018B54: E0901000 80059899 - v_add_lshl_u32 v153, v7, v8, 1 // 000000018B5C: D1FE0099 02061107 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 000000018B64: D1000099 008B330A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000018B6C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018B74: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018B7C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018B84: 86A2221E - v_add_lshl_u32 v155, v6, v8, 1 // 000000018B88: D1FE009B 02061106 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 000000018B90: D100009B 008B370A - buffer_load_short_d16 v154, v155, s[20:23], 0 offen // 000000018B98: E0901000 80059A9B - v_add_lshl_u32 v155, v7, v8, 1 // 000000018BA0: D1FE009B 02061107 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 000000018BA8: D100009B 008B370A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000018BB0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018BB8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018BC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018BC8: 86A2221E - v_add_lshl_u32 v157, v6, v8, 1 // 000000018BCC: D1FE009D 02061106 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 000000018BD4: D100009D 008B3B0A - buffer_load_short_d16 v156, v157, s[20:23], 0 offen // 000000018BDC: E0901000 80059C9D - v_add_lshl_u32 v157, v7, v8, 1 // 000000018BE4: D1FE009D 02061107 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 000000018BEC: D100009D 008B3B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000018BF4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000018BFC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000018C04: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000018C0C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018C14: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018C1C: 86A2221E - v_add_lshl_u32 v159, v6, v4, 1 // 000000018C20: D1FE009F 02060906 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 000000018C28: D100009F 008B3F0A - buffer_load_short_d16 v158, v159, s[20:23], 0 offen // 000000018C30: E0901000 80059E9F - v_add_lshl_u32 v159, v7, v4, 1 // 000000018C38: D1FE009F 02060907 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 000000018C40: D100009F 008B3F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000018C48: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018C50: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018C58: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018C60: 86A2221E - v_add_lshl_u32 v161, v6, v8, 1 // 000000018C64: D1FE00A1 02061106 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 000000018C6C: D10000A1 008B430A - buffer_load_short_d16 v160, v161, s[20:23], 0 offen // 000000018C74: E0901000 8005A0A1 - v_add_lshl_u32 v161, v7, v8, 1 // 000000018C7C: D1FE00A1 02061107 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 000000018C84: D10000A1 008B430A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000018C8C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018C94: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018C9C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018CA4: 86A2221E - v_add_lshl_u32 v163, v6, v8, 1 // 000000018CA8: D1FE00A3 02061106 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000018CB0: D10000A3 008B470A - buffer_load_short_d16 v162, v163, s[20:23], 0 offen // 000000018CB8: E0901000 8005A2A3 - v_add_lshl_u32 v163, v7, v8, 1 // 000000018CC0: D1FE00A3 02061107 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000018CC8: D10000A3 008B470A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000018CD0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018CD8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018CE0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018CE8: 86A2221E - v_add_lshl_u32 v165, v6, v8, 1 // 000000018CEC: D1FE00A5 02061106 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 000000018CF4: D10000A5 008B4B0A - buffer_load_short_d16 v164, v165, s[20:23], 0 offen // 000000018CFC: E0901000 8005A4A5 - v_add_lshl_u32 v165, v7, v8, 1 // 000000018D04: D1FE00A5 02061107 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 000000018D0C: D10000A5 008B4B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000018D14: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018D1C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018D24: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018D2C: 86A2221E - v_add_lshl_u32 v167, v6, v8, 1 // 000000018D30: D1FE00A7 02061106 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 000000018D38: D10000A7 008B4F0A - buffer_load_short_d16 v166, v167, s[20:23], 0 offen // 000000018D40: E0901000 8005A6A7 - v_add_lshl_u32 v167, v7, v8, 1 // 000000018D48: D1FE00A7 02061107 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 000000018D50: D10000A7 008B4F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000018D58: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018D60: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018D68: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018D70: 86A2221E - v_add_lshl_u32 v169, v6, v8, 1 // 000000018D74: D1FE00A9 02061106 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 000000018D7C: D10000A9 008B530A - buffer_load_short_d16 v168, v169, s[20:23], 0 offen // 000000018D84: E0901000 8005A8A9 - v_add_lshl_u32 v169, v7, v8, 1 // 000000018D8C: D1FE00A9 02061107 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 000000018D94: D10000A9 008B530A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000018D9C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018DA4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018DAC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018DB4: 86A2221E - v_add_lshl_u32 v171, v6, v8, 1 // 000000018DB8: D1FE00AB 02061106 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 000000018DC0: D10000AB 008B570A - buffer_load_short_d16 v170, v171, s[20:23], 0 offen // 000000018DC8: E0901000 8005AAAB - v_add_lshl_u32 v171, v7, v8, 1 // 000000018DD0: D1FE00AB 02061107 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 000000018DD8: D10000AB 008B570A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000018DE0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018DE8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018DF0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018DF8: 86A2221E - v_add_lshl_u32 v173, v6, v8, 1 // 000000018DFC: D1FE00AD 02061106 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 000000018E04: D10000AD 008B5B0A - buffer_load_short_d16 v172, v173, s[20:23], 0 offen // 000000018E0C: E0901000 8005ACAD - v_add_lshl_u32 v173, v7, v8, 1 // 000000018E14: D1FE00AD 02061107 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 000000018E1C: D10000AD 008B5B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000018E24: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000018E2C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000018E34: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000018E3C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018E44: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018E4C: 86A2221E - v_add_lshl_u32 v175, v6, v4, 1 // 000000018E50: D1FE00AF 02060906 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 000000018E58: D10000AF 008B5F0A - buffer_load_short_d16 v174, v175, s[20:23], 0 offen // 000000018E60: E0901000 8005AEAF - v_add_lshl_u32 v175, v7, v4, 1 // 000000018E68: D1FE00AF 02060907 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 000000018E70: D10000AF 008B5F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000018E78: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018E80: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018E88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018E90: 86A2221E - v_add_lshl_u32 v177, v6, v8, 1 // 000000018E94: D1FE00B1 02061106 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 000000018E9C: D10000B1 008B630A - buffer_load_short_d16 v176, v177, s[20:23], 0 offen // 000000018EA4: E0901000 8005B0B1 - v_add_lshl_u32 v177, v7, v8, 1 // 000000018EAC: D1FE00B1 02061107 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 000000018EB4: D10000B1 008B630A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000018EBC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018EC4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018ECC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018ED4: 86A2221E - v_add_lshl_u32 v179, v6, v8, 1 // 000000018ED8: D1FE00B3 02061106 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 000000018EE0: D10000B3 008B670A - buffer_load_short_d16 v178, v179, s[20:23], 0 offen // 000000018EE8: E0901000 8005B2B3 - v_add_lshl_u32 v179, v7, v8, 1 // 000000018EF0: D1FE00B3 02061107 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 000000018EF8: D10000B3 008B670A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000018F00: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018F08: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018F10: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018F18: 86A2221E - v_add_lshl_u32 v181, v6, v8, 1 // 000000018F1C: D1FE00B5 02061106 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 000000018F24: D10000B5 008B6B0A - buffer_load_short_d16 v180, v181, s[20:23], 0 offen // 000000018F2C: E0901000 8005B4B5 - v_add_lshl_u32 v181, v7, v8, 1 // 000000018F34: D1FE00B5 02061107 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 000000018F3C: D10000B5 008B6B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000018F44: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018F4C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018F54: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018F5C: 86A2221E - v_add_lshl_u32 v183, v6, v8, 1 // 000000018F60: D1FE00B7 02061106 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 000000018F68: D10000B7 008B6F0A - buffer_load_short_d16 v182, v183, s[20:23], 0 offen // 000000018F70: E0901000 8005B6B7 - v_add_lshl_u32 v183, v7, v8, 1 // 000000018F78: D1FE00B7 02061107 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 000000018F80: D10000B7 008B6F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000018F88: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018F90: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018F98: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018FA0: 86A2221E - v_add_lshl_u32 v185, v6, v8, 1 // 000000018FA4: D1FE00B9 02061106 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 000000018FAC: D10000B9 008B730A - buffer_load_short_d16 v184, v185, s[20:23], 0 offen // 000000018FB4: E0901000 8005B8B9 - v_add_lshl_u32 v185, v7, v8, 1 // 000000018FBC: D1FE00B9 02061107 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 000000018FC4: D10000B9 008B730A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000018FCC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000018FD4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000018FDC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000018FE4: 86A2221E - v_add_lshl_u32 v187, v6, v8, 1 // 000000018FE8: D1FE00BB 02061106 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 000000018FF0: D10000BB 008B770A - buffer_load_short_d16 v186, v187, s[20:23], 0 offen // 000000018FF8: E0901000 8005BABB - v_add_lshl_u32 v187, v7, v8, 1 // 000000019000: D1FE00BB 02061107 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 000000019008: D10000BB 008B770A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000019010: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019018: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019020: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019028: 86A2221E - v_add_lshl_u32 v189, v6, v8, 1 // 00000001902C: D1FE00BD 02061106 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 000000019034: D10000BD 008B7B0A - buffer_load_short_d16 v188, v189, s[20:23], 0 offen // 00000001903C: E0901000 8005BCBD - v_add_lshl_u32 v189, v7, v8, 1 // 000000019044: D1FE00BD 02061107 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 00000001904C: D10000BD 008B7B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000019054: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001905C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000019064: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001906C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019074: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001907C: 86A2221E - v_add_lshl_u32 v191, v6, v4, 1 // 000000019080: D1FE00BF 02060906 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 000000019088: D10000BF 008B7F0A - buffer_load_short_d16 v190, v191, s[20:23], 0 offen // 000000019090: E0901000 8005BEBF - v_add_lshl_u32 v191, v7, v4, 1 // 000000019098: D1FE00BF 02060907 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 0000000190A0: D10000BF 008B7F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000190A8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000190B0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000190B8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000190C0: 86A2221E - v_add_lshl_u32 v193, v6, v8, 1 // 0000000190C4: D1FE00C1 02061106 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 0000000190CC: D10000C1 008B830A - buffer_load_short_d16 v192, v193, s[20:23], 0 offen // 0000000190D4: E0901000 8005C0C1 - v_add_lshl_u32 v193, v7, v8, 1 // 0000000190DC: D1FE00C1 02061107 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 0000000190E4: D10000C1 008B830A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000190EC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000190F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000190FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019104: 86A2221E - v_add_lshl_u32 v195, v6, v8, 1 // 000000019108: D1FE00C3 02061106 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 000000019110: D10000C3 008B870A - buffer_load_short_d16 v194, v195, s[20:23], 0 offen // 000000019118: E0901000 8005C2C3 - v_add_lshl_u32 v195, v7, v8, 1 // 000000019120: D1FE00C3 02061107 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 000000019128: D10000C3 008B870A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000019130: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019138: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019140: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019148: 86A2221E - v_add_lshl_u32 v197, v6, v8, 1 // 00000001914C: D1FE00C5 02061106 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 000000019154: D10000C5 008B8B0A - buffer_load_short_d16 v196, v197, s[20:23], 0 offen // 00000001915C: E0901000 8005C4C5 - v_add_lshl_u32 v197, v7, v8, 1 // 000000019164: D1FE00C5 02061107 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 00000001916C: D10000C5 008B8B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000019174: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001917C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019184: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001918C: 86A2221E - v_add_lshl_u32 v199, v6, v8, 1 // 000000019190: D1FE00C7 02061106 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 000000019198: D10000C7 008B8F0A - buffer_load_short_d16 v198, v199, s[20:23], 0 offen // 0000000191A0: E0901000 8005C6C7 - v_add_lshl_u32 v199, v7, v8, 1 // 0000000191A8: D1FE00C7 02061107 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 0000000191B0: D10000C7 008B8F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000191B8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000191C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000191C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000191D0: 86A2221E - v_add_lshl_u32 v201, v6, v8, 1 // 0000000191D4: D1FE00C9 02061106 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 0000000191DC: D10000C9 008B930A - buffer_load_short_d16 v200, v201, s[20:23], 0 offen // 0000000191E4: E0901000 8005C8C9 - v_add_lshl_u32 v201, v7, v8, 1 // 0000000191EC: D1FE00C9 02061107 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 0000000191F4: D10000C9 008B930A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000191FC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019204: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001920C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019214: 86A2221E - v_add_lshl_u32 v203, v6, v8, 1 // 000000019218: D1FE00CB 02061106 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 000000019220: D10000CB 008B970A - buffer_load_short_d16 v202, v203, s[20:23], 0 offen // 000000019228: E0901000 8005CACB - v_add_lshl_u32 v203, v7, v8, 1 // 000000019230: D1FE00CB 02061107 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 000000019238: D10000CB 008B970A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000019240: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019248: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019250: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019258: 86A2221E - v_add_lshl_u32 v205, v6, v8, 1 // 00000001925C: D1FE00CD 02061106 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 000000019264: D10000CD 008B9B0A - buffer_load_short_d16 v204, v205, s[20:23], 0 offen // 00000001926C: E0901000 8005CCCD - v_add_lshl_u32 v205, v7, v8, 1 // 000000019274: D1FE00CD 02061107 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 00000001927C: D10000CD 008B9B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000019284: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001928C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000019294: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001929C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000192A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000192AC: 86A2221E - v_add_lshl_u32 v207, v6, v4, 1 // 0000000192B0: D1FE00CF 02060906 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 0000000192B8: D10000CF 008B9F0A - buffer_load_short_d16 v206, v207, s[20:23], 0 offen // 0000000192C0: E0901000 8005CECF - v_add_lshl_u32 v207, v7, v4, 1 // 0000000192C8: D1FE00CF 02060907 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 0000000192D0: D10000CF 008B9F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000192D8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000192E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000192E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000192F0: 86A2221E - v_add_lshl_u32 v209, v6, v8, 1 // 0000000192F4: D1FE00D1 02061106 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 0000000192FC: D10000D1 008BA30A - buffer_load_short_d16 v208, v209, s[20:23], 0 offen // 000000019304: E0901000 8005D0D1 - v_add_lshl_u32 v209, v7, v8, 1 // 00000001930C: D1FE00D1 02061107 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 000000019314: D10000D1 008BA30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001931C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019324: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001932C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019334: 86A2221E - v_add_lshl_u32 v211, v6, v8, 1 // 000000019338: D1FE00D3 02061106 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 000000019340: D10000D3 008BA70A - buffer_load_short_d16 v210, v211, s[20:23], 0 offen // 000000019348: E0901000 8005D2D3 - v_add_lshl_u32 v211, v7, v8, 1 // 000000019350: D1FE00D3 02061107 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 000000019358: D10000D3 008BA70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000019360: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019368: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019370: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019378: 86A2221E - v_add_lshl_u32 v213, v6, v8, 1 // 00000001937C: D1FE00D5 02061106 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 000000019384: D10000D5 008BAB0A - buffer_load_short_d16 v212, v213, s[20:23], 0 offen // 00000001938C: E0901000 8005D4D5 - v_add_lshl_u32 v213, v7, v8, 1 // 000000019394: D1FE00D5 02061107 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 00000001939C: D10000D5 008BAB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000193A4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000193AC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000193B4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000193BC: 86A2221E - v_add_lshl_u32 v215, v6, v8, 1 // 0000000193C0: D1FE00D7 02061106 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 0000000193C8: D10000D7 008BAF0A - buffer_load_short_d16 v214, v215, s[20:23], 0 offen // 0000000193D0: E0901000 8005D6D7 - v_add_lshl_u32 v215, v7, v8, 1 // 0000000193D8: D1FE00D7 02061107 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 0000000193E0: D10000D7 008BAF0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000193E8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000193F0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000193F8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019400: 86A2221E - v_add_lshl_u32 v217, v6, v8, 1 // 000000019404: D1FE00D9 02061106 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000001940C: D10000D9 008BB30A - buffer_load_short_d16 v216, v217, s[20:23], 0 offen // 000000019414: E0901000 8005D8D9 - v_add_lshl_u32 v217, v7, v8, 1 // 00000001941C: D1FE00D9 02061107 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 000000019424: D10000D9 008BB30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001942C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019434: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001943C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019444: 86A2221E - v_add_lshl_u32 v219, v6, v8, 1 // 000000019448: D1FE00DB 02061106 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 000000019450: D10000DB 008BB70A - buffer_load_short_d16 v218, v219, s[20:23], 0 offen // 000000019458: E0901000 8005DADB - v_add_lshl_u32 v219, v7, v8, 1 // 000000019460: D1FE00DB 02061107 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 000000019468: D10000DB 008BB70A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000019470: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019478: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019480: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019488: 86A2221E - v_add_lshl_u32 v221, v6, v8, 1 // 00000001948C: D1FE00DD 02061106 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 000000019494: D10000DD 008BBB0A - buffer_load_short_d16 v220, v221, s[20:23], 0 offen // 00000001949C: E0901000 8005DCDD - v_add_lshl_u32 v221, v7, v8, 1 // 0000000194A4: D1FE00DD 02061107 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 0000000194AC: D10000DD 008BBB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000194B4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000194BC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000194C4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000194CC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000194D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000194DC: 86A2221E - v_add_lshl_u32 v223, v6, v4, 1 // 0000000194E0: D1FE00DF 02060906 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 0000000194E8: D10000DF 008BBF0A - buffer_load_short_d16 v222, v223, s[20:23], 0 offen // 0000000194F0: E0901000 8005DEDF - v_add_lshl_u32 v223, v7, v4, 1 // 0000000194F8: D1FE00DF 02060907 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 000000019500: D10000DF 008BBF0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000019508: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019510: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019518: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019520: 86A2221E - v_add_lshl_u32 v225, v6, v8, 1 // 000000019524: D1FE00E1 02061106 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000001952C: D10000E1 008BC30A - buffer_load_short_d16 v224, v225, s[20:23], 0 offen // 000000019534: E0901000 8005E0E1 - v_add_lshl_u32 v225, v7, v8, 1 // 00000001953C: D1FE00E1 02061107 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 000000019544: D10000E1 008BC30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001954C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019554: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001955C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019564: 86A2221E - v_add_lshl_u32 v227, v6, v8, 1 // 000000019568: D1FE00E3 02061106 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 000000019570: D10000E3 008BC70A - buffer_load_short_d16 v226, v227, s[20:23], 0 offen // 000000019578: E0901000 8005E2E3 - v_add_lshl_u32 v227, v7, v8, 1 // 000000019580: D1FE00E3 02061107 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 000000019588: D10000E3 008BC70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000019590: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019598: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000195A0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000195A8: 86A2221E - v_add_lshl_u32 v229, v6, v8, 1 // 0000000195AC: D1FE00E5 02061106 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 0000000195B4: D10000E5 008BCB0A - buffer_load_short_d16 v228, v229, s[20:23], 0 offen // 0000000195BC: E0901000 8005E4E5 - v_add_lshl_u32 v229, v7, v8, 1 // 0000000195C4: D1FE00E5 02061107 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 0000000195CC: D10000E5 008BCB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000195D4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000195DC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000195E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000195EC: 86A2221E - v_add_lshl_u32 v231, v6, v8, 1 // 0000000195F0: D1FE00E7 02061106 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 0000000195F8: D10000E7 008BCF0A - buffer_load_short_d16 v230, v231, s[20:23], 0 offen // 000000019600: E0901000 8005E6E7 - v_add_lshl_u32 v231, v7, v8, 1 // 000000019608: D1FE00E7 02061107 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 000000019610: D10000E7 008BCF0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000019618: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019620: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019628: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019630: 86A2221E - v_add_lshl_u32 v233, v6, v8, 1 // 000000019634: D1FE00E9 02061106 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000001963C: D10000E9 008BD30A - buffer_load_short_d16 v232, v233, s[20:23], 0 offen // 000000019644: E0901000 8005E8E9 - v_add_lshl_u32 v233, v7, v8, 1 // 00000001964C: D1FE00E9 02061107 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 000000019654: D10000E9 008BD30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001965C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019664: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001966C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019674: 86A2221E - v_add_lshl_u32 v235, v6, v8, 1 // 000000019678: D1FE00EB 02061106 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 000000019680: D10000EB 008BD70A - buffer_load_short_d16 v234, v235, s[20:23], 0 offen // 000000019688: E0901000 8005EAEB - v_add_lshl_u32 v235, v7, v8, 1 // 000000019690: D1FE00EB 02061107 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 000000019698: D10000EB 008BD70A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000196A0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000196A8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000196B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000196B8: 86A2221E - v_add_lshl_u32 v237, v6, v8, 1 // 0000000196BC: D1FE00ED 02061106 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 0000000196C4: D10000ED 008BDB0A - buffer_load_short_d16 v236, v237, s[20:23], 0 offen // 0000000196CC: E0901000 8005ECED - v_add_lshl_u32 v237, v7, v8, 1 // 0000000196D4: D1FE00ED 02061107 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 0000000196DC: D10000ED 008BDB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000196E4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000196EC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000196F4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000196FC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019704: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001970C: 86A2221E - v_add_lshl_u32 v239, v6, v4, 1 // 000000019710: D1FE00EF 02060906 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 000000019718: D10000EF 008BDF0A - buffer_load_short_d16 v238, v239, s[20:23], 0 offen // 000000019720: E0901000 8005EEEF - v_add_lshl_u32 v239, v7, v4, 1 // 000000019728: D1FE00EF 02060907 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 000000019730: D10000EF 008BDF0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000019738: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019740: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000019748: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019750: 86A2221E - v_add_lshl_u32 v241, v6, v8, 1 // 000000019754: D1FE00F1 02061106 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 00000001975C: D10000F1 008BE30A - buffer_load_short_d16 v240, v241, s[20:23], 0 offen // 000000019764: E0901000 8005F0F1 - v_add_lshl_u32 v241, v7, v8, 1 // 00000001976C: D1FE00F1 02061107 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 000000019774: D10000F1 008BE30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001977C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000019784: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001978C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000019794: 86A2221E - v_add_lshl_u32 v243, v6, v8, 1 // 000000019798: D1FE00F3 02061106 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 0000000197A0: D10000F3 008BE70A - buffer_load_short_d16 v242, v243, s[20:23], 0 offen // 0000000197A8: E0901000 8005F2F3 - v_add_lshl_u32 v243, v7, v8, 1 // 0000000197B0: D1FE00F3 02061107 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 0000000197B8: D10000F3 008BE70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 0000000197C0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000197C8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000197D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000197D8: 86A2221E - v_add_lshl_u32 v245, v6, v8, 1 // 0000000197DC: D1FE00F5 02061106 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 0000000197E4: D10000F5 008BEB0A - buffer_load_short_d16 v244, v245, s[20:23], 0 offen // 0000000197EC: E0901000 8005F4F5 - v_add_lshl_u32 v245, v7, v8, 1 // 0000000197F4: D1FE00F5 02061107 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 0000000197FC: D10000F5 008BEB0A - v_accvgpr_read_b32 v15, a0 // 000000019804: D3D8400F 18000100 - v_accvgpr_read_b32 v16, a4 // 00000001980C: D3D84010 18000104 - v_accvgpr_read_b32 v17, a8 // 000000019814: D3D84011 18000108 - v_accvgpr_read_b32 v18, a12 // 00000001981C: D3D84012 1800010C - v_accvgpr_read_b32 v19, a16 // 000000019824: D3D84013 18000110 - v_accvgpr_read_b32 v20, a20 // 00000001982C: D3D84014 18000114 - v_accvgpr_read_b32 v21, a24 // 000000019834: D3D84015 18000118 - v_accvgpr_read_b32 v22, a28 // 00000001983C: D3D84016 1800011C - v_accvgpr_read_b32 v23, a32 // 000000019844: D3D84017 18000120 - v_accvgpr_read_b32 v24, a36 // 00000001984C: D3D84018 18000124 - v_accvgpr_read_b32 v25, a40 // 000000019854: D3D84019 18000128 - v_accvgpr_read_b32 v26, a44 // 00000001985C: D3D8401A 1800012C - v_accvgpr_read_b32 v27, a48 // 000000019864: D3D8401B 18000130 - v_accvgpr_read_b32 v28, a52 // 00000001986C: D3D8401C 18000134 - v_accvgpr_read_b32 v29, a56 // 000000019874: D3D8401D 18000138 - v_accvgpr_read_b32 v30, a60 // 00000001987C: D3D8401E 1800013C - v_accvgpr_read_b32 v31, a64 // 000000019884: D3D8401F 18000140 - v_accvgpr_read_b32 v32, a68 // 00000001988C: D3D84020 18000144 - v_accvgpr_read_b32 v33, a72 // 000000019894: D3D84021 18000148 - v_accvgpr_read_b32 v34, a76 // 00000001989C: D3D84022 1800014C - v_accvgpr_read_b32 v35, a80 // 0000000198A4: D3D84023 18000150 - v_accvgpr_read_b32 v36, a84 // 0000000198AC: D3D84024 18000154 - v_accvgpr_read_b32 v37, a88 // 0000000198B4: D3D84025 18000158 - v_accvgpr_read_b32 v38, a92 // 0000000198BC: D3D84026 1800015C - v_accvgpr_read_b32 v39, a96 // 0000000198C4: D3D84027 18000160 - v_accvgpr_read_b32 v40, a100 // 0000000198CC: D3D84028 18000164 - v_accvgpr_read_b32 v41, a104 // 0000000198D4: D3D84029 18000168 - v_accvgpr_read_b32 v42, a108 // 0000000198DC: D3D8402A 1800016C - v_accvgpr_read_b32 v43, a112 // 0000000198E4: D3D8402B 18000170 - v_accvgpr_read_b32 v44, a116 // 0000000198EC: D3D8402C 18000174 - v_accvgpr_read_b32 v45, a120 // 0000000198F4: D3D8402D 18000178 - v_accvgpr_read_b32 v46, a124 // 0000000198FC: D3D8402E 1800017C - v_accvgpr_read_b32 v47, a128 // 000000019904: D3D8402F 18000180 - v_accvgpr_read_b32 v48, a132 // 00000001990C: D3D84030 18000184 - v_accvgpr_read_b32 v49, a136 // 000000019914: D3D84031 18000188 - v_accvgpr_read_b32 v50, a140 // 00000001991C: D3D84032 1800018C - v_accvgpr_read_b32 v51, a144 // 000000019924: D3D84033 18000190 - v_accvgpr_read_b32 v52, a148 // 00000001992C: D3D84034 18000194 - v_accvgpr_read_b32 v53, a152 // 000000019934: D3D84035 18000198 - v_accvgpr_read_b32 v54, a156 // 00000001993C: D3D84036 1800019C - v_accvgpr_read_b32 v55, a160 // 000000019944: D3D84037 180001A0 - v_accvgpr_read_b32 v56, a164 // 00000001994C: D3D84038 180001A4 - v_accvgpr_read_b32 v57, a168 // 000000019954: D3D84039 180001A8 - v_accvgpr_read_b32 v58, a172 // 00000001995C: D3D8403A 180001AC - v_accvgpr_read_b32 v59, a176 // 000000019964: D3D8403B 180001B0 - v_accvgpr_read_b32 v60, a180 // 00000001996C: D3D8403C 180001B4 - v_accvgpr_read_b32 v61, a184 // 000000019974: D3D8403D 180001B8 - v_accvgpr_read_b32 v62, a188 // 00000001997C: D3D8403E 180001BC - v_accvgpr_read_b32 v63, a192 // 000000019984: D3D8403F 180001C0 - v_accvgpr_read_b32 v64, a196 // 00000001998C: D3D84040 180001C4 - v_accvgpr_read_b32 v65, a200 // 000000019994: D3D84041 180001C8 - v_accvgpr_read_b32 v66, a204 // 00000001999C: D3D84042 180001CC - v_accvgpr_read_b32 v67, a208 // 0000000199A4: D3D84043 180001D0 - v_accvgpr_read_b32 v68, a212 // 0000000199AC: D3D84044 180001D4 - v_accvgpr_read_b32 v69, a216 // 0000000199B4: D3D84045 180001D8 - v_accvgpr_read_b32 v70, a220 // 0000000199BC: D3D84046 180001DC - v_accvgpr_read_b32 v71, a224 // 0000000199C4: D3D84047 180001E0 - v_accvgpr_read_b32 v72, a228 // 0000000199CC: D3D84048 180001E4 - v_accvgpr_read_b32 v73, a232 // 0000000199D4: D3D84049 180001E8 - v_accvgpr_read_b32 v74, a236 // 0000000199DC: D3D8404A 180001EC - v_accvgpr_read_b32 v75, a240 // 0000000199E4: D3D8404B 180001F0 - v_accvgpr_read_b32 v76, a244 // 0000000199EC: D3D8404C 180001F4 - v_accvgpr_read_b32 v77, a248 // 0000000199F4: D3D8404D 180001F8 - v_accvgpr_read_b32 v78, a252 // 0000000199FC: D3D8404E 180001FC - v_accvgpr_read_b32 v79, a1 // 000000019A04: D3D8404F 18000101 - v_accvgpr_read_b32 v80, a5 // 000000019A0C: D3D84050 18000105 - v_accvgpr_read_b32 v81, a9 // 000000019A14: D3D84051 18000109 - v_accvgpr_read_b32 v82, a13 // 000000019A1C: D3D84052 1800010D - v_accvgpr_read_b32 v83, a17 // 000000019A24: D3D84053 18000111 - v_accvgpr_read_b32 v84, a21 // 000000019A2C: D3D84054 18000115 - v_accvgpr_read_b32 v85, a25 // 000000019A34: D3D84055 18000119 - v_accvgpr_read_b32 v86, a29 // 000000019A3C: D3D84056 1800011D - v_accvgpr_read_b32 v87, a33 // 000000019A44: D3D84057 18000121 - v_accvgpr_read_b32 v88, a37 // 000000019A4C: D3D84058 18000125 - v_accvgpr_read_b32 v89, a41 // 000000019A54: D3D84059 18000129 - v_accvgpr_read_b32 v90, a45 // 000000019A5C: D3D8405A 1800012D - v_mul_f32_e32 v15, s44, v15 // 000000019A64: 0A1E1E2C - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 000000019A68: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 000000019A70: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 000000019A78: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 000000019A80: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 000000019A88: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 000000019A90: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 000000019A98: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 000000019AA0: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 000000019AA8: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 000000019AB0: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 000000019AB8: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 000000019AC0: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 000000019AC8: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 000000019AD0: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 000000019AD8: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 000000019AE0: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 000000019AE8: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 000000019AF0: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 000000019AF8: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 000000019B00: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 000000019B08: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 000000019B10: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 000000019B18: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 000000019B20: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 000000019B28: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 000000019B30: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 000000019B38: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 000000019B40: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 000000019B48: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 000000019B50: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 000000019B58: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 000000019B60: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 000000019B68: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 000000019B70: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 000000019B78: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 000000019B80: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 000000019B88: D3B14058 1002B02C - v_mul_f32_e32 v90, s44, v90 // 000000019B90: 0AB4B42C - s_waitcnt vmcnt(0) // 000000019B94: BF8C0F70 - v_mov_b32_e32 v12, 0xffff0000 // 000000019B98: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 000000019BA0: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 000000019BA8: 7E1C02FF 00007FFF - v_cvt_f32_bf16_sdwa v8, v91 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019BB0: 7E10B6F9 0004165B - v_fmac_f32_e64 v15, v8, s45 // 000000019BB8: D13B000F 00005B08 - v_cvt_pk_bf16_f32 v15, v15, v15 // 000000019BC0: D268000F 00021F0F - buffer_store_short v15, v92, s[16:19], 0 offen nt // 000000019BC8: E06A1000 80040F5C - v_cvt_f32_bf16_sdwa v8, v93 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019BD0: 7E10B6F9 0004165D - v_fmac_f32_e64 v16, v8, s45 // 000000019BD8: D13B0010 00005B08 - v_cvt_pk_bf16_f32 v16, v16, v16 // 000000019BE0: D2680010 00022110 - buffer_store_short v16, v94, s[16:19], 0 offen nt // 000000019BE8: E06A1000 8004105E - v_cvt_f32_bf16_sdwa v8, v95 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019BF0: 7E10B6F9 0004165F - v_fmac_f32_e64 v17, v8, s45 // 000000019BF8: D13B0011 00005B08 - v_cvt_pk_bf16_f32 v17, v17, v17 // 000000019C00: D2680011 00022311 - buffer_store_short v17, v96, s[16:19], 0 offen nt // 000000019C08: E06A1000 80041160 - v_cvt_f32_bf16_sdwa v8, v97 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019C10: 7E10B6F9 00041661 - v_fmac_f32_e64 v18, v8, s45 // 000000019C18: D13B0012 00005B08 - v_cvt_pk_bf16_f32 v18, v18, v18 // 000000019C20: D2680012 00022512 - buffer_store_short v18, v98, s[16:19], 0 offen nt // 000000019C28: E06A1000 80041262 - v_cvt_f32_bf16_sdwa v8, v99 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019C30: 7E10B6F9 00041663 - v_fmac_f32_e64 v19, v8, s45 // 000000019C38: D13B0013 00005B08 - v_cvt_pk_bf16_f32 v19, v19, v19 // 000000019C40: D2680013 00022713 - buffer_store_short v19, v100, s[16:19], 0 offen nt // 000000019C48: E06A1000 80041364 - v_cvt_f32_bf16_sdwa v8, v101 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019C50: 7E10B6F9 00041665 - v_fmac_f32_e64 v20, v8, s45 // 000000019C58: D13B0014 00005B08 - v_cvt_pk_bf16_f32 v20, v20, v20 // 000000019C60: D2680014 00022914 - buffer_store_short v20, v102, s[16:19], 0 offen nt // 000000019C68: E06A1000 80041466 - v_cvt_f32_bf16_sdwa v8, v103 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019C70: 7E10B6F9 00041667 - v_fmac_f32_e64 v21, v8, s45 // 000000019C78: D13B0015 00005B08 - v_cvt_pk_bf16_f32 v21, v21, v21 // 000000019C80: D2680015 00022B15 - buffer_store_short v21, v104, s[16:19], 0 offen nt // 000000019C88: E06A1000 80041568 - v_cvt_f32_bf16_sdwa v8, v105 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019C90: 7E10B6F9 00041669 - v_fmac_f32_e64 v22, v8, s45 // 000000019C98: D13B0016 00005B08 - v_cvt_pk_bf16_f32 v22, v22, v22 // 000000019CA0: D2680016 00022D16 - buffer_store_short v22, v106, s[16:19], 0 offen nt // 000000019CA8: E06A1000 8004166A - v_cvt_f32_bf16_sdwa v8, v107 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019CB0: 7E10B6F9 0004166B - v_fmac_f32_e64 v23, v8, s45 // 000000019CB8: D13B0017 00005B08 - v_cvt_pk_bf16_f32 v23, v23, v23 // 000000019CC0: D2680017 00022F17 - buffer_store_short v23, v108, s[16:19], 0 offen nt // 000000019CC8: E06A1000 8004176C - v_cvt_f32_bf16_sdwa v8, v109 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019CD0: 7E10B6F9 0004166D - v_fmac_f32_e64 v24, v8, s45 // 000000019CD8: D13B0018 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v24 // 000000019CE0: D2680018 00023118 - buffer_store_short v24, v110, s[16:19], 0 offen nt // 000000019CE8: E06A1000 8004186E - v_cvt_f32_bf16_sdwa v8, v111 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019CF0: 7E10B6F9 0004166F - v_fmac_f32_e64 v25, v8, s45 // 000000019CF8: D13B0019 00005B08 - v_cvt_pk_bf16_f32 v25, v25, v25 // 000000019D00: D2680019 00023319 - buffer_store_short v25, v112, s[16:19], 0 offen nt // 000000019D08: E06A1000 80041970 - v_cvt_f32_bf16_sdwa v8, v113 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019D10: 7E10B6F9 00041671 - v_fmac_f32_e64 v26, v8, s45 // 000000019D18: D13B001A 00005B08 - v_cvt_pk_bf16_f32 v26, v26, v26 // 000000019D20: D268001A 0002351A - buffer_store_short v26, v114, s[16:19], 0 offen nt // 000000019D28: E06A1000 80041A72 - v_cvt_f32_bf16_sdwa v8, v115 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019D30: 7E10B6F9 00041673 - v_fmac_f32_e64 v27, v8, s45 // 000000019D38: D13B001B 00005B08 - v_cvt_pk_bf16_f32 v27, v27, v27 // 000000019D40: D268001B 0002371B - buffer_store_short v27, v116, s[16:19], 0 offen nt // 000000019D48: E06A1000 80041B74 - v_cvt_f32_bf16_sdwa v8, v117 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019D50: 7E10B6F9 00041675 - v_fmac_f32_e64 v28, v8, s45 // 000000019D58: D13B001C 00005B08 - v_cvt_pk_bf16_f32 v28, v28, v28 // 000000019D60: D268001C 0002391C - buffer_store_short v28, v118, s[16:19], 0 offen nt // 000000019D68: E06A1000 80041C76 - v_cvt_f32_bf16_sdwa v8, v119 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019D70: 7E10B6F9 00041677 - v_fmac_f32_e64 v29, v8, s45 // 000000019D78: D13B001D 00005B08 - v_cvt_pk_bf16_f32 v29, v29, v29 // 000000019D80: D268001D 00023B1D - buffer_store_short v29, v120, s[16:19], 0 offen nt // 000000019D88: E06A1000 80041D78 - v_cvt_f32_bf16_sdwa v8, v121 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019D90: 7E10B6F9 00041679 - v_fmac_f32_e64 v30, v8, s45 // 000000019D98: D13B001E 00005B08 - v_cvt_pk_bf16_f32 v30, v30, v30 // 000000019DA0: D268001E 00023D1E - buffer_store_short v30, v122, s[16:19], 0 offen nt // 000000019DA8: E06A1000 80041E7A - v_cvt_f32_bf16_sdwa v8, v123 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019DB0: 7E10B6F9 0004167B - v_fmac_f32_e64 v31, v8, s45 // 000000019DB8: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v31, v31, v31 // 000000019DC0: D268001F 00023F1F - buffer_store_short v31, v124, s[16:19], 0 offen nt // 000000019DC8: E06A1000 80041F7C - v_cvt_f32_bf16_sdwa v8, v125 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019DD0: 7E10B6F9 0004167D - v_fmac_f32_e64 v32, v8, s45 // 000000019DD8: D13B0020 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v32 // 000000019DE0: D2680020 00024120 - buffer_store_short v32, v126, s[16:19], 0 offen nt // 000000019DE8: E06A1000 8004207E - v_cvt_f32_bf16_sdwa v8, v127 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019DF0: 7E10B6F9 0004167F - v_fmac_f32_e64 v33, v8, s45 // 000000019DF8: D13B0021 00005B08 - v_cvt_pk_bf16_f32 v33, v33, v33 // 000000019E00: D2680021 00024321 - buffer_store_short v33, v128, s[16:19], 0 offen nt // 000000019E08: E06A1000 80042180 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019E10: 7E10B6F9 00041681 - v_fmac_f32_e64 v34, v8, s45 // 000000019E18: D13B0022 00005B08 - v_cvt_pk_bf16_f32 v34, v34, v34 // 000000019E20: D2680022 00024522 - buffer_store_short v34, v130, s[16:19], 0 offen nt // 000000019E28: E06A1000 80042282 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019E30: 7E10B6F9 00041683 - v_fmac_f32_e64 v35, v8, s45 // 000000019E38: D13B0023 00005B08 - v_cvt_pk_bf16_f32 v35, v35, v35 // 000000019E40: D2680023 00024723 - buffer_store_short v35, v135, s[16:19], 0 offen nt // 000000019E48: E06A1000 80042387 - v_cvt_f32_bf16_sdwa v8, v136 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019E50: 7E10B6F9 00041688 - v_fmac_f32_e64 v36, v8, s45 // 000000019E58: D13B0024 00005B08 - v_cvt_pk_bf16_f32 v36, v36, v36 // 000000019E60: D2680024 00024924 - buffer_store_short v36, v137, s[16:19], 0 offen nt // 000000019E68: E06A1000 80042489 - v_cvt_f32_bf16_sdwa v8, v138 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019E70: 7E10B6F9 0004168A - v_fmac_f32_e64 v37, v8, s45 // 000000019E78: D13B0025 00005B08 - v_cvt_pk_bf16_f32 v37, v37, v37 // 000000019E80: D2680025 00024B25 - buffer_store_short v37, v139, s[16:19], 0 offen nt // 000000019E88: E06A1000 8004258B - v_cvt_f32_bf16_sdwa v8, v140 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019E90: 7E10B6F9 0004168C - v_fmac_f32_e64 v38, v8, s45 // 000000019E98: D13B0026 00005B08 - v_cvt_pk_bf16_f32 v38, v38, v38 // 000000019EA0: D2680026 00024D26 - buffer_store_short v38, v141, s[16:19], 0 offen nt // 000000019EA8: E06A1000 8004268D - v_cvt_f32_bf16_sdwa v8, v142 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019EB0: 7E10B6F9 0004168E - v_fmac_f32_e64 v39, v8, s45 // 000000019EB8: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v39, v39, v39 // 000000019EC0: D2680027 00024F27 - buffer_store_short v39, v143, s[16:19], 0 offen nt // 000000019EC8: E06A1000 8004278F - v_cvt_f32_bf16_sdwa v8, v144 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019ED0: 7E10B6F9 00041690 - v_fmac_f32_e64 v40, v8, s45 // 000000019ED8: D13B0028 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v40 // 000000019EE0: D2680028 00025128 - buffer_store_short v40, v145, s[16:19], 0 offen nt // 000000019EE8: E06A1000 80042891 - v_cvt_f32_bf16_sdwa v8, v146 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019EF0: 7E10B6F9 00041692 - v_fmac_f32_e64 v41, v8, s45 // 000000019EF8: D13B0029 00005B08 - v_cvt_pk_bf16_f32 v41, v41, v41 // 000000019F00: D2680029 00025329 - buffer_store_short v41, v147, s[16:19], 0 offen nt // 000000019F08: E06A1000 80042993 - v_cvt_f32_bf16_sdwa v8, v148 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019F10: 7E10B6F9 00041694 - v_fmac_f32_e64 v42, v8, s45 // 000000019F18: D13B002A 00005B08 - v_cvt_pk_bf16_f32 v42, v42, v42 // 000000019F20: D268002A 0002552A - buffer_store_short v42, v149, s[16:19], 0 offen nt // 000000019F28: E06A1000 80042A95 - v_cvt_f32_bf16_sdwa v8, v150 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019F30: 7E10B6F9 00041696 - v_fmac_f32_e64 v43, v8, s45 // 000000019F38: D13B002B 00005B08 - v_cvt_pk_bf16_f32 v43, v43, v43 // 000000019F40: D268002B 0002572B - buffer_store_short v43, v151, s[16:19], 0 offen nt // 000000019F48: E06A1000 80042B97 - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019F50: 7E10B6F9 00041698 - v_fmac_f32_e64 v44, v8, s45 // 000000019F58: D13B002C 00005B08 - v_cvt_pk_bf16_f32 v44, v44, v44 // 000000019F60: D268002C 0002592C - buffer_store_short v44, v153, s[16:19], 0 offen nt // 000000019F68: E06A1000 80042C99 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019F70: 7E10B6F9 0004169A - v_fmac_f32_e64 v45, v8, s45 // 000000019F78: D13B002D 00005B08 - v_cvt_pk_bf16_f32 v45, v45, v45 // 000000019F80: D268002D 00025B2D - buffer_store_short v45, v155, s[16:19], 0 offen nt // 000000019F88: E06A1000 80042D9B - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019F90: 7E10B6F9 0004169C - v_fmac_f32_e64 v46, v8, s45 // 000000019F98: D13B002E 00005B08 - v_cvt_pk_bf16_f32 v46, v46, v46 // 000000019FA0: D268002E 00025D2E - buffer_store_short v46, v157, s[16:19], 0 offen nt // 000000019FA8: E06A1000 80042E9D - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019FB0: 7E10B6F9 0004169E - v_fmac_f32_e64 v47, v8, s45 // 000000019FB8: D13B002F 00005B08 - v_cvt_pk_bf16_f32 v47, v47, v47 // 000000019FC0: D268002F 00025F2F - buffer_store_short v47, v159, s[16:19], 0 offen nt // 000000019FC8: E06A1000 80042F9F - v_cvt_f32_bf16_sdwa v8, v160 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019FD0: 7E10B6F9 000416A0 - v_fmac_f32_e64 v48, v8, s45 // 000000019FD8: D13B0030 00005B08 - v_cvt_pk_bf16_f32 v48, v48, v48 // 000000019FE0: D2680030 00026130 - buffer_store_short v48, v161, s[16:19], 0 offen nt // 000000019FE8: E06A1000 800430A1 - v_cvt_f32_bf16_sdwa v8, v162 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 000000019FF0: 7E10B6F9 000416A2 - v_fmac_f32_e64 v49, v8, s45 // 000000019FF8: D13B0031 00005B08 - v_cvt_pk_bf16_f32 v49, v49, v49 // 00000001A000: D2680031 00026331 - buffer_store_short v49, v163, s[16:19], 0 offen nt // 00000001A008: E06A1000 800431A3 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A010: 7E10B6F9 000416A4 - v_fmac_f32_e64 v50, v8, s45 // 00000001A018: D13B0032 00005B08 - v_cvt_pk_bf16_f32 v50, v50, v50 // 00000001A020: D2680032 00026532 - buffer_store_short v50, v165, s[16:19], 0 offen nt // 00000001A028: E06A1000 800432A5 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A030: 7E10B6F9 000416A6 - v_fmac_f32_e64 v51, v8, s45 // 00000001A038: D13B0033 00005B08 - v_cvt_pk_bf16_f32 v51, v51, v51 // 00000001A040: D2680033 00026733 - buffer_store_short v51, v167, s[16:19], 0 offen nt // 00000001A048: E06A1000 800433A7 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A050: 7E10B6F9 000416A8 - v_fmac_f32_e64 v52, v8, s45 // 00000001A058: D13B0034 00005B08 - v_cvt_pk_bf16_f32 v52, v52, v52 // 00000001A060: D2680034 00026934 - buffer_store_short v52, v169, s[16:19], 0 offen nt // 00000001A068: E06A1000 800434A9 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A070: 7E10B6F9 000416AA - v_fmac_f32_e64 v53, v8, s45 // 00000001A078: D13B0035 00005B08 - v_cvt_pk_bf16_f32 v53, v53, v53 // 00000001A080: D2680035 00026B35 - buffer_store_short v53, v171, s[16:19], 0 offen nt // 00000001A088: E06A1000 800435AB - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A090: 7E10B6F9 000416AC - v_fmac_f32_e64 v54, v8, s45 // 00000001A098: D13B0036 00005B08 - v_cvt_pk_bf16_f32 v54, v54, v54 // 00000001A0A0: D2680036 00026D36 - buffer_store_short v54, v173, s[16:19], 0 offen nt // 00000001A0A8: E06A1000 800436AD - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A0B0: 7E10B6F9 000416AE - v_fmac_f32_e64 v55, v8, s45 // 00000001A0B8: D13B0037 00005B08 - v_cvt_pk_bf16_f32 v55, v55, v55 // 00000001A0C0: D2680037 00026F37 - buffer_store_short v55, v175, s[16:19], 0 offen nt // 00000001A0C8: E06A1000 800437AF - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A0D0: 7E10B6F9 000416B0 - v_fmac_f32_e64 v56, v8, s45 // 00000001A0D8: D13B0038 00005B08 - v_cvt_pk_bf16_f32 v56, v56, v56 // 00000001A0E0: D2680038 00027138 - buffer_store_short v56, v177, s[16:19], 0 offen nt // 00000001A0E8: E06A1000 800438B1 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A0F0: 7E10B6F9 000416B2 - v_fmac_f32_e64 v57, v8, s45 // 00000001A0F8: D13B0039 00005B08 - v_cvt_pk_bf16_f32 v57, v57, v57 // 00000001A100: D2680039 00027339 - buffer_store_short v57, v179, s[16:19], 0 offen nt // 00000001A108: E06A1000 800439B3 - v_cvt_f32_bf16_sdwa v8, v180 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A110: 7E10B6F9 000416B4 - v_fmac_f32_e64 v58, v8, s45 // 00000001A118: D13B003A 00005B08 - v_cvt_pk_bf16_f32 v58, v58, v58 // 00000001A120: D268003A 0002753A - buffer_store_short v58, v181, s[16:19], 0 offen nt // 00000001A128: E06A1000 80043AB5 - v_cvt_f32_bf16_sdwa v8, v182 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A130: 7E10B6F9 000416B6 - v_fmac_f32_e64 v59, v8, s45 // 00000001A138: D13B003B 00005B08 - v_cvt_pk_bf16_f32 v59, v59, v59 // 00000001A140: D268003B 0002773B - buffer_store_short v59, v183, s[16:19], 0 offen nt // 00000001A148: E06A1000 80043BB7 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A150: 7E10B6F9 000416B8 - v_fmac_f32_e64 v60, v8, s45 // 00000001A158: D13B003C 00005B08 - v_cvt_pk_bf16_f32 v60, v60, v60 // 00000001A160: D268003C 0002793C - buffer_store_short v60, v185, s[16:19], 0 offen nt // 00000001A168: E06A1000 80043CB9 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A170: 7E10B6F9 000416BA - v_fmac_f32_e64 v61, v8, s45 // 00000001A178: D13B003D 00005B08 - v_cvt_pk_bf16_f32 v61, v61, v61 // 00000001A180: D268003D 00027B3D - buffer_store_short v61, v187, s[16:19], 0 offen nt // 00000001A188: E06A1000 80043DBB - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A190: 7E10B6F9 000416BC - v_fmac_f32_e64 v62, v8, s45 // 00000001A198: D13B003E 00005B08 - v_cvt_pk_bf16_f32 v62, v62, v62 // 00000001A1A0: D268003E 00027D3E - buffer_store_short v62, v189, s[16:19], 0 offen nt // 00000001A1A8: E06A1000 80043EBD - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A1B0: 7E10B6F9 000416BE - v_fmac_f32_e64 v63, v8, s45 // 00000001A1B8: D13B003F 00005B08 - v_cvt_pk_bf16_f32 v63, v63, v63 // 00000001A1C0: D268003F 00027F3F - buffer_store_short v63, v191, s[16:19], 0 offen nt // 00000001A1C8: E06A1000 80043FBF - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A1D0: 7E10B6F9 000416C0 - v_fmac_f32_e64 v64, v8, s45 // 00000001A1D8: D13B0040 00005B08 - v_cvt_pk_bf16_f32 v64, v64, v64 // 00000001A1E0: D2680040 00028140 - buffer_store_short v64, v193, s[16:19], 0 offen nt // 00000001A1E8: E06A1000 800440C1 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A1F0: 7E10B6F9 000416C2 - v_fmac_f32_e64 v65, v8, s45 // 00000001A1F8: D13B0041 00005B08 - v_cvt_pk_bf16_f32 v65, v65, v65 // 00000001A200: D2680041 00028341 - buffer_store_short v65, v195, s[16:19], 0 offen nt // 00000001A208: E06A1000 800441C3 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A210: 7E10B6F9 000416C4 - v_fmac_f32_e64 v66, v8, s45 // 00000001A218: D13B0042 00005B08 - v_cvt_pk_bf16_f32 v66, v66, v66 // 00000001A220: D2680042 00028542 - buffer_store_short v66, v197, s[16:19], 0 offen nt // 00000001A228: E06A1000 800442C5 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A230: 7E10B6F9 000416C6 - v_fmac_f32_e64 v67, v8, s45 // 00000001A238: D13B0043 00005B08 - v_cvt_pk_bf16_f32 v67, v67, v67 // 00000001A240: D2680043 00028743 - buffer_store_short v67, v199, s[16:19], 0 offen nt // 00000001A248: E06A1000 800443C7 - v_cvt_f32_bf16_sdwa v8, v200 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A250: 7E10B6F9 000416C8 - v_fmac_f32_e64 v68, v8, s45 // 00000001A258: D13B0044 00005B08 - v_cvt_pk_bf16_f32 v68, v68, v68 // 00000001A260: D2680044 00028944 - buffer_store_short v68, v201, s[16:19], 0 offen nt // 00000001A268: E06A1000 800444C9 - v_cvt_f32_bf16_sdwa v8, v202 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A270: 7E10B6F9 000416CA - v_fmac_f32_e64 v69, v8, s45 // 00000001A278: D13B0045 00005B08 - v_cvt_pk_bf16_f32 v69, v69, v69 // 00000001A280: D2680045 00028B45 - buffer_store_short v69, v203, s[16:19], 0 offen nt // 00000001A288: E06A1000 800445CB - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A290: 7E10B6F9 000416CC - v_fmac_f32_e64 v70, v8, s45 // 00000001A298: D13B0046 00005B08 - v_cvt_pk_bf16_f32 v70, v70, v70 // 00000001A2A0: D2680046 00028D46 - buffer_store_short v70, v205, s[16:19], 0 offen nt // 00000001A2A8: E06A1000 800446CD - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A2B0: 7E10B6F9 000416CE - v_fmac_f32_e64 v71, v8, s45 // 00000001A2B8: D13B0047 00005B08 - v_cvt_pk_bf16_f32 v71, v71, v71 // 00000001A2C0: D2680047 00028F47 - buffer_store_short v71, v207, s[16:19], 0 offen nt // 00000001A2C8: E06A1000 800447CF - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A2D0: 7E10B6F9 000416D0 - v_fmac_f32_e64 v72, v8, s45 // 00000001A2D8: D13B0048 00005B08 - v_cvt_pk_bf16_f32 v72, v72, v72 // 00000001A2E0: D2680048 00029148 - buffer_store_short v72, v209, s[16:19], 0 offen nt // 00000001A2E8: E06A1000 800448D1 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A2F0: 7E10B6F9 000416D2 - v_fmac_f32_e64 v73, v8, s45 // 00000001A2F8: D13B0049 00005B08 - v_cvt_pk_bf16_f32 v73, v73, v73 // 00000001A300: D2680049 00029349 - buffer_store_short v73, v211, s[16:19], 0 offen nt // 00000001A308: E06A1000 800449D3 - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A310: 7E10B6F9 000416D4 - v_fmac_f32_e64 v74, v8, s45 // 00000001A318: D13B004A 00005B08 - v_cvt_pk_bf16_f32 v74, v74, v74 // 00000001A320: D268004A 0002954A - buffer_store_short v74, v213, s[16:19], 0 offen nt // 00000001A328: E06A1000 80044AD5 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A330: 7E10B6F9 000416D6 - v_fmac_f32_e64 v75, v8, s45 // 00000001A338: D13B004B 00005B08 - v_cvt_pk_bf16_f32 v75, v75, v75 // 00000001A340: D268004B 0002974B - buffer_store_short v75, v215, s[16:19], 0 offen nt // 00000001A348: E06A1000 80044BD7 - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A350: 7E10B6F9 000416D8 - v_fmac_f32_e64 v76, v8, s45 // 00000001A358: D13B004C 00005B08 - v_cvt_pk_bf16_f32 v76, v76, v76 // 00000001A360: D268004C 0002994C - buffer_store_short v76, v217, s[16:19], 0 offen nt // 00000001A368: E06A1000 80044CD9 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A370: 7E10B6F9 000416DA - v_fmac_f32_e64 v77, v8, s45 // 00000001A378: D13B004D 00005B08 - v_cvt_pk_bf16_f32 v77, v77, v77 // 00000001A380: D268004D 00029B4D - buffer_store_short v77, v219, s[16:19], 0 offen nt // 00000001A388: E06A1000 80044DDB - v_cvt_f32_bf16_sdwa v8, v220 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A390: 7E10B6F9 000416DC - v_fmac_f32_e64 v78, v8, s45 // 00000001A398: D13B004E 00005B08 - v_cvt_pk_bf16_f32 v78, v78, v78 // 00000001A3A0: D268004E 00029D4E - buffer_store_short v78, v221, s[16:19], 0 offen nt // 00000001A3A8: E06A1000 80044EDD - v_cvt_f32_bf16_sdwa v8, v222 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A3B0: 7E10B6F9 000416DE - v_fmac_f32_e64 v79, v8, s45 // 00000001A3B8: D13B004F 00005B08 - v_cvt_pk_bf16_f32 v79, v79, v79 // 00000001A3C0: D268004F 00029F4F - buffer_store_short v79, v223, s[16:19], 0 offen nt // 00000001A3C8: E06A1000 80044FDF - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A3D0: 7E10B6F9 000416E0 - v_fmac_f32_e64 v80, v8, s45 // 00000001A3D8: D13B0050 00005B08 - v_cvt_pk_bf16_f32 v80, v80, v80 // 00000001A3E0: D2680050 0002A150 - buffer_store_short v80, v225, s[16:19], 0 offen nt // 00000001A3E8: E06A1000 800450E1 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A3F0: 7E10B6F9 000416E2 - v_fmac_f32_e64 v81, v8, s45 // 00000001A3F8: D13B0051 00005B08 - v_cvt_pk_bf16_f32 v81, v81, v81 // 00000001A400: D2680051 0002A351 - buffer_store_short v81, v227, s[16:19], 0 offen nt // 00000001A408: E06A1000 800451E3 - v_cvt_f32_bf16_sdwa v8, v228 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A410: 7E10B6F9 000416E4 - v_fmac_f32_e64 v82, v8, s45 // 00000001A418: D13B0052 00005B08 - v_cvt_pk_bf16_f32 v82, v82, v82 // 00000001A420: D2680052 0002A552 - buffer_store_short v82, v229, s[16:19], 0 offen nt // 00000001A428: E06A1000 800452E5 - v_cvt_f32_bf16_sdwa v8, v230 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A430: 7E10B6F9 000416E6 - v_fmac_f32_e64 v83, v8, s45 // 00000001A438: D13B0053 00005B08 - v_cvt_pk_bf16_f32 v83, v83, v83 // 00000001A440: D2680053 0002A753 - buffer_store_short v83, v231, s[16:19], 0 offen nt // 00000001A448: E06A1000 800453E7 - v_cvt_f32_bf16_sdwa v8, v232 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A450: 7E10B6F9 000416E8 - v_fmac_f32_e64 v84, v8, s45 // 00000001A458: D13B0054 00005B08 - v_cvt_pk_bf16_f32 v84, v84, v84 // 00000001A460: D2680054 0002A954 - buffer_store_short v84, v233, s[16:19], 0 offen nt // 00000001A468: E06A1000 800454E9 - v_cvt_f32_bf16_sdwa v8, v234 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A470: 7E10B6F9 000416EA - v_fmac_f32_e64 v85, v8, s45 // 00000001A478: D13B0055 00005B08 - v_cvt_pk_bf16_f32 v85, v85, v85 // 00000001A480: D2680055 0002AB55 - buffer_store_short v85, v235, s[16:19], 0 offen nt // 00000001A488: E06A1000 800455EB - v_cvt_f32_bf16_sdwa v8, v236 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A490: 7E10B6F9 000416EC - v_fmac_f32_e64 v86, v8, s45 // 00000001A498: D13B0056 00005B08 - v_cvt_pk_bf16_f32 v86, v86, v86 // 00000001A4A0: D2680056 0002AD56 - buffer_store_short v86, v237, s[16:19], 0 offen nt // 00000001A4A8: E06A1000 800456ED - v_cvt_f32_bf16_sdwa v8, v238 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A4B0: 7E10B6F9 000416EE - v_fmac_f32_e64 v87, v8, s45 // 00000001A4B8: D13B0057 00005B08 - v_cvt_pk_bf16_f32 v87, v87, v87 // 00000001A4C0: D2680057 0002AF57 - buffer_store_short v87, v239, s[16:19], 0 offen nt // 00000001A4C8: E06A1000 800457EF - v_cvt_f32_bf16_sdwa v8, v240 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A4D0: 7E10B6F9 000416F0 - v_fmac_f32_e64 v88, v8, s45 // 00000001A4D8: D13B0058 00005B08 - v_cvt_pk_bf16_f32 v88, v88, v88 // 00000001A4E0: D2680058 0002B158 - buffer_store_short v88, v241, s[16:19], 0 offen nt // 00000001A4E8: E06A1000 800458F1 - v_cvt_f32_bf16_sdwa v8, v242 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A4F0: 7E10B6F9 000416F2 - v_fmac_f32_e64 v89, v8, s45 // 00000001A4F8: D13B0059 00005B08 - v_cvt_pk_bf16_f32 v89, v89, v89 // 00000001A500: D2680059 0002B359 - buffer_store_short v89, v243, s[16:19], 0 offen nt // 00000001A508: E06A1000 800459F3 - v_cvt_f32_bf16_sdwa v8, v244 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001A510: 7E10B6F9 000416F4 - v_fmac_f32_e64 v90, v8, s45 // 00000001A518: D13B005A 00005B08 - v_cvt_pk_bf16_f32 v90, v90, v90 // 00000001A520: D268005A 0002B55A - buffer_store_short v90, v245, s[16:19], 0 offen nt // 00000001A528: E06A1000 80045AF5 - s_nop 0 // 00000001A530: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 00000001A534: 7E1402FF 80000000 - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001A53C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A544: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A54C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A554: 86A2221E - v_add_lshl_u32 v92, v6, v8, 1 // 00000001A558: D1FE005C 02061106 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 00000001A560: D100005C 008AB90A - buffer_load_short_d16 v91, v92, s[20:23], 0 offen // 00000001A568: E0901000 80055B5C - v_add_lshl_u32 v92, v7, v8, 1 // 00000001A570: D1FE005C 02061107 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 00000001A578: D100005C 008AB90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001A580: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A588: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A590: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A598: 86A2221E - v_add_lshl_u32 v94, v6, v8, 1 // 00000001A59C: D1FE005E 02061106 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 00000001A5A4: D100005E 008ABD0A - buffer_load_short_d16 v93, v94, s[20:23], 0 offen // 00000001A5AC: E0901000 80055D5E - v_add_lshl_u32 v94, v7, v8, 1 // 00000001A5B4: D1FE005E 02061107 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 00000001A5BC: D100005E 008ABD0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001A5C4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A5CC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A5D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A5DC: 86A2221E - v_add_lshl_u32 v96, v6, v8, 1 // 00000001A5E0: D1FE0060 02061106 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 00000001A5E8: D1000060 008AC10A - buffer_load_short_d16 v95, v96, s[20:23], 0 offen // 00000001A5F0: E0901000 80055F60 - v_add_lshl_u32 v96, v7, v8, 1 // 00000001A5F8: D1FE0060 02061107 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 00000001A600: D1000060 008AC10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001A608: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A610: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A618: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A620: 86A2221E - v_add_lshl_u32 v98, v6, v8, 1 // 00000001A624: D1FE0062 02061106 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 00000001A62C: D1000062 008AC50A - buffer_load_short_d16 v97, v98, s[20:23], 0 offen // 00000001A634: E0901000 80056162 - v_add_lshl_u32 v98, v7, v8, 1 // 00000001A63C: D1FE0062 02061107 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 00000001A644: D1000062 008AC50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001A64C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001A654: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001A65C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001A664: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A66C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A674: 86A2221E - v_add_lshl_u32 v100, v6, v4, 1 // 00000001A678: D1FE0064 02060906 - v_cndmask_b32_e64 v100, v10, v100, s[34:35] // 00000001A680: D1000064 008AC90A - buffer_load_short_d16 v99, v100, s[20:23], 0 offen // 00000001A688: E0901000 80056364 - v_add_lshl_u32 v100, v7, v4, 1 // 00000001A690: D1FE0064 02060907 - v_cndmask_b32_e64 v100, v10, v100, s[34:35] // 00000001A698: D1000064 008AC90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001A6A0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A6A8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A6B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A6B8: 86A2221E - v_add_lshl_u32 v102, v6, v8, 1 // 00000001A6BC: D1FE0066 02061106 - v_cndmask_b32_e64 v102, v10, v102, s[34:35] // 00000001A6C4: D1000066 008ACD0A - buffer_load_short_d16 v101, v102, s[20:23], 0 offen // 00000001A6CC: E0901000 80056566 - v_add_lshl_u32 v102, v7, v8, 1 // 00000001A6D4: D1FE0066 02061107 - v_cndmask_b32_e64 v102, v10, v102, s[34:35] // 00000001A6DC: D1000066 008ACD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001A6E4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A6EC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A6F4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A6FC: 86A2221E - v_add_lshl_u32 v104, v6, v8, 1 // 00000001A700: D1FE0068 02061106 - v_cndmask_b32_e64 v104, v10, v104, s[34:35] // 00000001A708: D1000068 008AD10A - buffer_load_short_d16 v103, v104, s[20:23], 0 offen // 00000001A710: E0901000 80056768 - v_add_lshl_u32 v104, v7, v8, 1 // 00000001A718: D1FE0068 02061107 - v_cndmask_b32_e64 v104, v10, v104, s[34:35] // 00000001A720: D1000068 008AD10A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001A728: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A730: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A738: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A740: 86A2221E - v_add_lshl_u32 v106, v6, v8, 1 // 00000001A744: D1FE006A 02061106 - v_cndmask_b32_e64 v106, v10, v106, s[34:35] // 00000001A74C: D100006A 008AD50A - buffer_load_short_d16 v105, v106, s[20:23], 0 offen // 00000001A754: E0901000 8005696A - v_add_lshl_u32 v106, v7, v8, 1 // 00000001A75C: D1FE006A 02061107 - v_cndmask_b32_e64 v106, v10, v106, s[34:35] // 00000001A764: D100006A 008AD50A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001A76C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A774: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A77C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A784: 86A2221E - v_add_lshl_u32 v108, v6, v8, 1 // 00000001A788: D1FE006C 02061106 - v_cndmask_b32_e64 v108, v10, v108, s[34:35] // 00000001A790: D100006C 008AD90A - buffer_load_short_d16 v107, v108, s[20:23], 0 offen // 00000001A798: E0901000 80056B6C - v_add_lshl_u32 v108, v7, v8, 1 // 00000001A7A0: D1FE006C 02061107 - v_cndmask_b32_e64 v108, v10, v108, s[34:35] // 00000001A7A8: D100006C 008AD90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001A7B0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A7B8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A7C0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A7C8: 86A2221E - v_add_lshl_u32 v110, v6, v8, 1 // 00000001A7CC: D1FE006E 02061106 - v_cndmask_b32_e64 v110, v10, v110, s[34:35] // 00000001A7D4: D100006E 008ADD0A - buffer_load_short_d16 v109, v110, s[20:23], 0 offen // 00000001A7DC: E0901000 80056D6E - v_add_lshl_u32 v110, v7, v8, 1 // 00000001A7E4: D1FE006E 02061107 - v_cndmask_b32_e64 v110, v10, v110, s[34:35] // 00000001A7EC: D100006E 008ADD0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001A7F4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A7FC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A804: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A80C: 86A2221E - v_add_lshl_u32 v112, v6, v8, 1 // 00000001A810: D1FE0070 02061106 - v_cndmask_b32_e64 v112, v10, v112, s[34:35] // 00000001A818: D1000070 008AE10A - buffer_load_short_d16 v111, v112, s[20:23], 0 offen // 00000001A820: E0901000 80056F70 - v_add_lshl_u32 v112, v7, v8, 1 // 00000001A828: D1FE0070 02061107 - v_cndmask_b32_e64 v112, v10, v112, s[34:35] // 00000001A830: D1000070 008AE10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001A838: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A840: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A848: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A850: 86A2221E - v_add_lshl_u32 v114, v6, v8, 1 // 00000001A854: D1FE0072 02061106 - v_cndmask_b32_e64 v114, v10, v114, s[34:35] // 00000001A85C: D1000072 008AE50A - buffer_load_short_d16 v113, v114, s[20:23], 0 offen // 00000001A864: E0901000 80057172 - v_add_lshl_u32 v114, v7, v8, 1 // 00000001A86C: D1FE0072 02061107 - v_cndmask_b32_e64 v114, v10, v114, s[34:35] // 00000001A874: D1000072 008AE50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001A87C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001A884: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001A88C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001A894: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A89C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A8A4: 86A2221E - v_add_lshl_u32 v116, v6, v4, 1 // 00000001A8A8: D1FE0074 02060906 - v_cndmask_b32_e64 v116, v10, v116, s[34:35] // 00000001A8B0: D1000074 008AE90A - buffer_load_short_d16 v115, v116, s[20:23], 0 offen // 00000001A8B8: E0901000 80057374 - v_add_lshl_u32 v116, v7, v4, 1 // 00000001A8C0: D1FE0074 02060907 - v_cndmask_b32_e64 v116, v10, v116, s[34:35] // 00000001A8C8: D1000074 008AE90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001A8D0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A8D8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A8E0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A8E8: 86A2221E - v_add_lshl_u32 v118, v6, v8, 1 // 00000001A8EC: D1FE0076 02061106 - v_cndmask_b32_e64 v118, v10, v118, s[34:35] // 00000001A8F4: D1000076 008AED0A - buffer_load_short_d16 v117, v118, s[20:23], 0 offen // 00000001A8FC: E0901000 80057576 - v_add_lshl_u32 v118, v7, v8, 1 // 00000001A904: D1FE0076 02061107 - v_cndmask_b32_e64 v118, v10, v118, s[34:35] // 00000001A90C: D1000076 008AED0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001A914: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A91C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A924: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A92C: 86A2221E - v_add_lshl_u32 v120, v6, v8, 1 // 00000001A930: D1FE0078 02061106 - v_cndmask_b32_e64 v120, v10, v120, s[34:35] // 00000001A938: D1000078 008AF10A - buffer_load_short_d16 v119, v120, s[20:23], 0 offen // 00000001A940: E0901000 80057778 - v_add_lshl_u32 v120, v7, v8, 1 // 00000001A948: D1FE0078 02061107 - v_cndmask_b32_e64 v120, v10, v120, s[34:35] // 00000001A950: D1000078 008AF10A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001A958: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A960: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A968: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A970: 86A2221E - v_add_lshl_u32 v122, v6, v8, 1 // 00000001A974: D1FE007A 02061106 - v_cndmask_b32_e64 v122, v10, v122, s[34:35] // 00000001A97C: D100007A 008AF50A - buffer_load_short_d16 v121, v122, s[20:23], 0 offen // 00000001A984: E0901000 8005797A - v_add_lshl_u32 v122, v7, v8, 1 // 00000001A98C: D1FE007A 02061107 - v_cndmask_b32_e64 v122, v10, v122, s[34:35] // 00000001A994: D100007A 008AF50A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001A99C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A9A4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A9AC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A9B4: 86A2221E - v_add_lshl_u32 v124, v6, v8, 1 // 00000001A9B8: D1FE007C 02061106 - v_cndmask_b32_e64 v124, v10, v124, s[34:35] // 00000001A9C0: D100007C 008AF90A - buffer_load_short_d16 v123, v124, s[20:23], 0 offen // 00000001A9C8: E0901000 80057B7C - v_add_lshl_u32 v124, v7, v8, 1 // 00000001A9D0: D1FE007C 02061107 - v_cndmask_b32_e64 v124, v10, v124, s[34:35] // 00000001A9D8: D100007C 008AF90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001A9E0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001A9E8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001A9F0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001A9F8: 86A2221E - v_add_lshl_u32 v126, v6, v8, 1 // 00000001A9FC: D1FE007E 02061106 - v_cndmask_b32_e64 v126, v10, v126, s[34:35] // 00000001AA04: D100007E 008AFD0A - buffer_load_short_d16 v125, v126, s[20:23], 0 offen // 00000001AA0C: E0901000 80057D7E - v_add_lshl_u32 v126, v7, v8, 1 // 00000001AA14: D1FE007E 02061107 - v_cndmask_b32_e64 v126, v10, v126, s[34:35] // 00000001AA1C: D100007E 008AFD0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001AA24: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AA2C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AA34: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AA3C: 86A2221E - v_add_lshl_u32 v128, v6, v8, 1 // 00000001AA40: D1FE0080 02061106 - v_cndmask_b32_e64 v128, v10, v128, s[34:35] // 00000001AA48: D1000080 008B010A - buffer_load_short_d16 v127, v128, s[20:23], 0 offen // 00000001AA50: E0901000 80057F80 - v_add_lshl_u32 v128, v7, v8, 1 // 00000001AA58: D1FE0080 02061107 - v_cndmask_b32_e64 v128, v10, v128, s[34:35] // 00000001AA60: D1000080 008B010A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001AA68: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AA70: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AA78: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AA80: 86A2221E - v_add_lshl_u32 v130, v6, v8, 1 // 00000001AA84: D1FE0082 02061106 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 00000001AA8C: D1000082 008B050A - buffer_load_short_d16 v129, v130, s[20:23], 0 offen // 00000001AA94: E0901000 80058182 - v_add_lshl_u32 v130, v7, v8, 1 // 00000001AA9C: D1FE0082 02061107 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 00000001AAA4: D1000082 008B050A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001AAAC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001AAB4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001AABC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001AAC4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AACC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AAD4: 86A2221E - v_add_lshl_u32 v135, v6, v4, 1 // 00000001AAD8: D1FE0087 02060906 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 00000001AAE0: D1000087 008B0F0A - buffer_load_short_d16 v131, v135, s[20:23], 0 offen // 00000001AAE8: E0901000 80058387 - v_add_lshl_u32 v135, v7, v4, 1 // 00000001AAF0: D1FE0087 02060907 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 00000001AAF8: D1000087 008B0F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001AB00: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AB08: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AB10: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AB18: 86A2221E - v_add_lshl_u32 v137, v6, v8, 1 // 00000001AB1C: D1FE0089 02061106 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 00000001AB24: D1000089 008B130A - buffer_load_short_d16 v136, v137, s[20:23], 0 offen // 00000001AB2C: E0901000 80058889 - v_add_lshl_u32 v137, v7, v8, 1 // 00000001AB34: D1FE0089 02061107 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 00000001AB3C: D1000089 008B130A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001AB44: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AB4C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AB54: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AB5C: 86A2221E - v_add_lshl_u32 v139, v6, v8, 1 // 00000001AB60: D1FE008B 02061106 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 00000001AB68: D100008B 008B170A - buffer_load_short_d16 v138, v139, s[20:23], 0 offen // 00000001AB70: E0901000 80058A8B - v_add_lshl_u32 v139, v7, v8, 1 // 00000001AB78: D1FE008B 02061107 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 00000001AB80: D100008B 008B170A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001AB88: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AB90: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AB98: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ABA0: 86A2221E - v_add_lshl_u32 v141, v6, v8, 1 // 00000001ABA4: D1FE008D 02061106 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 00000001ABAC: D100008D 008B1B0A - buffer_load_short_d16 v140, v141, s[20:23], 0 offen // 00000001ABB4: E0901000 80058C8D - v_add_lshl_u32 v141, v7, v8, 1 // 00000001ABBC: D1FE008D 02061107 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 00000001ABC4: D100008D 008B1B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001ABCC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001ABD4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001ABDC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ABE4: 86A2221E - v_add_lshl_u32 v143, v6, v8, 1 // 00000001ABE8: D1FE008F 02061106 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 00000001ABF0: D100008F 008B1F0A - buffer_load_short_d16 v142, v143, s[20:23], 0 offen // 00000001ABF8: E0901000 80058E8F - v_add_lshl_u32 v143, v7, v8, 1 // 00000001AC00: D1FE008F 02061107 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 00000001AC08: D100008F 008B1F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001AC10: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AC18: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AC20: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AC28: 86A2221E - v_add_lshl_u32 v145, v6, v8, 1 // 00000001AC2C: D1FE0091 02061106 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 00000001AC34: D1000091 008B230A - buffer_load_short_d16 v144, v145, s[20:23], 0 offen // 00000001AC3C: E0901000 80059091 - v_add_lshl_u32 v145, v7, v8, 1 // 00000001AC44: D1FE0091 02061107 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 00000001AC4C: D1000091 008B230A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001AC54: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AC5C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AC64: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AC6C: 86A2221E - v_add_lshl_u32 v147, v6, v8, 1 // 00000001AC70: D1FE0093 02061106 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 00000001AC78: D1000093 008B270A - buffer_load_short_d16 v146, v147, s[20:23], 0 offen // 00000001AC80: E0901000 80059293 - v_add_lshl_u32 v147, v7, v8, 1 // 00000001AC88: D1FE0093 02061107 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 00000001AC90: D1000093 008B270A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001AC98: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001ACA0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001ACA8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ACB0: 86A2221E - v_add_lshl_u32 v149, v6, v8, 1 // 00000001ACB4: D1FE0095 02061106 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 00000001ACBC: D1000095 008B2B0A - buffer_load_short_d16 v148, v149, s[20:23], 0 offen // 00000001ACC4: E0901000 80059495 - v_add_lshl_u32 v149, v7, v8, 1 // 00000001ACCC: D1FE0095 02061107 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 00000001ACD4: D1000095 008B2B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001ACDC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001ACE4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001ACEC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001ACF4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001ACFC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AD04: 86A2221E - v_add_lshl_u32 v151, v6, v4, 1 // 00000001AD08: D1FE0097 02060906 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 00000001AD10: D1000097 008B2F0A - buffer_load_short_d16 v150, v151, s[20:23], 0 offen // 00000001AD18: E0901000 80059697 - v_add_lshl_u32 v151, v7, v4, 1 // 00000001AD20: D1FE0097 02060907 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 00000001AD28: D1000097 008B2F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001AD30: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AD38: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AD40: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AD48: 86A2221E - v_add_lshl_u32 v153, v6, v8, 1 // 00000001AD4C: D1FE0099 02061106 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 00000001AD54: D1000099 008B330A - buffer_load_short_d16 v152, v153, s[20:23], 0 offen // 00000001AD5C: E0901000 80059899 - v_add_lshl_u32 v153, v7, v8, 1 // 00000001AD64: D1FE0099 02061107 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 00000001AD6C: D1000099 008B330A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001AD74: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AD7C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AD84: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AD8C: 86A2221E - v_add_lshl_u32 v155, v6, v8, 1 // 00000001AD90: D1FE009B 02061106 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 00000001AD98: D100009B 008B370A - buffer_load_short_d16 v154, v155, s[20:23], 0 offen // 00000001ADA0: E0901000 80059A9B - v_add_lshl_u32 v155, v7, v8, 1 // 00000001ADA8: D1FE009B 02061107 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 00000001ADB0: D100009B 008B370A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001ADB8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001ADC0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001ADC8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ADD0: 86A2221E - v_add_lshl_u32 v157, v6, v8, 1 // 00000001ADD4: D1FE009D 02061106 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 00000001ADDC: D100009D 008B3B0A - buffer_load_short_d16 v156, v157, s[20:23], 0 offen // 00000001ADE4: E0901000 80059C9D - v_add_lshl_u32 v157, v7, v8, 1 // 00000001ADEC: D1FE009D 02061107 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 00000001ADF4: D100009D 008B3B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001ADFC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AE04: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AE0C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AE14: 86A2221E - v_add_lshl_u32 v159, v6, v8, 1 // 00000001AE18: D1FE009F 02061106 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 00000001AE20: D100009F 008B3F0A - buffer_load_short_d16 v158, v159, s[20:23], 0 offen // 00000001AE28: E0901000 80059E9F - v_add_lshl_u32 v159, v7, v8, 1 // 00000001AE30: D1FE009F 02061107 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 00000001AE38: D100009F 008B3F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001AE40: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AE48: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AE50: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AE58: 86A2221E - v_add_lshl_u32 v161, v6, v8, 1 // 00000001AE5C: D1FE00A1 02061106 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 00000001AE64: D10000A1 008B430A - buffer_load_short_d16 v160, v161, s[20:23], 0 offen // 00000001AE6C: E0901000 8005A0A1 - v_add_lshl_u32 v161, v7, v8, 1 // 00000001AE74: D1FE00A1 02061107 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 00000001AE7C: D10000A1 008B430A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001AE84: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AE8C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AE94: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AE9C: 86A2221E - v_add_lshl_u32 v163, v6, v8, 1 // 00000001AEA0: D1FE00A3 02061106 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 00000001AEA8: D10000A3 008B470A - buffer_load_short_d16 v162, v163, s[20:23], 0 offen // 00000001AEB0: E0901000 8005A2A3 - v_add_lshl_u32 v163, v7, v8, 1 // 00000001AEB8: D1FE00A3 02061107 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 00000001AEC0: D10000A3 008B470A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001AEC8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AED0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AED8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AEE0: 86A2221E - v_add_lshl_u32 v165, v6, v8, 1 // 00000001AEE4: D1FE00A5 02061106 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 00000001AEEC: D10000A5 008B4B0A - buffer_load_short_d16 v164, v165, s[20:23], 0 offen // 00000001AEF4: E0901000 8005A4A5 - v_add_lshl_u32 v165, v7, v8, 1 // 00000001AEFC: D1FE00A5 02061107 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 00000001AF04: D10000A5 008B4B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001AF0C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001AF14: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001AF1C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001AF24: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AF2C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AF34: 86A2221E - v_add_lshl_u32 v167, v6, v4, 1 // 00000001AF38: D1FE00A7 02060906 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 00000001AF40: D10000A7 008B4F0A - buffer_load_short_d16 v166, v167, s[20:23], 0 offen // 00000001AF48: E0901000 8005A6A7 - v_add_lshl_u32 v167, v7, v4, 1 // 00000001AF50: D1FE00A7 02060907 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 00000001AF58: D10000A7 008B4F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001AF60: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AF68: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AF70: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AF78: 86A2221E - v_add_lshl_u32 v169, v6, v8, 1 // 00000001AF7C: D1FE00A9 02061106 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 00000001AF84: D10000A9 008B530A - buffer_load_short_d16 v168, v169, s[20:23], 0 offen // 00000001AF8C: E0901000 8005A8A9 - v_add_lshl_u32 v169, v7, v8, 1 // 00000001AF94: D1FE00A9 02061107 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 00000001AF9C: D10000A9 008B530A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001AFA4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AFAC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AFB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001AFBC: 86A2221E - v_add_lshl_u32 v171, v6, v8, 1 // 00000001AFC0: D1FE00AB 02061106 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 00000001AFC8: D10000AB 008B570A - buffer_load_short_d16 v170, v171, s[20:23], 0 offen // 00000001AFD0: E0901000 8005AAAB - v_add_lshl_u32 v171, v7, v8, 1 // 00000001AFD8: D1FE00AB 02061107 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 00000001AFE0: D10000AB 008B570A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001AFE8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001AFF0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001AFF8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B000: 86A2221E - v_add_lshl_u32 v173, v6, v8, 1 // 00000001B004: D1FE00AD 02061106 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 00000001B00C: D10000AD 008B5B0A - buffer_load_short_d16 v172, v173, s[20:23], 0 offen // 00000001B014: E0901000 8005ACAD - v_add_lshl_u32 v173, v7, v8, 1 // 00000001B01C: D1FE00AD 02061107 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 00000001B024: D10000AD 008B5B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001B02C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B034: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B03C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B044: 86A2221E - v_add_lshl_u32 v175, v6, v8, 1 // 00000001B048: D1FE00AF 02061106 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 00000001B050: D10000AF 008B5F0A - buffer_load_short_d16 v174, v175, s[20:23], 0 offen // 00000001B058: E0901000 8005AEAF - v_add_lshl_u32 v175, v7, v8, 1 // 00000001B060: D1FE00AF 02061107 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 00000001B068: D10000AF 008B5F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001B070: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B078: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B080: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B088: 86A2221E - v_add_lshl_u32 v177, v6, v8, 1 // 00000001B08C: D1FE00B1 02061106 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 00000001B094: D10000B1 008B630A - buffer_load_short_d16 v176, v177, s[20:23], 0 offen // 00000001B09C: E0901000 8005B0B1 - v_add_lshl_u32 v177, v7, v8, 1 // 00000001B0A4: D1FE00B1 02061107 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 00000001B0AC: D10000B1 008B630A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001B0B4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B0BC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B0C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B0CC: 86A2221E - v_add_lshl_u32 v179, v6, v8, 1 // 00000001B0D0: D1FE00B3 02061106 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 00000001B0D8: D10000B3 008B670A - buffer_load_short_d16 v178, v179, s[20:23], 0 offen // 00000001B0E0: E0901000 8005B2B3 - v_add_lshl_u32 v179, v7, v8, 1 // 00000001B0E8: D1FE00B3 02061107 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 00000001B0F0: D10000B3 008B670A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001B0F8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B100: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B108: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B110: 86A2221E - v_add_lshl_u32 v181, v6, v8, 1 // 00000001B114: D1FE00B5 02061106 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 00000001B11C: D10000B5 008B6B0A - buffer_load_short_d16 v180, v181, s[20:23], 0 offen // 00000001B124: E0901000 8005B4B5 - v_add_lshl_u32 v181, v7, v8, 1 // 00000001B12C: D1FE00B5 02061107 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 00000001B134: D10000B5 008B6B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001B13C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001B144: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001B14C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001B154: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B15C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B164: 86A2221E - v_add_lshl_u32 v183, v6, v4, 1 // 00000001B168: D1FE00B7 02060906 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 00000001B170: D10000B7 008B6F0A - buffer_load_short_d16 v182, v183, s[20:23], 0 offen // 00000001B178: E0901000 8005B6B7 - v_add_lshl_u32 v183, v7, v4, 1 // 00000001B180: D1FE00B7 02060907 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 00000001B188: D10000B7 008B6F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001B190: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B198: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B1A0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B1A8: 86A2221E - v_add_lshl_u32 v185, v6, v8, 1 // 00000001B1AC: D1FE00B9 02061106 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 00000001B1B4: D10000B9 008B730A - buffer_load_short_d16 v184, v185, s[20:23], 0 offen // 00000001B1BC: E0901000 8005B8B9 - v_add_lshl_u32 v185, v7, v8, 1 // 00000001B1C4: D1FE00B9 02061107 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 00000001B1CC: D10000B9 008B730A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001B1D4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B1DC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B1E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B1EC: 86A2221E - v_add_lshl_u32 v187, v6, v8, 1 // 00000001B1F0: D1FE00BB 02061106 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 00000001B1F8: D10000BB 008B770A - buffer_load_short_d16 v186, v187, s[20:23], 0 offen // 00000001B200: E0901000 8005BABB - v_add_lshl_u32 v187, v7, v8, 1 // 00000001B208: D1FE00BB 02061107 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 00000001B210: D10000BB 008B770A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001B218: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B220: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B228: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B230: 86A2221E - v_add_lshl_u32 v189, v6, v8, 1 // 00000001B234: D1FE00BD 02061106 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 00000001B23C: D10000BD 008B7B0A - buffer_load_short_d16 v188, v189, s[20:23], 0 offen // 00000001B244: E0901000 8005BCBD - v_add_lshl_u32 v189, v7, v8, 1 // 00000001B24C: D1FE00BD 02061107 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 00000001B254: D10000BD 008B7B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001B25C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B264: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B26C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B274: 86A2221E - v_add_lshl_u32 v191, v6, v8, 1 // 00000001B278: D1FE00BF 02061106 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 00000001B280: D10000BF 008B7F0A - buffer_load_short_d16 v190, v191, s[20:23], 0 offen // 00000001B288: E0901000 8005BEBF - v_add_lshl_u32 v191, v7, v8, 1 // 00000001B290: D1FE00BF 02061107 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 00000001B298: D10000BF 008B7F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001B2A0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B2A8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B2B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B2B8: 86A2221E - v_add_lshl_u32 v193, v6, v8, 1 // 00000001B2BC: D1FE00C1 02061106 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 00000001B2C4: D10000C1 008B830A - buffer_load_short_d16 v192, v193, s[20:23], 0 offen // 00000001B2CC: E0901000 8005C0C1 - v_add_lshl_u32 v193, v7, v8, 1 // 00000001B2D4: D1FE00C1 02061107 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 00000001B2DC: D10000C1 008B830A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001B2E4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B2EC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B2F4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B2FC: 86A2221E - v_add_lshl_u32 v195, v6, v8, 1 // 00000001B300: D1FE00C3 02061106 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 00000001B308: D10000C3 008B870A - buffer_load_short_d16 v194, v195, s[20:23], 0 offen // 00000001B310: E0901000 8005C2C3 - v_add_lshl_u32 v195, v7, v8, 1 // 00000001B318: D1FE00C3 02061107 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 00000001B320: D10000C3 008B870A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001B328: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B330: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B338: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B340: 86A2221E - v_add_lshl_u32 v197, v6, v8, 1 // 00000001B344: D1FE00C5 02061106 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 00000001B34C: D10000C5 008B8B0A - buffer_load_short_d16 v196, v197, s[20:23], 0 offen // 00000001B354: E0901000 8005C4C5 - v_add_lshl_u32 v197, v7, v8, 1 // 00000001B35C: D1FE00C5 02061107 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 00000001B364: D10000C5 008B8B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001B36C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001B374: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001B37C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001B384: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B38C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B394: 86A2221E - v_add_lshl_u32 v199, v6, v4, 1 // 00000001B398: D1FE00C7 02060906 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 00000001B3A0: D10000C7 008B8F0A - buffer_load_short_d16 v198, v199, s[20:23], 0 offen // 00000001B3A8: E0901000 8005C6C7 - v_add_lshl_u32 v199, v7, v4, 1 // 00000001B3B0: D1FE00C7 02060907 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 00000001B3B8: D10000C7 008B8F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001B3C0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B3C8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B3D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B3D8: 86A2221E - v_add_lshl_u32 v201, v6, v8, 1 // 00000001B3DC: D1FE00C9 02061106 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 00000001B3E4: D10000C9 008B930A - buffer_load_short_d16 v200, v201, s[20:23], 0 offen // 00000001B3EC: E0901000 8005C8C9 - v_add_lshl_u32 v201, v7, v8, 1 // 00000001B3F4: D1FE00C9 02061107 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 00000001B3FC: D10000C9 008B930A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001B404: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B40C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B414: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B41C: 86A2221E - v_add_lshl_u32 v203, v6, v8, 1 // 00000001B420: D1FE00CB 02061106 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 00000001B428: D10000CB 008B970A - buffer_load_short_d16 v202, v203, s[20:23], 0 offen // 00000001B430: E0901000 8005CACB - v_add_lshl_u32 v203, v7, v8, 1 // 00000001B438: D1FE00CB 02061107 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 00000001B440: D10000CB 008B970A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001B448: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B450: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B458: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B460: 86A2221E - v_add_lshl_u32 v205, v6, v8, 1 // 00000001B464: D1FE00CD 02061106 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 00000001B46C: D10000CD 008B9B0A - buffer_load_short_d16 v204, v205, s[20:23], 0 offen // 00000001B474: E0901000 8005CCCD - v_add_lshl_u32 v205, v7, v8, 1 // 00000001B47C: D1FE00CD 02061107 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 00000001B484: D10000CD 008B9B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001B48C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B494: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B49C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B4A4: 86A2221E - v_add_lshl_u32 v207, v6, v8, 1 // 00000001B4A8: D1FE00CF 02061106 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 00000001B4B0: D10000CF 008B9F0A - buffer_load_short_d16 v206, v207, s[20:23], 0 offen // 00000001B4B8: E0901000 8005CECF - v_add_lshl_u32 v207, v7, v8, 1 // 00000001B4C0: D1FE00CF 02061107 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 00000001B4C8: D10000CF 008B9F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001B4D0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B4D8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B4E0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B4E8: 86A2221E - v_add_lshl_u32 v209, v6, v8, 1 // 00000001B4EC: D1FE00D1 02061106 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 00000001B4F4: D10000D1 008BA30A - buffer_load_short_d16 v208, v209, s[20:23], 0 offen // 00000001B4FC: E0901000 8005D0D1 - v_add_lshl_u32 v209, v7, v8, 1 // 00000001B504: D1FE00D1 02061107 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 00000001B50C: D10000D1 008BA30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001B514: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B51C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B524: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B52C: 86A2221E - v_add_lshl_u32 v211, v6, v8, 1 // 00000001B530: D1FE00D3 02061106 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 00000001B538: D10000D3 008BA70A - buffer_load_short_d16 v210, v211, s[20:23], 0 offen // 00000001B540: E0901000 8005D2D3 - v_add_lshl_u32 v211, v7, v8, 1 // 00000001B548: D1FE00D3 02061107 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 00000001B550: D10000D3 008BA70A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001B558: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B560: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B568: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B570: 86A2221E - v_add_lshl_u32 v213, v6, v8, 1 // 00000001B574: D1FE00D5 02061106 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 00000001B57C: D10000D5 008BAB0A - buffer_load_short_d16 v212, v213, s[20:23], 0 offen // 00000001B584: E0901000 8005D4D5 - v_add_lshl_u32 v213, v7, v8, 1 // 00000001B58C: D1FE00D5 02061107 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 00000001B594: D10000D5 008BAB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001B59C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001B5A4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001B5AC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001B5B4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B5BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B5C4: 86A2221E - v_add_lshl_u32 v215, v6, v4, 1 // 00000001B5C8: D1FE00D7 02060906 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 00000001B5D0: D10000D7 008BAF0A - buffer_load_short_d16 v214, v215, s[20:23], 0 offen // 00000001B5D8: E0901000 8005D6D7 - v_add_lshl_u32 v215, v7, v4, 1 // 00000001B5E0: D1FE00D7 02060907 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 00000001B5E8: D10000D7 008BAF0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001B5F0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B5F8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B600: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B608: 86A2221E - v_add_lshl_u32 v217, v6, v8, 1 // 00000001B60C: D1FE00D9 02061106 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000001B614: D10000D9 008BB30A - buffer_load_short_d16 v216, v217, s[20:23], 0 offen // 00000001B61C: E0901000 8005D8D9 - v_add_lshl_u32 v217, v7, v8, 1 // 00000001B624: D1FE00D9 02061107 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000001B62C: D10000D9 008BB30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001B634: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B63C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B644: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B64C: 86A2221E - v_add_lshl_u32 v219, v6, v8, 1 // 00000001B650: D1FE00DB 02061106 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 00000001B658: D10000DB 008BB70A - buffer_load_short_d16 v218, v219, s[20:23], 0 offen // 00000001B660: E0901000 8005DADB - v_add_lshl_u32 v219, v7, v8, 1 // 00000001B668: D1FE00DB 02061107 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 00000001B670: D10000DB 008BB70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001B678: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B680: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B688: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B690: 86A2221E - v_add_lshl_u32 v221, v6, v8, 1 // 00000001B694: D1FE00DD 02061106 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 00000001B69C: D10000DD 008BBB0A - buffer_load_short_d16 v220, v221, s[20:23], 0 offen // 00000001B6A4: E0901000 8005DCDD - v_add_lshl_u32 v221, v7, v8, 1 // 00000001B6AC: D1FE00DD 02061107 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 00000001B6B4: D10000DD 008BBB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001B6BC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B6C4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B6CC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B6D4: 86A2221E - v_add_lshl_u32 v223, v6, v8, 1 // 00000001B6D8: D1FE00DF 02061106 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 00000001B6E0: D10000DF 008BBF0A - buffer_load_short_d16 v222, v223, s[20:23], 0 offen // 00000001B6E8: E0901000 8005DEDF - v_add_lshl_u32 v223, v7, v8, 1 // 00000001B6F0: D1FE00DF 02061107 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 00000001B6F8: D10000DF 008BBF0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001B700: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B708: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B710: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B718: 86A2221E - v_add_lshl_u32 v225, v6, v8, 1 // 00000001B71C: D1FE00E1 02061106 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000001B724: D10000E1 008BC30A - buffer_load_short_d16 v224, v225, s[20:23], 0 offen // 00000001B72C: E0901000 8005E0E1 - v_add_lshl_u32 v225, v7, v8, 1 // 00000001B734: D1FE00E1 02061107 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000001B73C: D10000E1 008BC30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001B744: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B74C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B754: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B75C: 86A2221E - v_add_lshl_u32 v227, v6, v8, 1 // 00000001B760: D1FE00E3 02061106 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 00000001B768: D10000E3 008BC70A - buffer_load_short_d16 v226, v227, s[20:23], 0 offen // 00000001B770: E0901000 8005E2E3 - v_add_lshl_u32 v227, v7, v8, 1 // 00000001B778: D1FE00E3 02061107 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 00000001B780: D10000E3 008BC70A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001B788: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B790: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B798: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B7A0: 86A2221E - v_add_lshl_u32 v229, v6, v8, 1 // 00000001B7A4: D1FE00E5 02061106 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 00000001B7AC: D10000E5 008BCB0A - buffer_load_short_d16 v228, v229, s[20:23], 0 offen // 00000001B7B4: E0901000 8005E4E5 - v_add_lshl_u32 v229, v7, v8, 1 // 00000001B7BC: D1FE00E5 02061107 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 00000001B7C4: D10000E5 008BCB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001B7CC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001B7D4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001B7DC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001B7E4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B7EC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B7F4: 86A2221E - v_add_lshl_u32 v231, v6, v4, 1 // 00000001B7F8: D1FE00E7 02060906 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 00000001B800: D10000E7 008BCF0A - buffer_load_short_d16 v230, v231, s[20:23], 0 offen // 00000001B808: E0901000 8005E6E7 - v_add_lshl_u32 v231, v7, v4, 1 // 00000001B810: D1FE00E7 02060907 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 00000001B818: D10000E7 008BCF0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001B820: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B828: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B830: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B838: 86A2221E - v_add_lshl_u32 v233, v6, v8, 1 // 00000001B83C: D1FE00E9 02061106 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000001B844: D10000E9 008BD30A - buffer_load_short_d16 v232, v233, s[20:23], 0 offen // 00000001B84C: E0901000 8005E8E9 - v_add_lshl_u32 v233, v7, v8, 1 // 00000001B854: D1FE00E9 02061107 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000001B85C: D10000E9 008BD30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001B864: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B86C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B874: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B87C: 86A2221E - v_add_lshl_u32 v235, v6, v8, 1 // 00000001B880: D1FE00EB 02061106 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 00000001B888: D10000EB 008BD70A - buffer_load_short_d16 v234, v235, s[20:23], 0 offen // 00000001B890: E0901000 8005EAEB - v_add_lshl_u32 v235, v7, v8, 1 // 00000001B898: D1FE00EB 02061107 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 00000001B8A0: D10000EB 008BD70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001B8A8: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B8B0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B8B8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B8C0: 86A2221E - v_add_lshl_u32 v237, v6, v8, 1 // 00000001B8C4: D1FE00ED 02061106 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 00000001B8CC: D10000ED 008BDB0A - buffer_load_short_d16 v236, v237, s[20:23], 0 offen // 00000001B8D4: E0901000 8005ECED - v_add_lshl_u32 v237, v7, v8, 1 // 00000001B8DC: D1FE00ED 02061107 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 00000001B8E4: D10000ED 008BDB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001B8EC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B8F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B8FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B904: 86A2221E - v_add_lshl_u32 v239, v6, v8, 1 // 00000001B908: D1FE00EF 02061106 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 00000001B910: D10000EF 008BDF0A - buffer_load_short_d16 v238, v239, s[20:23], 0 offen // 00000001B918: E0901000 8005EEEF - v_add_lshl_u32 v239, v7, v8, 1 // 00000001B920: D1FE00EF 02061107 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 00000001B928: D10000EF 008BDF0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001B930: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B938: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B940: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B948: 86A2221E - v_add_lshl_u32 v241, v6, v8, 1 // 00000001B94C: D1FE00F1 02061106 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 00000001B954: D10000F1 008BE30A - buffer_load_short_d16 v240, v241, s[20:23], 0 offen // 00000001B95C: E0901000 8005F0F1 - v_add_lshl_u32 v241, v7, v8, 1 // 00000001B964: D1FE00F1 02061107 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 00000001B96C: D10000F1 008BE30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001B974: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B97C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B984: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B98C: 86A2221E - v_add_lshl_u32 v243, v6, v8, 1 // 00000001B990: D1FE00F3 02061106 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 00000001B998: D10000F3 008BE70A - buffer_load_short_d16 v242, v243, s[20:23], 0 offen // 00000001B9A0: E0901000 8005F2F3 - v_add_lshl_u32 v243, v7, v8, 1 // 00000001B9A8: D1FE00F3 02061107 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 00000001B9B0: D10000F3 008BE70A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001B9B8: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001B9C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001B9C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001B9D0: 86A2221E - v_add_lshl_u32 v245, v6, v8, 1 // 00000001B9D4: D1FE00F5 02061106 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 00000001B9DC: D10000F5 008BEB0A - buffer_load_short_d16 v244, v245, s[20:23], 0 offen // 00000001B9E4: E0901000 8005F4F5 - v_add_lshl_u32 v245, v7, v8, 1 // 00000001B9EC: D1FE00F5 02061107 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 00000001B9F4: D10000F5 008BEB0A - v_accvgpr_read_b32 v15, a49 // 00000001B9FC: D3D8400F 18000131 - v_accvgpr_read_b32 v16, a53 // 00000001BA04: D3D84010 18000135 - v_accvgpr_read_b32 v17, a57 // 00000001BA0C: D3D84011 18000139 - v_accvgpr_read_b32 v18, a61 // 00000001BA14: D3D84012 1800013D - v_accvgpr_read_b32 v19, a65 // 00000001BA1C: D3D84013 18000141 - v_accvgpr_read_b32 v20, a69 // 00000001BA24: D3D84014 18000145 - v_accvgpr_read_b32 v21, a73 // 00000001BA2C: D3D84015 18000149 - v_accvgpr_read_b32 v22, a77 // 00000001BA34: D3D84016 1800014D - v_accvgpr_read_b32 v23, a81 // 00000001BA3C: D3D84017 18000151 - v_accvgpr_read_b32 v24, a85 // 00000001BA44: D3D84018 18000155 - v_accvgpr_read_b32 v25, a89 // 00000001BA4C: D3D84019 18000159 - v_accvgpr_read_b32 v26, a93 // 00000001BA54: D3D8401A 1800015D - v_accvgpr_read_b32 v27, a97 // 00000001BA5C: D3D8401B 18000161 - v_accvgpr_read_b32 v28, a101 // 00000001BA64: D3D8401C 18000165 - v_accvgpr_read_b32 v29, a105 // 00000001BA6C: D3D8401D 18000169 - v_accvgpr_read_b32 v30, a109 // 00000001BA74: D3D8401E 1800016D - v_accvgpr_read_b32 v31, a113 // 00000001BA7C: D3D8401F 18000171 - v_accvgpr_read_b32 v32, a117 // 00000001BA84: D3D84020 18000175 - v_accvgpr_read_b32 v33, a121 // 00000001BA8C: D3D84021 18000179 - v_accvgpr_read_b32 v34, a125 // 00000001BA94: D3D84022 1800017D - v_accvgpr_read_b32 v35, a129 // 00000001BA9C: D3D84023 18000181 - v_accvgpr_read_b32 v36, a133 // 00000001BAA4: D3D84024 18000185 - v_accvgpr_read_b32 v37, a137 // 00000001BAAC: D3D84025 18000189 - v_accvgpr_read_b32 v38, a141 // 00000001BAB4: D3D84026 1800018D - v_accvgpr_read_b32 v39, a145 // 00000001BABC: D3D84027 18000191 - v_accvgpr_read_b32 v40, a149 // 00000001BAC4: D3D84028 18000195 - v_accvgpr_read_b32 v41, a153 // 00000001BACC: D3D84029 18000199 - v_accvgpr_read_b32 v42, a157 // 00000001BAD4: D3D8402A 1800019D - v_accvgpr_read_b32 v43, a161 // 00000001BADC: D3D8402B 180001A1 - v_accvgpr_read_b32 v44, a165 // 00000001BAE4: D3D8402C 180001A5 - v_accvgpr_read_b32 v45, a169 // 00000001BAEC: D3D8402D 180001A9 - v_accvgpr_read_b32 v46, a173 // 00000001BAF4: D3D8402E 180001AD - v_accvgpr_read_b32 v47, a177 // 00000001BAFC: D3D8402F 180001B1 - v_accvgpr_read_b32 v48, a181 // 00000001BB04: D3D84030 180001B5 - v_accvgpr_read_b32 v49, a185 // 00000001BB0C: D3D84031 180001B9 - v_accvgpr_read_b32 v50, a189 // 00000001BB14: D3D84032 180001BD - v_accvgpr_read_b32 v51, a193 // 00000001BB1C: D3D84033 180001C1 - v_accvgpr_read_b32 v52, a197 // 00000001BB24: D3D84034 180001C5 - v_accvgpr_read_b32 v53, a201 // 00000001BB2C: D3D84035 180001C9 - v_accvgpr_read_b32 v54, a205 // 00000001BB34: D3D84036 180001CD - v_accvgpr_read_b32 v55, a209 // 00000001BB3C: D3D84037 180001D1 - v_accvgpr_read_b32 v56, a213 // 00000001BB44: D3D84038 180001D5 - v_accvgpr_read_b32 v57, a217 // 00000001BB4C: D3D84039 180001D9 - v_accvgpr_read_b32 v58, a221 // 00000001BB54: D3D8403A 180001DD - v_accvgpr_read_b32 v59, a225 // 00000001BB5C: D3D8403B 180001E1 - v_accvgpr_read_b32 v60, a229 // 00000001BB64: D3D8403C 180001E5 - v_accvgpr_read_b32 v61, a233 // 00000001BB6C: D3D8403D 180001E9 - v_accvgpr_read_b32 v62, a237 // 00000001BB74: D3D8403E 180001ED - v_accvgpr_read_b32 v63, a241 // 00000001BB7C: D3D8403F 180001F1 - v_accvgpr_read_b32 v64, a245 // 00000001BB84: D3D84040 180001F5 - v_accvgpr_read_b32 v65, a249 // 00000001BB8C: D3D84041 180001F9 - v_accvgpr_read_b32 v66, a253 // 00000001BB94: D3D84042 180001FD - v_accvgpr_read_b32 v67, a2 // 00000001BB9C: D3D84043 18000102 - v_accvgpr_read_b32 v68, a6 // 00000001BBA4: D3D84044 18000106 - v_accvgpr_read_b32 v69, a10 // 00000001BBAC: D3D84045 1800010A - v_accvgpr_read_b32 v70, a14 // 00000001BBB4: D3D84046 1800010E - v_accvgpr_read_b32 v71, a18 // 00000001BBBC: D3D84047 18000112 - v_accvgpr_read_b32 v72, a22 // 00000001BBC4: D3D84048 18000116 - v_accvgpr_read_b32 v73, a26 // 00000001BBCC: D3D84049 1800011A - v_accvgpr_read_b32 v74, a30 // 00000001BBD4: D3D8404A 1800011E - v_accvgpr_read_b32 v75, a34 // 00000001BBDC: D3D8404B 18000122 - v_accvgpr_read_b32 v76, a38 // 00000001BBE4: D3D8404C 18000126 - v_accvgpr_read_b32 v77, a42 // 00000001BBEC: D3D8404D 1800012A - v_accvgpr_read_b32 v78, a46 // 00000001BBF4: D3D8404E 1800012E - v_accvgpr_read_b32 v79, a50 // 00000001BBFC: D3D8404F 18000132 - v_accvgpr_read_b32 v80, a54 // 00000001BC04: D3D84050 18000136 - v_accvgpr_read_b32 v81, a58 // 00000001BC0C: D3D84051 1800013A - v_accvgpr_read_b32 v82, a62 // 00000001BC14: D3D84052 1800013E - v_accvgpr_read_b32 v83, a66 // 00000001BC1C: D3D84053 18000142 - v_accvgpr_read_b32 v84, a70 // 00000001BC24: D3D84054 18000146 - v_accvgpr_read_b32 v85, a74 // 00000001BC2C: D3D84055 1800014A - v_accvgpr_read_b32 v86, a78 // 00000001BC34: D3D84056 1800014E - v_accvgpr_read_b32 v87, a82 // 00000001BC3C: D3D84057 18000152 - v_accvgpr_read_b32 v88, a86 // 00000001BC44: D3D84058 18000156 - v_accvgpr_read_b32 v89, a90 // 00000001BC4C: D3D84059 1800015A - v_accvgpr_read_b32 v90, a94 // 00000001BC54: D3D8405A 1800015E - v_mul_f32_e32 v15, s44, v15 // 00000001BC5C: 0A1E1E2C - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 00000001BC60: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 00000001BC68: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 00000001BC70: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 00000001BC78: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000001BC80: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000001BC88: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000001BC90: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000001BC98: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000001BCA0: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000001BCA8: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000001BCB0: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000001BCB8: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000001BCC0: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 00000001BCC8: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 00000001BCD0: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 00000001BCD8: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 00000001BCE0: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 00000001BCE8: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 00000001BCF0: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 00000001BCF8: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 00000001BD00: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000001BD08: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 00000001BD10: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 00000001BD18: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 00000001BD20: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 00000001BD28: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 00000001BD30: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 00000001BD38: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 00000001BD40: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 00000001BD48: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 00000001BD50: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 00000001BD58: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 00000001BD60: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 00000001BD68: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 00000001BD70: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 00000001BD78: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 00000001BD80: D3B14058 1002B02C - v_mul_f32_e32 v90, s44, v90 // 00000001BD88: 0AB4B42C - s_waitcnt vmcnt(0) // 00000001BD8C: BF8C0F70 - v_mov_b32_e32 v12, 0xffff0000 // 00000001BD90: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000001BD98: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 00000001BDA0: 7E1C02FF 00007FFF - v_cvt_f32_bf16_sdwa v8, v91 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BDA8: 7E10B6F9 0004165B - v_fmac_f32_e64 v15, v8, s45 // 00000001BDB0: D13B000F 00005B08 - v_cvt_pk_bf16_f32 v15, v15, v15 // 00000001BDB8: D268000F 00021F0F - buffer_store_short v15, v92, s[16:19], 0 offen nt // 00000001BDC0: E06A1000 80040F5C - v_cvt_f32_bf16_sdwa v8, v93 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BDC8: 7E10B6F9 0004165D - v_fmac_f32_e64 v16, v8, s45 // 00000001BDD0: D13B0010 00005B08 - v_cvt_pk_bf16_f32 v16, v16, v16 // 00000001BDD8: D2680010 00022110 - buffer_store_short v16, v94, s[16:19], 0 offen nt // 00000001BDE0: E06A1000 8004105E - v_cvt_f32_bf16_sdwa v8, v95 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BDE8: 7E10B6F9 0004165F - v_fmac_f32_e64 v17, v8, s45 // 00000001BDF0: D13B0011 00005B08 - v_cvt_pk_bf16_f32 v17, v17, v17 // 00000001BDF8: D2680011 00022311 - buffer_store_short v17, v96, s[16:19], 0 offen nt // 00000001BE00: E06A1000 80041160 - v_cvt_f32_bf16_sdwa v8, v97 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BE08: 7E10B6F9 00041661 - v_fmac_f32_e64 v18, v8, s45 // 00000001BE10: D13B0012 00005B08 - v_cvt_pk_bf16_f32 v18, v18, v18 // 00000001BE18: D2680012 00022512 - buffer_store_short v18, v98, s[16:19], 0 offen nt // 00000001BE20: E06A1000 80041262 - v_cvt_f32_bf16_sdwa v8, v99 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BE28: 7E10B6F9 00041663 - v_fmac_f32_e64 v19, v8, s45 // 00000001BE30: D13B0013 00005B08 - v_cvt_pk_bf16_f32 v19, v19, v19 // 00000001BE38: D2680013 00022713 - buffer_store_short v19, v100, s[16:19], 0 offen nt // 00000001BE40: E06A1000 80041364 - v_cvt_f32_bf16_sdwa v8, v101 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BE48: 7E10B6F9 00041665 - v_fmac_f32_e64 v20, v8, s45 // 00000001BE50: D13B0014 00005B08 - v_cvt_pk_bf16_f32 v20, v20, v20 // 00000001BE58: D2680014 00022914 - buffer_store_short v20, v102, s[16:19], 0 offen nt // 00000001BE60: E06A1000 80041466 - v_cvt_f32_bf16_sdwa v8, v103 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BE68: 7E10B6F9 00041667 - v_fmac_f32_e64 v21, v8, s45 // 00000001BE70: D13B0015 00005B08 - v_cvt_pk_bf16_f32 v21, v21, v21 // 00000001BE78: D2680015 00022B15 - buffer_store_short v21, v104, s[16:19], 0 offen nt // 00000001BE80: E06A1000 80041568 - v_cvt_f32_bf16_sdwa v8, v105 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BE88: 7E10B6F9 00041669 - v_fmac_f32_e64 v22, v8, s45 // 00000001BE90: D13B0016 00005B08 - v_cvt_pk_bf16_f32 v22, v22, v22 // 00000001BE98: D2680016 00022D16 - buffer_store_short v22, v106, s[16:19], 0 offen nt // 00000001BEA0: E06A1000 8004166A - v_cvt_f32_bf16_sdwa v8, v107 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BEA8: 7E10B6F9 0004166B - v_fmac_f32_e64 v23, v8, s45 // 00000001BEB0: D13B0017 00005B08 - v_cvt_pk_bf16_f32 v23, v23, v23 // 00000001BEB8: D2680017 00022F17 - buffer_store_short v23, v108, s[16:19], 0 offen nt // 00000001BEC0: E06A1000 8004176C - v_cvt_f32_bf16_sdwa v8, v109 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BEC8: 7E10B6F9 0004166D - v_fmac_f32_e64 v24, v8, s45 // 00000001BED0: D13B0018 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v24 // 00000001BED8: D2680018 00023118 - buffer_store_short v24, v110, s[16:19], 0 offen nt // 00000001BEE0: E06A1000 8004186E - v_cvt_f32_bf16_sdwa v8, v111 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BEE8: 7E10B6F9 0004166F - v_fmac_f32_e64 v25, v8, s45 // 00000001BEF0: D13B0019 00005B08 - v_cvt_pk_bf16_f32 v25, v25, v25 // 00000001BEF8: D2680019 00023319 - buffer_store_short v25, v112, s[16:19], 0 offen nt // 00000001BF00: E06A1000 80041970 - v_cvt_f32_bf16_sdwa v8, v113 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BF08: 7E10B6F9 00041671 - v_fmac_f32_e64 v26, v8, s45 // 00000001BF10: D13B001A 00005B08 - v_cvt_pk_bf16_f32 v26, v26, v26 // 00000001BF18: D268001A 0002351A - buffer_store_short v26, v114, s[16:19], 0 offen nt // 00000001BF20: E06A1000 80041A72 - v_cvt_f32_bf16_sdwa v8, v115 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BF28: 7E10B6F9 00041673 - v_fmac_f32_e64 v27, v8, s45 // 00000001BF30: D13B001B 00005B08 - v_cvt_pk_bf16_f32 v27, v27, v27 // 00000001BF38: D268001B 0002371B - buffer_store_short v27, v116, s[16:19], 0 offen nt // 00000001BF40: E06A1000 80041B74 - v_cvt_f32_bf16_sdwa v8, v117 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BF48: 7E10B6F9 00041675 - v_fmac_f32_e64 v28, v8, s45 // 00000001BF50: D13B001C 00005B08 - v_cvt_pk_bf16_f32 v28, v28, v28 // 00000001BF58: D268001C 0002391C - buffer_store_short v28, v118, s[16:19], 0 offen nt // 00000001BF60: E06A1000 80041C76 - v_cvt_f32_bf16_sdwa v8, v119 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BF68: 7E10B6F9 00041677 - v_fmac_f32_e64 v29, v8, s45 // 00000001BF70: D13B001D 00005B08 - v_cvt_pk_bf16_f32 v29, v29, v29 // 00000001BF78: D268001D 00023B1D - buffer_store_short v29, v120, s[16:19], 0 offen nt // 00000001BF80: E06A1000 80041D78 - v_cvt_f32_bf16_sdwa v8, v121 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BF88: 7E10B6F9 00041679 - v_fmac_f32_e64 v30, v8, s45 // 00000001BF90: D13B001E 00005B08 - v_cvt_pk_bf16_f32 v30, v30, v30 // 00000001BF98: D268001E 00023D1E - buffer_store_short v30, v122, s[16:19], 0 offen nt // 00000001BFA0: E06A1000 80041E7A - v_cvt_f32_bf16_sdwa v8, v123 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BFA8: 7E10B6F9 0004167B - v_fmac_f32_e64 v31, v8, s45 // 00000001BFB0: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v31, v31, v31 // 00000001BFB8: D268001F 00023F1F - buffer_store_short v31, v124, s[16:19], 0 offen nt // 00000001BFC0: E06A1000 80041F7C - v_cvt_f32_bf16_sdwa v8, v125 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BFC8: 7E10B6F9 0004167D - v_fmac_f32_e64 v32, v8, s45 // 00000001BFD0: D13B0020 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v32 // 00000001BFD8: D2680020 00024120 - buffer_store_short v32, v126, s[16:19], 0 offen nt // 00000001BFE0: E06A1000 8004207E - v_cvt_f32_bf16_sdwa v8, v127 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001BFE8: 7E10B6F9 0004167F - v_fmac_f32_e64 v33, v8, s45 // 00000001BFF0: D13B0021 00005B08 - v_cvt_pk_bf16_f32 v33, v33, v33 // 00000001BFF8: D2680021 00024321 - buffer_store_short v33, v128, s[16:19], 0 offen nt // 00000001C000: E06A1000 80042180 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C008: 7E10B6F9 00041681 - v_fmac_f32_e64 v34, v8, s45 // 00000001C010: D13B0022 00005B08 - v_cvt_pk_bf16_f32 v34, v34, v34 // 00000001C018: D2680022 00024522 - buffer_store_short v34, v130, s[16:19], 0 offen nt // 00000001C020: E06A1000 80042282 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C028: 7E10B6F9 00041683 - v_fmac_f32_e64 v35, v8, s45 // 00000001C030: D13B0023 00005B08 - v_cvt_pk_bf16_f32 v35, v35, v35 // 00000001C038: D2680023 00024723 - buffer_store_short v35, v135, s[16:19], 0 offen nt // 00000001C040: E06A1000 80042387 - v_cvt_f32_bf16_sdwa v8, v136 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C048: 7E10B6F9 00041688 - v_fmac_f32_e64 v36, v8, s45 // 00000001C050: D13B0024 00005B08 - v_cvt_pk_bf16_f32 v36, v36, v36 // 00000001C058: D2680024 00024924 - buffer_store_short v36, v137, s[16:19], 0 offen nt // 00000001C060: E06A1000 80042489 - v_cvt_f32_bf16_sdwa v8, v138 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C068: 7E10B6F9 0004168A - v_fmac_f32_e64 v37, v8, s45 // 00000001C070: D13B0025 00005B08 - v_cvt_pk_bf16_f32 v37, v37, v37 // 00000001C078: D2680025 00024B25 - buffer_store_short v37, v139, s[16:19], 0 offen nt // 00000001C080: E06A1000 8004258B - v_cvt_f32_bf16_sdwa v8, v140 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C088: 7E10B6F9 0004168C - v_fmac_f32_e64 v38, v8, s45 // 00000001C090: D13B0026 00005B08 - v_cvt_pk_bf16_f32 v38, v38, v38 // 00000001C098: D2680026 00024D26 - buffer_store_short v38, v141, s[16:19], 0 offen nt // 00000001C0A0: E06A1000 8004268D - v_cvt_f32_bf16_sdwa v8, v142 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C0A8: 7E10B6F9 0004168E - v_fmac_f32_e64 v39, v8, s45 // 00000001C0B0: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v39, v39, v39 // 00000001C0B8: D2680027 00024F27 - buffer_store_short v39, v143, s[16:19], 0 offen nt // 00000001C0C0: E06A1000 8004278F - v_cvt_f32_bf16_sdwa v8, v144 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C0C8: 7E10B6F9 00041690 - v_fmac_f32_e64 v40, v8, s45 // 00000001C0D0: D13B0028 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v40 // 00000001C0D8: D2680028 00025128 - buffer_store_short v40, v145, s[16:19], 0 offen nt // 00000001C0E0: E06A1000 80042891 - v_cvt_f32_bf16_sdwa v8, v146 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C0E8: 7E10B6F9 00041692 - v_fmac_f32_e64 v41, v8, s45 // 00000001C0F0: D13B0029 00005B08 - v_cvt_pk_bf16_f32 v41, v41, v41 // 00000001C0F8: D2680029 00025329 - buffer_store_short v41, v147, s[16:19], 0 offen nt // 00000001C100: E06A1000 80042993 - v_cvt_f32_bf16_sdwa v8, v148 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C108: 7E10B6F9 00041694 - v_fmac_f32_e64 v42, v8, s45 // 00000001C110: D13B002A 00005B08 - v_cvt_pk_bf16_f32 v42, v42, v42 // 00000001C118: D268002A 0002552A - buffer_store_short v42, v149, s[16:19], 0 offen nt // 00000001C120: E06A1000 80042A95 - v_cvt_f32_bf16_sdwa v8, v150 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C128: 7E10B6F9 00041696 - v_fmac_f32_e64 v43, v8, s45 // 00000001C130: D13B002B 00005B08 - v_cvt_pk_bf16_f32 v43, v43, v43 // 00000001C138: D268002B 0002572B - buffer_store_short v43, v151, s[16:19], 0 offen nt // 00000001C140: E06A1000 80042B97 - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C148: 7E10B6F9 00041698 - v_fmac_f32_e64 v44, v8, s45 // 00000001C150: D13B002C 00005B08 - v_cvt_pk_bf16_f32 v44, v44, v44 // 00000001C158: D268002C 0002592C - buffer_store_short v44, v153, s[16:19], 0 offen nt // 00000001C160: E06A1000 80042C99 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C168: 7E10B6F9 0004169A - v_fmac_f32_e64 v45, v8, s45 // 00000001C170: D13B002D 00005B08 - v_cvt_pk_bf16_f32 v45, v45, v45 // 00000001C178: D268002D 00025B2D - buffer_store_short v45, v155, s[16:19], 0 offen nt // 00000001C180: E06A1000 80042D9B - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C188: 7E10B6F9 0004169C - v_fmac_f32_e64 v46, v8, s45 // 00000001C190: D13B002E 00005B08 - v_cvt_pk_bf16_f32 v46, v46, v46 // 00000001C198: D268002E 00025D2E - buffer_store_short v46, v157, s[16:19], 0 offen nt // 00000001C1A0: E06A1000 80042E9D - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C1A8: 7E10B6F9 0004169E - v_fmac_f32_e64 v47, v8, s45 // 00000001C1B0: D13B002F 00005B08 - v_cvt_pk_bf16_f32 v47, v47, v47 // 00000001C1B8: D268002F 00025F2F - buffer_store_short v47, v159, s[16:19], 0 offen nt // 00000001C1C0: E06A1000 80042F9F - v_cvt_f32_bf16_sdwa v8, v160 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C1C8: 7E10B6F9 000416A0 - v_fmac_f32_e64 v48, v8, s45 // 00000001C1D0: D13B0030 00005B08 - v_cvt_pk_bf16_f32 v48, v48, v48 // 00000001C1D8: D2680030 00026130 - buffer_store_short v48, v161, s[16:19], 0 offen nt // 00000001C1E0: E06A1000 800430A1 - v_cvt_f32_bf16_sdwa v8, v162 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C1E8: 7E10B6F9 000416A2 - v_fmac_f32_e64 v49, v8, s45 // 00000001C1F0: D13B0031 00005B08 - v_cvt_pk_bf16_f32 v49, v49, v49 // 00000001C1F8: D2680031 00026331 - buffer_store_short v49, v163, s[16:19], 0 offen nt // 00000001C200: E06A1000 800431A3 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C208: 7E10B6F9 000416A4 - v_fmac_f32_e64 v50, v8, s45 // 00000001C210: D13B0032 00005B08 - v_cvt_pk_bf16_f32 v50, v50, v50 // 00000001C218: D2680032 00026532 - buffer_store_short v50, v165, s[16:19], 0 offen nt // 00000001C220: E06A1000 800432A5 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C228: 7E10B6F9 000416A6 - v_fmac_f32_e64 v51, v8, s45 // 00000001C230: D13B0033 00005B08 - v_cvt_pk_bf16_f32 v51, v51, v51 // 00000001C238: D2680033 00026733 - buffer_store_short v51, v167, s[16:19], 0 offen nt // 00000001C240: E06A1000 800433A7 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C248: 7E10B6F9 000416A8 - v_fmac_f32_e64 v52, v8, s45 // 00000001C250: D13B0034 00005B08 - v_cvt_pk_bf16_f32 v52, v52, v52 // 00000001C258: D2680034 00026934 - buffer_store_short v52, v169, s[16:19], 0 offen nt // 00000001C260: E06A1000 800434A9 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C268: 7E10B6F9 000416AA - v_fmac_f32_e64 v53, v8, s45 // 00000001C270: D13B0035 00005B08 - v_cvt_pk_bf16_f32 v53, v53, v53 // 00000001C278: D2680035 00026B35 - buffer_store_short v53, v171, s[16:19], 0 offen nt // 00000001C280: E06A1000 800435AB - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C288: 7E10B6F9 000416AC - v_fmac_f32_e64 v54, v8, s45 // 00000001C290: D13B0036 00005B08 - v_cvt_pk_bf16_f32 v54, v54, v54 // 00000001C298: D2680036 00026D36 - buffer_store_short v54, v173, s[16:19], 0 offen nt // 00000001C2A0: E06A1000 800436AD - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C2A8: 7E10B6F9 000416AE - v_fmac_f32_e64 v55, v8, s45 // 00000001C2B0: D13B0037 00005B08 - v_cvt_pk_bf16_f32 v55, v55, v55 // 00000001C2B8: D2680037 00026F37 - buffer_store_short v55, v175, s[16:19], 0 offen nt // 00000001C2C0: E06A1000 800437AF - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C2C8: 7E10B6F9 000416B0 - v_fmac_f32_e64 v56, v8, s45 // 00000001C2D0: D13B0038 00005B08 - v_cvt_pk_bf16_f32 v56, v56, v56 // 00000001C2D8: D2680038 00027138 - buffer_store_short v56, v177, s[16:19], 0 offen nt // 00000001C2E0: E06A1000 800438B1 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C2E8: 7E10B6F9 000416B2 - v_fmac_f32_e64 v57, v8, s45 // 00000001C2F0: D13B0039 00005B08 - v_cvt_pk_bf16_f32 v57, v57, v57 // 00000001C2F8: D2680039 00027339 - buffer_store_short v57, v179, s[16:19], 0 offen nt // 00000001C300: E06A1000 800439B3 - v_cvt_f32_bf16_sdwa v8, v180 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C308: 7E10B6F9 000416B4 - v_fmac_f32_e64 v58, v8, s45 // 00000001C310: D13B003A 00005B08 - v_cvt_pk_bf16_f32 v58, v58, v58 // 00000001C318: D268003A 0002753A - buffer_store_short v58, v181, s[16:19], 0 offen nt // 00000001C320: E06A1000 80043AB5 - v_cvt_f32_bf16_sdwa v8, v182 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C328: 7E10B6F9 000416B6 - v_fmac_f32_e64 v59, v8, s45 // 00000001C330: D13B003B 00005B08 - v_cvt_pk_bf16_f32 v59, v59, v59 // 00000001C338: D268003B 0002773B - buffer_store_short v59, v183, s[16:19], 0 offen nt // 00000001C340: E06A1000 80043BB7 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C348: 7E10B6F9 000416B8 - v_fmac_f32_e64 v60, v8, s45 // 00000001C350: D13B003C 00005B08 - v_cvt_pk_bf16_f32 v60, v60, v60 // 00000001C358: D268003C 0002793C - buffer_store_short v60, v185, s[16:19], 0 offen nt // 00000001C360: E06A1000 80043CB9 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C368: 7E10B6F9 000416BA - v_fmac_f32_e64 v61, v8, s45 // 00000001C370: D13B003D 00005B08 - v_cvt_pk_bf16_f32 v61, v61, v61 // 00000001C378: D268003D 00027B3D - buffer_store_short v61, v187, s[16:19], 0 offen nt // 00000001C380: E06A1000 80043DBB - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C388: 7E10B6F9 000416BC - v_fmac_f32_e64 v62, v8, s45 // 00000001C390: D13B003E 00005B08 - v_cvt_pk_bf16_f32 v62, v62, v62 // 00000001C398: D268003E 00027D3E - buffer_store_short v62, v189, s[16:19], 0 offen nt // 00000001C3A0: E06A1000 80043EBD - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C3A8: 7E10B6F9 000416BE - v_fmac_f32_e64 v63, v8, s45 // 00000001C3B0: D13B003F 00005B08 - v_cvt_pk_bf16_f32 v63, v63, v63 // 00000001C3B8: D268003F 00027F3F - buffer_store_short v63, v191, s[16:19], 0 offen nt // 00000001C3C0: E06A1000 80043FBF - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C3C8: 7E10B6F9 000416C0 - v_fmac_f32_e64 v64, v8, s45 // 00000001C3D0: D13B0040 00005B08 - v_cvt_pk_bf16_f32 v64, v64, v64 // 00000001C3D8: D2680040 00028140 - buffer_store_short v64, v193, s[16:19], 0 offen nt // 00000001C3E0: E06A1000 800440C1 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C3E8: 7E10B6F9 000416C2 - v_fmac_f32_e64 v65, v8, s45 // 00000001C3F0: D13B0041 00005B08 - v_cvt_pk_bf16_f32 v65, v65, v65 // 00000001C3F8: D2680041 00028341 - buffer_store_short v65, v195, s[16:19], 0 offen nt // 00000001C400: E06A1000 800441C3 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C408: 7E10B6F9 000416C4 - v_fmac_f32_e64 v66, v8, s45 // 00000001C410: D13B0042 00005B08 - v_cvt_pk_bf16_f32 v66, v66, v66 // 00000001C418: D2680042 00028542 - buffer_store_short v66, v197, s[16:19], 0 offen nt // 00000001C420: E06A1000 800442C5 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C428: 7E10B6F9 000416C6 - v_fmac_f32_e64 v67, v8, s45 // 00000001C430: D13B0043 00005B08 - v_cvt_pk_bf16_f32 v67, v67, v67 // 00000001C438: D2680043 00028743 - buffer_store_short v67, v199, s[16:19], 0 offen nt // 00000001C440: E06A1000 800443C7 - v_cvt_f32_bf16_sdwa v8, v200 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C448: 7E10B6F9 000416C8 - v_fmac_f32_e64 v68, v8, s45 // 00000001C450: D13B0044 00005B08 - v_cvt_pk_bf16_f32 v68, v68, v68 // 00000001C458: D2680044 00028944 - buffer_store_short v68, v201, s[16:19], 0 offen nt // 00000001C460: E06A1000 800444C9 - v_cvt_f32_bf16_sdwa v8, v202 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C468: 7E10B6F9 000416CA - v_fmac_f32_e64 v69, v8, s45 // 00000001C470: D13B0045 00005B08 - v_cvt_pk_bf16_f32 v69, v69, v69 // 00000001C478: D2680045 00028B45 - buffer_store_short v69, v203, s[16:19], 0 offen nt // 00000001C480: E06A1000 800445CB - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C488: 7E10B6F9 000416CC - v_fmac_f32_e64 v70, v8, s45 // 00000001C490: D13B0046 00005B08 - v_cvt_pk_bf16_f32 v70, v70, v70 // 00000001C498: D2680046 00028D46 - buffer_store_short v70, v205, s[16:19], 0 offen nt // 00000001C4A0: E06A1000 800446CD - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C4A8: 7E10B6F9 000416CE - v_fmac_f32_e64 v71, v8, s45 // 00000001C4B0: D13B0047 00005B08 - v_cvt_pk_bf16_f32 v71, v71, v71 // 00000001C4B8: D2680047 00028F47 - buffer_store_short v71, v207, s[16:19], 0 offen nt // 00000001C4C0: E06A1000 800447CF - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C4C8: 7E10B6F9 000416D0 - v_fmac_f32_e64 v72, v8, s45 // 00000001C4D0: D13B0048 00005B08 - v_cvt_pk_bf16_f32 v72, v72, v72 // 00000001C4D8: D2680048 00029148 - buffer_store_short v72, v209, s[16:19], 0 offen nt // 00000001C4E0: E06A1000 800448D1 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C4E8: 7E10B6F9 000416D2 - v_fmac_f32_e64 v73, v8, s45 // 00000001C4F0: D13B0049 00005B08 - v_cvt_pk_bf16_f32 v73, v73, v73 // 00000001C4F8: D2680049 00029349 - buffer_store_short v73, v211, s[16:19], 0 offen nt // 00000001C500: E06A1000 800449D3 - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C508: 7E10B6F9 000416D4 - v_fmac_f32_e64 v74, v8, s45 // 00000001C510: D13B004A 00005B08 - v_cvt_pk_bf16_f32 v74, v74, v74 // 00000001C518: D268004A 0002954A - buffer_store_short v74, v213, s[16:19], 0 offen nt // 00000001C520: E06A1000 80044AD5 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C528: 7E10B6F9 000416D6 - v_fmac_f32_e64 v75, v8, s45 // 00000001C530: D13B004B 00005B08 - v_cvt_pk_bf16_f32 v75, v75, v75 // 00000001C538: D268004B 0002974B - buffer_store_short v75, v215, s[16:19], 0 offen nt // 00000001C540: E06A1000 80044BD7 - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C548: 7E10B6F9 000416D8 - v_fmac_f32_e64 v76, v8, s45 // 00000001C550: D13B004C 00005B08 - v_cvt_pk_bf16_f32 v76, v76, v76 // 00000001C558: D268004C 0002994C - buffer_store_short v76, v217, s[16:19], 0 offen nt // 00000001C560: E06A1000 80044CD9 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C568: 7E10B6F9 000416DA - v_fmac_f32_e64 v77, v8, s45 // 00000001C570: D13B004D 00005B08 - v_cvt_pk_bf16_f32 v77, v77, v77 // 00000001C578: D268004D 00029B4D - buffer_store_short v77, v219, s[16:19], 0 offen nt // 00000001C580: E06A1000 80044DDB - v_cvt_f32_bf16_sdwa v8, v220 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C588: 7E10B6F9 000416DC - v_fmac_f32_e64 v78, v8, s45 // 00000001C590: D13B004E 00005B08 - v_cvt_pk_bf16_f32 v78, v78, v78 // 00000001C598: D268004E 00029D4E - buffer_store_short v78, v221, s[16:19], 0 offen nt // 00000001C5A0: E06A1000 80044EDD - v_cvt_f32_bf16_sdwa v8, v222 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C5A8: 7E10B6F9 000416DE - v_fmac_f32_e64 v79, v8, s45 // 00000001C5B0: D13B004F 00005B08 - v_cvt_pk_bf16_f32 v79, v79, v79 // 00000001C5B8: D268004F 00029F4F - buffer_store_short v79, v223, s[16:19], 0 offen nt // 00000001C5C0: E06A1000 80044FDF - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C5C8: 7E10B6F9 000416E0 - v_fmac_f32_e64 v80, v8, s45 // 00000001C5D0: D13B0050 00005B08 - v_cvt_pk_bf16_f32 v80, v80, v80 // 00000001C5D8: D2680050 0002A150 - buffer_store_short v80, v225, s[16:19], 0 offen nt // 00000001C5E0: E06A1000 800450E1 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C5E8: 7E10B6F9 000416E2 - v_fmac_f32_e64 v81, v8, s45 // 00000001C5F0: D13B0051 00005B08 - v_cvt_pk_bf16_f32 v81, v81, v81 // 00000001C5F8: D2680051 0002A351 - buffer_store_short v81, v227, s[16:19], 0 offen nt // 00000001C600: E06A1000 800451E3 - v_cvt_f32_bf16_sdwa v8, v228 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C608: 7E10B6F9 000416E4 - v_fmac_f32_e64 v82, v8, s45 // 00000001C610: D13B0052 00005B08 - v_cvt_pk_bf16_f32 v82, v82, v82 // 00000001C618: D2680052 0002A552 - buffer_store_short v82, v229, s[16:19], 0 offen nt // 00000001C620: E06A1000 800452E5 - v_cvt_f32_bf16_sdwa v8, v230 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C628: 7E10B6F9 000416E6 - v_fmac_f32_e64 v83, v8, s45 // 00000001C630: D13B0053 00005B08 - v_cvt_pk_bf16_f32 v83, v83, v83 // 00000001C638: D2680053 0002A753 - buffer_store_short v83, v231, s[16:19], 0 offen nt // 00000001C640: E06A1000 800453E7 - v_cvt_f32_bf16_sdwa v8, v232 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C648: 7E10B6F9 000416E8 - v_fmac_f32_e64 v84, v8, s45 // 00000001C650: D13B0054 00005B08 - v_cvt_pk_bf16_f32 v84, v84, v84 // 00000001C658: D2680054 0002A954 - buffer_store_short v84, v233, s[16:19], 0 offen nt // 00000001C660: E06A1000 800454E9 - v_cvt_f32_bf16_sdwa v8, v234 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C668: 7E10B6F9 000416EA - v_fmac_f32_e64 v85, v8, s45 // 00000001C670: D13B0055 00005B08 - v_cvt_pk_bf16_f32 v85, v85, v85 // 00000001C678: D2680055 0002AB55 - buffer_store_short v85, v235, s[16:19], 0 offen nt // 00000001C680: E06A1000 800455EB - v_cvt_f32_bf16_sdwa v8, v236 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C688: 7E10B6F9 000416EC - v_fmac_f32_e64 v86, v8, s45 // 00000001C690: D13B0056 00005B08 - v_cvt_pk_bf16_f32 v86, v86, v86 // 00000001C698: D2680056 0002AD56 - buffer_store_short v86, v237, s[16:19], 0 offen nt // 00000001C6A0: E06A1000 800456ED - v_cvt_f32_bf16_sdwa v8, v238 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C6A8: 7E10B6F9 000416EE - v_fmac_f32_e64 v87, v8, s45 // 00000001C6B0: D13B0057 00005B08 - v_cvt_pk_bf16_f32 v87, v87, v87 // 00000001C6B8: D2680057 0002AF57 - buffer_store_short v87, v239, s[16:19], 0 offen nt // 00000001C6C0: E06A1000 800457EF - v_cvt_f32_bf16_sdwa v8, v240 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C6C8: 7E10B6F9 000416F0 - v_fmac_f32_e64 v88, v8, s45 // 00000001C6D0: D13B0058 00005B08 - v_cvt_pk_bf16_f32 v88, v88, v88 // 00000001C6D8: D2680058 0002B158 - buffer_store_short v88, v241, s[16:19], 0 offen nt // 00000001C6E0: E06A1000 800458F1 - v_cvt_f32_bf16_sdwa v8, v242 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C6E8: 7E10B6F9 000416F2 - v_fmac_f32_e64 v89, v8, s45 // 00000001C6F0: D13B0059 00005B08 - v_cvt_pk_bf16_f32 v89, v89, v89 // 00000001C6F8: D2680059 0002B359 - buffer_store_short v89, v243, s[16:19], 0 offen nt // 00000001C700: E06A1000 800459F3 - v_cvt_f32_bf16_sdwa v8, v244 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001C708: 7E10B6F9 000416F4 - v_fmac_f32_e64 v90, v8, s45 // 00000001C710: D13B005A 00005B08 - v_cvt_pk_bf16_f32 v90, v90, v90 // 00000001C718: D268005A 0002B55A - buffer_store_short v90, v245, s[16:19], 0 offen nt // 00000001C720: E06A1000 80045AF5 - s_nop 0 // 00000001C728: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 00000001C72C: 7E1402FF 80000000 - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001C734: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001C73C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001C744: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001C74C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C754: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C75C: 86A2221E - v_add_lshl_u32 v92, v6, v4, 1 // 00000001C760: D1FE005C 02060906 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 00000001C768: D100005C 008AB90A - buffer_load_short_d16 v91, v92, s[20:23], 0 offen // 00000001C770: E0901000 80055B5C - v_add_lshl_u32 v92, v7, v4, 1 // 00000001C778: D1FE005C 02060907 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 00000001C780: D100005C 008AB90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001C788: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C790: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C798: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C7A0: 86A2221E - v_add_lshl_u32 v94, v6, v8, 1 // 00000001C7A4: D1FE005E 02061106 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 00000001C7AC: D100005E 008ABD0A - buffer_load_short_d16 v93, v94, s[20:23], 0 offen // 00000001C7B4: E0901000 80055D5E - v_add_lshl_u32 v94, v7, v8, 1 // 00000001C7BC: D1FE005E 02061107 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 00000001C7C4: D100005E 008ABD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001C7CC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C7D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C7DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C7E4: 86A2221E - v_add_lshl_u32 v96, v6, v8, 1 // 00000001C7E8: D1FE0060 02061106 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 00000001C7F0: D1000060 008AC10A - buffer_load_short_d16 v95, v96, s[20:23], 0 offen // 00000001C7F8: E0901000 80055F60 - v_add_lshl_u32 v96, v7, v8, 1 // 00000001C800: D1FE0060 02061107 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 00000001C808: D1000060 008AC10A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001C810: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C818: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C820: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C828: 86A2221E - v_add_lshl_u32 v98, v6, v8, 1 // 00000001C82C: D1FE0062 02061106 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 00000001C834: D1000062 008AC50A - buffer_load_short_d16 v97, v98, s[20:23], 0 offen // 00000001C83C: E0901000 80056162 - v_add_lshl_u32 v98, v7, v8, 1 // 00000001C844: D1FE0062 02061107 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 00000001C84C: D1000062 008AC50A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001C854: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C85C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C864: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C86C: 86A2221E - v_add_lshl_u32 v100, v6, v8, 1 // 00000001C870: D1FE0064 02061106 - v_cndmask_b32_e64 v100, v10, v100, s[34:35] // 00000001C878: D1000064 008AC90A - buffer_load_short_d16 v99, v100, s[20:23], 0 offen // 00000001C880: E0901000 80056364 - v_add_lshl_u32 v100, v7, v8, 1 // 00000001C888: D1FE0064 02061107 - v_cndmask_b32_e64 v100, v10, v100, s[34:35] // 00000001C890: D1000064 008AC90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001C898: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C8A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C8A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C8B0: 86A2221E - v_add_lshl_u32 v102, v6, v8, 1 // 00000001C8B4: D1FE0066 02061106 - v_cndmask_b32_e64 v102, v10, v102, s[34:35] // 00000001C8BC: D1000066 008ACD0A - buffer_load_short_d16 v101, v102, s[20:23], 0 offen // 00000001C8C4: E0901000 80056566 - v_add_lshl_u32 v102, v7, v8, 1 // 00000001C8CC: D1FE0066 02061107 - v_cndmask_b32_e64 v102, v10, v102, s[34:35] // 00000001C8D4: D1000066 008ACD0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001C8DC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C8E4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C8EC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C8F4: 86A2221E - v_add_lshl_u32 v104, v6, v8, 1 // 00000001C8F8: D1FE0068 02061106 - v_cndmask_b32_e64 v104, v10, v104, s[34:35] // 00000001C900: D1000068 008AD10A - buffer_load_short_d16 v103, v104, s[20:23], 0 offen // 00000001C908: E0901000 80056768 - v_add_lshl_u32 v104, v7, v8, 1 // 00000001C910: D1FE0068 02061107 - v_cndmask_b32_e64 v104, v10, v104, s[34:35] // 00000001C918: D1000068 008AD10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001C920: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C928: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C930: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C938: 86A2221E - v_add_lshl_u32 v106, v6, v8, 1 // 00000001C93C: D1FE006A 02061106 - v_cndmask_b32_e64 v106, v10, v106, s[34:35] // 00000001C944: D100006A 008AD50A - buffer_load_short_d16 v105, v106, s[20:23], 0 offen // 00000001C94C: E0901000 8005696A - v_add_lshl_u32 v106, v7, v8, 1 // 00000001C954: D1FE006A 02061107 - v_cndmask_b32_e64 v106, v10, v106, s[34:35] // 00000001C95C: D100006A 008AD50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001C964: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001C96C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001C974: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001C97C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C984: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C98C: 86A2221E - v_add_lshl_u32 v108, v6, v4, 1 // 00000001C990: D1FE006C 02060906 - v_cndmask_b32_e64 v108, v10, v108, s[34:35] // 00000001C998: D100006C 008AD90A - buffer_load_short_d16 v107, v108, s[20:23], 0 offen // 00000001C9A0: E0901000 80056B6C - v_add_lshl_u32 v108, v7, v4, 1 // 00000001C9A8: D1FE006C 02060907 - v_cndmask_b32_e64 v108, v10, v108, s[34:35] // 00000001C9B0: D100006C 008AD90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001C9B8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001C9C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001C9C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001C9D0: 86A2221E - v_add_lshl_u32 v110, v6, v8, 1 // 00000001C9D4: D1FE006E 02061106 - v_cndmask_b32_e64 v110, v10, v110, s[34:35] // 00000001C9DC: D100006E 008ADD0A - buffer_load_short_d16 v109, v110, s[20:23], 0 offen // 00000001C9E4: E0901000 80056D6E - v_add_lshl_u32 v110, v7, v8, 1 // 00000001C9EC: D1FE006E 02061107 - v_cndmask_b32_e64 v110, v10, v110, s[34:35] // 00000001C9F4: D100006E 008ADD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001C9FC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CA04: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CA0C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CA14: 86A2221E - v_add_lshl_u32 v112, v6, v8, 1 // 00000001CA18: D1FE0070 02061106 - v_cndmask_b32_e64 v112, v10, v112, s[34:35] // 00000001CA20: D1000070 008AE10A - buffer_load_short_d16 v111, v112, s[20:23], 0 offen // 00000001CA28: E0901000 80056F70 - v_add_lshl_u32 v112, v7, v8, 1 // 00000001CA30: D1FE0070 02061107 - v_cndmask_b32_e64 v112, v10, v112, s[34:35] // 00000001CA38: D1000070 008AE10A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001CA40: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CA48: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CA50: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CA58: 86A2221E - v_add_lshl_u32 v114, v6, v8, 1 // 00000001CA5C: D1FE0072 02061106 - v_cndmask_b32_e64 v114, v10, v114, s[34:35] // 00000001CA64: D1000072 008AE50A - buffer_load_short_d16 v113, v114, s[20:23], 0 offen // 00000001CA6C: E0901000 80057172 - v_add_lshl_u32 v114, v7, v8, 1 // 00000001CA74: D1FE0072 02061107 - v_cndmask_b32_e64 v114, v10, v114, s[34:35] // 00000001CA7C: D1000072 008AE50A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001CA84: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CA8C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CA94: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CA9C: 86A2221E - v_add_lshl_u32 v116, v6, v8, 1 // 00000001CAA0: D1FE0074 02061106 - v_cndmask_b32_e64 v116, v10, v116, s[34:35] // 00000001CAA8: D1000074 008AE90A - buffer_load_short_d16 v115, v116, s[20:23], 0 offen // 00000001CAB0: E0901000 80057374 - v_add_lshl_u32 v116, v7, v8, 1 // 00000001CAB8: D1FE0074 02061107 - v_cndmask_b32_e64 v116, v10, v116, s[34:35] // 00000001CAC0: D1000074 008AE90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001CAC8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CAD0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CAD8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CAE0: 86A2221E - v_add_lshl_u32 v118, v6, v8, 1 // 00000001CAE4: D1FE0076 02061106 - v_cndmask_b32_e64 v118, v10, v118, s[34:35] // 00000001CAEC: D1000076 008AED0A - buffer_load_short_d16 v117, v118, s[20:23], 0 offen // 00000001CAF4: E0901000 80057576 - v_add_lshl_u32 v118, v7, v8, 1 // 00000001CAFC: D1FE0076 02061107 - v_cndmask_b32_e64 v118, v10, v118, s[34:35] // 00000001CB04: D1000076 008AED0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001CB0C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CB14: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CB1C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CB24: 86A2221E - v_add_lshl_u32 v120, v6, v8, 1 // 00000001CB28: D1FE0078 02061106 - v_cndmask_b32_e64 v120, v10, v120, s[34:35] // 00000001CB30: D1000078 008AF10A - buffer_load_short_d16 v119, v120, s[20:23], 0 offen // 00000001CB38: E0901000 80057778 - v_add_lshl_u32 v120, v7, v8, 1 // 00000001CB40: D1FE0078 02061107 - v_cndmask_b32_e64 v120, v10, v120, s[34:35] // 00000001CB48: D1000078 008AF10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001CB50: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CB58: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CB60: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CB68: 86A2221E - v_add_lshl_u32 v122, v6, v8, 1 // 00000001CB6C: D1FE007A 02061106 - v_cndmask_b32_e64 v122, v10, v122, s[34:35] // 00000001CB74: D100007A 008AF50A - buffer_load_short_d16 v121, v122, s[20:23], 0 offen // 00000001CB7C: E0901000 8005797A - v_add_lshl_u32 v122, v7, v8, 1 // 00000001CB84: D1FE007A 02061107 - v_cndmask_b32_e64 v122, v10, v122, s[34:35] // 00000001CB8C: D100007A 008AF50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001CB94: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001CB9C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001CBA4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001CBAC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CBB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CBBC: 86A2221E - v_add_lshl_u32 v124, v6, v4, 1 // 00000001CBC0: D1FE007C 02060906 - v_cndmask_b32_e64 v124, v10, v124, s[34:35] // 00000001CBC8: D100007C 008AF90A - buffer_load_short_d16 v123, v124, s[20:23], 0 offen // 00000001CBD0: E0901000 80057B7C - v_add_lshl_u32 v124, v7, v4, 1 // 00000001CBD8: D1FE007C 02060907 - v_cndmask_b32_e64 v124, v10, v124, s[34:35] // 00000001CBE0: D100007C 008AF90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001CBE8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CBF0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CBF8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CC00: 86A2221E - v_add_lshl_u32 v126, v6, v8, 1 // 00000001CC04: D1FE007E 02061106 - v_cndmask_b32_e64 v126, v10, v126, s[34:35] // 00000001CC0C: D100007E 008AFD0A - buffer_load_short_d16 v125, v126, s[20:23], 0 offen // 00000001CC14: E0901000 80057D7E - v_add_lshl_u32 v126, v7, v8, 1 // 00000001CC1C: D1FE007E 02061107 - v_cndmask_b32_e64 v126, v10, v126, s[34:35] // 00000001CC24: D100007E 008AFD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001CC2C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CC34: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CC3C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CC44: 86A2221E - v_add_lshl_u32 v128, v6, v8, 1 // 00000001CC48: D1FE0080 02061106 - v_cndmask_b32_e64 v128, v10, v128, s[34:35] // 00000001CC50: D1000080 008B010A - buffer_load_short_d16 v127, v128, s[20:23], 0 offen // 00000001CC58: E0901000 80057F80 - v_add_lshl_u32 v128, v7, v8, 1 // 00000001CC60: D1FE0080 02061107 - v_cndmask_b32_e64 v128, v10, v128, s[34:35] // 00000001CC68: D1000080 008B010A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001CC70: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CC78: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CC80: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CC88: 86A2221E - v_add_lshl_u32 v130, v6, v8, 1 // 00000001CC8C: D1FE0082 02061106 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 00000001CC94: D1000082 008B050A - buffer_load_short_d16 v129, v130, s[20:23], 0 offen // 00000001CC9C: E0901000 80058182 - v_add_lshl_u32 v130, v7, v8, 1 // 00000001CCA4: D1FE0082 02061107 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 00000001CCAC: D1000082 008B050A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001CCB4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CCBC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CCC4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CCCC: 86A2221E - v_add_lshl_u32 v135, v6, v8, 1 // 00000001CCD0: D1FE0087 02061106 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 00000001CCD8: D1000087 008B0F0A - buffer_load_short_d16 v131, v135, s[20:23], 0 offen // 00000001CCE0: E0901000 80058387 - v_add_lshl_u32 v135, v7, v8, 1 // 00000001CCE8: D1FE0087 02061107 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 00000001CCF0: D1000087 008B0F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001CCF8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CD00: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CD08: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CD10: 86A2221E - v_add_lshl_u32 v137, v6, v8, 1 // 00000001CD14: D1FE0089 02061106 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 00000001CD1C: D1000089 008B130A - buffer_load_short_d16 v136, v137, s[20:23], 0 offen // 00000001CD24: E0901000 80058889 - v_add_lshl_u32 v137, v7, v8, 1 // 00000001CD2C: D1FE0089 02061107 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 00000001CD34: D1000089 008B130A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001CD3C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CD44: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CD4C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CD54: 86A2221E - v_add_lshl_u32 v139, v6, v8, 1 // 00000001CD58: D1FE008B 02061106 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 00000001CD60: D100008B 008B170A - buffer_load_short_d16 v138, v139, s[20:23], 0 offen // 00000001CD68: E0901000 80058A8B - v_add_lshl_u32 v139, v7, v8, 1 // 00000001CD70: D1FE008B 02061107 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 00000001CD78: D100008B 008B170A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001CD80: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CD88: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CD90: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CD98: 86A2221E - v_add_lshl_u32 v141, v6, v8, 1 // 00000001CD9C: D1FE008D 02061106 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 00000001CDA4: D100008D 008B1B0A - buffer_load_short_d16 v140, v141, s[20:23], 0 offen // 00000001CDAC: E0901000 80058C8D - v_add_lshl_u32 v141, v7, v8, 1 // 00000001CDB4: D1FE008D 02061107 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 00000001CDBC: D100008D 008B1B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001CDC4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001CDCC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001CDD4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001CDDC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CDE4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CDEC: 86A2221E - v_add_lshl_u32 v143, v6, v4, 1 // 00000001CDF0: D1FE008F 02060906 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 00000001CDF8: D100008F 008B1F0A - buffer_load_short_d16 v142, v143, s[20:23], 0 offen // 00000001CE00: E0901000 80058E8F - v_add_lshl_u32 v143, v7, v4, 1 // 00000001CE08: D1FE008F 02060907 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 00000001CE10: D100008F 008B1F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001CE18: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CE20: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CE28: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CE30: 86A2221E - v_add_lshl_u32 v145, v6, v8, 1 // 00000001CE34: D1FE0091 02061106 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 00000001CE3C: D1000091 008B230A - buffer_load_short_d16 v144, v145, s[20:23], 0 offen // 00000001CE44: E0901000 80059091 - v_add_lshl_u32 v145, v7, v8, 1 // 00000001CE4C: D1FE0091 02061107 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 00000001CE54: D1000091 008B230A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001CE5C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CE64: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CE6C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CE74: 86A2221E - v_add_lshl_u32 v147, v6, v8, 1 // 00000001CE78: D1FE0093 02061106 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 00000001CE80: D1000093 008B270A - buffer_load_short_d16 v146, v147, s[20:23], 0 offen // 00000001CE88: E0901000 80059293 - v_add_lshl_u32 v147, v7, v8, 1 // 00000001CE90: D1FE0093 02061107 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 00000001CE98: D1000093 008B270A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001CEA0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CEA8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CEB0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CEB8: 86A2221E - v_add_lshl_u32 v149, v6, v8, 1 // 00000001CEBC: D1FE0095 02061106 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 00000001CEC4: D1000095 008B2B0A - buffer_load_short_d16 v148, v149, s[20:23], 0 offen // 00000001CECC: E0901000 80059495 - v_add_lshl_u32 v149, v7, v8, 1 // 00000001CED4: D1FE0095 02061107 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 00000001CEDC: D1000095 008B2B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001CEE4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CEEC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CEF4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CEFC: 86A2221E - v_add_lshl_u32 v151, v6, v8, 1 // 00000001CF00: D1FE0097 02061106 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 00000001CF08: D1000097 008B2F0A - buffer_load_short_d16 v150, v151, s[20:23], 0 offen // 00000001CF10: E0901000 80059697 - v_add_lshl_u32 v151, v7, v8, 1 // 00000001CF18: D1FE0097 02061107 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 00000001CF20: D1000097 008B2F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001CF28: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CF30: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CF38: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CF40: 86A2221E - v_add_lshl_u32 v153, v6, v8, 1 // 00000001CF44: D1FE0099 02061106 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 00000001CF4C: D1000099 008B330A - buffer_load_short_d16 v152, v153, s[20:23], 0 offen // 00000001CF54: E0901000 80059899 - v_add_lshl_u32 v153, v7, v8, 1 // 00000001CF5C: D1FE0099 02061107 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 00000001CF64: D1000099 008B330A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001CF6C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CF74: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CF7C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CF84: 86A2221E - v_add_lshl_u32 v155, v6, v8, 1 // 00000001CF88: D1FE009B 02061106 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 00000001CF90: D100009B 008B370A - buffer_load_short_d16 v154, v155, s[20:23], 0 offen // 00000001CF98: E0901000 80059A9B - v_add_lshl_u32 v155, v7, v8, 1 // 00000001CFA0: D1FE009B 02061107 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 00000001CFA8: D100009B 008B370A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001CFB0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001CFB8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001CFC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001CFC8: 86A2221E - v_add_lshl_u32 v157, v6, v8, 1 // 00000001CFCC: D1FE009D 02061106 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 00000001CFD4: D100009D 008B3B0A - buffer_load_short_d16 v156, v157, s[20:23], 0 offen // 00000001CFDC: E0901000 80059C9D - v_add_lshl_u32 v157, v7, v8, 1 // 00000001CFE4: D1FE009D 02061107 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 00000001CFEC: D100009D 008B3B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001CFF4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001CFFC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001D004: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001D00C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D014: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D01C: 86A2221E - v_add_lshl_u32 v159, v6, v4, 1 // 00000001D020: D1FE009F 02060906 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 00000001D028: D100009F 008B3F0A - buffer_load_short_d16 v158, v159, s[20:23], 0 offen // 00000001D030: E0901000 80059E9F - v_add_lshl_u32 v159, v7, v4, 1 // 00000001D038: D1FE009F 02060907 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 00000001D040: D100009F 008B3F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001D048: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D050: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D058: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D060: 86A2221E - v_add_lshl_u32 v161, v6, v8, 1 // 00000001D064: D1FE00A1 02061106 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 00000001D06C: D10000A1 008B430A - buffer_load_short_d16 v160, v161, s[20:23], 0 offen // 00000001D074: E0901000 8005A0A1 - v_add_lshl_u32 v161, v7, v8, 1 // 00000001D07C: D1FE00A1 02061107 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 00000001D084: D10000A1 008B430A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001D08C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D094: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D09C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D0A4: 86A2221E - v_add_lshl_u32 v163, v6, v8, 1 // 00000001D0A8: D1FE00A3 02061106 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 00000001D0B0: D10000A3 008B470A - buffer_load_short_d16 v162, v163, s[20:23], 0 offen // 00000001D0B8: E0901000 8005A2A3 - v_add_lshl_u32 v163, v7, v8, 1 // 00000001D0C0: D1FE00A3 02061107 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 00000001D0C8: D10000A3 008B470A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001D0D0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D0D8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D0E0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D0E8: 86A2221E - v_add_lshl_u32 v165, v6, v8, 1 // 00000001D0EC: D1FE00A5 02061106 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 00000001D0F4: D10000A5 008B4B0A - buffer_load_short_d16 v164, v165, s[20:23], 0 offen // 00000001D0FC: E0901000 8005A4A5 - v_add_lshl_u32 v165, v7, v8, 1 // 00000001D104: D1FE00A5 02061107 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 00000001D10C: D10000A5 008B4B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001D114: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D11C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D124: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D12C: 86A2221E - v_add_lshl_u32 v167, v6, v8, 1 // 00000001D130: D1FE00A7 02061106 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 00000001D138: D10000A7 008B4F0A - buffer_load_short_d16 v166, v167, s[20:23], 0 offen // 00000001D140: E0901000 8005A6A7 - v_add_lshl_u32 v167, v7, v8, 1 // 00000001D148: D1FE00A7 02061107 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 00000001D150: D10000A7 008B4F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001D158: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D160: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D168: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D170: 86A2221E - v_add_lshl_u32 v169, v6, v8, 1 // 00000001D174: D1FE00A9 02061106 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 00000001D17C: D10000A9 008B530A - buffer_load_short_d16 v168, v169, s[20:23], 0 offen // 00000001D184: E0901000 8005A8A9 - v_add_lshl_u32 v169, v7, v8, 1 // 00000001D18C: D1FE00A9 02061107 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 00000001D194: D10000A9 008B530A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001D19C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D1A4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D1AC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D1B4: 86A2221E - v_add_lshl_u32 v171, v6, v8, 1 // 00000001D1B8: D1FE00AB 02061106 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 00000001D1C0: D10000AB 008B570A - buffer_load_short_d16 v170, v171, s[20:23], 0 offen // 00000001D1C8: E0901000 8005AAAB - v_add_lshl_u32 v171, v7, v8, 1 // 00000001D1D0: D1FE00AB 02061107 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 00000001D1D8: D10000AB 008B570A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001D1E0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D1E8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D1F0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D1F8: 86A2221E - v_add_lshl_u32 v173, v6, v8, 1 // 00000001D1FC: D1FE00AD 02061106 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 00000001D204: D10000AD 008B5B0A - buffer_load_short_d16 v172, v173, s[20:23], 0 offen // 00000001D20C: E0901000 8005ACAD - v_add_lshl_u32 v173, v7, v8, 1 // 00000001D214: D1FE00AD 02061107 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 00000001D21C: D10000AD 008B5B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001D224: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001D22C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001D234: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001D23C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D244: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D24C: 86A2221E - v_add_lshl_u32 v175, v6, v4, 1 // 00000001D250: D1FE00AF 02060906 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 00000001D258: D10000AF 008B5F0A - buffer_load_short_d16 v174, v175, s[20:23], 0 offen // 00000001D260: E0901000 8005AEAF - v_add_lshl_u32 v175, v7, v4, 1 // 00000001D268: D1FE00AF 02060907 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 00000001D270: D10000AF 008B5F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001D278: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D280: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D288: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D290: 86A2221E - v_add_lshl_u32 v177, v6, v8, 1 // 00000001D294: D1FE00B1 02061106 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 00000001D29C: D10000B1 008B630A - buffer_load_short_d16 v176, v177, s[20:23], 0 offen // 00000001D2A4: E0901000 8005B0B1 - v_add_lshl_u32 v177, v7, v8, 1 // 00000001D2AC: D1FE00B1 02061107 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 00000001D2B4: D10000B1 008B630A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001D2BC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D2C4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D2CC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D2D4: 86A2221E - v_add_lshl_u32 v179, v6, v8, 1 // 00000001D2D8: D1FE00B3 02061106 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 00000001D2E0: D10000B3 008B670A - buffer_load_short_d16 v178, v179, s[20:23], 0 offen // 00000001D2E8: E0901000 8005B2B3 - v_add_lshl_u32 v179, v7, v8, 1 // 00000001D2F0: D1FE00B3 02061107 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 00000001D2F8: D10000B3 008B670A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001D300: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D308: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D310: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D318: 86A2221E - v_add_lshl_u32 v181, v6, v8, 1 // 00000001D31C: D1FE00B5 02061106 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 00000001D324: D10000B5 008B6B0A - buffer_load_short_d16 v180, v181, s[20:23], 0 offen // 00000001D32C: E0901000 8005B4B5 - v_add_lshl_u32 v181, v7, v8, 1 // 00000001D334: D1FE00B5 02061107 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 00000001D33C: D10000B5 008B6B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001D344: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D34C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D354: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D35C: 86A2221E - v_add_lshl_u32 v183, v6, v8, 1 // 00000001D360: D1FE00B7 02061106 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 00000001D368: D10000B7 008B6F0A - buffer_load_short_d16 v182, v183, s[20:23], 0 offen // 00000001D370: E0901000 8005B6B7 - v_add_lshl_u32 v183, v7, v8, 1 // 00000001D378: D1FE00B7 02061107 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 00000001D380: D10000B7 008B6F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001D388: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D390: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D398: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D3A0: 86A2221E - v_add_lshl_u32 v185, v6, v8, 1 // 00000001D3A4: D1FE00B9 02061106 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 00000001D3AC: D10000B9 008B730A - buffer_load_short_d16 v184, v185, s[20:23], 0 offen // 00000001D3B4: E0901000 8005B8B9 - v_add_lshl_u32 v185, v7, v8, 1 // 00000001D3BC: D1FE00B9 02061107 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 00000001D3C4: D10000B9 008B730A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001D3CC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D3D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D3DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D3E4: 86A2221E - v_add_lshl_u32 v187, v6, v8, 1 // 00000001D3E8: D1FE00BB 02061106 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 00000001D3F0: D10000BB 008B770A - buffer_load_short_d16 v186, v187, s[20:23], 0 offen // 00000001D3F8: E0901000 8005BABB - v_add_lshl_u32 v187, v7, v8, 1 // 00000001D400: D1FE00BB 02061107 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 00000001D408: D10000BB 008B770A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001D410: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D418: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D420: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D428: 86A2221E - v_add_lshl_u32 v189, v6, v8, 1 // 00000001D42C: D1FE00BD 02061106 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 00000001D434: D10000BD 008B7B0A - buffer_load_short_d16 v188, v189, s[20:23], 0 offen // 00000001D43C: E0901000 8005BCBD - v_add_lshl_u32 v189, v7, v8, 1 // 00000001D444: D1FE00BD 02061107 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 00000001D44C: D10000BD 008B7B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001D454: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001D45C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001D464: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001D46C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D474: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D47C: 86A2221E - v_add_lshl_u32 v191, v6, v4, 1 // 00000001D480: D1FE00BF 02060906 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 00000001D488: D10000BF 008B7F0A - buffer_load_short_d16 v190, v191, s[20:23], 0 offen // 00000001D490: E0901000 8005BEBF - v_add_lshl_u32 v191, v7, v4, 1 // 00000001D498: D1FE00BF 02060907 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 00000001D4A0: D10000BF 008B7F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001D4A8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D4B0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D4B8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D4C0: 86A2221E - v_add_lshl_u32 v193, v6, v8, 1 // 00000001D4C4: D1FE00C1 02061106 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 00000001D4CC: D10000C1 008B830A - buffer_load_short_d16 v192, v193, s[20:23], 0 offen // 00000001D4D4: E0901000 8005C0C1 - v_add_lshl_u32 v193, v7, v8, 1 // 00000001D4DC: D1FE00C1 02061107 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 00000001D4E4: D10000C1 008B830A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001D4EC: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D4F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D4FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D504: 86A2221E - v_add_lshl_u32 v195, v6, v8, 1 // 00000001D508: D1FE00C3 02061106 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 00000001D510: D10000C3 008B870A - buffer_load_short_d16 v194, v195, s[20:23], 0 offen // 00000001D518: E0901000 8005C2C3 - v_add_lshl_u32 v195, v7, v8, 1 // 00000001D520: D1FE00C3 02061107 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 00000001D528: D10000C3 008B870A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001D530: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D538: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D540: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D548: 86A2221E - v_add_lshl_u32 v197, v6, v8, 1 // 00000001D54C: D1FE00C5 02061106 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 00000001D554: D10000C5 008B8B0A - buffer_load_short_d16 v196, v197, s[20:23], 0 offen // 00000001D55C: E0901000 8005C4C5 - v_add_lshl_u32 v197, v7, v8, 1 // 00000001D564: D1FE00C5 02061107 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 00000001D56C: D10000C5 008B8B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001D574: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D57C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D584: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D58C: 86A2221E - v_add_lshl_u32 v199, v6, v8, 1 // 00000001D590: D1FE00C7 02061106 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 00000001D598: D10000C7 008B8F0A - buffer_load_short_d16 v198, v199, s[20:23], 0 offen // 00000001D5A0: E0901000 8005C6C7 - v_add_lshl_u32 v199, v7, v8, 1 // 00000001D5A8: D1FE00C7 02061107 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 00000001D5B0: D10000C7 008B8F0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001D5B8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D5C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D5C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D5D0: 86A2221E - v_add_lshl_u32 v201, v6, v8, 1 // 00000001D5D4: D1FE00C9 02061106 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 00000001D5DC: D10000C9 008B930A - buffer_load_short_d16 v200, v201, s[20:23], 0 offen // 00000001D5E4: E0901000 8005C8C9 - v_add_lshl_u32 v201, v7, v8, 1 // 00000001D5EC: D1FE00C9 02061107 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 00000001D5F4: D10000C9 008B930A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001D5FC: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D604: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D60C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D614: 86A2221E - v_add_lshl_u32 v203, v6, v8, 1 // 00000001D618: D1FE00CB 02061106 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 00000001D620: D10000CB 008B970A - buffer_load_short_d16 v202, v203, s[20:23], 0 offen // 00000001D628: E0901000 8005CACB - v_add_lshl_u32 v203, v7, v8, 1 // 00000001D630: D1FE00CB 02061107 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 00000001D638: D10000CB 008B970A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001D640: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D648: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D650: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D658: 86A2221E - v_add_lshl_u32 v205, v6, v8, 1 // 00000001D65C: D1FE00CD 02061106 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 00000001D664: D10000CD 008B9B0A - buffer_load_short_d16 v204, v205, s[20:23], 0 offen // 00000001D66C: E0901000 8005CCCD - v_add_lshl_u32 v205, v7, v8, 1 // 00000001D674: D1FE00CD 02061107 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 00000001D67C: D10000CD 008B9B0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001D684: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001D68C: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001D694: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001D69C: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D6A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D6AC: 86A2221E - v_add_lshl_u32 v207, v6, v4, 1 // 00000001D6B0: D1FE00CF 02060906 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 00000001D6B8: D10000CF 008B9F0A - buffer_load_short_d16 v206, v207, s[20:23], 0 offen // 00000001D6C0: E0901000 8005CECF - v_add_lshl_u32 v207, v7, v4, 1 // 00000001D6C8: D1FE00CF 02060907 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 00000001D6D0: D10000CF 008B9F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001D6D8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D6E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D6E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D6F0: 86A2221E - v_add_lshl_u32 v209, v6, v8, 1 // 00000001D6F4: D1FE00D1 02061106 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 00000001D6FC: D10000D1 008BA30A - buffer_load_short_d16 v208, v209, s[20:23], 0 offen // 00000001D704: E0901000 8005D0D1 - v_add_lshl_u32 v209, v7, v8, 1 // 00000001D70C: D1FE00D1 02061107 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 00000001D714: D10000D1 008BA30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001D71C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D724: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D72C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D734: 86A2221E - v_add_lshl_u32 v211, v6, v8, 1 // 00000001D738: D1FE00D3 02061106 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 00000001D740: D10000D3 008BA70A - buffer_load_short_d16 v210, v211, s[20:23], 0 offen // 00000001D748: E0901000 8005D2D3 - v_add_lshl_u32 v211, v7, v8, 1 // 00000001D750: D1FE00D3 02061107 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 00000001D758: D10000D3 008BA70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001D760: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D768: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D770: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D778: 86A2221E - v_add_lshl_u32 v213, v6, v8, 1 // 00000001D77C: D1FE00D5 02061106 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 00000001D784: D10000D5 008BAB0A - buffer_load_short_d16 v212, v213, s[20:23], 0 offen // 00000001D78C: E0901000 8005D4D5 - v_add_lshl_u32 v213, v7, v8, 1 // 00000001D794: D1FE00D5 02061107 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 00000001D79C: D10000D5 008BAB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001D7A4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D7AC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D7B4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D7BC: 86A2221E - v_add_lshl_u32 v215, v6, v8, 1 // 00000001D7C0: D1FE00D7 02061106 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 00000001D7C8: D10000D7 008BAF0A - buffer_load_short_d16 v214, v215, s[20:23], 0 offen // 00000001D7D0: E0901000 8005D6D7 - v_add_lshl_u32 v215, v7, v8, 1 // 00000001D7D8: D1FE00D7 02061107 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 00000001D7E0: D10000D7 008BAF0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001D7E8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D7F0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D7F8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D800: 86A2221E - v_add_lshl_u32 v217, v6, v8, 1 // 00000001D804: D1FE00D9 02061106 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000001D80C: D10000D9 008BB30A - buffer_load_short_d16 v216, v217, s[20:23], 0 offen // 00000001D814: E0901000 8005D8D9 - v_add_lshl_u32 v217, v7, v8, 1 // 00000001D81C: D1FE00D9 02061107 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000001D824: D10000D9 008BB30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001D82C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D834: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D83C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D844: 86A2221E - v_add_lshl_u32 v219, v6, v8, 1 // 00000001D848: D1FE00DB 02061106 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 00000001D850: D10000DB 008BB70A - buffer_load_short_d16 v218, v219, s[20:23], 0 offen // 00000001D858: E0901000 8005DADB - v_add_lshl_u32 v219, v7, v8, 1 // 00000001D860: D1FE00DB 02061107 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 00000001D868: D10000DB 008BB70A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001D870: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D878: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D880: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D888: 86A2221E - v_add_lshl_u32 v221, v6, v8, 1 // 00000001D88C: D1FE00DD 02061106 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 00000001D894: D10000DD 008BBB0A - buffer_load_short_d16 v220, v221, s[20:23], 0 offen // 00000001D89C: E0901000 8005DCDD - v_add_lshl_u32 v221, v7, v8, 1 // 00000001D8A4: D1FE00DD 02061107 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 00000001D8AC: D10000DD 008BBB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001D8B4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001D8BC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001D8C4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001D8CC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D8D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D8DC: 86A2221E - v_add_lshl_u32 v223, v6, v4, 1 // 00000001D8E0: D1FE00DF 02060906 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 00000001D8E8: D10000DF 008BBF0A - buffer_load_short_d16 v222, v223, s[20:23], 0 offen // 00000001D8F0: E0901000 8005DEDF - v_add_lshl_u32 v223, v7, v4, 1 // 00000001D8F8: D1FE00DF 02060907 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 00000001D900: D10000DF 008BBF0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001D908: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D910: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D918: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D920: 86A2221E - v_add_lshl_u32 v225, v6, v8, 1 // 00000001D924: D1FE00E1 02061106 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000001D92C: D10000E1 008BC30A - buffer_load_short_d16 v224, v225, s[20:23], 0 offen // 00000001D934: E0901000 8005E0E1 - v_add_lshl_u32 v225, v7, v8, 1 // 00000001D93C: D1FE00E1 02061107 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000001D944: D10000E1 008BC30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001D94C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D954: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D95C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D964: 86A2221E - v_add_lshl_u32 v227, v6, v8, 1 // 00000001D968: D1FE00E3 02061106 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 00000001D970: D10000E3 008BC70A - buffer_load_short_d16 v226, v227, s[20:23], 0 offen // 00000001D978: E0901000 8005E2E3 - v_add_lshl_u32 v227, v7, v8, 1 // 00000001D980: D1FE00E3 02061107 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 00000001D988: D10000E3 008BC70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001D990: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D998: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D9A0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D9A8: 86A2221E - v_add_lshl_u32 v229, v6, v8, 1 // 00000001D9AC: D1FE00E5 02061106 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 00000001D9B4: D10000E5 008BCB0A - buffer_load_short_d16 v228, v229, s[20:23], 0 offen // 00000001D9BC: E0901000 8005E4E5 - v_add_lshl_u32 v229, v7, v8, 1 // 00000001D9C4: D1FE00E5 02061107 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 00000001D9CC: D10000E5 008BCB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001D9D4: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001D9DC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001D9E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001D9EC: 86A2221E - v_add_lshl_u32 v231, v6, v8, 1 // 00000001D9F0: D1FE00E7 02061106 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 00000001D9F8: D10000E7 008BCF0A - buffer_load_short_d16 v230, v231, s[20:23], 0 offen // 00000001DA00: E0901000 8005E6E7 - v_add_lshl_u32 v231, v7, v8, 1 // 00000001DA08: D1FE00E7 02061107 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 00000001DA10: D10000E7 008BCF0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001DA18: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001DA20: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001DA28: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001DA30: 86A2221E - v_add_lshl_u32 v233, v6, v8, 1 // 00000001DA34: D1FE00E9 02061106 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000001DA3C: D10000E9 008BD30A - buffer_load_short_d16 v232, v233, s[20:23], 0 offen // 00000001DA44: E0901000 8005E8E9 - v_add_lshl_u32 v233, v7, v8, 1 // 00000001DA4C: D1FE00E9 02061107 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000001DA54: D10000E9 008BD30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001DA5C: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001DA64: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001DA6C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001DA74: 86A2221E - v_add_lshl_u32 v235, v6, v8, 1 // 00000001DA78: D1FE00EB 02061106 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 00000001DA80: D10000EB 008BD70A - buffer_load_short_d16 v234, v235, s[20:23], 0 offen // 00000001DA88: E0901000 8005EAEB - v_add_lshl_u32 v235, v7, v8, 1 // 00000001DA90: D1FE00EB 02061107 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 00000001DA98: D10000EB 008BD70A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001DAA0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001DAA8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001DAB0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001DAB8: 86A2221E - v_add_lshl_u32 v237, v6, v8, 1 // 00000001DABC: D1FE00ED 02061106 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 00000001DAC4: D10000ED 008BDB0A - buffer_load_short_d16 v236, v237, s[20:23], 0 offen // 00000001DACC: E0901000 8005ECED - v_add_lshl_u32 v237, v7, v8, 1 // 00000001DAD4: D1FE00ED 02061107 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 00000001DADC: D10000ED 008BDB0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001DAE4: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001DAEC: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001DAF4: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001DAFC: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001DB04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001DB0C: 86A2221E - v_add_lshl_u32 v239, v6, v4, 1 // 00000001DB10: D1FE00EF 02060906 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 00000001DB18: D10000EF 008BDF0A - buffer_load_short_d16 v238, v239, s[20:23], 0 offen // 00000001DB20: E0901000 8005EEEF - v_add_lshl_u32 v239, v7, v4, 1 // 00000001DB28: D1FE00EF 02060907 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 00000001DB30: D10000EF 008BDF0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001DB38: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001DB40: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001DB48: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001DB50: 86A2221E - v_add_lshl_u32 v241, v6, v8, 1 // 00000001DB54: D1FE00F1 02061106 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 00000001DB5C: D10000F1 008BE30A - buffer_load_short_d16 v240, v241, s[20:23], 0 offen // 00000001DB64: E0901000 8005F0F1 - v_add_lshl_u32 v241, v7, v8, 1 // 00000001DB6C: D1FE00F1 02061107 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 00000001DB74: D10000F1 008BE30A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001DB7C: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001DB84: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001DB8C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001DB94: 86A2221E - v_add_lshl_u32 v243, v6, v8, 1 // 00000001DB98: D1FE00F3 02061106 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 00000001DBA0: D10000F3 008BE70A - buffer_load_short_d16 v242, v243, s[20:23], 0 offen // 00000001DBA8: E0901000 8005F2F3 - v_add_lshl_u32 v243, v7, v8, 1 // 00000001DBB0: D1FE00F3 02061107 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 00000001DBB8: D10000F3 008BE70A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001DBC0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001DBC8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001DBD0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001DBD8: 86A2221E - v_add_lshl_u32 v245, v6, v8, 1 // 00000001DBDC: D1FE00F5 02061106 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 00000001DBE4: D10000F5 008BEB0A - buffer_load_short_d16 v244, v245, s[20:23], 0 offen // 00000001DBEC: E0901000 8005F4F5 - v_add_lshl_u32 v245, v7, v8, 1 // 00000001DBF4: D1FE00F5 02061107 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 00000001DBFC: D10000F5 008BEB0A - v_accvgpr_read_b32 v15, a98 // 00000001DC04: D3D8400F 18000162 - v_accvgpr_read_b32 v16, a102 // 00000001DC0C: D3D84010 18000166 - v_accvgpr_read_b32 v17, a106 // 00000001DC14: D3D84011 1800016A - v_accvgpr_read_b32 v18, a110 // 00000001DC1C: D3D84012 1800016E - v_accvgpr_read_b32 v19, a114 // 00000001DC24: D3D84013 18000172 - v_accvgpr_read_b32 v20, a118 // 00000001DC2C: D3D84014 18000176 - v_accvgpr_read_b32 v21, a122 // 00000001DC34: D3D84015 1800017A - v_accvgpr_read_b32 v22, a126 // 00000001DC3C: D3D84016 1800017E - v_accvgpr_read_b32 v23, a130 // 00000001DC44: D3D84017 18000182 - v_accvgpr_read_b32 v24, a134 // 00000001DC4C: D3D84018 18000186 - v_accvgpr_read_b32 v25, a138 // 00000001DC54: D3D84019 1800018A - v_accvgpr_read_b32 v26, a142 // 00000001DC5C: D3D8401A 1800018E - v_accvgpr_read_b32 v27, a146 // 00000001DC64: D3D8401B 18000192 - v_accvgpr_read_b32 v28, a150 // 00000001DC6C: D3D8401C 18000196 - v_accvgpr_read_b32 v29, a154 // 00000001DC74: D3D8401D 1800019A - v_accvgpr_read_b32 v30, a158 // 00000001DC7C: D3D8401E 1800019E - v_accvgpr_read_b32 v31, a162 // 00000001DC84: D3D8401F 180001A2 - v_accvgpr_read_b32 v32, a166 // 00000001DC8C: D3D84020 180001A6 - v_accvgpr_read_b32 v33, a170 // 00000001DC94: D3D84021 180001AA - v_accvgpr_read_b32 v34, a174 // 00000001DC9C: D3D84022 180001AE - v_accvgpr_read_b32 v35, a178 // 00000001DCA4: D3D84023 180001B2 - v_accvgpr_read_b32 v36, a182 // 00000001DCAC: D3D84024 180001B6 - v_accvgpr_read_b32 v37, a186 // 00000001DCB4: D3D84025 180001BA - v_accvgpr_read_b32 v38, a190 // 00000001DCBC: D3D84026 180001BE - v_accvgpr_read_b32 v39, a194 // 00000001DCC4: D3D84027 180001C2 - v_accvgpr_read_b32 v40, a198 // 00000001DCCC: D3D84028 180001C6 - v_accvgpr_read_b32 v41, a202 // 00000001DCD4: D3D84029 180001CA - v_accvgpr_read_b32 v42, a206 // 00000001DCDC: D3D8402A 180001CE - v_accvgpr_read_b32 v43, a210 // 00000001DCE4: D3D8402B 180001D2 - v_accvgpr_read_b32 v44, a214 // 00000001DCEC: D3D8402C 180001D6 - v_accvgpr_read_b32 v45, a218 // 00000001DCF4: D3D8402D 180001DA - v_accvgpr_read_b32 v46, a222 // 00000001DCFC: D3D8402E 180001DE - v_accvgpr_read_b32 v47, a226 // 00000001DD04: D3D8402F 180001E2 - v_accvgpr_read_b32 v48, a230 // 00000001DD0C: D3D84030 180001E6 - v_accvgpr_read_b32 v49, a234 // 00000001DD14: D3D84031 180001EA - v_accvgpr_read_b32 v50, a238 // 00000001DD1C: D3D84032 180001EE - v_accvgpr_read_b32 v51, a242 // 00000001DD24: D3D84033 180001F2 - v_accvgpr_read_b32 v52, a246 // 00000001DD2C: D3D84034 180001F6 - v_accvgpr_read_b32 v53, a250 // 00000001DD34: D3D84035 180001FA - v_accvgpr_read_b32 v54, a254 // 00000001DD3C: D3D84036 180001FE - v_accvgpr_read_b32 v55, a3 // 00000001DD44: D3D84037 18000103 - v_accvgpr_read_b32 v56, a7 // 00000001DD4C: D3D84038 18000107 - v_accvgpr_read_b32 v57, a11 // 00000001DD54: D3D84039 1800010B - v_accvgpr_read_b32 v58, a15 // 00000001DD5C: D3D8403A 1800010F - v_accvgpr_read_b32 v59, a19 // 00000001DD64: D3D8403B 18000113 - v_accvgpr_read_b32 v60, a23 // 00000001DD6C: D3D8403C 18000117 - v_accvgpr_read_b32 v61, a27 // 00000001DD74: D3D8403D 1800011B - v_accvgpr_read_b32 v62, a31 // 00000001DD7C: D3D8403E 1800011F - v_accvgpr_read_b32 v63, a35 // 00000001DD84: D3D8403F 18000123 - v_accvgpr_read_b32 v64, a39 // 00000001DD8C: D3D84040 18000127 - v_accvgpr_read_b32 v65, a43 // 00000001DD94: D3D84041 1800012B - v_accvgpr_read_b32 v66, a47 // 00000001DD9C: D3D84042 1800012F - v_accvgpr_read_b32 v67, a51 // 00000001DDA4: D3D84043 18000133 - v_accvgpr_read_b32 v68, a55 // 00000001DDAC: D3D84044 18000137 - v_accvgpr_read_b32 v69, a59 // 00000001DDB4: D3D84045 1800013B - v_accvgpr_read_b32 v70, a63 // 00000001DDBC: D3D84046 1800013F - v_accvgpr_read_b32 v71, a67 // 00000001DDC4: D3D84047 18000143 - v_accvgpr_read_b32 v72, a71 // 00000001DDCC: D3D84048 18000147 - v_accvgpr_read_b32 v73, a75 // 00000001DDD4: D3D84049 1800014B - v_accvgpr_read_b32 v74, a79 // 00000001DDDC: D3D8404A 1800014F - v_accvgpr_read_b32 v75, a83 // 00000001DDE4: D3D8404B 18000153 - v_accvgpr_read_b32 v76, a87 // 00000001DDEC: D3D8404C 18000157 - v_accvgpr_read_b32 v77, a91 // 00000001DDF4: D3D8404D 1800015B - v_accvgpr_read_b32 v78, a95 // 00000001DDFC: D3D8404E 1800015F - v_accvgpr_read_b32 v79, a99 // 00000001DE04: D3D8404F 18000163 - v_accvgpr_read_b32 v80, a103 // 00000001DE0C: D3D84050 18000167 - v_accvgpr_read_b32 v81, a107 // 00000001DE14: D3D84051 1800016B - v_accvgpr_read_b32 v82, a111 // 00000001DE1C: D3D84052 1800016F - v_accvgpr_read_b32 v83, a115 // 00000001DE24: D3D84053 18000173 - v_accvgpr_read_b32 v84, a119 // 00000001DE2C: D3D84054 18000177 - v_accvgpr_read_b32 v85, a123 // 00000001DE34: D3D84055 1800017B - v_accvgpr_read_b32 v86, a127 // 00000001DE3C: D3D84056 1800017F - v_accvgpr_read_b32 v87, a131 // 00000001DE44: D3D84057 18000183 - v_accvgpr_read_b32 v88, a135 // 00000001DE4C: D3D84058 18000187 - v_accvgpr_read_b32 v89, a139 // 00000001DE54: D3D84059 1800018B - v_accvgpr_read_b32 v90, a143 // 00000001DE5C: D3D8405A 1800018F - v_mul_f32_e32 v15, s44, v15 // 00000001DE64: 0A1E1E2C - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 00000001DE68: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 00000001DE70: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 00000001DE78: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 00000001DE80: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000001DE88: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000001DE90: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000001DE98: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000001DEA0: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000001DEA8: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000001DEB0: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000001DEB8: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000001DEC0: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000001DEC8: D3B14028 1002502C - v_pk_mul_f32 v[42:43], s[44:45], v[42:43] op_sel_hi:[0,1] // 00000001DED0: D3B1402A 1002542C - v_pk_mul_f32 v[44:45], s[44:45], v[44:45] op_sel_hi:[0,1] // 00000001DED8: D3B1402C 1002582C - v_pk_mul_f32 v[46:47], s[44:45], v[46:47] op_sel_hi:[0,1] // 00000001DEE0: D3B1402E 10025C2C - v_pk_mul_f32 v[48:49], s[44:45], v[48:49] op_sel_hi:[0,1] // 00000001DEE8: D3B14030 1002602C - v_pk_mul_f32 v[50:51], s[44:45], v[50:51] op_sel_hi:[0,1] // 00000001DEF0: D3B14032 1002642C - v_pk_mul_f32 v[52:53], s[44:45], v[52:53] op_sel_hi:[0,1] // 00000001DEF8: D3B14034 1002682C - v_pk_mul_f32 v[54:55], s[44:45], v[54:55] op_sel_hi:[0,1] // 00000001DF00: D3B14036 10026C2C - v_pk_mul_f32 v[56:57], s[44:45], v[56:57] op_sel_hi:[0,1] // 00000001DF08: D3B14038 1002702C - v_pk_mul_f32 v[58:59], s[44:45], v[58:59] op_sel_hi:[0,1] // 00000001DF10: D3B1403A 1002742C - v_pk_mul_f32 v[60:61], s[44:45], v[60:61] op_sel_hi:[0,1] // 00000001DF18: D3B1403C 1002782C - v_pk_mul_f32 v[62:63], s[44:45], v[62:63] op_sel_hi:[0,1] // 00000001DF20: D3B1403E 10027C2C - v_pk_mul_f32 v[64:65], s[44:45], v[64:65] op_sel_hi:[0,1] // 00000001DF28: D3B14040 1002802C - v_pk_mul_f32 v[66:67], s[44:45], v[66:67] op_sel_hi:[0,1] // 00000001DF30: D3B14042 1002842C - v_pk_mul_f32 v[68:69], s[44:45], v[68:69] op_sel_hi:[0,1] // 00000001DF38: D3B14044 1002882C - v_pk_mul_f32 v[70:71], s[44:45], v[70:71] op_sel_hi:[0,1] // 00000001DF40: D3B14046 10028C2C - v_pk_mul_f32 v[72:73], s[44:45], v[72:73] op_sel_hi:[0,1] // 00000001DF48: D3B14048 1002902C - v_pk_mul_f32 v[74:75], s[44:45], v[74:75] op_sel_hi:[0,1] // 00000001DF50: D3B1404A 1002942C - v_pk_mul_f32 v[76:77], s[44:45], v[76:77] op_sel_hi:[0,1] // 00000001DF58: D3B1404C 1002982C - v_pk_mul_f32 v[78:79], s[44:45], v[78:79] op_sel_hi:[0,1] // 00000001DF60: D3B1404E 10029C2C - v_pk_mul_f32 v[80:81], s[44:45], v[80:81] op_sel_hi:[0,1] // 00000001DF68: D3B14050 1002A02C - v_pk_mul_f32 v[82:83], s[44:45], v[82:83] op_sel_hi:[0,1] // 00000001DF70: D3B14052 1002A42C - v_pk_mul_f32 v[84:85], s[44:45], v[84:85] op_sel_hi:[0,1] // 00000001DF78: D3B14054 1002A82C - v_pk_mul_f32 v[86:87], s[44:45], v[86:87] op_sel_hi:[0,1] // 00000001DF80: D3B14056 1002AC2C - v_pk_mul_f32 v[88:89], s[44:45], v[88:89] op_sel_hi:[0,1] // 00000001DF88: D3B14058 1002B02C - v_mul_f32_e32 v90, s44, v90 // 00000001DF90: 0AB4B42C - s_waitcnt vmcnt(0) // 00000001DF94: BF8C0F70 - v_mov_b32_e32 v12, 0xffff0000 // 00000001DF98: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000001DFA0: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 00000001DFA8: 7E1C02FF 00007FFF - v_cvt_f32_bf16_sdwa v8, v91 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001DFB0: 7E10B6F9 0004165B - v_fmac_f32_e64 v15, v8, s45 // 00000001DFB8: D13B000F 00005B08 - v_cvt_pk_bf16_f32 v15, v15, v15 // 00000001DFC0: D268000F 00021F0F - buffer_store_short v15, v92, s[16:19], 0 offen nt // 00000001DFC8: E06A1000 80040F5C - v_cvt_f32_bf16_sdwa v8, v93 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001DFD0: 7E10B6F9 0004165D - v_fmac_f32_e64 v16, v8, s45 // 00000001DFD8: D13B0010 00005B08 - v_cvt_pk_bf16_f32 v16, v16, v16 // 00000001DFE0: D2680010 00022110 - buffer_store_short v16, v94, s[16:19], 0 offen nt // 00000001DFE8: E06A1000 8004105E - v_cvt_f32_bf16_sdwa v8, v95 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001DFF0: 7E10B6F9 0004165F - v_fmac_f32_e64 v17, v8, s45 // 00000001DFF8: D13B0011 00005B08 - v_cvt_pk_bf16_f32 v17, v17, v17 // 00000001E000: D2680011 00022311 - buffer_store_short v17, v96, s[16:19], 0 offen nt // 00000001E008: E06A1000 80041160 - v_cvt_f32_bf16_sdwa v8, v97 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E010: 7E10B6F9 00041661 - v_fmac_f32_e64 v18, v8, s45 // 00000001E018: D13B0012 00005B08 - v_cvt_pk_bf16_f32 v18, v18, v18 // 00000001E020: D2680012 00022512 - buffer_store_short v18, v98, s[16:19], 0 offen nt // 00000001E028: E06A1000 80041262 - v_cvt_f32_bf16_sdwa v8, v99 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E030: 7E10B6F9 00041663 - v_fmac_f32_e64 v19, v8, s45 // 00000001E038: D13B0013 00005B08 - v_cvt_pk_bf16_f32 v19, v19, v19 // 00000001E040: D2680013 00022713 - buffer_store_short v19, v100, s[16:19], 0 offen nt // 00000001E048: E06A1000 80041364 - v_cvt_f32_bf16_sdwa v8, v101 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E050: 7E10B6F9 00041665 - v_fmac_f32_e64 v20, v8, s45 // 00000001E058: D13B0014 00005B08 - v_cvt_pk_bf16_f32 v20, v20, v20 // 00000001E060: D2680014 00022914 - buffer_store_short v20, v102, s[16:19], 0 offen nt // 00000001E068: E06A1000 80041466 - v_cvt_f32_bf16_sdwa v8, v103 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E070: 7E10B6F9 00041667 - v_fmac_f32_e64 v21, v8, s45 // 00000001E078: D13B0015 00005B08 - v_cvt_pk_bf16_f32 v21, v21, v21 // 00000001E080: D2680015 00022B15 - buffer_store_short v21, v104, s[16:19], 0 offen nt // 00000001E088: E06A1000 80041568 - v_cvt_f32_bf16_sdwa v8, v105 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E090: 7E10B6F9 00041669 - v_fmac_f32_e64 v22, v8, s45 // 00000001E098: D13B0016 00005B08 - v_cvt_pk_bf16_f32 v22, v22, v22 // 00000001E0A0: D2680016 00022D16 - buffer_store_short v22, v106, s[16:19], 0 offen nt // 00000001E0A8: E06A1000 8004166A - v_cvt_f32_bf16_sdwa v8, v107 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E0B0: 7E10B6F9 0004166B - v_fmac_f32_e64 v23, v8, s45 // 00000001E0B8: D13B0017 00005B08 - v_cvt_pk_bf16_f32 v23, v23, v23 // 00000001E0C0: D2680017 00022F17 - buffer_store_short v23, v108, s[16:19], 0 offen nt // 00000001E0C8: E06A1000 8004176C - v_cvt_f32_bf16_sdwa v8, v109 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E0D0: 7E10B6F9 0004166D - v_fmac_f32_e64 v24, v8, s45 // 00000001E0D8: D13B0018 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v24 // 00000001E0E0: D2680018 00023118 - buffer_store_short v24, v110, s[16:19], 0 offen nt // 00000001E0E8: E06A1000 8004186E - v_cvt_f32_bf16_sdwa v8, v111 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E0F0: 7E10B6F9 0004166F - v_fmac_f32_e64 v25, v8, s45 // 00000001E0F8: D13B0019 00005B08 - v_cvt_pk_bf16_f32 v25, v25, v25 // 00000001E100: D2680019 00023319 - buffer_store_short v25, v112, s[16:19], 0 offen nt // 00000001E108: E06A1000 80041970 - v_cvt_f32_bf16_sdwa v8, v113 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E110: 7E10B6F9 00041671 - v_fmac_f32_e64 v26, v8, s45 // 00000001E118: D13B001A 00005B08 - v_cvt_pk_bf16_f32 v26, v26, v26 // 00000001E120: D268001A 0002351A - buffer_store_short v26, v114, s[16:19], 0 offen nt // 00000001E128: E06A1000 80041A72 - v_cvt_f32_bf16_sdwa v8, v115 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E130: 7E10B6F9 00041673 - v_fmac_f32_e64 v27, v8, s45 // 00000001E138: D13B001B 00005B08 - v_cvt_pk_bf16_f32 v27, v27, v27 // 00000001E140: D268001B 0002371B - buffer_store_short v27, v116, s[16:19], 0 offen nt // 00000001E148: E06A1000 80041B74 - v_cvt_f32_bf16_sdwa v8, v117 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E150: 7E10B6F9 00041675 - v_fmac_f32_e64 v28, v8, s45 // 00000001E158: D13B001C 00005B08 - v_cvt_pk_bf16_f32 v28, v28, v28 // 00000001E160: D268001C 0002391C - buffer_store_short v28, v118, s[16:19], 0 offen nt // 00000001E168: E06A1000 80041C76 - v_cvt_f32_bf16_sdwa v8, v119 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E170: 7E10B6F9 00041677 - v_fmac_f32_e64 v29, v8, s45 // 00000001E178: D13B001D 00005B08 - v_cvt_pk_bf16_f32 v29, v29, v29 // 00000001E180: D268001D 00023B1D - buffer_store_short v29, v120, s[16:19], 0 offen nt // 00000001E188: E06A1000 80041D78 - v_cvt_f32_bf16_sdwa v8, v121 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E190: 7E10B6F9 00041679 - v_fmac_f32_e64 v30, v8, s45 // 00000001E198: D13B001E 00005B08 - v_cvt_pk_bf16_f32 v30, v30, v30 // 00000001E1A0: D268001E 00023D1E - buffer_store_short v30, v122, s[16:19], 0 offen nt // 00000001E1A8: E06A1000 80041E7A - v_cvt_f32_bf16_sdwa v8, v123 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E1B0: 7E10B6F9 0004167B - v_fmac_f32_e64 v31, v8, s45 // 00000001E1B8: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v31, v31, v31 // 00000001E1C0: D268001F 00023F1F - buffer_store_short v31, v124, s[16:19], 0 offen nt // 00000001E1C8: E06A1000 80041F7C - v_cvt_f32_bf16_sdwa v8, v125 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E1D0: 7E10B6F9 0004167D - v_fmac_f32_e64 v32, v8, s45 // 00000001E1D8: D13B0020 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v32 // 00000001E1E0: D2680020 00024120 - buffer_store_short v32, v126, s[16:19], 0 offen nt // 00000001E1E8: E06A1000 8004207E - v_cvt_f32_bf16_sdwa v8, v127 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E1F0: 7E10B6F9 0004167F - v_fmac_f32_e64 v33, v8, s45 // 00000001E1F8: D13B0021 00005B08 - v_cvt_pk_bf16_f32 v33, v33, v33 // 00000001E200: D2680021 00024321 - buffer_store_short v33, v128, s[16:19], 0 offen nt // 00000001E208: E06A1000 80042180 - v_cvt_f32_bf16_sdwa v8, v129 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E210: 7E10B6F9 00041681 - v_fmac_f32_e64 v34, v8, s45 // 00000001E218: D13B0022 00005B08 - v_cvt_pk_bf16_f32 v34, v34, v34 // 00000001E220: D2680022 00024522 - buffer_store_short v34, v130, s[16:19], 0 offen nt // 00000001E228: E06A1000 80042282 - v_cvt_f32_bf16_sdwa v8, v131 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E230: 7E10B6F9 00041683 - v_fmac_f32_e64 v35, v8, s45 // 00000001E238: D13B0023 00005B08 - v_cvt_pk_bf16_f32 v35, v35, v35 // 00000001E240: D2680023 00024723 - buffer_store_short v35, v135, s[16:19], 0 offen nt // 00000001E248: E06A1000 80042387 - v_cvt_f32_bf16_sdwa v8, v136 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E250: 7E10B6F9 00041688 - v_fmac_f32_e64 v36, v8, s45 // 00000001E258: D13B0024 00005B08 - v_cvt_pk_bf16_f32 v36, v36, v36 // 00000001E260: D2680024 00024924 - buffer_store_short v36, v137, s[16:19], 0 offen nt // 00000001E268: E06A1000 80042489 - v_cvt_f32_bf16_sdwa v8, v138 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E270: 7E10B6F9 0004168A - v_fmac_f32_e64 v37, v8, s45 // 00000001E278: D13B0025 00005B08 - v_cvt_pk_bf16_f32 v37, v37, v37 // 00000001E280: D2680025 00024B25 - buffer_store_short v37, v139, s[16:19], 0 offen nt // 00000001E288: E06A1000 8004258B - v_cvt_f32_bf16_sdwa v8, v140 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E290: 7E10B6F9 0004168C - v_fmac_f32_e64 v38, v8, s45 // 00000001E298: D13B0026 00005B08 - v_cvt_pk_bf16_f32 v38, v38, v38 // 00000001E2A0: D2680026 00024D26 - buffer_store_short v38, v141, s[16:19], 0 offen nt // 00000001E2A8: E06A1000 8004268D - v_cvt_f32_bf16_sdwa v8, v142 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E2B0: 7E10B6F9 0004168E - v_fmac_f32_e64 v39, v8, s45 // 00000001E2B8: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v39, v39, v39 // 00000001E2C0: D2680027 00024F27 - buffer_store_short v39, v143, s[16:19], 0 offen nt // 00000001E2C8: E06A1000 8004278F - v_cvt_f32_bf16_sdwa v8, v144 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E2D0: 7E10B6F9 00041690 - v_fmac_f32_e64 v40, v8, s45 // 00000001E2D8: D13B0028 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v40 // 00000001E2E0: D2680028 00025128 - buffer_store_short v40, v145, s[16:19], 0 offen nt // 00000001E2E8: E06A1000 80042891 - v_cvt_f32_bf16_sdwa v8, v146 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E2F0: 7E10B6F9 00041692 - v_fmac_f32_e64 v41, v8, s45 // 00000001E2F8: D13B0029 00005B08 - v_cvt_pk_bf16_f32 v41, v41, v41 // 00000001E300: D2680029 00025329 - buffer_store_short v41, v147, s[16:19], 0 offen nt // 00000001E308: E06A1000 80042993 - v_cvt_f32_bf16_sdwa v8, v148 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E310: 7E10B6F9 00041694 - v_fmac_f32_e64 v42, v8, s45 // 00000001E318: D13B002A 00005B08 - v_cvt_pk_bf16_f32 v42, v42, v42 // 00000001E320: D268002A 0002552A - buffer_store_short v42, v149, s[16:19], 0 offen nt // 00000001E328: E06A1000 80042A95 - v_cvt_f32_bf16_sdwa v8, v150 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E330: 7E10B6F9 00041696 - v_fmac_f32_e64 v43, v8, s45 // 00000001E338: D13B002B 00005B08 - v_cvt_pk_bf16_f32 v43, v43, v43 // 00000001E340: D268002B 0002572B - buffer_store_short v43, v151, s[16:19], 0 offen nt // 00000001E348: E06A1000 80042B97 - v_cvt_f32_bf16_sdwa v8, v152 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E350: 7E10B6F9 00041698 - v_fmac_f32_e64 v44, v8, s45 // 00000001E358: D13B002C 00005B08 - v_cvt_pk_bf16_f32 v44, v44, v44 // 00000001E360: D268002C 0002592C - buffer_store_short v44, v153, s[16:19], 0 offen nt // 00000001E368: E06A1000 80042C99 - v_cvt_f32_bf16_sdwa v8, v154 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E370: 7E10B6F9 0004169A - v_fmac_f32_e64 v45, v8, s45 // 00000001E378: D13B002D 00005B08 - v_cvt_pk_bf16_f32 v45, v45, v45 // 00000001E380: D268002D 00025B2D - buffer_store_short v45, v155, s[16:19], 0 offen nt // 00000001E388: E06A1000 80042D9B - v_cvt_f32_bf16_sdwa v8, v156 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E390: 7E10B6F9 0004169C - v_fmac_f32_e64 v46, v8, s45 // 00000001E398: D13B002E 00005B08 - v_cvt_pk_bf16_f32 v46, v46, v46 // 00000001E3A0: D268002E 00025D2E - buffer_store_short v46, v157, s[16:19], 0 offen nt // 00000001E3A8: E06A1000 80042E9D - v_cvt_f32_bf16_sdwa v8, v158 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E3B0: 7E10B6F9 0004169E - v_fmac_f32_e64 v47, v8, s45 // 00000001E3B8: D13B002F 00005B08 - v_cvt_pk_bf16_f32 v47, v47, v47 // 00000001E3C0: D268002F 00025F2F - buffer_store_short v47, v159, s[16:19], 0 offen nt // 00000001E3C8: E06A1000 80042F9F - v_cvt_f32_bf16_sdwa v8, v160 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E3D0: 7E10B6F9 000416A0 - v_fmac_f32_e64 v48, v8, s45 // 00000001E3D8: D13B0030 00005B08 - v_cvt_pk_bf16_f32 v48, v48, v48 // 00000001E3E0: D2680030 00026130 - buffer_store_short v48, v161, s[16:19], 0 offen nt // 00000001E3E8: E06A1000 800430A1 - v_cvt_f32_bf16_sdwa v8, v162 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E3F0: 7E10B6F9 000416A2 - v_fmac_f32_e64 v49, v8, s45 // 00000001E3F8: D13B0031 00005B08 - v_cvt_pk_bf16_f32 v49, v49, v49 // 00000001E400: D2680031 00026331 - buffer_store_short v49, v163, s[16:19], 0 offen nt // 00000001E408: E06A1000 800431A3 - v_cvt_f32_bf16_sdwa v8, v164 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E410: 7E10B6F9 000416A4 - v_fmac_f32_e64 v50, v8, s45 // 00000001E418: D13B0032 00005B08 - v_cvt_pk_bf16_f32 v50, v50, v50 // 00000001E420: D2680032 00026532 - buffer_store_short v50, v165, s[16:19], 0 offen nt // 00000001E428: E06A1000 800432A5 - v_cvt_f32_bf16_sdwa v8, v166 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E430: 7E10B6F9 000416A6 - v_fmac_f32_e64 v51, v8, s45 // 00000001E438: D13B0033 00005B08 - v_cvt_pk_bf16_f32 v51, v51, v51 // 00000001E440: D2680033 00026733 - buffer_store_short v51, v167, s[16:19], 0 offen nt // 00000001E448: E06A1000 800433A7 - v_cvt_f32_bf16_sdwa v8, v168 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E450: 7E10B6F9 000416A8 - v_fmac_f32_e64 v52, v8, s45 // 00000001E458: D13B0034 00005B08 - v_cvt_pk_bf16_f32 v52, v52, v52 // 00000001E460: D2680034 00026934 - buffer_store_short v52, v169, s[16:19], 0 offen nt // 00000001E468: E06A1000 800434A9 - v_cvt_f32_bf16_sdwa v8, v170 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E470: 7E10B6F9 000416AA - v_fmac_f32_e64 v53, v8, s45 // 00000001E478: D13B0035 00005B08 - v_cvt_pk_bf16_f32 v53, v53, v53 // 00000001E480: D2680035 00026B35 - buffer_store_short v53, v171, s[16:19], 0 offen nt // 00000001E488: E06A1000 800435AB - v_cvt_f32_bf16_sdwa v8, v172 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E490: 7E10B6F9 000416AC - v_fmac_f32_e64 v54, v8, s45 // 00000001E498: D13B0036 00005B08 - v_cvt_pk_bf16_f32 v54, v54, v54 // 00000001E4A0: D2680036 00026D36 - buffer_store_short v54, v173, s[16:19], 0 offen nt // 00000001E4A8: E06A1000 800436AD - v_cvt_f32_bf16_sdwa v8, v174 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E4B0: 7E10B6F9 000416AE - v_fmac_f32_e64 v55, v8, s45 // 00000001E4B8: D13B0037 00005B08 - v_cvt_pk_bf16_f32 v55, v55, v55 // 00000001E4C0: D2680037 00026F37 - buffer_store_short v55, v175, s[16:19], 0 offen nt // 00000001E4C8: E06A1000 800437AF - v_cvt_f32_bf16_sdwa v8, v176 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E4D0: 7E10B6F9 000416B0 - v_fmac_f32_e64 v56, v8, s45 // 00000001E4D8: D13B0038 00005B08 - v_cvt_pk_bf16_f32 v56, v56, v56 // 00000001E4E0: D2680038 00027138 - buffer_store_short v56, v177, s[16:19], 0 offen nt // 00000001E4E8: E06A1000 800438B1 - v_cvt_f32_bf16_sdwa v8, v178 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E4F0: 7E10B6F9 000416B2 - v_fmac_f32_e64 v57, v8, s45 // 00000001E4F8: D13B0039 00005B08 - v_cvt_pk_bf16_f32 v57, v57, v57 // 00000001E500: D2680039 00027339 - buffer_store_short v57, v179, s[16:19], 0 offen nt // 00000001E508: E06A1000 800439B3 - v_cvt_f32_bf16_sdwa v8, v180 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E510: 7E10B6F9 000416B4 - v_fmac_f32_e64 v58, v8, s45 // 00000001E518: D13B003A 00005B08 - v_cvt_pk_bf16_f32 v58, v58, v58 // 00000001E520: D268003A 0002753A - buffer_store_short v58, v181, s[16:19], 0 offen nt // 00000001E528: E06A1000 80043AB5 - v_cvt_f32_bf16_sdwa v8, v182 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E530: 7E10B6F9 000416B6 - v_fmac_f32_e64 v59, v8, s45 // 00000001E538: D13B003B 00005B08 - v_cvt_pk_bf16_f32 v59, v59, v59 // 00000001E540: D268003B 0002773B - buffer_store_short v59, v183, s[16:19], 0 offen nt // 00000001E548: E06A1000 80043BB7 - v_cvt_f32_bf16_sdwa v8, v184 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E550: 7E10B6F9 000416B8 - v_fmac_f32_e64 v60, v8, s45 // 00000001E558: D13B003C 00005B08 - v_cvt_pk_bf16_f32 v60, v60, v60 // 00000001E560: D268003C 0002793C - buffer_store_short v60, v185, s[16:19], 0 offen nt // 00000001E568: E06A1000 80043CB9 - v_cvt_f32_bf16_sdwa v8, v186 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E570: 7E10B6F9 000416BA - v_fmac_f32_e64 v61, v8, s45 // 00000001E578: D13B003D 00005B08 - v_cvt_pk_bf16_f32 v61, v61, v61 // 00000001E580: D268003D 00027B3D - buffer_store_short v61, v187, s[16:19], 0 offen nt // 00000001E588: E06A1000 80043DBB - v_cvt_f32_bf16_sdwa v8, v188 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E590: 7E10B6F9 000416BC - v_fmac_f32_e64 v62, v8, s45 // 00000001E598: D13B003E 00005B08 - v_cvt_pk_bf16_f32 v62, v62, v62 // 00000001E5A0: D268003E 00027D3E - buffer_store_short v62, v189, s[16:19], 0 offen nt // 00000001E5A8: E06A1000 80043EBD - v_cvt_f32_bf16_sdwa v8, v190 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E5B0: 7E10B6F9 000416BE - v_fmac_f32_e64 v63, v8, s45 // 00000001E5B8: D13B003F 00005B08 - v_cvt_pk_bf16_f32 v63, v63, v63 // 00000001E5C0: D268003F 00027F3F - buffer_store_short v63, v191, s[16:19], 0 offen nt // 00000001E5C8: E06A1000 80043FBF - v_cvt_f32_bf16_sdwa v8, v192 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E5D0: 7E10B6F9 000416C0 - v_fmac_f32_e64 v64, v8, s45 // 00000001E5D8: D13B0040 00005B08 - v_cvt_pk_bf16_f32 v64, v64, v64 // 00000001E5E0: D2680040 00028140 - buffer_store_short v64, v193, s[16:19], 0 offen nt // 00000001E5E8: E06A1000 800440C1 - v_cvt_f32_bf16_sdwa v8, v194 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E5F0: 7E10B6F9 000416C2 - v_fmac_f32_e64 v65, v8, s45 // 00000001E5F8: D13B0041 00005B08 - v_cvt_pk_bf16_f32 v65, v65, v65 // 00000001E600: D2680041 00028341 - buffer_store_short v65, v195, s[16:19], 0 offen nt // 00000001E608: E06A1000 800441C3 - v_cvt_f32_bf16_sdwa v8, v196 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E610: 7E10B6F9 000416C4 - v_fmac_f32_e64 v66, v8, s45 // 00000001E618: D13B0042 00005B08 - v_cvt_pk_bf16_f32 v66, v66, v66 // 00000001E620: D2680042 00028542 - buffer_store_short v66, v197, s[16:19], 0 offen nt // 00000001E628: E06A1000 800442C5 - v_cvt_f32_bf16_sdwa v8, v198 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E630: 7E10B6F9 000416C6 - v_fmac_f32_e64 v67, v8, s45 // 00000001E638: D13B0043 00005B08 - v_cvt_pk_bf16_f32 v67, v67, v67 // 00000001E640: D2680043 00028743 - buffer_store_short v67, v199, s[16:19], 0 offen nt // 00000001E648: E06A1000 800443C7 - v_cvt_f32_bf16_sdwa v8, v200 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E650: 7E10B6F9 000416C8 - v_fmac_f32_e64 v68, v8, s45 // 00000001E658: D13B0044 00005B08 - v_cvt_pk_bf16_f32 v68, v68, v68 // 00000001E660: D2680044 00028944 - buffer_store_short v68, v201, s[16:19], 0 offen nt // 00000001E668: E06A1000 800444C9 - v_cvt_f32_bf16_sdwa v8, v202 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E670: 7E10B6F9 000416CA - v_fmac_f32_e64 v69, v8, s45 // 00000001E678: D13B0045 00005B08 - v_cvt_pk_bf16_f32 v69, v69, v69 // 00000001E680: D2680045 00028B45 - buffer_store_short v69, v203, s[16:19], 0 offen nt // 00000001E688: E06A1000 800445CB - v_cvt_f32_bf16_sdwa v8, v204 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E690: 7E10B6F9 000416CC - v_fmac_f32_e64 v70, v8, s45 // 00000001E698: D13B0046 00005B08 - v_cvt_pk_bf16_f32 v70, v70, v70 // 00000001E6A0: D2680046 00028D46 - buffer_store_short v70, v205, s[16:19], 0 offen nt // 00000001E6A8: E06A1000 800446CD - v_cvt_f32_bf16_sdwa v8, v206 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E6B0: 7E10B6F9 000416CE - v_fmac_f32_e64 v71, v8, s45 // 00000001E6B8: D13B0047 00005B08 - v_cvt_pk_bf16_f32 v71, v71, v71 // 00000001E6C0: D2680047 00028F47 - buffer_store_short v71, v207, s[16:19], 0 offen nt // 00000001E6C8: E06A1000 800447CF - v_cvt_f32_bf16_sdwa v8, v208 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E6D0: 7E10B6F9 000416D0 - v_fmac_f32_e64 v72, v8, s45 // 00000001E6D8: D13B0048 00005B08 - v_cvt_pk_bf16_f32 v72, v72, v72 // 00000001E6E0: D2680048 00029148 - buffer_store_short v72, v209, s[16:19], 0 offen nt // 00000001E6E8: E06A1000 800448D1 - v_cvt_f32_bf16_sdwa v8, v210 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E6F0: 7E10B6F9 000416D2 - v_fmac_f32_e64 v73, v8, s45 // 00000001E6F8: D13B0049 00005B08 - v_cvt_pk_bf16_f32 v73, v73, v73 // 00000001E700: D2680049 00029349 - buffer_store_short v73, v211, s[16:19], 0 offen nt // 00000001E708: E06A1000 800449D3 - v_cvt_f32_bf16_sdwa v8, v212 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E710: 7E10B6F9 000416D4 - v_fmac_f32_e64 v74, v8, s45 // 00000001E718: D13B004A 00005B08 - v_cvt_pk_bf16_f32 v74, v74, v74 // 00000001E720: D268004A 0002954A - buffer_store_short v74, v213, s[16:19], 0 offen nt // 00000001E728: E06A1000 80044AD5 - v_cvt_f32_bf16_sdwa v8, v214 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E730: 7E10B6F9 000416D6 - v_fmac_f32_e64 v75, v8, s45 // 00000001E738: D13B004B 00005B08 - v_cvt_pk_bf16_f32 v75, v75, v75 // 00000001E740: D268004B 0002974B - buffer_store_short v75, v215, s[16:19], 0 offen nt // 00000001E748: E06A1000 80044BD7 - v_cvt_f32_bf16_sdwa v8, v216 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E750: 7E10B6F9 000416D8 - v_fmac_f32_e64 v76, v8, s45 // 00000001E758: D13B004C 00005B08 - v_cvt_pk_bf16_f32 v76, v76, v76 // 00000001E760: D268004C 0002994C - buffer_store_short v76, v217, s[16:19], 0 offen nt // 00000001E768: E06A1000 80044CD9 - v_cvt_f32_bf16_sdwa v8, v218 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E770: 7E10B6F9 000416DA - v_fmac_f32_e64 v77, v8, s45 // 00000001E778: D13B004D 00005B08 - v_cvt_pk_bf16_f32 v77, v77, v77 // 00000001E780: D268004D 00029B4D - buffer_store_short v77, v219, s[16:19], 0 offen nt // 00000001E788: E06A1000 80044DDB - v_cvt_f32_bf16_sdwa v8, v220 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E790: 7E10B6F9 000416DC - v_fmac_f32_e64 v78, v8, s45 // 00000001E798: D13B004E 00005B08 - v_cvt_pk_bf16_f32 v78, v78, v78 // 00000001E7A0: D268004E 00029D4E - buffer_store_short v78, v221, s[16:19], 0 offen nt // 00000001E7A8: E06A1000 80044EDD - v_cvt_f32_bf16_sdwa v8, v222 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E7B0: 7E10B6F9 000416DE - v_fmac_f32_e64 v79, v8, s45 // 00000001E7B8: D13B004F 00005B08 - v_cvt_pk_bf16_f32 v79, v79, v79 // 00000001E7C0: D268004F 00029F4F - buffer_store_short v79, v223, s[16:19], 0 offen nt // 00000001E7C8: E06A1000 80044FDF - v_cvt_f32_bf16_sdwa v8, v224 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E7D0: 7E10B6F9 000416E0 - v_fmac_f32_e64 v80, v8, s45 // 00000001E7D8: D13B0050 00005B08 - v_cvt_pk_bf16_f32 v80, v80, v80 // 00000001E7E0: D2680050 0002A150 - buffer_store_short v80, v225, s[16:19], 0 offen nt // 00000001E7E8: E06A1000 800450E1 - v_cvt_f32_bf16_sdwa v8, v226 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E7F0: 7E10B6F9 000416E2 - v_fmac_f32_e64 v81, v8, s45 // 00000001E7F8: D13B0051 00005B08 - v_cvt_pk_bf16_f32 v81, v81, v81 // 00000001E800: D2680051 0002A351 - buffer_store_short v81, v227, s[16:19], 0 offen nt // 00000001E808: E06A1000 800451E3 - v_cvt_f32_bf16_sdwa v8, v228 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E810: 7E10B6F9 000416E4 - v_fmac_f32_e64 v82, v8, s45 // 00000001E818: D13B0052 00005B08 - v_cvt_pk_bf16_f32 v82, v82, v82 // 00000001E820: D2680052 0002A552 - buffer_store_short v82, v229, s[16:19], 0 offen nt // 00000001E828: E06A1000 800452E5 - v_cvt_f32_bf16_sdwa v8, v230 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E830: 7E10B6F9 000416E6 - v_fmac_f32_e64 v83, v8, s45 // 00000001E838: D13B0053 00005B08 - v_cvt_pk_bf16_f32 v83, v83, v83 // 00000001E840: D2680053 0002A753 - buffer_store_short v83, v231, s[16:19], 0 offen nt // 00000001E848: E06A1000 800453E7 - v_cvt_f32_bf16_sdwa v8, v232 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E850: 7E10B6F9 000416E8 - v_fmac_f32_e64 v84, v8, s45 // 00000001E858: D13B0054 00005B08 - v_cvt_pk_bf16_f32 v84, v84, v84 // 00000001E860: D2680054 0002A954 - buffer_store_short v84, v233, s[16:19], 0 offen nt // 00000001E868: E06A1000 800454E9 - v_cvt_f32_bf16_sdwa v8, v234 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E870: 7E10B6F9 000416EA - v_fmac_f32_e64 v85, v8, s45 // 00000001E878: D13B0055 00005B08 - v_cvt_pk_bf16_f32 v85, v85, v85 // 00000001E880: D2680055 0002AB55 - buffer_store_short v85, v235, s[16:19], 0 offen nt // 00000001E888: E06A1000 800455EB - v_cvt_f32_bf16_sdwa v8, v236 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E890: 7E10B6F9 000416EC - v_fmac_f32_e64 v86, v8, s45 // 00000001E898: D13B0056 00005B08 - v_cvt_pk_bf16_f32 v86, v86, v86 // 00000001E8A0: D2680056 0002AD56 - buffer_store_short v86, v237, s[16:19], 0 offen nt // 00000001E8A8: E06A1000 800456ED - v_cvt_f32_bf16_sdwa v8, v238 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E8B0: 7E10B6F9 000416EE - v_fmac_f32_e64 v87, v8, s45 // 00000001E8B8: D13B0057 00005B08 - v_cvt_pk_bf16_f32 v87, v87, v87 // 00000001E8C0: D2680057 0002AF57 - buffer_store_short v87, v239, s[16:19], 0 offen nt // 00000001E8C8: E06A1000 800457EF - v_cvt_f32_bf16_sdwa v8, v240 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E8D0: 7E10B6F9 000416F0 - v_fmac_f32_e64 v88, v8, s45 // 00000001E8D8: D13B0058 00005B08 - v_cvt_pk_bf16_f32 v88, v88, v88 // 00000001E8E0: D2680058 0002B158 - buffer_store_short v88, v241, s[16:19], 0 offen nt // 00000001E8E8: E06A1000 800458F1 - v_cvt_f32_bf16_sdwa v8, v242 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E8F0: 7E10B6F9 000416F2 - v_fmac_f32_e64 v89, v8, s45 // 00000001E8F8: D13B0059 00005B08 - v_cvt_pk_bf16_f32 v89, v89, v89 // 00000001E900: D2680059 0002B359 - buffer_store_short v89, v243, s[16:19], 0 offen nt // 00000001E908: E06A1000 800459F3 - v_cvt_f32_bf16_sdwa v8, v244 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001E910: 7E10B6F9 000416F4 - v_fmac_f32_e64 v90, v8, s45 // 00000001E918: D13B005A 00005B08 - v_cvt_pk_bf16_f32 v90, v90, v90 // 00000001E920: D268005A 0002B55A - buffer_store_short v90, v245, s[16:19], 0 offen nt // 00000001E928: E06A1000 80045AF5 - s_nop 0 // 00000001E930: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 00000001E934: 7E1402FF 80000000 - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001E93C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001E944: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001E94C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001E954: 86A2221E - v_add_lshl_u32 v44, v6, v8, 1 // 00000001E958: D1FE002C 02061106 - v_cndmask_b32_e64 v44, v10, v44, s[34:35] // 00000001E960: D100002C 008A590A - buffer_load_short_d16 v43, v44, s[20:23], 0 offen // 00000001E968: E0901000 80052B2C - v_add_lshl_u32 v44, v7, v8, 1 // 00000001E970: D1FE002C 02061107 - v_cndmask_b32_e64 v44, v10, v44, s[34:35] // 00000001E978: D100002C 008A590A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001E980: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001E988: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001E990: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001E998: 86A2221E - v_add_lshl_u32 v46, v6, v8, 1 // 00000001E99C: D1FE002E 02061106 - v_cndmask_b32_e64 v46, v10, v46, s[34:35] // 00000001E9A4: D100002E 008A5D0A - buffer_load_short_d16 v45, v46, s[20:23], 0 offen // 00000001E9AC: E0901000 80052D2E - v_add_lshl_u32 v46, v7, v8, 1 // 00000001E9B4: D1FE002E 02061107 - v_cndmask_b32_e64 v46, v10, v46, s[34:35] // 00000001E9BC: D100002E 008A5D0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001E9C4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001E9CC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001E9D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001E9DC: 86A2221E - v_add_lshl_u32 v48, v6, v8, 1 // 00000001E9E0: D1FE0030 02061106 - v_cndmask_b32_e64 v48, v10, v48, s[34:35] // 00000001E9E8: D1000030 008A610A - buffer_load_short_d16 v47, v48, s[20:23], 0 offen // 00000001E9F0: E0901000 80052F30 - v_add_lshl_u32 v48, v7, v8, 1 // 00000001E9F8: D1FE0030 02061107 - v_cndmask_b32_e64 v48, v10, v48, s[34:35] // 00000001EA00: D1000030 008A610A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001EA08: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EA10: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EA18: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EA20: 86A2221E - v_add_lshl_u32 v50, v6, v8, 1 // 00000001EA24: D1FE0032 02061106 - v_cndmask_b32_e64 v50, v10, v50, s[34:35] // 00000001EA2C: D1000032 008A650A - buffer_load_short_d16 v49, v50, s[20:23], 0 offen // 00000001EA34: E0901000 80053132 - v_add_lshl_u32 v50, v7, v8, 1 // 00000001EA3C: D1FE0032 02061107 - v_cndmask_b32_e64 v50, v10, v50, s[34:35] // 00000001EA44: D1000032 008A650A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001EA4C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001EA54: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001EA5C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001EA64: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EA6C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EA74: 86A2221E - v_add_lshl_u32 v52, v6, v4, 1 // 00000001EA78: D1FE0034 02060906 - v_cndmask_b32_e64 v52, v10, v52, s[34:35] // 00000001EA80: D1000034 008A690A - buffer_load_short_d16 v51, v52, s[20:23], 0 offen // 00000001EA88: E0901000 80053334 - v_add_lshl_u32 v52, v7, v4, 1 // 00000001EA90: D1FE0034 02060907 - v_cndmask_b32_e64 v52, v10, v52, s[34:35] // 00000001EA98: D1000034 008A690A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001EAA0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EAA8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EAB0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EAB8: 86A2221E - v_add_lshl_u32 v54, v6, v8, 1 // 00000001EABC: D1FE0036 02061106 - v_cndmask_b32_e64 v54, v10, v54, s[34:35] // 00000001EAC4: D1000036 008A6D0A - buffer_load_short_d16 v53, v54, s[20:23], 0 offen // 00000001EACC: E0901000 80053536 - v_add_lshl_u32 v54, v7, v8, 1 // 00000001EAD4: D1FE0036 02061107 - v_cndmask_b32_e64 v54, v10, v54, s[34:35] // 00000001EADC: D1000036 008A6D0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001EAE4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EAEC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EAF4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EAFC: 86A2221E - v_add_lshl_u32 v56, v6, v8, 1 // 00000001EB00: D1FE0038 02061106 - v_cndmask_b32_e64 v56, v10, v56, s[34:35] // 00000001EB08: D1000038 008A710A - buffer_load_short_d16 v55, v56, s[20:23], 0 offen // 00000001EB10: E0901000 80053738 - v_add_lshl_u32 v56, v7, v8, 1 // 00000001EB18: D1FE0038 02061107 - v_cndmask_b32_e64 v56, v10, v56, s[34:35] // 00000001EB20: D1000038 008A710A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001EB28: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EB30: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EB38: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EB40: 86A2221E - v_add_lshl_u32 v58, v6, v8, 1 // 00000001EB44: D1FE003A 02061106 - v_cndmask_b32_e64 v58, v10, v58, s[34:35] // 00000001EB4C: D100003A 008A750A - buffer_load_short_d16 v57, v58, s[20:23], 0 offen // 00000001EB54: E0901000 8005393A - v_add_lshl_u32 v58, v7, v8, 1 // 00000001EB5C: D1FE003A 02061107 - v_cndmask_b32_e64 v58, v10, v58, s[34:35] // 00000001EB64: D100003A 008A750A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001EB6C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EB74: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EB7C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EB84: 86A2221E - v_add_lshl_u32 v60, v6, v8, 1 // 00000001EB88: D1FE003C 02061106 - v_cndmask_b32_e64 v60, v10, v60, s[34:35] // 00000001EB90: D100003C 008A790A - buffer_load_short_d16 v59, v60, s[20:23], 0 offen // 00000001EB98: E0901000 80053B3C - v_add_lshl_u32 v60, v7, v8, 1 // 00000001EBA0: D1FE003C 02061107 - v_cndmask_b32_e64 v60, v10, v60, s[34:35] // 00000001EBA8: D100003C 008A790A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001EBB0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EBB8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EBC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EBC8: 86A2221E - v_add_lshl_u32 v62, v6, v8, 1 // 00000001EBCC: D1FE003E 02061106 - v_cndmask_b32_e64 v62, v10, v62, s[34:35] // 00000001EBD4: D100003E 008A7D0A - buffer_load_short_d16 v61, v62, s[20:23], 0 offen // 00000001EBDC: E0901000 80053D3E - v_add_lshl_u32 v62, v7, v8, 1 // 00000001EBE4: D1FE003E 02061107 - v_cndmask_b32_e64 v62, v10, v62, s[34:35] // 00000001EBEC: D100003E 008A7D0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001EBF4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EBFC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EC04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EC0C: 86A2221E - v_add_lshl_u32 v64, v6, v8, 1 // 00000001EC10: D1FE0040 02061106 - v_cndmask_b32_e64 v64, v10, v64, s[34:35] // 00000001EC18: D1000040 008A810A - buffer_load_short_d16 v63, v64, s[20:23], 0 offen // 00000001EC20: E0901000 80053F40 - v_add_lshl_u32 v64, v7, v8, 1 // 00000001EC28: D1FE0040 02061107 - v_cndmask_b32_e64 v64, v10, v64, s[34:35] // 00000001EC30: D1000040 008A810A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001EC38: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EC40: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EC48: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EC50: 86A2221E - v_add_lshl_u32 v66, v6, v8, 1 // 00000001EC54: D1FE0042 02061106 - v_cndmask_b32_e64 v66, v10, v66, s[34:35] // 00000001EC5C: D1000042 008A850A - buffer_load_short_d16 v65, v66, s[20:23], 0 offen // 00000001EC64: E0901000 80054142 - v_add_lshl_u32 v66, v7, v8, 1 // 00000001EC6C: D1FE0042 02061107 - v_cndmask_b32_e64 v66, v10, v66, s[34:35] // 00000001EC74: D1000042 008A850A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001EC7C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001EC84: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001EC8C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001EC94: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EC9C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ECA4: 86A2221E - v_add_lshl_u32 v68, v6, v4, 1 // 00000001ECA8: D1FE0044 02060906 - v_cndmask_b32_e64 v68, v10, v68, s[34:35] // 00000001ECB0: D1000044 008A890A - buffer_load_short_d16 v67, v68, s[20:23], 0 offen // 00000001ECB8: E0901000 80054344 - v_add_lshl_u32 v68, v7, v4, 1 // 00000001ECC0: D1FE0044 02060907 - v_cndmask_b32_e64 v68, v10, v68, s[34:35] // 00000001ECC8: D1000044 008A890A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001ECD0: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001ECD8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001ECE0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ECE8: 86A2221E - v_add_lshl_u32 v70, v6, v8, 1 // 00000001ECEC: D1FE0046 02061106 - v_cndmask_b32_e64 v70, v10, v70, s[34:35] // 00000001ECF4: D1000046 008A8D0A - buffer_load_short_d16 v69, v70, s[20:23], 0 offen // 00000001ECFC: E0901000 80054546 - v_add_lshl_u32 v70, v7, v8, 1 // 00000001ED04: D1FE0046 02061107 - v_cndmask_b32_e64 v70, v10, v70, s[34:35] // 00000001ED0C: D1000046 008A8D0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001ED14: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001ED1C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001ED24: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ED2C: 86A2221E - v_add_lshl_u32 v72, v6, v8, 1 // 00000001ED30: D1FE0048 02061106 - v_cndmask_b32_e64 v72, v10, v72, s[34:35] // 00000001ED38: D1000048 008A910A - buffer_load_short_d16 v71, v72, s[20:23], 0 offen // 00000001ED40: E0901000 80054748 - v_add_lshl_u32 v72, v7, v8, 1 // 00000001ED48: D1FE0048 02061107 - v_cndmask_b32_e64 v72, v10, v72, s[34:35] // 00000001ED50: D1000048 008A910A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001ED58: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001ED60: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001ED68: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001ED70: 86A2221E - v_add_lshl_u32 v74, v6, v8, 1 // 00000001ED74: D1FE004A 02061106 - v_cndmask_b32_e64 v74, v10, v74, s[34:35] // 00000001ED7C: D100004A 008A950A - buffer_load_short_d16 v73, v74, s[20:23], 0 offen // 00000001ED84: E0901000 8005494A - v_add_lshl_u32 v74, v7, v8, 1 // 00000001ED8C: D1FE004A 02061107 - v_cndmask_b32_e64 v74, v10, v74, s[34:35] // 00000001ED94: D100004A 008A950A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001ED9C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EDA4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EDAC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EDB4: 86A2221E - v_add_lshl_u32 v76, v6, v8, 1 // 00000001EDB8: D1FE004C 02061106 - v_cndmask_b32_e64 v76, v10, v76, s[34:35] // 00000001EDC0: D100004C 008A990A - buffer_load_short_d16 v75, v76, s[20:23], 0 offen // 00000001EDC8: E0901000 80054B4C - v_add_lshl_u32 v76, v7, v8, 1 // 00000001EDD0: D1FE004C 02061107 - v_cndmask_b32_e64 v76, v10, v76, s[34:35] // 00000001EDD8: D100004C 008A990A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001EDE0: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EDE8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EDF0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EDF8: 86A2221E - v_add_lshl_u32 v78, v6, v8, 1 // 00000001EDFC: D1FE004E 02061106 - v_cndmask_b32_e64 v78, v10, v78, s[34:35] // 00000001EE04: D100004E 008A9D0A - buffer_load_short_d16 v77, v78, s[20:23], 0 offen // 00000001EE0C: E0901000 80054D4E - v_add_lshl_u32 v78, v7, v8, 1 // 00000001EE14: D1FE004E 02061107 - v_cndmask_b32_e64 v78, v10, v78, s[34:35] // 00000001EE1C: D100004E 008A9D0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001EE24: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EE2C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EE34: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EE3C: 86A2221E - v_add_lshl_u32 v80, v6, v8, 1 // 00000001EE40: D1FE0050 02061106 - v_cndmask_b32_e64 v80, v10, v80, s[34:35] // 00000001EE48: D1000050 008AA10A - buffer_load_short_d16 v79, v80, s[20:23], 0 offen // 00000001EE50: E0901000 80054F50 - v_add_lshl_u32 v80, v7, v8, 1 // 00000001EE58: D1FE0050 02061107 - v_cndmask_b32_e64 v80, v10, v80, s[34:35] // 00000001EE60: D1000050 008AA10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001EE68: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EE70: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EE78: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EE80: 86A2221E - v_add_lshl_u32 v82, v6, v8, 1 // 00000001EE84: D1FE0052 02061106 - v_cndmask_b32_e64 v82, v10, v82, s[34:35] // 00000001EE8C: D1000052 008AA50A - buffer_load_short_d16 v81, v82, s[20:23], 0 offen // 00000001EE94: E0901000 80055152 - v_add_lshl_u32 v82, v7, v8, 1 // 00000001EE9C: D1FE0052 02061107 - v_cndmask_b32_e64 v82, v10, v82, s[34:35] // 00000001EEA4: D1000052 008AA50A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000001EEAC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000001EEB4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000001EEBC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000001EEC4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EECC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EED4: 86A2221E - v_add_lshl_u32 v84, v6, v4, 1 // 00000001EED8: D1FE0054 02060906 - v_cndmask_b32_e64 v84, v10, v84, s[34:35] // 00000001EEE0: D1000054 008AA90A - buffer_load_short_d16 v83, v84, s[20:23], 0 offen // 00000001EEE8: E0901000 80055354 - v_add_lshl_u32 v84, v7, v4, 1 // 00000001EEF0: D1FE0054 02060907 - v_cndmask_b32_e64 v84, v10, v84, s[34:35] // 00000001EEF8: D1000054 008AA90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000001EF00: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EF08: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EF10: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EF18: 86A2221E - v_add_lshl_u32 v86, v6, v8, 1 // 00000001EF1C: D1FE0056 02061106 - v_cndmask_b32_e64 v86, v10, v86, s[34:35] // 00000001EF24: D1000056 008AAD0A - buffer_load_short_d16 v85, v86, s[20:23], 0 offen // 00000001EF2C: E0901000 80055556 - v_add_lshl_u32 v86, v7, v8, 1 // 00000001EF34: D1FE0056 02061107 - v_cndmask_b32_e64 v86, v10, v86, s[34:35] // 00000001EF3C: D1000056 008AAD0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000001EF44: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EF4C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EF54: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EF5C: 86A2221E - v_add_lshl_u32 v88, v6, v8, 1 // 00000001EF60: D1FE0058 02061106 - v_cndmask_b32_e64 v88, v10, v88, s[34:35] // 00000001EF68: D1000058 008AB10A - buffer_load_short_d16 v87, v88, s[20:23], 0 offen // 00000001EF70: E0901000 80055758 - v_add_lshl_u32 v88, v7, v8, 1 // 00000001EF78: D1FE0058 02061107 - v_cndmask_b32_e64 v88, v10, v88, s[34:35] // 00000001EF80: D1000058 008AB10A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000001EF88: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EF90: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EF98: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EFA0: 86A2221E - v_add_lshl_u32 v90, v6, v8, 1 // 00000001EFA4: D1FE005A 02061106 - v_cndmask_b32_e64 v90, v10, v90, s[34:35] // 00000001EFAC: D100005A 008AB50A - buffer_load_short_d16 v89, v90, s[20:23], 0 offen // 00000001EFB4: E0901000 8005595A - v_add_lshl_u32 v90, v7, v8, 1 // 00000001EFBC: D1FE005A 02061107 - v_cndmask_b32_e64 v90, v10, v90, s[34:35] // 00000001EFC4: D100005A 008AB50A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000001EFCC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001EFD4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001EFDC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001EFE4: 86A2221E - v_add_lshl_u32 v92, v6, v8, 1 // 00000001EFE8: D1FE005C 02061106 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 00000001EFF0: D100005C 008AB90A - buffer_load_short_d16 v91, v92, s[20:23], 0 offen // 00000001EFF8: E0901000 80055B5C - v_add_lshl_u32 v92, v7, v8, 1 // 00000001F000: D1FE005C 02061107 - v_cndmask_b32_e64 v92, v10, v92, s[34:35] // 00000001F008: D100005C 008AB90A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000001F010: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001F018: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001F020: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001F028: 86A2221E - v_add_lshl_u32 v94, v6, v8, 1 // 00000001F02C: D1FE005E 02061106 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 00000001F034: D100005E 008ABD0A - buffer_load_short_d16 v93, v94, s[20:23], 0 offen // 00000001F03C: E0901000 80055D5E - v_add_lshl_u32 v94, v7, v8, 1 // 00000001F044: D1FE005E 02061107 - v_cndmask_b32_e64 v94, v10, v94, s[34:35] // 00000001F04C: D100005E 008ABD0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000001F054: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001F05C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001F064: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001F06C: 86A2221E - v_add_lshl_u32 v96, v6, v8, 1 // 00000001F070: D1FE0060 02061106 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 00000001F078: D1000060 008AC10A - buffer_load_short_d16 v95, v96, s[20:23], 0 offen // 00000001F080: E0901000 80055F60 - v_add_lshl_u32 v96, v7, v8, 1 // 00000001F088: D1FE0060 02061107 - v_cndmask_b32_e64 v96, v10, v96, s[34:35] // 00000001F090: D1000060 008AC10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000001F098: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000001F0A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000001F0A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000001F0B0: 86A2221E - v_add_lshl_u32 v98, v6, v8, 1 // 00000001F0B4: D1FE0062 02061106 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 00000001F0BC: D1000062 008AC50A - buffer_load_short_d16 v97, v98, s[20:23], 0 offen // 00000001F0C4: E0901000 80056162 - v_add_lshl_u32 v98, v7, v8, 1 // 00000001F0CC: D1FE0062 02061107 - v_cndmask_b32_e64 v98, v10, v98, s[34:35] // 00000001F0D4: D1000062 008AC50A - v_accvgpr_read_b32 v15, a147 // 00000001F0DC: D3D8400F 18000193 - v_accvgpr_read_b32 v16, a151 // 00000001F0E4: D3D84010 18000197 - v_accvgpr_read_b32 v17, a155 // 00000001F0EC: D3D84011 1800019B - v_accvgpr_read_b32 v18, a159 // 00000001F0F4: D3D84012 1800019F - v_accvgpr_read_b32 v19, a163 // 00000001F0FC: D3D84013 180001A3 - v_accvgpr_read_b32 v20, a167 // 00000001F104: D3D84014 180001A7 - v_accvgpr_read_b32 v21, a171 // 00000001F10C: D3D84015 180001AB - v_accvgpr_read_b32 v22, a175 // 00000001F114: D3D84016 180001AF - v_accvgpr_read_b32 v23, a179 // 00000001F11C: D3D84017 180001B3 - v_accvgpr_read_b32 v24, a183 // 00000001F124: D3D84018 180001B7 - v_accvgpr_read_b32 v25, a187 // 00000001F12C: D3D84019 180001BB - v_accvgpr_read_b32 v26, a191 // 00000001F134: D3D8401A 180001BF - v_accvgpr_read_b32 v27, a195 // 00000001F13C: D3D8401B 180001C3 - v_accvgpr_read_b32 v28, a199 // 00000001F144: D3D8401C 180001C7 - v_accvgpr_read_b32 v29, a203 // 00000001F14C: D3D8401D 180001CB - v_accvgpr_read_b32 v30, a207 // 00000001F154: D3D8401E 180001CF - v_accvgpr_read_b32 v31, a211 // 00000001F15C: D3D8401F 180001D3 - v_accvgpr_read_b32 v32, a215 // 00000001F164: D3D84020 180001D7 - v_accvgpr_read_b32 v33, a219 // 00000001F16C: D3D84021 180001DB - v_accvgpr_read_b32 v34, a223 // 00000001F174: D3D84022 180001DF - v_accvgpr_read_b32 v35, a227 // 00000001F17C: D3D84023 180001E3 - v_accvgpr_read_b32 v36, a231 // 00000001F184: D3D84024 180001E7 - v_accvgpr_read_b32 v37, a235 // 00000001F18C: D3D84025 180001EB - v_accvgpr_read_b32 v38, a239 // 00000001F194: D3D84026 180001EF - v_accvgpr_read_b32 v39, a243 // 00000001F19C: D3D84027 180001F3 - v_accvgpr_read_b32 v40, a247 // 00000001F1A4: D3D84028 180001F7 - v_accvgpr_read_b32 v41, a251 // 00000001F1AC: D3D84029 180001FB - v_accvgpr_read_b32 v42, a255 // 00000001F1B4: D3D8402A 180001FF - v_mul_f32_e32 v15, s44, v15 // 00000001F1BC: 0A1E1E2C - v_pk_mul_f32 v[16:17], s[44:45], v[16:17] op_sel_hi:[0,1] // 00000001F1C0: D3B14010 1002202C - v_pk_mul_f32 v[18:19], s[44:45], v[18:19] op_sel_hi:[0,1] // 00000001F1C8: D3B14012 1002242C - v_pk_mul_f32 v[20:21], s[44:45], v[20:21] op_sel_hi:[0,1] // 00000001F1D0: D3B14014 1002282C - v_pk_mul_f32 v[22:23], s[44:45], v[22:23] op_sel_hi:[0,1] // 00000001F1D8: D3B14016 10022C2C - v_pk_mul_f32 v[24:25], s[44:45], v[24:25] op_sel_hi:[0,1] // 00000001F1E0: D3B14018 1002302C - v_pk_mul_f32 v[26:27], s[44:45], v[26:27] op_sel_hi:[0,1] // 00000001F1E8: D3B1401A 1002342C - v_pk_mul_f32 v[28:29], s[44:45], v[28:29] op_sel_hi:[0,1] // 00000001F1F0: D3B1401C 1002382C - v_pk_mul_f32 v[30:31], s[44:45], v[30:31] op_sel_hi:[0,1] // 00000001F1F8: D3B1401E 10023C2C - v_pk_mul_f32 v[32:33], s[44:45], v[32:33] op_sel_hi:[0,1] // 00000001F200: D3B14020 1002402C - v_pk_mul_f32 v[34:35], s[44:45], v[34:35] op_sel_hi:[0,1] // 00000001F208: D3B14022 1002442C - v_pk_mul_f32 v[36:37], s[44:45], v[36:37] op_sel_hi:[0,1] // 00000001F210: D3B14024 1002482C - v_pk_mul_f32 v[38:39], s[44:45], v[38:39] op_sel_hi:[0,1] // 00000001F218: D3B14026 10024C2C - v_pk_mul_f32 v[40:41], s[44:45], v[40:41] op_sel_hi:[0,1] // 00000001F220: D3B14028 1002502C - v_mul_f32_e32 v42, s44, v42 // 00000001F228: 0A54542C - s_waitcnt vmcnt(0) // 00000001F22C: BF8C0F70 - v_mov_b32_e32 v12, 0xffff0000 // 00000001F230: 7E1802FF FFFF0000 - v_mov_b32_e32 v13, 0x7fff0000 // 00000001F238: 7E1A02FF 7FFF0000 - v_mov_b32_e32 v14, 0x7fff // 00000001F240: 7E1C02FF 00007FFF - v_cvt_f32_bf16_sdwa v8, v43 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F248: 7E10B6F9 0004162B - v_fmac_f32_e64 v15, v8, s45 // 00000001F250: D13B000F 00005B08 - v_cvt_pk_bf16_f32 v15, v15, v15 // 00000001F258: D268000F 00021F0F - buffer_store_short v15, v44, s[16:19], 0 offen nt // 00000001F260: E06A1000 80040F2C - v_cvt_f32_bf16_sdwa v8, v45 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F268: 7E10B6F9 0004162D - v_fmac_f32_e64 v16, v8, s45 // 00000001F270: D13B0010 00005B08 - v_cvt_pk_bf16_f32 v16, v16, v16 // 00000001F278: D2680010 00022110 - buffer_store_short v16, v46, s[16:19], 0 offen nt // 00000001F280: E06A1000 8004102E - v_cvt_f32_bf16_sdwa v8, v47 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F288: 7E10B6F9 0004162F - v_fmac_f32_e64 v17, v8, s45 // 00000001F290: D13B0011 00005B08 - v_cvt_pk_bf16_f32 v17, v17, v17 // 00000001F298: D2680011 00022311 - buffer_store_short v17, v48, s[16:19], 0 offen nt // 00000001F2A0: E06A1000 80041130 - v_cvt_f32_bf16_sdwa v8, v49 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F2A8: 7E10B6F9 00041631 - v_fmac_f32_e64 v18, v8, s45 // 00000001F2B0: D13B0012 00005B08 - v_cvt_pk_bf16_f32 v18, v18, v18 // 00000001F2B8: D2680012 00022512 - buffer_store_short v18, v50, s[16:19], 0 offen nt // 00000001F2C0: E06A1000 80041232 - v_cvt_f32_bf16_sdwa v8, v51 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F2C8: 7E10B6F9 00041633 - v_fmac_f32_e64 v19, v8, s45 // 00000001F2D0: D13B0013 00005B08 - v_cvt_pk_bf16_f32 v19, v19, v19 // 00000001F2D8: D2680013 00022713 - buffer_store_short v19, v52, s[16:19], 0 offen nt // 00000001F2E0: E06A1000 80041334 - v_cvt_f32_bf16_sdwa v8, v53 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F2E8: 7E10B6F9 00041635 - v_fmac_f32_e64 v20, v8, s45 // 00000001F2F0: D13B0014 00005B08 - v_cvt_pk_bf16_f32 v20, v20, v20 // 00000001F2F8: D2680014 00022914 - buffer_store_short v20, v54, s[16:19], 0 offen nt // 00000001F300: E06A1000 80041436 - v_cvt_f32_bf16_sdwa v8, v55 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F308: 7E10B6F9 00041637 - v_fmac_f32_e64 v21, v8, s45 // 00000001F310: D13B0015 00005B08 - v_cvt_pk_bf16_f32 v21, v21, v21 // 00000001F318: D2680015 00022B15 - buffer_store_short v21, v56, s[16:19], 0 offen nt // 00000001F320: E06A1000 80041538 - v_cvt_f32_bf16_sdwa v8, v57 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F328: 7E10B6F9 00041639 - v_fmac_f32_e64 v22, v8, s45 // 00000001F330: D13B0016 00005B08 - v_cvt_pk_bf16_f32 v22, v22, v22 // 00000001F338: D2680016 00022D16 - buffer_store_short v22, v58, s[16:19], 0 offen nt // 00000001F340: E06A1000 8004163A - v_cvt_f32_bf16_sdwa v8, v59 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F348: 7E10B6F9 0004163B - v_fmac_f32_e64 v23, v8, s45 // 00000001F350: D13B0017 00005B08 - v_cvt_pk_bf16_f32 v23, v23, v23 // 00000001F358: D2680017 00022F17 - buffer_store_short v23, v60, s[16:19], 0 offen nt // 00000001F360: E06A1000 8004173C - v_cvt_f32_bf16_sdwa v8, v61 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F368: 7E10B6F9 0004163D - v_fmac_f32_e64 v24, v8, s45 // 00000001F370: D13B0018 00005B08 - v_cvt_pk_bf16_f32 v24, v24, v24 // 00000001F378: D2680018 00023118 - buffer_store_short v24, v62, s[16:19], 0 offen nt // 00000001F380: E06A1000 8004183E - v_cvt_f32_bf16_sdwa v8, v63 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F388: 7E10B6F9 0004163F - v_fmac_f32_e64 v25, v8, s45 // 00000001F390: D13B0019 00005B08 - v_cvt_pk_bf16_f32 v25, v25, v25 // 00000001F398: D2680019 00023319 - buffer_store_short v25, v64, s[16:19], 0 offen nt // 00000001F3A0: E06A1000 80041940 - v_cvt_f32_bf16_sdwa v8, v65 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F3A8: 7E10B6F9 00041641 - v_fmac_f32_e64 v26, v8, s45 // 00000001F3B0: D13B001A 00005B08 - v_cvt_pk_bf16_f32 v26, v26, v26 // 00000001F3B8: D268001A 0002351A - buffer_store_short v26, v66, s[16:19], 0 offen nt // 00000001F3C0: E06A1000 80041A42 - v_cvt_f32_bf16_sdwa v8, v67 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F3C8: 7E10B6F9 00041643 - v_fmac_f32_e64 v27, v8, s45 // 00000001F3D0: D13B001B 00005B08 - v_cvt_pk_bf16_f32 v27, v27, v27 // 00000001F3D8: D268001B 0002371B - buffer_store_short v27, v68, s[16:19], 0 offen nt // 00000001F3E0: E06A1000 80041B44 - v_cvt_f32_bf16_sdwa v8, v69 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F3E8: 7E10B6F9 00041645 - v_fmac_f32_e64 v28, v8, s45 // 00000001F3F0: D13B001C 00005B08 - v_cvt_pk_bf16_f32 v28, v28, v28 // 00000001F3F8: D268001C 0002391C - buffer_store_short v28, v70, s[16:19], 0 offen nt // 00000001F400: E06A1000 80041C46 - v_cvt_f32_bf16_sdwa v8, v71 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F408: 7E10B6F9 00041647 - v_fmac_f32_e64 v29, v8, s45 // 00000001F410: D13B001D 00005B08 - v_cvt_pk_bf16_f32 v29, v29, v29 // 00000001F418: D268001D 00023B1D - buffer_store_short v29, v72, s[16:19], 0 offen nt // 00000001F420: E06A1000 80041D48 - v_cvt_f32_bf16_sdwa v8, v73 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F428: 7E10B6F9 00041649 - v_fmac_f32_e64 v30, v8, s45 // 00000001F430: D13B001E 00005B08 - v_cvt_pk_bf16_f32 v30, v30, v30 // 00000001F438: D268001E 00023D1E - buffer_store_short v30, v74, s[16:19], 0 offen nt // 00000001F440: E06A1000 80041E4A - v_cvt_f32_bf16_sdwa v8, v75 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F448: 7E10B6F9 0004164B - v_fmac_f32_e64 v31, v8, s45 // 00000001F450: D13B001F 00005B08 - v_cvt_pk_bf16_f32 v31, v31, v31 // 00000001F458: D268001F 00023F1F - buffer_store_short v31, v76, s[16:19], 0 offen nt // 00000001F460: E06A1000 80041F4C - v_cvt_f32_bf16_sdwa v8, v77 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F468: 7E10B6F9 0004164D - v_fmac_f32_e64 v32, v8, s45 // 00000001F470: D13B0020 00005B08 - v_cvt_pk_bf16_f32 v32, v32, v32 // 00000001F478: D2680020 00024120 - buffer_store_short v32, v78, s[16:19], 0 offen nt // 00000001F480: E06A1000 8004204E - v_cvt_f32_bf16_sdwa v8, v79 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F488: 7E10B6F9 0004164F - v_fmac_f32_e64 v33, v8, s45 // 00000001F490: D13B0021 00005B08 - v_cvt_pk_bf16_f32 v33, v33, v33 // 00000001F498: D2680021 00024321 - buffer_store_short v33, v80, s[16:19], 0 offen nt // 00000001F4A0: E06A1000 80042150 - v_cvt_f32_bf16_sdwa v8, v81 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F4A8: 7E10B6F9 00041651 - v_fmac_f32_e64 v34, v8, s45 // 00000001F4B0: D13B0022 00005B08 - v_cvt_pk_bf16_f32 v34, v34, v34 // 00000001F4B8: D2680022 00024522 - buffer_store_short v34, v82, s[16:19], 0 offen nt // 00000001F4C0: E06A1000 80042252 - v_cvt_f32_bf16_sdwa v8, v83 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F4C8: 7E10B6F9 00041653 - v_fmac_f32_e64 v35, v8, s45 // 00000001F4D0: D13B0023 00005B08 - v_cvt_pk_bf16_f32 v35, v35, v35 // 00000001F4D8: D2680023 00024723 - buffer_store_short v35, v84, s[16:19], 0 offen nt // 00000001F4E0: E06A1000 80042354 - v_cvt_f32_bf16_sdwa v8, v85 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F4E8: 7E10B6F9 00041655 - v_fmac_f32_e64 v36, v8, s45 // 00000001F4F0: D13B0024 00005B08 - v_cvt_pk_bf16_f32 v36, v36, v36 // 00000001F4F8: D2680024 00024924 - buffer_store_short v36, v86, s[16:19], 0 offen nt // 00000001F500: E06A1000 80042456 - v_cvt_f32_bf16_sdwa v8, v87 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F508: 7E10B6F9 00041657 - v_fmac_f32_e64 v37, v8, s45 // 00000001F510: D13B0025 00005B08 - v_cvt_pk_bf16_f32 v37, v37, v37 // 00000001F518: D2680025 00024B25 - buffer_store_short v37, v88, s[16:19], 0 offen nt // 00000001F520: E06A1000 80042558 - v_cvt_f32_bf16_sdwa v8, v89 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F528: 7E10B6F9 00041659 - v_fmac_f32_e64 v38, v8, s45 // 00000001F530: D13B0026 00005B08 - v_cvt_pk_bf16_f32 v38, v38, v38 // 00000001F538: D2680026 00024D26 - buffer_store_short v38, v90, s[16:19], 0 offen nt // 00000001F540: E06A1000 8004265A - v_cvt_f32_bf16_sdwa v8, v91 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F548: 7E10B6F9 0004165B - v_fmac_f32_e64 v39, v8, s45 // 00000001F550: D13B0027 00005B08 - v_cvt_pk_bf16_f32 v39, v39, v39 // 00000001F558: D2680027 00024F27 - buffer_store_short v39, v92, s[16:19], 0 offen nt // 00000001F560: E06A1000 8004275C - v_cvt_f32_bf16_sdwa v8, v93 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F568: 7E10B6F9 0004165D - v_fmac_f32_e64 v40, v8, s45 // 00000001F570: D13B0028 00005B08 - v_cvt_pk_bf16_f32 v40, v40, v40 // 00000001F578: D2680028 00025128 - buffer_store_short v40, v94, s[16:19], 0 offen nt // 00000001F580: E06A1000 8004285E - v_cvt_f32_bf16_sdwa v8, v95 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F588: 7E10B6F9 0004165F - v_fmac_f32_e64 v41, v8, s45 // 00000001F590: D13B0029 00005B08 - v_cvt_pk_bf16_f32 v41, v41, v41 // 00000001F598: D2680029 00025329 - buffer_store_short v41, v96, s[16:19], 0 offen nt // 00000001F5A0: E06A1000 80042960 - v_cvt_f32_bf16_sdwa v8, v97 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_0// 00000001F5A8: 7E10B6F9 00041661 - v_fmac_f32_e64 v42, v8, s45 // 00000001F5B0: D13B002A 00005B08 - v_cvt_pk_bf16_f32 v42, v42, v42 // 00000001F5B8: D268002A 0002552A - buffer_store_short v42, v98, s[16:19], 0 offen nt // 00000001F5C0: E06A1000 80042A62 - s_nop 0 // 00000001F5C8: BF800000 - s_branch label_GW_End_2 // 00000001F5CC: BF820000 - -label_GW_End_2: label_GW_End: -label_KernelEnd: end: s_endpgm // 00000001F5D0: BF810000 @@ -15285,7 +4578,6 @@ end: # ---- user SGPR enables (descriptor bits >448) ---- .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_count 2 .amdhsa_user_sgpr_kernarg_preload_length 0 .amdhsa_user_sgpr_kernarg_preload_offset 0 diff --git a/extra/gemm/asm/test.py b/extra/gemm/asm/test.py index 5c57d4c37d..bc9c75b7c7 100644 --- a/extra/gemm/asm/test.py +++ b/extra/gemm/asm/test.py @@ -9,9 +9,12 @@ from tinygrad.helpers import TracingKey fp = pathlib.Path(__file__).parent/"gemm.s" +N = 8192 +THREADS_PER_WG = 256 +NUM_WG = N//THREADS_PER_WG * N//THREADS_PER_WG + # ** generate inputs on CPU -N = 8192 scale = 10.0 import torch @@ -34,19 +37,19 @@ C_asm.uop.buffer.allocate() # ** run gemms -@track_rewrites(name=lambda *args,ret,**kwargs: TracingKey(ret.name, (ret.function_name,), ret=ret)) -def get_asm_gemm(ast:UOp, fp:pathlib.Path) -> ProgramSpec: - src = fp.read_text() - lib = Device[Device.DEFAULT].compiler.compile(src) - return ProgramSpec("gemm", src, Device.DEFAULT, ast, lib=lib, global_size=[1024, 1, 1], local_size=[256, 1, 1], globals=[0, 1, 2]) - +# baseline tinygrad sched = C_tiny.schedule() assert len(sched) == 1 eis:list[ExecItem] = [sched[-1].lower()] -ast = eis[0].ast -prg = get_asm_gemm(ast, fp) -eis.append(ExecItem(ast, [C_asm.uop.buffer, from_torch(B).uop.buffer, from_torch(A).uop.buffer], prg=CompiledRunner(prg))) +ast = sched[-1].ast +# assembly gemm +@track_rewrites(name=lambda ret: TracingKey(ret.name, (ret.function_name,), ret)) +def get_asm_prg() -> ProgramSpec: + src = fp.read_text() + lib = Device[Device.DEFAULT].compiler.compile(src) + return ProgramSpec("gemm", src, Device.DEFAULT, ast, lib=lib, global_size=[NUM_WG, 1, 1], local_size=[THREADS_PER_WG, 1, 1], globals=[0, 1, 2]) +eis.append(ExecItem(ast, [C_asm.uop.buffer, from_torch(B).uop.buffer, from_torch(A).uop.buffer], prg=CompiledRunner(get_asm_prg()))) for ei in eis: et = ei.run(wait=True)