diff --git a/CLAUDE.md b/CLAUDE.md index ac8ed615a3..1161db0235 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -208,3 +208,9 @@ Key patterns to watch (from ResNet50 benchmark): - `vmin==vmax folding`: ~55ms, 0.33% match rate - checks 52K ops but rarely matches Patterns with 0% match rate are workload-specific overhead. They may be useful in other workloads, so don't remove them without understanding their purpose. + +## AMD Performance Counter Profiling + +Set VIZ to `-2` to save performance counters traces for the AMD backend. + +Use the CLI in `./extra/sqtt/roc.py` to explore the trace. diff --git a/extra/gemm/asm/gemm.s b/extra/gemm/asm/gemm.s index fcaaffc070..52e44b9f01 100644 --- a/extra/gemm/asm/gemm.s +++ b/extra/gemm/asm/gemm.s @@ -42,21 +42,6 @@ gemm: s_lshr_b32 s60, s11, 16 // 000000002A94: 8F3C900B s_ff1_i32_b32 s60, s60 // 000000002A98: BEBC103C s_lshr_b32 s61, s11, 22 // 000000002A9C: 8F3D960B - s_cmp_gt_i32 s60, 0 // 000000002AA0: BF02803C - s_cbranch_scc0 label_skip_WGMXCC // 000000002AA4: BF840042 - s_lshr_b32 s57, s54, s60 // 000000002AA8: 8F393C36 - s_lshl_b32 s57, s57, s60 // 000000002AAC: 8E393C39 - s_cmp_ge_u32 s2, s57 // 000000002AB0: BF093902 - s_cbranch_scc1 label_skip_WGMXCC // 000000002AB4: BF85003E - s_lshr_b32 s57, s2, s60 // 000000002AC0: 8F393C02 - s_bfm_b32 s58, s60, 0 // 000000002AC4: 913A803C - s_and_b32 s58, s2, s58 // 000000002AC8: 863A3A02 - s_lshr_b32 s59, s54, s60 // 000000002ACC: 8F3B3C36 - s_mul_i32 s58, s58, s59 // 000000002AD0: 923A3B3A - s_add_u32 s2, s57, s58 // 000000002AD4: 80023A39 - s_branch label_skip_WGMXCC // 000000002AD8: BF820035 - -label_skip_WGMXCC: v_and_b32_e32 v5, 63, v134 // 000000002BB0: 260B0CBF v_and_b32_e32 v4, 15, v5 // 000000002BB4: 26080A8F v_lshlrev_b32_e32 v4, 6, v4 // 000000002BB8: 24080886 @@ -190,193 +175,11 @@ label_skip_WGMXCC: s_sub_u32 s34, s34, 16 // 000000002E44: 80A29022 s_subb_u32 s35, s35, 0 // 000000002E48: 82A38023 s_and_b32 s84, s50, 0x3fff // 000000002E5C: 8654FF32 00003FFF - s_cmp_eq_u32 s84, 1 // 000000002E64: BF068154 - s_cbranch_scc1 label_GSU // 000000002E68: BF850037 - s_and_b32 s84, s50, 0x4000 // 000000002E6C: 8654FF32 00004000 - s_cbranch_scc1 label_GSUWGMRR // 000000002E74: BF85001A - s_and_b32 s84, s50, 0x3fff // 000000002E78: 8654FF32 00003FFF - v_cvt_f32_u32_e32 v10, s84 // 000000002E80: 7E140C54 - v_rcp_iflag_f32_e32 v10, v10 // 000000002E84: 7E14470A - v_cvt_f32_u32_e32 v11, s3 // 000000002E88: 7E160C03 - v_mul_f32_e32 v10, v10, v11 // 000000002E8C: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 000000002E90: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s84 // 000000002E94: D108000B 0000A90A - v_sub_u32_e32 v11, s3, v11 // 000000002E9C: 6A161603 - v_cmpx_eq_u32_e64 exec, v11, s84 // 000000002EA0: D0DA007E 0000A90B - v_add_u32_e32 v10, 1, v10 // 000000002EA8: 68141481 - v_mov_b32_e32 v11, 0 // 000000002EAC: 7E160280 - s_mov_b64 exec, -1 // 000000002EB0: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s84 // 000000002EB4: D0DC007E 0000A90B - v_sub_u32_e64 v10, v10, 1 // 000000002EBC: D135000A 0001030A - v_mul_u32_u24_e64 v11, v10, s84 // 000000002EC4: D108000B 0000A90A - v_sub_u32_e32 v11, s3, v11 // 000000002ECC: 6A161603 - s_mov_b64 exec, -1 // 000000002ED0: BEFE01C1 - v_readfirstlane_b32 s3, v10 // 000000002ED4: 7E06050A - v_readfirstlane_b32 s6, v11 // 000000002ED8: 7E0C050B - s_branch label_GSUWGMRR_End // 000000002EDC: BF820017 - -label_GSUWGMRR: - v_cvt_f32_u32_e32 v10, s15 // 000000002EE0: 7E140C0F - v_rcp_iflag_f32_e32 v10, v10 // 000000002EE4: 7E14470A - v_cvt_f32_u32_e32 v11, s3 // 000000002EE8: 7E160C03 - v_mul_f32_e32 v10, v10, v11 // 000000002EEC: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 000000002EF0: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s15 // 000000002EF4: D108000B 00001F0A - v_sub_u32_e32 v11, s3, v11 // 000000002EFC: 6A161603 - v_cmpx_eq_u32_e64 exec, v11, s15 // 000000002F00: D0DA007E 00001F0B - v_add_u32_e32 v10, 1, v10 // 000000002F08: 68141481 - v_mov_b32_e32 v11, 0 // 000000002F0C: 7E160280 - s_mov_b64 exec, -1 // 000000002F10: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s15 // 000000002F14: D0DC007E 00001F0B - v_sub_u32_e64 v10, v10, 1 // 000000002F1C: D135000A 0001030A - v_mul_u32_u24_e64 v11, v10, s15 // 000000002F24: D108000B 00001F0A - v_sub_u32_e32 v11, s3, v11 // 000000002F2C: 6A161603 - s_mov_b64 exec, -1 // 000000002F30: BEFE01C1 - v_readfirstlane_b32 s6, v10 // 000000002F34: 7E0C050A - v_readfirstlane_b32 s3, v11 // 000000002F38: 7E06050B - -label_GSUWGMRR_End: - s_mov_b32 s8, 1 // 000000002F3C: BE880081 - s_mov_b32 s9, 2 // 000000002F40: BE890082 - s_branch label_GSU_End // 000000002F44: BF820003 - -label_GSU: s_mov_b64 s[6:7], 0 // 000000002F48: BE860180 s_mov_b32 s8, 1 // 000000002F4C: BE880081 s_mov_b32 s9, 1 // 000000002F50: BE890081 -label_GSU_End: s_sext_i32_i16 s11, s11 // 000000002F54: BE8B170B - s_cmp_gt_i32 s11, 1 // 000000002F58: BF02810B - s_cbranch_scc1 label_WGMPositive // 000000002F5C: BF85004D - s_cmp_ge_i32 s11, 0 // 000000002F60: BF03800B - s_cbranch_scc1 label_WGM // 000000002F64: BF850094 - s_abs_i32 s11, s11 // 000000002F68: BE8B300B - v_cvt_f32_u32_e32 v10, s11 // 000000002F6C: 7E140C0B - v_rcp_iflag_f32_e32 v10, v10 // 000000002F70: 7E14470A - v_cvt_f32_u32_e32 v11, s2 // 000000002F74: 7E160C02 - v_mul_f32_e32 v10, v10, v11 // 000000002F78: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 000000002F7C: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s11 // 000000002F80: D108000B 0000170A - v_sub_u32_e32 v11, s2, v11 // 000000002F88: 6A161602 - v_cmpx_eq_u32_e64 exec, v11, s11 // 000000002F8C: D0DA007E 0000170B - v_add_u32_e32 v10, 1, v10 // 000000002F94: 68141481 - s_mov_b64 exec, -1 // 000000002F98: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s11 // 000000002F9C: D0DC007E 0000170B - v_sub_u32_e64 v10, v10, 1 // 000000002FA4: D135000A 0001030A - s_mov_b64 exec, -1 // 000000002FAC: BEFE01C1 - v_readfirstlane_b32 s86, v10 // 000000002FB0: 7EAC050A - s_mul_i32 s87, s86, s11 // 000000002FB4: 92570B56 - s_sub_u32 s87, s2, s87 // 000000002FB8: 80D75702 - s_mul_i32 s87, s87, s15 // 000000002FBC: 92570F57 - s_add_u32 s87, s87, s3 // 000000002FC0: 80570357 - v_cvt_f32_u32_e32 v10, s11 // 000000002FC4: 7E140C0B - v_rcp_iflag_f32_e32 v10, v10 // 000000002FC8: 7E14470A - v_cvt_f32_u32_e32 v11, s14 // 000000002FCC: 7E160C0E - v_mul_f32_e32 v10, v10, v11 // 000000002FD0: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 000000002FD4: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s11 // 000000002FD8: D108000B 0000170A - v_sub_u32_e32 v11, s14, v11 // 000000002FE0: 6A16160E - v_cmpx_eq_u32_e64 exec, v11, s11 // 000000002FE4: D0DA007E 0000170B - v_add_u32_e32 v10, 1, v10 // 000000002FEC: 68141481 - s_mov_b64 exec, -1 // 000000002FF0: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s11 // 000000002FF4: D0DC007E 0000170B - v_sub_u32_e64 v10, v10, 1 // 000000002FFC: D135000A 0001030A - s_mov_b64 exec, -1 // 000000003004: BEFE01C1 - v_readfirstlane_b32 s84, v10 // 000000003008: 7EA8050A - s_mul_i32 s85, s11, s84 // 00000000300C: 9255540B - s_sub_u32 s85, s14, s85 // 000000003010: 80D5550E - s_cmp_eq_u32 s85, 0 // 000000003014: BF068055 - s_cmov_b32 s85, s11 // 000000003018: BED5020B - s_cmp_ge_u32 s86, s84 // 00000000301C: BF095456 - s_cselect_b32 s84, s85, s11 // 000000003020: 85540B55 - v_cvt_f32_u32_e32 v10, s84 // 000000003024: 7E140C54 - v_rcp_iflag_f32_e32 v10, v10 // 000000003028: 7E14470A - v_cvt_f32_u32_e32 v11, s87 // 00000000302C: 7E160C57 - v_mul_f32_e32 v10, v10, v11 // 000000003030: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 000000003034: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s84 // 000000003038: D108000B 0000A90A - v_sub_u32_e32 v11, s87, v11 // 000000003040: 6A161657 - v_cmpx_eq_u32_e64 exec, v11, s84 // 000000003044: D0DA007E 0000A90B - v_add_u32_e32 v10, 1, v10 // 00000000304C: 68141481 - v_mov_b32_e32 v11, 0 // 000000003050: 7E160280 - s_mov_b64 exec, -1 // 000000003054: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s84 // 000000003058: D0DC007E 0000A90B - v_sub_u32_e64 v10, v10, 1 // 000000003060: D135000A 0001030A - v_mul_u32_u24_e64 v11, v10, s84 // 000000003068: D108000B 0000A90A - v_sub_u32_e32 v11, s87, v11 // 000000003070: 6A161657 - s_mov_b64 exec, -1 // 000000003074: BEFE01C1 - v_readfirstlane_b32 s3, v10 // 000000003078: 7E06050A - v_readfirstlane_b32 s2, v11 // 00000000307C: 7E04050B - s_mul_i32 s2, s3, s84 // 000000003080: 92025403 - s_sub_u32 s2, s87, s2 // 000000003084: 80820257 - s_mul_i32 s86, s86, s11 // 000000003088: 92560B56 - s_add_u32 s2, s2, s86 // 00000000308C: 80025602 - s_branch label_WGM // 000000003090: BF820049 - -label_WGMPositive: - v_cvt_f32_u32_e32 v10, s11 // 000000003094: 7E140C0B - v_rcp_iflag_f32_e32 v10, v10 // 000000003098: 7E14470A - v_cvt_f32_u32_e32 v11, s3 // 00000000309C: 7E160C03 - v_mul_f32_e32 v10, v10, v11 // 0000000030A0: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 0000000030A4: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s11 // 0000000030A8: D108000B 0000170A - v_sub_u32_e32 v11, s3, v11 // 0000000030B0: 6A161603 - v_cmpx_eq_u32_e64 exec, v11, s11 // 0000000030B4: D0DA007E 0000170B - v_add_u32_e32 v10, 1, v10 // 0000000030BC: 68141481 - s_mov_b64 exec, -1 // 0000000030C0: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s11 // 0000000030C4: D0DC007E 0000170B - v_sub_u32_e64 v10, v10, 1 // 0000000030CC: D135000A 0001030A - s_mov_b64 exec, -1 // 0000000030D4: BEFE01C1 - v_readfirstlane_b32 s86, v10 // 0000000030D8: 7EAC050A - s_mul_i32 s87, s86, s11 // 0000000030DC: 92570B56 - s_sub_u32 s87, s3, s87 // 0000000030E0: 80D75703 - s_mul_i32 s87, s87, s14 // 0000000030E4: 92570E57 - s_add_u32 s87, s87, s2 // 0000000030E8: 80570257 - v_cvt_f32_u32_e32 v10, s11 // 0000000030EC: 7E140C0B - v_rcp_iflag_f32_e32 v10, v10 // 0000000030F0: 7E14470A - v_cvt_f32_u32_e32 v11, s15 // 0000000030F4: 7E160C0F - v_mul_f32_e32 v10, v10, v11 // 0000000030F8: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 0000000030FC: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s11 // 000000003100: D108000B 0000170A - v_sub_u32_e32 v11, s15, v11 // 000000003108: 6A16160F - v_cmpx_eq_u32_e64 exec, v11, s11 // 00000000310C: D0DA007E 0000170B - v_add_u32_e32 v10, 1, v10 // 000000003114: 68141481 - s_mov_b64 exec, -1 // 000000003118: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s11 // 00000000311C: D0DC007E 0000170B - v_sub_u32_e64 v10, v10, 1 // 000000003124: D135000A 0001030A - s_mov_b64 exec, -1 // 00000000312C: BEFE01C1 - v_readfirstlane_b32 s84, v10 // 000000003130: 7EA8050A - s_mul_i32 s85, s11, s84 // 000000003134: 9255540B - s_sub_u32 s85, s15, s85 // 000000003138: 80D5550F - s_cmp_eq_u32 s85, 0 // 00000000313C: BF068055 - s_cmov_b32 s85, s11 // 000000003140: BED5020B - s_cmp_ge_u32 s86, s84 // 000000003144: BF095456 - s_cselect_b32 s84, s85, s11 // 000000003148: 85540B55 - v_cvt_f32_u32_e32 v10, s84 // 00000000314C: 7E140C54 - v_rcp_iflag_f32_e32 v10, v10 // 000000003150: 7E14470A - v_cvt_f32_u32_e32 v11, s87 // 000000003154: 7E160C57 - v_mul_f32_e32 v10, v10, v11 // 000000003158: 0A14170A - v_cvt_u32_f32_e32 v10, v10 // 00000000315C: 7E140F0A - v_mul_u32_u24_e64 v11, v10, s84 // 000000003160: D108000B 0000A90A - v_sub_u32_e32 v11, s87, v11 // 000000003168: 6A161657 - v_cmpx_eq_u32_e64 exec, v11, s84 // 00000000316C: D0DA007E 0000A90B - v_add_u32_e32 v10, 1, v10 // 000000003174: 68141481 - v_mov_b32_e32 v11, 0 // 000000003178: 7E160280 - s_mov_b64 exec, -1 // 00000000317C: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v11, s84 // 000000003180: D0DC007E 0000A90B - v_sub_u32_e64 v10, v10, 1 // 000000003188: D135000A 0001030A - v_mul_u32_u24_e64 v11, v10, s84 // 000000003190: D108000B 0000A90A - v_sub_u32_e32 v11, s87, v11 // 000000003198: 6A161657 - s_mov_b64 exec, -1 // 00000000319C: BEFE01C1 - v_readfirstlane_b32 s2, v10 // 0000000031A0: 7E04050A - v_readfirstlane_b32 s3, v11 // 0000000031A4: 7E06050B - s_mul_i32 s3, s2, s84 // 0000000031A8: 92035402 - s_sub_u32 s3, s87, s3 // 0000000031AC: 80830357 - s_mul_i32 s86, s86, s11 // 0000000031B0: 92560B56 - s_add_u32 s3, s3, s86 // 0000000031B4: 80035603 - -label_WGM: v_mul_lo_u32 v10, s40, v4 // 0000000031B8: D285000A 00020828 v_add_co_u32_e32 v0, vcc, v5, v10 // 0000000031C0: 32001505 v_add_u32_e32 v0, 8, v0 // 0000000031C4: 68000088 @@ -421,39 +224,8 @@ label_WGM: s_cbranch_scc1 label_GSUC_A // 000000003298: BF850003 s_mul_hi_u32 s85, 64, s6 // 00000000329C: 965506C0 s_mul_i32 s84, 64, s6 // 0000000032A0: 925406C0 - s_branch label_GSUC_A_End // 0000000032A4: BF820022 label_GSUC_A: - s_lshr_b32 s12, s27, 6 // 0000000032A8: 8F0C861B - s_and_b32 s7, s50, 0x3fff // 0000000032AC: 8607FF32 00003FFF - v_cvt_f32_u32_e32 v4, s7 // 0000000032B4: 7E080C07 - v_rcp_iflag_f32_e32 v4, v4 // 0000000032B8: 7E084704 - v_cvt_f32_u32_e32 v5, s12 // 0000000032BC: 7E0A0C0C - v_mul_f32_e32 v4, v4, v5 // 0000000032C0: 0A080B04 - v_cvt_u32_f32_e32 v4, v4 // 0000000032C4: 7E080F04 - v_mul_u32_u24_e64 v5, v4, s7 // 0000000032C8: D1080005 00000F04 - v_sub_u32_e32 v5, s12, v5 // 0000000032D0: 6A0A0A0C - v_cmpx_eq_u32_e64 exec, v5, s7 // 0000000032D4: D0DA007E 00000F05 - v_add_u32_e32 v4, 1, v4 // 0000000032DC: 68080881 - v_mov_b32_e32 v5, 0 // 0000000032E0: 7E0A0280 - s_mov_b64 exec, -1 // 0000000032E4: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v5, s7 // 0000000032E8: D0DC007E 00000F05 - v_sub_u32_e64 v4, v4, 1 // 0000000032F0: D1350004 00010304 - v_mul_u32_u24_e64 v5, v4, s7 // 0000000032F8: D1080005 00000F04 - v_sub_u32_e32 v5, s12, v5 // 000000003300: 6A0A0A0C - s_mov_b64 exec, -1 // 000000003304: BEFE01C1 - v_readfirstlane_b32 s12, v4 // 000000003308: 7E180504 - v_readfirstlane_b32 s7, v5 // 00000000330C: 7E0E0505 - s_mul_i32 s85, s12, s6 // 000000003310: 9255060C - s_add_u32 s84, 1, s12 // 000000003314: 80540C81 - s_add_u32 s85, s85, s7 // 000000003318: 80550755 - s_mul_i32 s84, s84, s6 // 00000000331C: 92540654 - s_cmp_lt_u32 s6, s7 // 000000003320: BF0A0706 - s_cselect_b32 s84, s84, s85 // 000000003324: 85545554 - s_mul_hi_u32 s85, s84, 64 // 000000003328: 9655C054 - s_mul_i32 s84, s84, 64 // 00000000332C: 9254C054 - -label_GSUC_A_End: s_add_u32 s86, s86, s84 // 000000003330: 80565456 s_addc_u32 s87, s87, s85 // 000000003334: 82575557 s_mov_b64 s[60:61], 1 // 000000003338: BEBC0181 @@ -490,39 +262,8 @@ label_GSUC_A_End: s_cbranch_scc1 label_GSUC_B // 0000000033C4: BF850003 s_mul_hi_u32 s85, 64, s6 // 0000000033C8: 965506C0 s_mul_i32 s84, 64, s6 // 0000000033CC: 925406C0 - s_branch label_GSUC_B_End // 0000000033D0: BF820022 label_GSUC_B: - s_lshr_b32 s12, s27, 6 // 0000000033D4: 8F0C861B - s_and_b32 s7, s50, 0x3fff // 0000000033D8: 8607FF32 00003FFF - v_cvt_f32_u32_e32 v4, s7 // 0000000033E0: 7E080C07 - v_rcp_iflag_f32_e32 v4, v4 // 0000000033E4: 7E084704 - v_cvt_f32_u32_e32 v5, s12 // 0000000033E8: 7E0A0C0C - v_mul_f32_e32 v4, v4, v5 // 0000000033EC: 0A080B04 - v_cvt_u32_f32_e32 v4, v4 // 0000000033F0: 7E080F04 - v_mul_u32_u24_e64 v5, v4, s7 // 0000000033F4: D1080005 00000F04 - v_sub_u32_e32 v5, s12, v5 // 0000000033FC: 6A0A0A0C - v_cmpx_eq_u32_e64 exec, v5, s7 // 000000003400: D0DA007E 00000F05 - v_add_u32_e32 v4, 1, v4 // 000000003408: 68080881 - v_mov_b32_e32 v5, 0 // 00000000340C: 7E0A0280 - s_mov_b64 exec, -1 // 000000003410: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v5, s7 // 000000003414: D0DC007E 00000F05 - v_sub_u32_e64 v4, v4, 1 // 00000000341C: D1350004 00010304 - v_mul_u32_u24_e64 v5, v4, s7 // 000000003424: D1080005 00000F04 - v_sub_u32_e32 v5, s12, v5 // 00000000342C: 6A0A0A0C - s_mov_b64 exec, -1 // 000000003430: BEFE01C1 - v_readfirstlane_b32 s12, v4 // 000000003434: 7E180504 - v_readfirstlane_b32 s7, v5 // 000000003438: 7E0E0505 - s_mul_i32 s85, s12, s6 // 00000000343C: 9255060C - s_add_u32 s84, 1, s12 // 000000003440: 80540C81 - s_add_u32 s85, s85, s7 // 000000003444: 80550755 - s_mul_i32 s84, s84, s6 // 000000003448: 92540654 - s_cmp_lt_u32 s6, s7 // 00000000344C: BF0A0706 - s_cselect_b32 s84, s84, s85 // 000000003450: 85545554 - s_mul_hi_u32 s85, s84, 64 // 000000003454: 9655C054 - s_mul_i32 s84, s84, 64 // 000000003458: 9254C054 - -label_GSUC_B_End: s_add_u32 s86, s86, s84 // 00000000345C: 80565456 s_addc_u32 s87, s87, s85 // 000000003460: 82575557 s_mov_b64 s[62:63], 1 // 000000003464: BEBE0181 @@ -561,32 +302,6 @@ label_GSUC_B_End: s_cselect_b32 s69, 0x80, s85 // 000000003508: 854555FF 00000080 s_lshr_b32 s12, s27, 6 // 000000003510: 8F0C861B s_and_b32 s84, s50, 0x3fff // 000000003514: 8654FF32 00003FFF - s_cmp_eq_u32 s84, 1 // 00000000351C: BF068154 - s_cbranch_scc1 label_GSU_1 // 000000003520: BF85001C - s_and_b32 s7, s50, 0x3fff // 000000003524: 8607FF32 00003FFF - v_cvt_f32_u32_e32 v4, s7 // 00000000352C: 7E080C07 - v_rcp_iflag_f32_e32 v4, v4 // 000000003530: 7E084704 - v_cvt_f32_u32_e32 v5, s12 // 000000003534: 7E0A0C0C - v_mul_f32_e32 v4, v4, v5 // 000000003538: 0A080B04 - v_cvt_u32_f32_e32 v4, v4 // 00000000353C: 7E080F04 - v_mul_u32_u24_e64 v5, v4, s7 // 000000003540: D1080005 00000F04 - v_sub_u32_e32 v5, s12, v5 // 000000003548: 6A0A0A0C - v_cmpx_eq_u32_e64 exec, v5, s7 // 00000000354C: D0DA007E 00000F05 - v_add_u32_e32 v4, 1, v4 // 000000003554: 68080881 - v_mov_b32_e32 v5, 0 // 000000003558: 7E0A0280 - s_mov_b64 exec, -1 // 00000000355C: BEFE01C1 - v_cmpx_gt_u32_e64 exec, v5, s7 // 000000003560: D0DC007E 00000F05 - v_sub_u32_e64 v4, v4, 1 // 000000003568: D1350004 00010304 - v_mul_u32_u24_e64 v5, v4, s7 // 000000003570: D1080005 00000F04 - v_sub_u32_e32 v5, s12, v5 // 000000003578: 6A0A0A0C - s_mov_b64 exec, -1 // 00000000357C: BEFE01C1 - v_readfirstlane_b32 s12, v4 // 000000003580: 7E180504 - v_readfirstlane_b32 s7, v5 // 000000003584: 7E0E0505 - s_add_u32 s84, 1, s12 // 000000003588: 80540C81 - s_cmp_lt_u32 s6, s7 // 00000000358C: BF0A0706 - s_cmov_b32 s12, s84 // 000000003590: BE8C0254 - -label_GSU_1: s_mov_b32 s13, s12 // 000000003594: BE8D000C s_and_b32 s86, s10, 0x1f00 // 000000003598: 8656FF0A 00001F00 s_lshr_b32 s86, s86, 8 // 0000000035A0: 8F568856 @@ -597,46 +312,10 @@ label_GSU_1: label_beginStaggerUIter: s_lshl_b32 s85, s84, s86 // 0000000035B8: 8E555654 s_cmp_ge_u32 s13, s85 // 0000000035BC: BF09550D - s_cbranch_scc1 label_endStaggerUIter // 0000000035C0: BF850002 - s_lshr_b32 s84, s84, 1 // 0000000035C4: 8F548154 - s_branch label_beginStaggerUIter // 0000000035C8: BF82FFFB - -label_endStaggerUIter: s_sub_u32 s85, s84, 1 // 0000000035CC: 80D58154 s_cmp_ge_u32 s84, 1 // 0000000035D0: BF098154 s_cselect_b32 s51, s85, 0 // 0000000035D4: 85338055 - s_cmp_eq_u32 s87, 0 // 0000000035D8: BF068057 - s_cbranch_scc1 label_StaggerUMapping_1 // 0000000035DC: BF850002 - s_mov_b32 s84, s2 // 0000000035E0: BED40002 - s_branch label_staggerInputEnd // 0000000035E4: BF820016 - -label_StaggerUMapping_1: s_cmp_eq_u32 s87, 0x2000 // 0000000035E8: BF06FF57 00002000 - s_cbranch_scc1 label_StaggerUMapping_2 // 0000000035F0: BF850002 - s_mov_b32 s84, s3 // 0000000035F4: BED40003 - s_branch label_staggerInputEnd // 0000000035F8: BF820011 - -label_StaggerUMapping_2: - s_cmp_eq_u32 s87, 0x4000 // 0000000035FC: BF06FF57 00004000 - s_cbranch_scc1 label_StaggerUMapping_3 // 000000003604: BF850002 - s_mov_b32 s84, -1 // 000000003608: BED400C1 - s_branch label_staggerInputEnd // 00000000360C: BF82000C - -label_StaggerUMapping_3: - s_cmp_eq_u32 s87, 0x6000 // 000000003610: BF06FF57 00006000 - s_cbranch_scc1 label_StaggerUMapping_4 // 000000003618: BF850004 - s_mul_i32 s85, s14, s3 // 00000000361C: 9255030E - s_add_u32 s84, s84, s85 // 000000003620: 80545554 - s_add_u32 s84, s84, s2 // 000000003624: 80540254 - s_branch label_staggerInputEnd // 000000003628: BF820005 - -label_StaggerUMapping_4: - s_cmp_eq_u32 s87, 0x8000 // 00000000362C: BF06FF57 00008000 - s_cbranch_scc1 label_staggerInputEnd // 000000003634: BF850002 - s_mov_b32 s84, -1 // 000000003638: BED400C1 - s_branch label_staggerInputEnd // 00000000363C: BF820000 - -label_staggerInputEnd: s_and_b32 s51, s51, s84 // 000000003640: 86335433 s_lshl_b32 s51, s51, s86 // 000000003644: 8E335633 s_mul_hi_i32 s85, s51, s68 // 000000003648: 96D54433 @@ -780,9 +459,6 @@ label_ShadowInitStart: s_lshl_b64 s[84:85], s[84:85], s9 // 000000003970: 8ED40954 s_add_u32 s16, s16, s84 // 000000003974: 80105410 s_addc_u32 s17, s17, s85 // 000000003978: 82115511 - s_and_b32 s84, s50, 0x3fff // 00000000397C: 8654FF32 00003FFF - s_cmp_eq_u32 s84, 1 // 000000003984: BF068154 - s_cbranch_scc1 label_GSU_2 // 000000003988: BF850011 s_mul_hi_u32 s85, s24, s6 // 00000000398C: 96550618 s_mul_i32 s84, s24, s6 // 000000003990: 92540618 s_sub_u32 s86, s25, 1 // 000000003994: 80D68119 @@ -801,15 +477,6 @@ label_ShadowInitStart: s_add_u32 s16, s16, s84 // 0000000039C8: 80105410 s_addc_u32 s17, s17, s85 // 0000000039CC: 82115511 -label_GSU_2: - s_cmp_eq_u32 s12, 0 // 0000000039D0: BF06800C - s_cbranch_scc0 label_NoBranch_T8JHFHKM7BO5OHXW // 0000000039D4: BF840006 - s_getpc_b64 s[84:85] // 0000000039D8: BED41C00 - s_add_i32 s86, 0x25d8, 4 // 0000000039DC: 815684FF 000025D8 - s_add_u32 s84, s84, s86 // 0000000039E4: 80545654 - s_addc_u32 s85, s85, 0 // 0000000039E8: 82558055 - s_setpc_b64 s[84:85] // 0000000039EC: BE801D54 - label_NoBranch_T8JHFHKM7BO5OHXW: s_xor_b32 s46, s48, s46 // 0000000039F0: 882E2E30 s_xor_b32 s47, s49, s47 // 0000000039F4: 882F2F31 @@ -1966,2596 +1633,7 @@ label_GW_B0_E0: s_addc_u32 s17, s17, 0 // 000000005B14: 82118011 buffer_store_dwordx4 v[40:43], v11, s[16:19], 0 offen nt // 000000005B18: E07E1000 8004280B s_nop 0 // 000000005B20: BF800000 - s_branch label_GW_End // 000000005B24: BF820000 -label_toPGR1end_OrdNLL: - v_lshrrev_b32_e32 v8, 6, v134 // 000000005FB8: 20110C86 - v_lshrrev_b32_e32 v9, 1, v8 // 000000005FBC: 20121081 - v_mul_lo_u32 v9, 16, v9 // 000000005FC0: D2850009 00021290 - v_and_b32_e32 v5, 63, v134 // 000000005FC8: 260B0CBF - v_lshrrev_b32_e32 v5, 4, v5 // 000000005FCC: 200A0A84 - v_lshlrev_b32_e32 v5, 2, v5 // 000000005FD0: 240A0A82 - v_add_lshl_u32 v5, v9, v5, 3 // 000000005FD4: D1FE0005 020E0B09 - v_mul_lo_u32 v6, v5, s38 // 000000005FDC: D2850006 00004D05 - v_mul_lo_u32 v7, v5, s36 // 000000005FE4: D2850007 00004905 - v_and_b32_e32 v4, 1, v8 // 000000005FEC: 26081081 - v_mul_lo_u32 v4, 16, v4 // 000000005FF0: D2850004 00020890 - v_and_b32_e32 v9, 15, v134 // 000000005FF8: 26130C8F - v_add_lshl_u32 v4, v9, v4, 3 // 000000005FFC: D1FE0004 020E0909 - s_mul_i32 s8, 0x100, s2 // 000000006004: 920802FF 00000100 - v_add_u32_e32 v4, s8, v4 // 00000000600C: 68080808 - s_mul_i32 s8, 0x100, s3 // 000000006010: 920803FF 00000100 - v_add_u32_e32 v5, s8, v5 // 000000006018: 680A0A08 - s_and_b32 s8, s50, 0x3fff // 00000000601C: 8608FF32 00003FFF - s_cmp_eq_u32 s8, 1 // 000000006024: BF068108 - s_cbranch_scc1 label_GSU_4 // 000000006028: BF8516DB - s_and_b32 s30, 0xff, s24 // 00000000602C: 861E18FF 000000FF - s_add_u32 s31, -1, s14 // 000000006034: 801F0EC1 - s_cmp_ge_u32 s2, s31 // 000000006038: BF091F02 - s_cselect_b32 s30, s30, 0 // 00000000603C: 851E801E - s_cmpk_gt_u32 s30, 0x0 // 000000006040: B51E0000 - s_cbranch_scc1 label_GW_B0_E1_M // 000000006044: BF85074A - s_and_b32 s30, 0xff, s25 // 000000006048: 861E19FF 000000FF - s_add_u32 s31, -1, s15 // 000000006050: 801F0FC1 - s_cmp_ge_u32 s3, s31 // 000000006054: BF091F03 - s_cselect_b32 s30, s30, 0 // 000000006058: 851E801E - s_cmpk_gt_u32 s30, 0x0 // 00000000605C: B51E0000 - -label_GW_B0_E0_1: - v_add_lshl_u32 v15, v7, v4, 2 // 000000006064: D1FE000F 020A0907 - v_accvgpr_read_b32 v24, a0 // 00000000606C: D3D84018 18000100 - v_accvgpr_read_b32 v25, a4 // 000000006074: D3D84019 18000104 - v_accvgpr_read_b32 v26, a8 // 00000000607C: D3D8401A 18000108 - v_accvgpr_read_b32 v27, a12 // 000000006084: D3D8401B 1800010C - v_accvgpr_read_b32 v28, a16 // 00000000608C: D3D8401C 18000110 - v_accvgpr_read_b32 v29, a20 // 000000006094: D3D8401D 18000114 - v_accvgpr_read_b32 v30, a24 // 00000000609C: D3D8401E 18000118 - v_accvgpr_read_b32 v31, a28 // 0000000060A4: D3D8401F 1800011C - v_accvgpr_read_b32 v32, a32 // 0000000060AC: D3D84020 18000120 - v_accvgpr_read_b32 v33, a36 // 0000000060B4: D3D84021 18000124 - v_accvgpr_read_b32 v34, a40 // 0000000060BC: D3D84022 18000128 - v_accvgpr_read_b32 v35, a44 // 0000000060C4: D3D84023 1800012C - v_accvgpr_read_b32 v36, a48 // 0000000060CC: D3D84024 18000130 - v_accvgpr_read_b32 v37, a52 // 0000000060D4: D3D84025 18000134 - v_accvgpr_read_b32 v38, a56 // 0000000060DC: D3D84026 18000138 - v_accvgpr_read_b32 v39, a60 // 0000000060E4: D3D84027 1800013C - v_accvgpr_read_b32 v40, a64 // 0000000060EC: D3D84028 18000140 - v_accvgpr_read_b32 v41, a68 // 0000000060F4: D3D84029 18000144 - v_accvgpr_read_b32 v42, a72 // 0000000060FC: D3D8402A 18000148 - v_accvgpr_read_b32 v43, a76 // 000000006104: D3D8402B 1800014C - v_accvgpr_read_b32 v44, a80 // 00000000610C: D3D8402C 18000150 - v_accvgpr_read_b32 v45, a84 // 000000006114: D3D8402D 18000154 - v_accvgpr_read_b32 v46, a88 // 00000000611C: D3D8402E 18000158 - v_accvgpr_read_b32 v47, a92 // 000000006124: D3D8402F 1800015C - v_accvgpr_read_b32 v48, a96 // 00000000612C: D3D84030 18000160 - v_accvgpr_read_b32 v49, a100 // 000000006134: D3D84031 18000164 - v_accvgpr_read_b32 v50, a104 // 00000000613C: D3D84032 18000168 - v_accvgpr_read_b32 v51, a108 // 000000006144: D3D84033 1800016C - v_accvgpr_read_b32 v52, a112 // 00000000614C: D3D84034 18000170 - v_accvgpr_read_b32 v53, a116 // 000000006154: D3D84035 18000174 - v_accvgpr_read_b32 v54, a120 // 00000000615C: D3D84036 18000178 - v_accvgpr_read_b32 v55, a124 // 000000006164: D3D84037 1800017C - v_accvgpr_read_b32 v56, a128 // 00000000616C: D3D84038 18000180 - v_accvgpr_read_b32 v57, a132 // 000000006174: D3D84039 18000184 - v_accvgpr_read_b32 v58, a136 // 00000000617C: D3D8403A 18000188 - v_accvgpr_read_b32 v59, a140 // 000000006184: D3D8403B 1800018C - v_accvgpr_read_b32 v60, a144 // 00000000618C: D3D8403C 18000190 - v_accvgpr_read_b32 v61, a148 // 000000006194: D3D8403D 18000194 - v_accvgpr_read_b32 v62, a152 // 00000000619C: D3D8403E 18000198 - v_accvgpr_read_b32 v63, a156 // 0000000061A4: D3D8403F 1800019C - v_accvgpr_read_b32 v64, a160 // 0000000061AC: D3D84040 180001A0 - v_accvgpr_read_b32 v65, a164 // 0000000061B4: D3D84041 180001A4 - v_accvgpr_read_b32 v66, a168 // 0000000061BC: D3D84042 180001A8 - v_accvgpr_read_b32 v67, a172 // 0000000061C4: D3D84043 180001AC - v_accvgpr_read_b32 v68, a176 // 0000000061CC: D3D84044 180001B0 - v_accvgpr_read_b32 v69, a180 // 0000000061D4: D3D84045 180001B4 - v_accvgpr_read_b32 v70, a184 // 0000000061DC: D3D84046 180001B8 - v_accvgpr_read_b32 v71, a188 // 0000000061E4: D3D84047 180001BC - v_accvgpr_read_b32 v72, a192 // 0000000061EC: D3D84048 180001C0 - v_accvgpr_read_b32 v73, a196 // 0000000061F4: D3D84049 180001C4 - v_accvgpr_read_b32 v74, a200 // 0000000061FC: D3D8404A 180001C8 - v_accvgpr_read_b32 v75, a204 // 000000006204: D3D8404B 180001CC - v_accvgpr_read_b32 v76, a208 // 00000000620C: D3D8404C 180001D0 - v_accvgpr_read_b32 v77, a212 // 000000006214: D3D8404D 180001D4 - v_accvgpr_read_b32 v78, a216 // 00000000621C: D3D8404E 180001D8 - v_accvgpr_read_b32 v79, a220 // 000000006224: D3D8404F 180001DC - v_accvgpr_read_b32 v80, a224 // 00000000622C: D3D84050 180001E0 - v_accvgpr_read_b32 v81, a228 // 000000006234: D3D84051 180001E4 - v_accvgpr_read_b32 v82, a232 // 00000000623C: D3D84052 180001E8 - v_accvgpr_read_b32 v83, a236 // 000000006244: D3D84053 180001EC - v_accvgpr_read_b32 v84, a240 // 00000000624C: D3D84054 180001F0 - v_accvgpr_read_b32 v85, a244 // 000000006254: D3D84055 180001F4 - v_accvgpr_read_b32 v86, a248 // 00000000625C: D3D84056 180001F8 - v_accvgpr_read_b32 v87, a252 // 000000006264: D3D84057 180001FC - v_accvgpr_read_b32 v88, a1 // 00000000626C: D3D84058 18000101 - v_accvgpr_read_b32 v89, a5 // 000000006274: D3D84059 18000105 - v_accvgpr_read_b32 v90, a9 // 00000000627C: D3D8405A 18000109 - v_accvgpr_read_b32 v91, a13 // 000000006284: D3D8405B 1800010D - v_accvgpr_read_b32 v92, a17 // 00000000628C: D3D8405C 18000111 - v_accvgpr_read_b32 v93, a21 // 000000006294: D3D8405D 18000115 - v_accvgpr_read_b32 v94, a25 // 00000000629C: D3D8405E 18000119 - v_accvgpr_read_b32 v95, a29 // 0000000062A4: D3D8405F 1800011D - v_accvgpr_read_b32 v96, a33 // 0000000062AC: D3D84060 18000121 - v_accvgpr_read_b32 v97, a37 // 0000000062B4: D3D84061 18000125 - v_accvgpr_read_b32 v98, a41 // 0000000062BC: D3D84062 18000129 - v_accvgpr_read_b32 v99, a45 // 0000000062C4: D3D84063 1800012D - v_accvgpr_read_b32 v100, a49 // 0000000062CC: D3D84064 18000131 - v_accvgpr_read_b32 v101, a53 // 0000000062D4: D3D84065 18000135 - v_accvgpr_read_b32 v102, a57 // 0000000062DC: D3D84066 18000139 - v_accvgpr_read_b32 v103, a61 // 0000000062E4: D3D84067 1800013D - v_accvgpr_read_b32 v104, a65 // 0000000062EC: D3D84068 18000141 - v_accvgpr_read_b32 v105, a69 // 0000000062F4: D3D84069 18000145 - v_accvgpr_read_b32 v106, a73 // 0000000062FC: D3D8406A 18000149 - v_accvgpr_read_b32 v107, a77 // 000000006304: D3D8406B 1800014D - v_accvgpr_read_b32 v108, a81 // 00000000630C: D3D8406C 18000151 - v_accvgpr_read_b32 v109, a85 // 000000006314: D3D8406D 18000155 - v_accvgpr_read_b32 v110, a89 // 00000000631C: D3D8406E 18000159 - v_accvgpr_read_b32 v111, a93 // 000000006324: D3D8406F 1800015D - v_accvgpr_read_b32 v112, a97 // 00000000632C: D3D84070 18000161 - v_accvgpr_read_b32 v113, a101 // 000000006334: D3D84071 18000165 - v_accvgpr_read_b32 v114, a105 // 00000000633C: D3D84072 18000169 - v_accvgpr_read_b32 v115, a109 // 000000006344: D3D84073 1800016D - v_accvgpr_read_b32 v116, a113 // 00000000634C: D3D84074 18000171 - v_accvgpr_read_b32 v117, a117 // 000000006354: D3D84075 18000175 - v_accvgpr_read_b32 v118, a121 // 00000000635C: D3D84076 18000179 - v_accvgpr_read_b32 v119, a125 // 000000006364: D3D84077 1800017D - v_accvgpr_read_b32 v120, a129 // 00000000636C: D3D84078 18000181 - v_accvgpr_read_b32 v121, a133 // 000000006374: D3D84079 18000185 - v_accvgpr_read_b32 v122, a137 // 00000000637C: D3D8407A 18000189 - v_accvgpr_read_b32 v123, a141 // 000000006384: D3D8407B 1800018D - v_accvgpr_read_b32 v124, a145 // 00000000638C: D3D8407C 18000191 - v_accvgpr_read_b32 v125, a149 // 000000006394: D3D8407D 18000195 - v_accvgpr_read_b32 v126, a153 // 00000000639C: D3D8407E 18000199 - v_accvgpr_read_b32 v127, a157 // 0000000063A4: D3D8407F 1800019D - v_accvgpr_read_b32 v136, a161 // 0000000063AC: D3D84088 180001A1 - v_accvgpr_read_b32 v137, a165 // 0000000063B4: D3D84089 180001A5 - v_accvgpr_read_b32 v138, a169 // 0000000063BC: D3D8408A 180001A9 - v_accvgpr_read_b32 v139, a173 // 0000000063C4: D3D8408B 180001AD - v_accvgpr_read_b32 v140, a177 // 0000000063CC: D3D8408C 180001B1 - v_accvgpr_read_b32 v141, a181 // 0000000063D4: D3D8408D 180001B5 - v_accvgpr_read_b32 v142, a185 // 0000000063DC: D3D8408E 180001B9 - v_accvgpr_read_b32 v143, a189 // 0000000063E4: D3D8408F 180001BD - v_accvgpr_read_b32 v144, a193 // 0000000063EC: D3D84090 180001C1 - v_accvgpr_read_b32 v145, a197 // 0000000063F4: D3D84091 180001C5 - v_accvgpr_read_b32 v146, a201 // 0000000063FC: D3D84092 180001C9 - v_accvgpr_read_b32 v147, a205 // 000000006404: D3D84093 180001CD - v_accvgpr_read_b32 v148, a209 // 00000000640C: D3D84094 180001D1 - v_accvgpr_read_b32 v149, a213 // 000000006414: D3D84095 180001D5 - v_accvgpr_read_b32 v150, a217 // 00000000641C: D3D84096 180001D9 - v_accvgpr_read_b32 v151, a221 // 000000006424: D3D84097 180001DD - v_accvgpr_read_b32 v152, a225 // 00000000642C: D3D84098 180001E1 - v_accvgpr_read_b32 v153, a229 // 000000006434: D3D84099 180001E5 - v_accvgpr_read_b32 v154, a233 // 00000000643C: D3D8409A 180001E9 - v_accvgpr_read_b32 v155, a237 // 000000006444: D3D8409B 180001ED - v_accvgpr_read_b32 v156, a241 // 00000000644C: D3D8409C 180001F1 - v_accvgpr_read_b32 v157, a245 // 000000006454: D3D8409D 180001F5 - v_accvgpr_read_b32 v158, a249 // 00000000645C: D3D8409E 180001F9 - v_accvgpr_read_b32 v159, a253 // 000000006464: D3D8409F 180001FD - v_accvgpr_read_b32 v160, a2 // 00000000646C: D3D840A0 18000102 - v_accvgpr_read_b32 v161, a6 // 000000006474: D3D840A1 18000106 - v_accvgpr_read_b32 v162, a10 // 00000000647C: D3D840A2 1800010A - v_accvgpr_read_b32 v163, a14 // 000000006484: D3D840A3 1800010E - v_accvgpr_read_b32 v164, a18 // 00000000648C: D3D840A4 18000112 - v_accvgpr_read_b32 v165, a22 // 000000006494: D3D840A5 18000116 - v_accvgpr_read_b32 v166, a26 // 00000000649C: D3D840A6 1800011A - v_accvgpr_read_b32 v167, a30 // 0000000064A4: D3D840A7 1800011E - v_accvgpr_read_b32 v168, a34 // 0000000064AC: D3D840A8 18000122 - v_accvgpr_read_b32 v169, a38 // 0000000064B4: D3D840A9 18000126 - v_accvgpr_read_b32 v170, a42 // 0000000064BC: D3D840AA 1800012A - v_accvgpr_read_b32 v171, a46 // 0000000064C4: D3D840AB 1800012E - v_accvgpr_read_b32 v172, a50 // 0000000064CC: D3D840AC 18000132 - v_accvgpr_read_b32 v173, a54 // 0000000064D4: D3D840AD 18000136 - v_accvgpr_read_b32 v174, a58 // 0000000064DC: D3D840AE 1800013A - v_accvgpr_read_b32 v175, a62 // 0000000064E4: D3D840AF 1800013E - v_accvgpr_read_b32 v176, a66 // 0000000064EC: D3D840B0 18000142 - v_accvgpr_read_b32 v177, a70 // 0000000064F4: D3D840B1 18000146 - v_accvgpr_read_b32 v178, a74 // 0000000064FC: D3D840B2 1800014A - v_accvgpr_read_b32 v179, a78 // 000000006504: D3D840B3 1800014E - v_accvgpr_read_b32 v180, a82 // 00000000650C: D3D840B4 18000152 - v_accvgpr_read_b32 v181, a86 // 000000006514: D3D840B5 18000156 - v_accvgpr_read_b32 v182, a90 // 00000000651C: D3D840B6 1800015A - v_accvgpr_read_b32 v183, a94 // 000000006524: D3D840B7 1800015E - v_accvgpr_read_b32 v184, a98 // 00000000652C: D3D840B8 18000162 - v_accvgpr_read_b32 v185, a102 // 000000006534: D3D840B9 18000166 - v_accvgpr_read_b32 v186, a106 // 00000000653C: D3D840BA 1800016A - v_accvgpr_read_b32 v187, a110 // 000000006544: D3D840BB 1800016E - v_accvgpr_read_b32 v188, a114 // 00000000654C: D3D840BC 18000172 - v_accvgpr_read_b32 v189, a118 // 000000006554: D3D840BD 18000176 - v_accvgpr_read_b32 v190, a122 // 00000000655C: D3D840BE 1800017A - v_accvgpr_read_b32 v191, a126 // 000000006564: D3D840BF 1800017E - v_accvgpr_read_b32 v192, a130 // 00000000656C: D3D840C0 18000182 - v_accvgpr_read_b32 v193, a134 // 000000006574: D3D840C1 18000186 - v_accvgpr_read_b32 v194, a138 // 00000000657C: D3D840C2 1800018A - v_accvgpr_read_b32 v195, a142 // 000000006584: D3D840C3 1800018E - v_accvgpr_read_b32 v196, a146 // 00000000658C: D3D840C4 18000192 - v_accvgpr_read_b32 v197, a150 // 000000006594: D3D840C5 18000196 - v_accvgpr_read_b32 v198, a154 // 00000000659C: D3D840C6 1800019A - v_accvgpr_read_b32 v199, a158 // 0000000065A4: D3D840C7 1800019E - v_accvgpr_read_b32 v200, a162 // 0000000065AC: D3D840C8 180001A2 - v_accvgpr_read_b32 v201, a166 // 0000000065B4: D3D840C9 180001A6 - v_accvgpr_read_b32 v202, a170 // 0000000065BC: D3D840CA 180001AA - v_accvgpr_read_b32 v203, a174 // 0000000065C4: D3D840CB 180001AE - v_accvgpr_read_b32 v204, a178 // 0000000065CC: D3D840CC 180001B2 - v_accvgpr_read_b32 v205, a182 // 0000000065D4: D3D840CD 180001B6 - v_accvgpr_read_b32 v206, a186 // 0000000065DC: D3D840CE 180001BA - v_accvgpr_read_b32 v207, a190 // 0000000065E4: D3D840CF 180001BE - v_accvgpr_read_b32 v208, a194 // 0000000065EC: D3D840D0 180001C2 - v_accvgpr_read_b32 v209, a198 // 0000000065F4: D3D840D1 180001C6 - v_accvgpr_read_b32 v210, a202 // 0000000065FC: D3D840D2 180001CA - v_accvgpr_read_b32 v211, a206 // 000000006604: D3D840D3 180001CE - v_accvgpr_read_b32 v212, a210 // 00000000660C: D3D840D4 180001D2 - v_accvgpr_read_b32 v213, a214 // 000000006614: D3D840D5 180001D6 - v_accvgpr_read_b32 v214, a218 // 00000000661C: D3D840D6 180001DA - v_accvgpr_read_b32 v215, a222 // 000000006624: D3D840D7 180001DE - v_accvgpr_read_b32 v216, a226 // 00000000662C: D3D840D8 180001E2 - v_accvgpr_read_b32 v217, a230 // 000000006634: D3D840D9 180001E6 - v_accvgpr_read_b32 v218, a234 // 00000000663C: D3D840DA 180001EA - v_accvgpr_read_b32 v219, a238 // 000000006644: D3D840DB 180001EE - v_accvgpr_read_b32 v220, a242 // 00000000664C: D3D840DC 180001F2 - v_accvgpr_read_b32 v221, a246 // 000000006654: D3D840DD 180001F6 - v_accvgpr_read_b32 v222, a250 // 00000000665C: D3D840DE 180001FA - v_accvgpr_read_b32 v223, a254 // 000000006664: D3D840DF 180001FE - v_accvgpr_read_b32 v224, a3 // 00000000666C: D3D840E0 18000103 - v_accvgpr_read_b32 v225, a7 // 000000006674: D3D840E1 18000107 - v_accvgpr_read_b32 v226, a11 // 00000000667C: D3D840E2 1800010B - v_accvgpr_read_b32 v227, a15 // 000000006684: D3D840E3 1800010F - v_accvgpr_read_b32 v228, a19 // 00000000668C: D3D840E4 18000113 - v_accvgpr_read_b32 v229, a23 // 000000006694: D3D840E5 18000117 - v_accvgpr_read_b32 v230, a27 // 00000000669C: D3D840E6 1800011B - v_accvgpr_read_b32 v231, a31 // 0000000066A4: D3D840E7 1800011F - v_accvgpr_read_b32 v232, a35 // 0000000066AC: D3D840E8 18000123 - v_accvgpr_read_b32 v233, a39 // 0000000066B4: D3D840E9 18000127 - v_accvgpr_read_b32 v234, a43 // 0000000066BC: D3D840EA 1800012B - v_accvgpr_read_b32 v235, a47 // 0000000066C4: D3D840EB 1800012F - v_accvgpr_read_b32 v236, a51 // 0000000066CC: D3D840EC 18000133 - v_accvgpr_read_b32 v237, a55 // 0000000066D4: D3D840ED 18000137 - v_accvgpr_read_b32 v238, a59 // 0000000066DC: D3D840EE 1800013B - v_accvgpr_read_b32 v239, a63 // 0000000066E4: D3D840EF 1800013F - buffer_store_dwordx4 v[24:27], v15, s[16:19], 0 offen nt // 0000000066EC: E07E1000 8004180F - buffer_store_dwordx4 v[28:31], v15, s[16:19], 0 offen offset:16 nt// 0000000066F4: E07E1010 80041C0F - s_lshl_b32 s12, s36, 2 // 0000000066FC: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006700: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006704: 82118011 - buffer_store_dwordx4 v[32:35], v15, s[16:19], 0 offen nt // 000000006708: E07E1000 8004200F - buffer_store_dwordx4 v[36:39], v15, s[16:19], 0 offen offset:16 nt// 000000006710: E07E1010 8004240F - s_lshl_b32 s12, s36, 2 // 000000006718: 8E0C8224 - s_add_u32 s16, s16, s12 // 00000000671C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006720: 82118011 - buffer_store_dwordx4 v[40:43], v15, s[16:19], 0 offen nt // 000000006724: E07E1000 8004280F - buffer_store_dwordx4 v[44:47], v15, s[16:19], 0 offen offset:16 nt// 00000000672C: E07E1010 80042C0F - s_lshl_b32 s12, s36, 2 // 000000006734: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006738: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000673C: 82118011 - buffer_store_dwordx4 v[48:51], v15, s[16:19], 0 offen nt // 000000006740: E07E1000 8004300F - buffer_store_dwordx4 v[52:55], v15, s[16:19], 0 offen offset:16 nt// 000000006748: E07E1010 8004340F - s_lshl_b32 s12, s36, 2 // 000000006750: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006754: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006758: 82118011 - buffer_store_dwordx4 v[56:59], v15, s[16:19], 0 offen nt // 00000000675C: E07E1000 8004380F - buffer_store_dwordx4 v[60:63], v15, s[16:19], 0 offen offset:16 nt// 000000006764: E07E1010 80043C0F - s_lshl_b32 s12, s36, 2 // 00000000676C: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006770: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006774: 82118011 - buffer_store_dwordx4 v[64:67], v15, s[16:19], 0 offen nt // 000000006778: E07E1000 8004400F - buffer_store_dwordx4 v[68:71], v15, s[16:19], 0 offen offset:16 nt// 000000006780: E07E1010 8004440F - s_lshl_b32 s12, s36, 2 // 000000006788: 8E0C8224 - s_add_u32 s16, s16, s12 // 00000000678C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006790: 82118011 - buffer_store_dwordx4 v[72:75], v15, s[16:19], 0 offen nt // 000000006794: E07E1000 8004480F - buffer_store_dwordx4 v[76:79], v15, s[16:19], 0 offen offset:16 nt// 00000000679C: E07E1010 80044C0F - s_lshl_b32 s12, s36, 2 // 0000000067A4: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000067A8: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000067AC: 82118011 - buffer_store_dwordx4 v[80:83], v15, s[16:19], 0 offen nt // 0000000067B0: E07E1000 8004500F - buffer_store_dwordx4 v[84:87], v15, s[16:19], 0 offen offset:16 nt// 0000000067B8: E07E1010 8004540F - s_lshl_b32 s12, s36, 2 // 0000000067C0: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000067C4: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000067C8: 82118011 - buffer_store_dwordx4 v[88:91], v15, s[16:19], 0 offen nt // 0000000067CC: E07E1000 8004580F - buffer_store_dwordx4 v[92:95], v15, s[16:19], 0 offen offset:16 nt// 0000000067D4: E07E1010 80045C0F - s_lshl_b32 s12, s36, 2 // 0000000067DC: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000067E0: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000067E4: 82118011 - buffer_store_dwordx4 v[96:99], v15, s[16:19], 0 offen nt // 0000000067E8: E07E1000 8004600F - buffer_store_dwordx4 v[100:103], v15, s[16:19], 0 offen offset:16 nt// 0000000067F0: E07E1010 8004640F - s_lshl_b32 s12, s36, 2 // 0000000067F8: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000067FC: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006800: 82118011 - buffer_store_dwordx4 v[104:107], v15, s[16:19], 0 offen nt // 000000006804: E07E1000 8004680F - buffer_store_dwordx4 v[108:111], v15, s[16:19], 0 offen offset:16 nt// 00000000680C: E07E1010 80046C0F - s_lshl_b32 s12, s36, 2 // 000000006814: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006818: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000681C: 82118011 - buffer_store_dwordx4 v[112:115], v15, s[16:19], 0 offen nt // 000000006820: E07E1000 8004700F - buffer_store_dwordx4 v[116:119], v15, s[16:19], 0 offen offset:16 nt// 000000006828: E07E1010 8004740F - s_lshl_b32 s12, s36, 2 // 000000006830: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006834: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006838: 82118011 - buffer_store_dwordx4 v[120:123], v15, s[16:19], 0 offen nt // 00000000683C: E07E1000 8004780F - buffer_store_dwordx4 v[124:127], v15, s[16:19], 0 offen offset:16 nt// 000000006844: E07E1010 80047C0F - s_lshl_b32 s12, s36, 2 // 00000000684C: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006850: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006854: 82118011 - buffer_store_dwordx4 v[136:139], v15, s[16:19], 0 offen nt // 000000006858: E07E1000 8004880F - buffer_store_dwordx4 v[140:143], v15, s[16:19], 0 offen offset:16 nt// 000000006860: E07E1010 80048C0F - s_lshl_b32 s12, s36, 2 // 000000006868: 8E0C8224 - s_add_u32 s16, s16, s12 // 00000000686C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006870: 82118011 - buffer_store_dwordx4 v[144:147], v15, s[16:19], 0 offen nt // 000000006874: E07E1000 8004900F - buffer_store_dwordx4 v[148:151], v15, s[16:19], 0 offen offset:16 nt// 00000000687C: E07E1010 8004940F - s_lshl_b32 s12, s36, 2 // 000000006884: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006888: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000688C: 82118011 - buffer_store_dwordx4 v[152:155], v15, s[16:19], 0 offen nt // 000000006890: E07E1000 8004980F - buffer_store_dwordx4 v[156:159], v15, s[16:19], 0 offen offset:16 nt// 000000006898: E07E1010 80049C0F - s_lshl_b32 s12, s36, 2 // 0000000068A0: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000068A4: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000068A8: 82118011 - buffer_store_dwordx4 v[160:163], v15, s[16:19], 0 offen nt // 0000000068AC: E07E1000 8004A00F - buffer_store_dwordx4 v[164:167], v15, s[16:19], 0 offen offset:16 nt// 0000000068B4: E07E1010 8004A40F - s_lshl_b32 s12, s36, 2 // 0000000068BC: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000068C0: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000068C4: 82118011 - buffer_store_dwordx4 v[168:171], v15, s[16:19], 0 offen nt // 0000000068C8: E07E1000 8004A80F - buffer_store_dwordx4 v[172:175], v15, s[16:19], 0 offen offset:16 nt// 0000000068D0: E07E1010 8004AC0F - s_lshl_b32 s12, s36, 2 // 0000000068D8: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000068DC: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000068E0: 82118011 - buffer_store_dwordx4 v[176:179], v15, s[16:19], 0 offen nt // 0000000068E4: E07E1000 8004B00F - buffer_store_dwordx4 v[180:183], v15, s[16:19], 0 offen offset:16 nt// 0000000068EC: E07E1010 8004B40F - s_lshl_b32 s12, s36, 2 // 0000000068F4: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000068F8: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000068FC: 82118011 - buffer_store_dwordx4 v[184:187], v15, s[16:19], 0 offen nt // 000000006900: E07E1000 8004B80F - buffer_store_dwordx4 v[188:191], v15, s[16:19], 0 offen offset:16 nt// 000000006908: E07E1010 8004BC0F - s_lshl_b32 s12, s36, 2 // 000000006910: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006914: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006918: 82118011 - buffer_store_dwordx4 v[192:195], v15, s[16:19], 0 offen nt // 00000000691C: E07E1000 8004C00F - buffer_store_dwordx4 v[196:199], v15, s[16:19], 0 offen offset:16 nt// 000000006924: E07E1010 8004C40F - s_lshl_b32 s12, s36, 2 // 00000000692C: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006930: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006934: 82118011 - buffer_store_dwordx4 v[200:203], v15, s[16:19], 0 offen nt // 000000006938: E07E1000 8004C80F - buffer_store_dwordx4 v[204:207], v15, s[16:19], 0 offen offset:16 nt// 000000006940: E07E1010 8004CC0F - s_lshl_b32 s12, s36, 2 // 000000006948: 8E0C8224 - s_add_u32 s16, s16, s12 // 00000000694C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006950: 82118011 - buffer_store_dwordx4 v[208:211], v15, s[16:19], 0 offen nt // 000000006954: E07E1000 8004D00F - buffer_store_dwordx4 v[212:215], v15, s[16:19], 0 offen offset:16 nt// 00000000695C: E07E1010 8004D40F - s_lshl_b32 s12, s36, 2 // 000000006964: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006968: 80100C10 - s_addc_u32 s17, s17, 0 // 00000000696C: 82118011 - buffer_store_dwordx4 v[216:219], v15, s[16:19], 0 offen nt // 000000006970: E07E1000 8004D80F - buffer_store_dwordx4 v[220:223], v15, s[16:19], 0 offen offset:16 nt// 000000006978: E07E1010 8004DC0F - s_lshl_b32 s12, s36, 2 // 000000006980: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006984: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006988: 82118011 - buffer_store_dwordx4 v[224:227], v15, s[16:19], 0 offen nt // 00000000698C: E07E1000 8004E00F - buffer_store_dwordx4 v[228:231], v15, s[16:19], 0 offen offset:16 nt// 000000006994: E07E1010 8004E40F - s_lshl_b32 s12, s36, 2 // 00000000699C: 8E0C8224 - s_add_u32 s16, s16, s12 // 0000000069A0: 80100C10 - s_addc_u32 s17, s17, 0 // 0000000069A4: 82118011 - buffer_store_dwordx4 v[232:235], v15, s[16:19], 0 offen nt // 0000000069A8: E07E1000 8004E80F - buffer_store_dwordx4 v[236:239], v15, s[16:19], 0 offen offset:16 nt// 0000000069B0: E07E1010 8004EC0F - s_nop 0 // 0000000069B8: BF800000 - v_accvgpr_read_b32 v24, a67 // 0000000069BC: D3D84018 18000143 - v_accvgpr_read_b32 v25, a71 // 0000000069C4: D3D84019 18000147 - v_accvgpr_read_b32 v26, a75 // 0000000069CC: D3D8401A 1800014B - v_accvgpr_read_b32 v27, a79 // 0000000069D4: D3D8401B 1800014F - v_accvgpr_read_b32 v28, a83 // 0000000069DC: D3D8401C 18000153 - v_accvgpr_read_b32 v29, a87 // 0000000069E4: D3D8401D 18000157 - v_accvgpr_read_b32 v30, a91 // 0000000069EC: D3D8401E 1800015B - v_accvgpr_read_b32 v31, a95 // 0000000069F4: D3D8401F 1800015F - v_accvgpr_read_b32 v32, a99 // 0000000069FC: D3D84020 18000163 - v_accvgpr_read_b32 v33, a103 // 000000006A04: D3D84021 18000167 - v_accvgpr_read_b32 v34, a107 // 000000006A0C: D3D84022 1800016B - v_accvgpr_read_b32 v35, a111 // 000000006A14: D3D84023 1800016F - v_accvgpr_read_b32 v36, a115 // 000000006A1C: D3D84024 18000173 - v_accvgpr_read_b32 v37, a119 // 000000006A24: D3D84025 18000177 - v_accvgpr_read_b32 v38, a123 // 000000006A2C: D3D84026 1800017B - v_accvgpr_read_b32 v39, a127 // 000000006A34: D3D84027 1800017F - v_accvgpr_read_b32 v40, a131 // 000000006A3C: D3D84028 18000183 - v_accvgpr_read_b32 v41, a135 // 000000006A44: D3D84029 18000187 - v_accvgpr_read_b32 v42, a139 // 000000006A4C: D3D8402A 1800018B - v_accvgpr_read_b32 v43, a143 // 000000006A54: D3D8402B 1800018F - v_accvgpr_read_b32 v44, a147 // 000000006A5C: D3D8402C 18000193 - v_accvgpr_read_b32 v45, a151 // 000000006A64: D3D8402D 18000197 - v_accvgpr_read_b32 v46, a155 // 000000006A6C: D3D8402E 1800019B - v_accvgpr_read_b32 v47, a159 // 000000006A74: D3D8402F 1800019F - v_accvgpr_read_b32 v48, a163 // 000000006A7C: D3D84030 180001A3 - v_accvgpr_read_b32 v49, a167 // 000000006A84: D3D84031 180001A7 - v_accvgpr_read_b32 v50, a171 // 000000006A8C: D3D84032 180001AB - v_accvgpr_read_b32 v51, a175 // 000000006A94: D3D84033 180001AF - v_accvgpr_read_b32 v52, a179 // 000000006A9C: D3D84034 180001B3 - v_accvgpr_read_b32 v53, a183 // 000000006AA4: D3D84035 180001B7 - v_accvgpr_read_b32 v54, a187 // 000000006AAC: D3D84036 180001BB - v_accvgpr_read_b32 v55, a191 // 000000006AB4: D3D84037 180001BF - v_accvgpr_read_b32 v56, a195 // 000000006ABC: D3D84038 180001C3 - v_accvgpr_read_b32 v57, a199 // 000000006AC4: D3D84039 180001C7 - v_accvgpr_read_b32 v58, a203 // 000000006ACC: D3D8403A 180001CB - v_accvgpr_read_b32 v59, a207 // 000000006AD4: D3D8403B 180001CF - v_accvgpr_read_b32 v60, a211 // 000000006ADC: D3D8403C 180001D3 - v_accvgpr_read_b32 v61, a215 // 000000006AE4: D3D8403D 180001D7 - v_accvgpr_read_b32 v62, a219 // 000000006AEC: D3D8403E 180001DB - v_accvgpr_read_b32 v63, a223 // 000000006AF4: D3D8403F 180001DF - v_accvgpr_read_b32 v64, a227 // 000000006AFC: D3D84040 180001E3 - v_accvgpr_read_b32 v65, a231 // 000000006B04: D3D84041 180001E7 - v_accvgpr_read_b32 v66, a235 // 000000006B0C: D3D84042 180001EB - v_accvgpr_read_b32 v67, a239 // 000000006B14: D3D84043 180001EF - v_accvgpr_read_b32 v68, a243 // 000000006B1C: D3D84044 180001F3 - v_accvgpr_read_b32 v69, a247 // 000000006B24: D3D84045 180001F7 - v_accvgpr_read_b32 v70, a251 // 000000006B2C: D3D84046 180001FB - v_accvgpr_read_b32 v71, a255 // 000000006B34: D3D84047 180001FF - s_lshl_b32 s12, s36, 2 // 000000006B3C: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006B40: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006B44: 82118011 - buffer_store_dwordx4 v[24:27], v15, s[16:19], 0 offen nt // 000000006B48: E07E1000 8004180F - buffer_store_dwordx4 v[28:31], v15, s[16:19], 0 offen offset:16 nt// 000000006B50: E07E1010 80041C0F - s_lshl_b32 s12, s36, 2 // 000000006B58: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006B5C: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006B60: 82118011 - buffer_store_dwordx4 v[32:35], v15, s[16:19], 0 offen nt // 000000006B64: E07E1000 8004200F - buffer_store_dwordx4 v[36:39], v15, s[16:19], 0 offen offset:16 nt// 000000006B6C: E07E1010 8004240F - s_lshl_b32 s12, s36, 2 // 000000006B74: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006B78: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006B7C: 82118011 - buffer_store_dwordx4 v[40:43], v15, s[16:19], 0 offen nt // 000000006B80: E07E1000 8004280F - buffer_store_dwordx4 v[44:47], v15, s[16:19], 0 offen offset:16 nt// 000000006B88: E07E1010 80042C0F - s_lshl_b32 s12, s36, 2 // 000000006B90: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006B94: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006B98: 82118011 - buffer_store_dwordx4 v[48:51], v15, s[16:19], 0 offen nt // 000000006B9C: E07E1000 8004300F - buffer_store_dwordx4 v[52:55], v15, s[16:19], 0 offen offset:16 nt// 000000006BA4: E07E1010 8004340F - s_lshl_b32 s12, s36, 2 // 000000006BAC: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006BB0: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006BB4: 82118011 - buffer_store_dwordx4 v[56:59], v15, s[16:19], 0 offen nt // 000000006BB8: E07E1000 8004380F - buffer_store_dwordx4 v[60:63], v15, s[16:19], 0 offen offset:16 nt// 000000006BC0: E07E1010 80043C0F - s_lshl_b32 s12, s36, 2 // 000000006BC8: 8E0C8224 - s_add_u32 s16, s16, s12 // 000000006BCC: 80100C10 - s_addc_u32 s17, s17, 0 // 000000006BD0: 82118011 - buffer_store_dwordx4 v[64:67], v15, s[16:19], 0 offen nt // 000000006BD4: E07E1000 8004400F - buffer_store_dwordx4 v[68:71], v15, s[16:19], 0 offen offset:16 nt// 000000006BDC: E07E1010 8004440F - s_nop 0 // 000000006BE4: BF800000 - s_branch label_GW_End_1 // 000000006BE8: BF8213E5 - -label_GW_B0_E1_M: - v_mov_b32_e32 v10, 0x80000000 // 000000007D70: 7E1402FF 80000000 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000007D78: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007D80: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007D88: 86A2221E - v_add_lshl_u32 v129, v7, v4, 2 // 000000007D8C: D1FE0081 020A0907 - v_cndmask_b32_e64 v129, v10, v129, s[34:35] // 000000007D94: D1000081 008B030A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000007D9C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007DA4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007DAC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007DB4: 86A2221E - v_add_lshl_u32 v130, v7, v8, 2 // 000000007DB8: D1FE0082 020A1107 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 000000007DC0: D1000082 008B050A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000007DC8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007DD0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007DD8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007DE0: 86A2221E - v_add_lshl_u32 v131, v7, v8, 2 // 000000007DE4: D1FE0083 020A1107 - v_cndmask_b32_e64 v131, v10, v131, s[34:35] // 000000007DEC: D1000083 008B070A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000007DF4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007DFC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007E04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007E0C: 86A2221E - v_add_lshl_u32 v135, v7, v8, 2 // 000000007E10: D1FE0087 020A1107 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 000000007E18: D1000087 008B0F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000007E20: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007E28: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007E30: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007E38: 86A2221E - v_add_lshl_u32 v136, v7, v8, 2 // 000000007E3C: D1FE0088 020A1107 - v_cndmask_b32_e64 v136, v10, v136, s[34:35] // 000000007E44: D1000088 008B110A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000007E4C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007E54: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007E5C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007E64: 86A2221E - v_add_lshl_u32 v137, v7, v8, 2 // 000000007E68: D1FE0089 020A1107 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 000000007E70: D1000089 008B130A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000007E78: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007E80: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007E88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007E90: 86A2221E - v_add_lshl_u32 v138, v7, v8, 2 // 000000007E94: D1FE008A 020A1107 - v_cndmask_b32_e64 v138, v10, v138, s[34:35] // 000000007E9C: D100008A 008B150A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000007EA4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007EAC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007EB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007EBC: 86A2221E - v_add_lshl_u32 v139, v7, v8, 2 // 000000007EC0: D1FE008B 020A1107 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 000000007EC8: D100008B 008B170A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000007ED0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000007ED8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000007EE0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000007EE8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007EF0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007EF8: 86A2221E - v_add_lshl_u32 v140, v7, v4, 2 // 000000007EFC: D1FE008C 020A0907 - v_cndmask_b32_e64 v140, v10, v140, s[34:35] // 000000007F04: D100008C 008B190A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000007F0C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007F14: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007F1C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007F24: 86A2221E - v_add_lshl_u32 v141, v7, v8, 2 // 000000007F28: D1FE008D 020A1107 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 000000007F30: D100008D 008B1B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000007F38: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007F40: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007F48: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007F50: 86A2221E - v_add_lshl_u32 v142, v7, v8, 2 // 000000007F54: D1FE008E 020A1107 - v_cndmask_b32_e64 v142, v10, v142, s[34:35] // 000000007F5C: D100008E 008B1D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000007F64: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007F6C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007F74: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007F7C: 86A2221E - v_add_lshl_u32 v143, v7, v8, 2 // 000000007F80: D1FE008F 020A1107 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 000000007F88: D100008F 008B1F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000007F90: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007F98: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007FA0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007FA8: 86A2221E - v_add_lshl_u32 v144, v7, v8, 2 // 000000007FAC: D1FE0090 020A1107 - v_cndmask_b32_e64 v144, v10, v144, s[34:35] // 000000007FB4: D1000090 008B210A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000007FBC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007FC4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007FCC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000007FD4: 86A2221E - v_add_lshl_u32 v145, v7, v8, 2 // 000000007FD8: D1FE0091 020A1107 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 000000007FE0: D1000091 008B230A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000007FE8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000007FF0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000007FF8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008000: 86A2221E - v_add_lshl_u32 v146, v7, v8, 2 // 000000008004: D1FE0092 020A1107 - v_cndmask_b32_e64 v146, v10, v146, s[34:35] // 00000000800C: D1000092 008B250A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008014: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000801C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008024: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000802C: 86A2221E - v_add_lshl_u32 v147, v7, v8, 2 // 000000008030: D1FE0093 020A1107 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 000000008038: D1000093 008B270A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008040: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008048: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008050: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008058: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008060: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008068: 86A2221E - v_add_lshl_u32 v148, v7, v4, 2 // 00000000806C: D1FE0094 020A0907 - v_cndmask_b32_e64 v148, v10, v148, s[34:35] // 000000008074: D1000094 008B290A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000807C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008084: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000808C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008094: 86A2221E - v_add_lshl_u32 v149, v7, v8, 2 // 000000008098: D1FE0095 020A1107 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 0000000080A0: D1000095 008B2B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000080A8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000080B0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000080B8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000080C0: 86A2221E - v_add_lshl_u32 v150, v7, v8, 2 // 0000000080C4: D1FE0096 020A1107 - v_cndmask_b32_e64 v150, v10, v150, s[34:35] // 0000000080CC: D1000096 008B2D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 0000000080D4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000080DC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000080E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000080EC: 86A2221E - v_add_lshl_u32 v151, v7, v8, 2 // 0000000080F0: D1FE0097 020A1107 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 0000000080F8: D1000097 008B2F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008100: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008108: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008110: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008118: 86A2221E - v_add_lshl_u32 v152, v7, v8, 2 // 00000000811C: D1FE0098 020A1107 - v_cndmask_b32_e64 v152, v10, v152, s[34:35] // 000000008124: D1000098 008B310A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000812C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008134: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000813C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008144: 86A2221E - v_add_lshl_u32 v153, v7, v8, 2 // 000000008148: D1FE0099 020A1107 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 000000008150: D1000099 008B330A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008158: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008160: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008168: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008170: 86A2221E - v_add_lshl_u32 v154, v7, v8, 2 // 000000008174: D1FE009A 020A1107 - v_cndmask_b32_e64 v154, v10, v154, s[34:35] // 00000000817C: D100009A 008B350A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008184: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000818C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008194: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000819C: 86A2221E - v_add_lshl_u32 v155, v7, v8, 2 // 0000000081A0: D1FE009B 020A1107 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 0000000081A8: D100009B 008B370A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000081B0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000081B8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000081C0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000081C8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000081D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000081D8: 86A2221E - v_add_lshl_u32 v156, v7, v4, 2 // 0000000081DC: D1FE009C 020A0907 - v_cndmask_b32_e64 v156, v10, v156, s[34:35] // 0000000081E4: D100009C 008B390A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000081EC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000081F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000081FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008204: 86A2221E - v_add_lshl_u32 v157, v7, v8, 2 // 000000008208: D1FE009D 020A1107 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 000000008210: D100009D 008B3B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008218: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008220: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008228: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008230: 86A2221E - v_add_lshl_u32 v158, v7, v8, 2 // 000000008234: D1FE009E 020A1107 - v_cndmask_b32_e64 v158, v10, v158, s[34:35] // 00000000823C: D100009E 008B3D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008244: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000824C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008254: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000825C: 86A2221E - v_add_lshl_u32 v159, v7, v8, 2 // 000000008260: D1FE009F 020A1107 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 000000008268: D100009F 008B3F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008270: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008278: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008280: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008288: 86A2221E - v_add_lshl_u32 v160, v7, v8, 2 // 00000000828C: D1FE00A0 020A1107 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 000000008294: D10000A0 008B410A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000829C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000082A4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000082AC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000082B4: 86A2221E - v_add_lshl_u32 v161, v7, v8, 2 // 0000000082B8: D1FE00A1 020A1107 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 0000000082C0: D10000A1 008B430A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000082C8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000082D0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000082D8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000082E0: 86A2221E - v_add_lshl_u32 v162, v7, v8, 2 // 0000000082E4: D1FE00A2 020A1107 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 0000000082EC: D10000A2 008B450A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000082F4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000082FC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008304: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000830C: 86A2221E - v_add_lshl_u32 v163, v7, v8, 2 // 000000008310: D1FE00A3 020A1107 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000008318: D10000A3 008B470A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008320: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008328: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008330: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008338: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008340: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008348: 86A2221E - v_add_lshl_u32 v164, v7, v4, 2 // 00000000834C: D1FE00A4 020A0907 - v_cndmask_b32_e64 v164, v10, v164, s[34:35] // 000000008354: D10000A4 008B490A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000835C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008364: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000836C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008374: 86A2221E - v_add_lshl_u32 v165, v7, v8, 2 // 000000008378: D1FE00A5 020A1107 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 000000008380: D10000A5 008B4B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008388: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008390: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008398: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000083A0: 86A2221E - v_add_lshl_u32 v166, v7, v8, 2 // 0000000083A4: D1FE00A6 020A1107 - v_cndmask_b32_e64 v166, v10, v166, s[34:35] // 0000000083AC: D10000A6 008B4D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 0000000083B4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000083BC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000083C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000083CC: 86A2221E - v_add_lshl_u32 v167, v7, v8, 2 // 0000000083D0: D1FE00A7 020A1107 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 0000000083D8: D10000A7 008B4F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000083E0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000083E8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000083F0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000083F8: 86A2221E - v_add_lshl_u32 v168, v7, v8, 2 // 0000000083FC: D1FE00A8 020A1107 - v_cndmask_b32_e64 v168, v10, v168, s[34:35] // 000000008404: D10000A8 008B510A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000840C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008414: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000841C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008424: 86A2221E - v_add_lshl_u32 v169, v7, v8, 2 // 000000008428: D1FE00A9 020A1107 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 000000008430: D10000A9 008B530A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008438: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008440: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008448: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008450: 86A2221E - v_add_lshl_u32 v170, v7, v8, 2 // 000000008454: D1FE00AA 020A1107 - v_cndmask_b32_e64 v170, v10, v170, s[34:35] // 00000000845C: D10000AA 008B550A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008464: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000846C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008474: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000847C: 86A2221E - v_add_lshl_u32 v171, v7, v8, 2 // 000000008480: D1FE00AB 020A1107 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 000000008488: D10000AB 008B570A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008490: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008498: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000084A0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000084A8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000084B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000084B8: 86A2221E - v_add_lshl_u32 v172, v7, v4, 2 // 0000000084BC: D1FE00AC 020A0907 - v_cndmask_b32_e64 v172, v10, v172, s[34:35] // 0000000084C4: D10000AC 008B590A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000084CC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000084D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000084DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000084E4: 86A2221E - v_add_lshl_u32 v173, v7, v8, 2 // 0000000084E8: D1FE00AD 020A1107 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 0000000084F0: D10000AD 008B5B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000084F8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008500: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008508: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008510: 86A2221E - v_add_lshl_u32 v174, v7, v8, 2 // 000000008514: D1FE00AE 020A1107 - v_cndmask_b32_e64 v174, v10, v174, s[34:35] // 00000000851C: D10000AE 008B5D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008524: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000852C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008534: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000853C: 86A2221E - v_add_lshl_u32 v175, v7, v8, 2 // 000000008540: D1FE00AF 020A1107 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 000000008548: D10000AF 008B5F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008550: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008558: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008560: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008568: 86A2221E - v_add_lshl_u32 v176, v7, v8, 2 // 00000000856C: D1FE00B0 020A1107 - v_cndmask_b32_e64 v176, v10, v176, s[34:35] // 000000008574: D10000B0 008B610A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000857C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008584: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000858C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008594: 86A2221E - v_add_lshl_u32 v177, v7, v8, 2 // 000000008598: D1FE00B1 020A1107 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 0000000085A0: D10000B1 008B630A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000085A8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000085B0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000085B8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000085C0: 86A2221E - v_add_lshl_u32 v178, v7, v8, 2 // 0000000085C4: D1FE00B2 020A1107 - v_cndmask_b32_e64 v178, v10, v178, s[34:35] // 0000000085CC: D10000B2 008B650A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000085D4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000085DC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000085E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000085EC: 86A2221E - v_add_lshl_u32 v179, v7, v8, 2 // 0000000085F0: D1FE00B3 020A1107 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 0000000085F8: D10000B3 008B670A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008600: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008608: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008610: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008618: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008620: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008628: 86A2221E - v_add_lshl_u32 v180, v7, v4, 2 // 00000000862C: D1FE00B4 020A0907 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 000000008634: D10000B4 008B690A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000863C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008644: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000864C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008654: 86A2221E - v_add_lshl_u32 v181, v7, v8, 2 // 000000008658: D1FE00B5 020A1107 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 000000008660: D10000B5 008B6B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008668: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008670: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008678: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008680: 86A2221E - v_add_lshl_u32 v182, v7, v8, 2 // 000000008684: D1FE00B6 020A1107 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 00000000868C: D10000B6 008B6D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008694: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000869C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000086A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000086AC: 86A2221E - v_add_lshl_u32 v183, v7, v8, 2 // 0000000086B0: D1FE00B7 020A1107 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 0000000086B8: D10000B7 008B6F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000086C0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000086C8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000086D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000086D8: 86A2221E - v_add_lshl_u32 v184, v7, v8, 2 // 0000000086DC: D1FE00B8 020A1107 - v_cndmask_b32_e64 v184, v10, v184, s[34:35] // 0000000086E4: D10000B8 008B710A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000086EC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000086F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000086FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008704: 86A2221E - v_add_lshl_u32 v185, v7, v8, 2 // 000000008708: D1FE00B9 020A1107 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 000000008710: D10000B9 008B730A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008718: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008720: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008728: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008730: 86A2221E - v_add_lshl_u32 v186, v7, v8, 2 // 000000008734: D1FE00BA 020A1107 - v_cndmask_b32_e64 v186, v10, v186, s[34:35] // 00000000873C: D10000BA 008B750A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008744: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000874C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008754: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000875C: 86A2221E - v_add_lshl_u32 v187, v7, v8, 2 // 000000008760: D1FE00BB 020A1107 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 000000008768: D10000BB 008B770A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008770: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008778: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008780: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008788: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008790: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008798: 86A2221E - v_add_lshl_u32 v188, v7, v4, 2 // 00000000879C: D1FE00BC 020A0907 - v_cndmask_b32_e64 v188, v10, v188, s[34:35] // 0000000087A4: D10000BC 008B790A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000087AC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000087B4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000087BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000087C4: 86A2221E - v_add_lshl_u32 v189, v7, v8, 2 // 0000000087C8: D1FE00BD 020A1107 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 0000000087D0: D10000BD 008B7B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 0000000087D8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000087E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000087E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000087F0: 86A2221E - v_add_lshl_u32 v190, v7, v8, 2 // 0000000087F4: D1FE00BE 020A1107 - v_cndmask_b32_e64 v190, v10, v190, s[34:35] // 0000000087FC: D10000BE 008B7D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008804: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000880C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008814: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000881C: 86A2221E - v_add_lshl_u32 v191, v7, v8, 2 // 000000008820: D1FE00BF 020A1107 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 000000008828: D10000BF 008B7F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008830: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008838: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008840: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008848: 86A2221E - v_add_lshl_u32 v192, v7, v8, 2 // 00000000884C: D1FE00C0 020A1107 - v_cndmask_b32_e64 v192, v10, v192, s[34:35] // 000000008854: D10000C0 008B810A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000885C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008864: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000886C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008874: 86A2221E - v_add_lshl_u32 v193, v7, v8, 2 // 000000008878: D1FE00C1 020A1107 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 000000008880: D10000C1 008B830A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008888: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008890: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008898: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000088A0: 86A2221E - v_add_lshl_u32 v194, v7, v8, 2 // 0000000088A4: D1FE00C2 020A1107 - v_cndmask_b32_e64 v194, v10, v194, s[34:35] // 0000000088AC: D10000C2 008B850A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000088B4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000088BC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000088C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000088CC: 86A2221E - v_add_lshl_u32 v195, v7, v8, 2 // 0000000088D0: D1FE00C3 020A1107 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 0000000088D8: D10000C3 008B870A - v_add_co_u32_e64 v5, vcc, v5, 1 // 0000000088E0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 0000000088E8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 0000000088F0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 0000000088F8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008900: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008908: 86A2221E - v_add_lshl_u32 v196, v7, v4, 2 // 00000000890C: D1FE00C4 020A0907 - v_cndmask_b32_e64 v196, v10, v196, s[34:35] // 000000008914: D10000C4 008B890A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000891C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008924: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000892C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008934: 86A2221E - v_add_lshl_u32 v197, v7, v8, 2 // 000000008938: D1FE00C5 020A1107 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 000000008940: D10000C5 008B8B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008948: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008950: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008958: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008960: 86A2221E - v_add_lshl_u32 v198, v7, v8, 2 // 000000008964: D1FE00C6 020A1107 - v_cndmask_b32_e64 v198, v10, v198, s[34:35] // 00000000896C: D10000C6 008B8D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008974: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000897C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008984: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000898C: 86A2221E - v_add_lshl_u32 v199, v7, v8, 2 // 000000008990: D1FE00C7 020A1107 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 000000008998: D10000C7 008B8F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000089A0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000089A8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000089B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000089B8: 86A2221E - v_add_lshl_u32 v200, v7, v8, 2 // 0000000089BC: D1FE00C8 020A1107 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 0000000089C4: D10000C8 008B910A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000089CC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000089D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000089DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000089E4: 86A2221E - v_add_lshl_u32 v201, v7, v8, 2 // 0000000089E8: D1FE00C9 020A1107 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 0000000089F0: D10000C9 008B930A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000089F8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008A00: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008A08: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008A10: 86A2221E - v_add_lshl_u32 v202, v7, v8, 2 // 000000008A14: D1FE00CA 020A1107 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 000000008A1C: D10000CA 008B950A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008A24: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008A2C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008A34: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008A3C: 86A2221E - v_add_lshl_u32 v203, v7, v8, 2 // 000000008A40: D1FE00CB 020A1107 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 000000008A48: D10000CB 008B970A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008A50: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008A58: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008A60: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008A68: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008A70: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008A78: 86A2221E - v_add_lshl_u32 v204, v7, v4, 2 // 000000008A7C: D1FE00CC 020A0907 - v_cndmask_b32_e64 v204, v10, v204, s[34:35] // 000000008A84: D10000CC 008B990A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000008A8C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008A94: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008A9C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008AA4: 86A2221E - v_add_lshl_u32 v205, v7, v8, 2 // 000000008AA8: D1FE00CD 020A1107 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 000000008AB0: D10000CD 008B9B0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008AB8: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008AC0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008AC8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008AD0: 86A2221E - v_add_lshl_u32 v206, v7, v8, 2 // 000000008AD4: D1FE00CE 020A1107 - v_cndmask_b32_e64 v206, v10, v206, s[34:35] // 000000008ADC: D10000CE 008B9D0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008AE4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008AEC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008AF4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008AFC: 86A2221E - v_add_lshl_u32 v207, v7, v8, 2 // 000000008B00: D1FE00CF 020A1107 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 000000008B08: D10000CF 008B9F0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008B10: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008B18: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008B20: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008B28: 86A2221E - v_add_lshl_u32 v208, v7, v8, 2 // 000000008B2C: D1FE00D0 020A1107 - v_cndmask_b32_e64 v208, v10, v208, s[34:35] // 000000008B34: D10000D0 008BA10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000008B3C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008B44: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008B4C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008B54: 86A2221E - v_add_lshl_u32 v209, v7, v8, 2 // 000000008B58: D1FE00D1 020A1107 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 000000008B60: D10000D1 008BA30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008B68: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008B70: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008B78: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008B80: 86A2221E - v_add_lshl_u32 v210, v7, v8, 2 // 000000008B84: D1FE00D2 020A1107 - v_cndmask_b32_e64 v210, v10, v210, s[34:35] // 000000008B8C: D10000D2 008BA50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008B94: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008B9C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008BA4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008BAC: 86A2221E - v_add_lshl_u32 v211, v7, v8, 2 // 000000008BB0: D1FE00D3 020A1107 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 000000008BB8: D10000D3 008BA70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008BC0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008BC8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008BD0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008BD8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008BE0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008BE8: 86A2221E - v_add_lshl_u32 v212, v7, v4, 2 // 000000008BEC: D1FE00D4 020A0907 - v_cndmask_b32_e64 v212, v10, v212, s[34:35] // 000000008BF4: D10000D4 008BA90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000008BFC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008C04: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008C0C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008C14: 86A2221E - v_add_lshl_u32 v213, v7, v8, 2 // 000000008C18: D1FE00D5 020A1107 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 000000008C20: D10000D5 008BAB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008C28: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008C30: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008C38: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008C40: 86A2221E - v_add_lshl_u32 v214, v7, v8, 2 // 000000008C44: D1FE00D6 020A1107 - v_cndmask_b32_e64 v214, v10, v214, s[34:35] // 000000008C4C: D10000D6 008BAD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008C54: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008C5C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008C64: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008C6C: 86A2221E - v_add_lshl_u32 v215, v7, v8, 2 // 000000008C70: D1FE00D7 020A1107 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 000000008C78: D10000D7 008BAF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008C80: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008C88: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008C90: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008C98: 86A2221E - v_add_lshl_u32 v216, v7, v8, 2 // 000000008C9C: D1FE00D8 020A1107 - v_cndmask_b32_e64 v216, v10, v216, s[34:35] // 000000008CA4: D10000D8 008BB10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000008CAC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008CB4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008CBC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008CC4: 86A2221E - v_add_lshl_u32 v217, v7, v8, 2 // 000000008CC8: D1FE00D9 020A1107 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 000000008CD0: D10000D9 008BB30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008CD8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008CE0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008CE8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008CF0: 86A2221E - v_add_lshl_u32 v218, v7, v8, 2 // 000000008CF4: D1FE00DA 020A1107 - v_cndmask_b32_e64 v218, v10, v218, s[34:35] // 000000008CFC: D10000DA 008BB50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008D04: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008D0C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008D14: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008D1C: 86A2221E - v_add_lshl_u32 v219, v7, v8, 2 // 000000008D20: D1FE00DB 020A1107 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 000000008D28: D10000DB 008BB70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008D30: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008D38: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008D40: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008D48: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008D50: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008D58: 86A2221E - v_add_lshl_u32 v220, v7, v4, 2 // 000000008D5C: D1FE00DC 020A0907 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 000000008D64: D10000DC 008BB90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000008D6C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008D74: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008D7C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008D84: 86A2221E - v_add_lshl_u32 v221, v7, v8, 2 // 000000008D88: D1FE00DD 020A1107 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 000000008D90: D10000DD 008BBB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008D98: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008DA0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008DA8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008DB0: 86A2221E - v_add_lshl_u32 v222, v7, v8, 2 // 000000008DB4: D1FE00DE 020A1107 - v_cndmask_b32_e64 v222, v10, v222, s[34:35] // 000000008DBC: D10000DE 008BBD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008DC4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008DCC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008DD4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008DDC: 86A2221E - v_add_lshl_u32 v223, v7, v8, 2 // 000000008DE0: D1FE00DF 020A1107 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 000000008DE8: D10000DF 008BBF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008DF0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008DF8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008E00: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008E08: 86A2221E - v_add_lshl_u32 v224, v7, v8, 2 // 000000008E0C: D1FE00E0 020A1107 - v_cndmask_b32_e64 v224, v10, v224, s[34:35] // 000000008E14: D10000E0 008BC10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000008E1C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008E24: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008E2C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008E34: 86A2221E - v_add_lshl_u32 v225, v7, v8, 2 // 000000008E38: D1FE00E1 020A1107 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 000000008E40: D10000E1 008BC30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008E48: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008E50: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008E58: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008E60: 86A2221E - v_add_lshl_u32 v226, v7, v8, 2 // 000000008E64: D1FE00E2 020A1107 - v_cndmask_b32_e64 v226, v10, v226, s[34:35] // 000000008E6C: D10000E2 008BC50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008E74: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008E7C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008E84: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008E8C: 86A2221E - v_add_lshl_u32 v227, v7, v8, 2 // 000000008E90: D1FE00E3 020A1107 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 000000008E98: D10000E3 008BC70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000008EA0: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000008EA8: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000008EB0: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000008EB8: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008EC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008EC8: 86A2221E - v_add_lshl_u32 v228, v7, v4, 2 // 000000008ECC: D1FE00E4 020A0907 - v_cndmask_b32_e64 v228, v10, v228, s[34:35] // 000000008ED4: D10000E4 008BC90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000008EDC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008EE4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008EEC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008EF4: 86A2221E - v_add_lshl_u32 v229, v7, v8, 2 // 000000008EF8: D1FE00E5 020A1107 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 000000008F00: D10000E5 008BCB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000008F08: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008F10: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008F18: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008F20: 86A2221E - v_add_lshl_u32 v230, v7, v8, 2 // 000000008F24: D1FE00E6 020A1107 - v_cndmask_b32_e64 v230, v10, v230, s[34:35] // 000000008F2C: D10000E6 008BCD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000008F34: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008F3C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008F44: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008F4C: 86A2221E - v_add_lshl_u32 v231, v7, v8, 2 // 000000008F50: D1FE00E7 020A1107 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 000000008F58: D10000E7 008BCF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000008F60: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008F68: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008F70: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008F78: 86A2221E - v_add_lshl_u32 v232, v7, v8, 2 // 000000008F7C: D1FE00E8 020A1107 - v_cndmask_b32_e64 v232, v10, v232, s[34:35] // 000000008F84: D10000E8 008BD10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000008F8C: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008F94: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008F9C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008FA4: 86A2221E - v_add_lshl_u32 v233, v7, v8, 2 // 000000008FA8: D1FE00E9 020A1107 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 000000008FB0: D10000E9 008BD30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000008FB8: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008FC0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008FC8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008FD0: 86A2221E - v_add_lshl_u32 v234, v7, v8, 2 // 000000008FD4: D1FE00EA 020A1107 - v_cndmask_b32_e64 v234, v10, v234, s[34:35] // 000000008FDC: D10000EA 008BD50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000008FE4: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000008FEC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000008FF4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000008FFC: 86A2221E - v_add_lshl_u32 v235, v7, v8, 2 // 000000009000: D1FE00EB 020A1107 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 000000009008: D10000EB 008BD70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000009010: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000009018: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000009020: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000009028: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009030: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009038: 86A2221E - v_add_lshl_u32 v236, v7, v4, 2 // 00000000903C: D1FE00EC 020A0907 - v_cndmask_b32_e64 v236, v10, v236, s[34:35] // 000000009044: D10000EC 008BD90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000904C: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009054: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000905C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009064: 86A2221E - v_add_lshl_u32 v237, v7, v8, 2 // 000000009068: D1FE00ED 020A1107 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 000000009070: D10000ED 008BDB0A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000009078: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009080: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009088: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009090: 86A2221E - v_add_lshl_u32 v238, v7, v8, 2 // 000000009094: D1FE00EE 020A1107 - v_cndmask_b32_e64 v238, v10, v238, s[34:35] // 00000000909C: D10000EE 008BDD0A - v_add_co_u32_e64 v8, vcc, v4, 3 // 0000000090A4: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000090AC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000090B4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000090BC: 86A2221E - v_add_lshl_u32 v239, v7, v8, 2 // 0000000090C0: D1FE00EF 020A1107 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 0000000090C8: D10000EF 008BDF0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 0000000090D0: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000090D8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000090E0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000090E8: 86A2221E - v_add_lshl_u32 v240, v7, v8, 2 // 0000000090EC: D1FE00F0 020A1107 - v_cndmask_b32_e64 v240, v10, v240, s[34:35] // 0000000090F4: D10000F0 008BE10A - v_add_co_u32_e64 v8, vcc, v4, 5 // 0000000090FC: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009104: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000910C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009114: 86A2221E - v_add_lshl_u32 v241, v7, v8, 2 // 000000009118: D1FE00F1 020A1107 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 000000009120: D10000F1 008BE30A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000009128: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009130: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009138: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009140: 86A2221E - v_add_lshl_u32 v242, v7, v8, 2 // 000000009144: D1FE00F2 020A1107 - v_cndmask_b32_e64 v242, v10, v242, s[34:35] // 00000000914C: D10000F2 008BE50A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000009154: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000915C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009164: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000916C: 86A2221E - v_add_lshl_u32 v243, v7, v8, 2 // 000000009170: D1FE00F3 020A1107 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 000000009178: D10000F3 008BE70A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000009180: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000009188: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000009190: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000009198: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000091A0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000091A8: 86A2221E - v_add_lshl_u32 v244, v7, v4, 2 // 0000000091AC: D1FE00F4 020A0907 - v_cndmask_b32_e64 v244, v10, v244, s[34:35] // 0000000091B4: D10000F4 008BE90A - v_add_co_u32_e64 v8, vcc, v4, 1 // 0000000091BC: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000091C4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000091CC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000091D4: 86A2221E - v_add_lshl_u32 v245, v7, v8, 2 // 0000000091D8: D1FE00F5 020A1107 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 0000000091E0: D10000F5 008BEB0A - v_accvgpr_read_b32 v15, a0 // 0000000091E8: D3D8400F 18000100 - v_accvgpr_read_b32 v16, a4 // 0000000091F0: D3D84010 18000104 - v_accvgpr_read_b32 v17, a8 // 0000000091F8: D3D84011 18000108 - v_accvgpr_read_b32 v18, a12 // 000000009200: D3D84012 1800010C - v_accvgpr_read_b32 v19, a16 // 000000009208: D3D84013 18000110 - v_accvgpr_read_b32 v20, a20 // 000000009210: D3D84014 18000114 - v_accvgpr_read_b32 v21, a24 // 000000009218: D3D84015 18000118 - v_accvgpr_read_b32 v22, a28 // 000000009220: D3D84016 1800011C - v_accvgpr_read_b32 v23, a32 // 000000009228: D3D84017 18000120 - v_accvgpr_read_b32 v24, a36 // 000000009230: D3D84018 18000124 - v_accvgpr_read_b32 v25, a40 // 000000009238: D3D84019 18000128 - v_accvgpr_read_b32 v26, a44 // 000000009240: D3D8401A 1800012C - v_accvgpr_read_b32 v27, a48 // 000000009248: D3D8401B 18000130 - v_accvgpr_read_b32 v28, a52 // 000000009250: D3D8401C 18000134 - v_accvgpr_read_b32 v29, a56 // 000000009258: D3D8401D 18000138 - v_accvgpr_read_b32 v30, a60 // 000000009260: D3D8401E 1800013C - v_accvgpr_read_b32 v31, a64 // 000000009268: D3D8401F 18000140 - v_accvgpr_read_b32 v32, a68 // 000000009270: D3D84020 18000144 - v_accvgpr_read_b32 v33, a72 // 000000009278: D3D84021 18000148 - v_accvgpr_read_b32 v34, a76 // 000000009280: D3D84022 1800014C - v_accvgpr_read_b32 v35, a80 // 000000009288: D3D84023 18000150 - v_accvgpr_read_b32 v36, a84 // 000000009290: D3D84024 18000154 - v_accvgpr_read_b32 v37, a88 // 000000009298: D3D84025 18000158 - v_accvgpr_read_b32 v38, a92 // 0000000092A0: D3D84026 1800015C - v_accvgpr_read_b32 v39, a96 // 0000000092A8: D3D84027 18000160 - v_accvgpr_read_b32 v40, a100 // 0000000092B0: D3D84028 18000164 - v_accvgpr_read_b32 v41, a104 // 0000000092B8: D3D84029 18000168 - v_accvgpr_read_b32 v42, a108 // 0000000092C0: D3D8402A 1800016C - v_accvgpr_read_b32 v43, a112 // 0000000092C8: D3D8402B 18000170 - v_accvgpr_read_b32 v44, a116 // 0000000092D0: D3D8402C 18000174 - v_accvgpr_read_b32 v45, a120 // 0000000092D8: D3D8402D 18000178 - v_accvgpr_read_b32 v46, a124 // 0000000092E0: D3D8402E 1800017C - v_accvgpr_read_b32 v47, a128 // 0000000092E8: D3D8402F 18000180 - v_accvgpr_read_b32 v48, a132 // 0000000092F0: D3D84030 18000184 - v_accvgpr_read_b32 v49, a136 // 0000000092F8: D3D84031 18000188 - v_accvgpr_read_b32 v50, a140 // 000000009300: D3D84032 1800018C - v_accvgpr_read_b32 v51, a144 // 000000009308: D3D84033 18000190 - v_accvgpr_read_b32 v52, a148 // 000000009310: D3D84034 18000194 - v_accvgpr_read_b32 v53, a152 // 000000009318: D3D84035 18000198 - v_accvgpr_read_b32 v54, a156 // 000000009320: D3D84036 1800019C - v_accvgpr_read_b32 v55, a160 // 000000009328: D3D84037 180001A0 - v_accvgpr_read_b32 v56, a164 // 000000009330: D3D84038 180001A4 - v_accvgpr_read_b32 v57, a168 // 000000009338: D3D84039 180001A8 - v_accvgpr_read_b32 v58, a172 // 000000009340: D3D8403A 180001AC - v_accvgpr_read_b32 v59, a176 // 000000009348: D3D8403B 180001B0 - v_accvgpr_read_b32 v60, a180 // 000000009350: D3D8403C 180001B4 - v_accvgpr_read_b32 v61, a184 // 000000009358: D3D8403D 180001B8 - v_accvgpr_read_b32 v62, a188 // 000000009360: D3D8403E 180001BC - v_accvgpr_read_b32 v63, a192 // 000000009368: D3D8403F 180001C0 - v_accvgpr_read_b32 v64, a196 // 000000009370: D3D84040 180001C4 - v_accvgpr_read_b32 v65, a200 // 000000009378: D3D84041 180001C8 - v_accvgpr_read_b32 v66, a204 // 000000009380: D3D84042 180001CC - v_accvgpr_read_b32 v67, a208 // 000000009388: D3D84043 180001D0 - v_accvgpr_read_b32 v68, a212 // 000000009390: D3D84044 180001D4 - v_accvgpr_read_b32 v69, a216 // 000000009398: D3D84045 180001D8 - v_accvgpr_read_b32 v70, a220 // 0000000093A0: D3D84046 180001DC - v_accvgpr_read_b32 v71, a224 // 0000000093A8: D3D84047 180001E0 - v_accvgpr_read_b32 v72, a228 // 0000000093B0: D3D84048 180001E4 - v_accvgpr_read_b32 v73, a232 // 0000000093B8: D3D84049 180001E8 - v_accvgpr_read_b32 v74, a236 // 0000000093C0: D3D8404A 180001EC - v_accvgpr_read_b32 v75, a240 // 0000000093C8: D3D8404B 180001F0 - v_accvgpr_read_b32 v76, a244 // 0000000093D0: D3D8404C 180001F4 - v_accvgpr_read_b32 v77, a248 // 0000000093D8: D3D8404D 180001F8 - v_accvgpr_read_b32 v78, a252 // 0000000093E0: D3D8404E 180001FC - v_accvgpr_read_b32 v79, a1 // 0000000093E8: D3D8404F 18000101 - v_accvgpr_read_b32 v80, a5 // 0000000093F0: D3D84050 18000105 - v_accvgpr_read_b32 v81, a9 // 0000000093F8: D3D84051 18000109 - v_accvgpr_read_b32 v82, a13 // 000000009400: D3D84052 1800010D - v_accvgpr_read_b32 v83, a17 // 000000009408: D3D84053 18000111 - v_accvgpr_read_b32 v84, a21 // 000000009410: D3D84054 18000115 - v_accvgpr_read_b32 v85, a25 // 000000009418: D3D84055 18000119 - v_accvgpr_read_b32 v86, a29 // 000000009420: D3D84056 1800011D - v_accvgpr_read_b32 v87, a33 // 000000009428: D3D84057 18000121 - v_accvgpr_read_b32 v88, a37 // 000000009430: D3D84058 18000125 - v_accvgpr_read_b32 v89, a41 // 000000009438: D3D84059 18000129 - v_accvgpr_read_b32 v90, a45 // 000000009440: D3D8405A 1800012D - v_accvgpr_read_b32 v91, a49 // 000000009448: D3D8405B 18000131 - v_accvgpr_read_b32 v92, a53 // 000000009450: D3D8405C 18000135 - v_accvgpr_read_b32 v93, a57 // 000000009458: D3D8405D 18000139 - v_accvgpr_read_b32 v94, a61 // 000000009460: D3D8405E 1800013D - v_accvgpr_read_b32 v95, a65 // 000000009468: D3D8405F 18000141 - v_accvgpr_read_b32 v96, a69 // 000000009470: D3D84060 18000145 - v_accvgpr_read_b32 v97, a73 // 000000009478: D3D84061 18000149 - v_accvgpr_read_b32 v98, a77 // 000000009480: D3D84062 1800014D - v_accvgpr_read_b32 v99, a81 // 000000009488: D3D84063 18000151 - v_accvgpr_read_b32 v100, a85 // 000000009490: D3D84064 18000155 - v_accvgpr_read_b32 v101, a89 // 000000009498: D3D84065 18000159 - v_accvgpr_read_b32 v102, a93 // 0000000094A0: D3D84066 1800015D - v_accvgpr_read_b32 v103, a97 // 0000000094A8: D3D84067 18000161 - v_accvgpr_read_b32 v104, a101 // 0000000094B0: D3D84068 18000165 - v_accvgpr_read_b32 v105, a105 // 0000000094B8: D3D84069 18000169 - v_accvgpr_read_b32 v106, a109 // 0000000094C0: D3D8406A 1800016D - v_accvgpr_read_b32 v107, a113 // 0000000094C8: D3D8406B 18000171 - v_accvgpr_read_b32 v108, a117 // 0000000094D0: D3D8406C 18000175 - v_accvgpr_read_b32 v109, a121 // 0000000094D8: D3D8406D 18000179 - v_accvgpr_read_b32 v110, a125 // 0000000094E0: D3D8406E 1800017D - v_accvgpr_read_b32 v111, a129 // 0000000094E8: D3D8406F 18000181 - v_accvgpr_read_b32 v112, a133 // 0000000094F0: D3D84070 18000185 - v_accvgpr_read_b32 v113, a137 // 0000000094F8: D3D84071 18000189 - v_accvgpr_read_b32 v114, a141 // 000000009500: D3D84072 1800018D - v_accvgpr_read_b32 v115, a145 // 000000009508: D3D84073 18000191 - v_accvgpr_read_b32 v116, a149 // 000000009510: D3D84074 18000195 - v_accvgpr_read_b32 v117, a153 // 000000009518: D3D84075 18000199 - v_accvgpr_read_b32 v118, a157 // 000000009520: D3D84076 1800019D - v_accvgpr_read_b32 v119, a161 // 000000009528: D3D84077 180001A1 - v_accvgpr_read_b32 v120, a165 // 000000009530: D3D84078 180001A5 - v_accvgpr_read_b32 v121, a169 // 000000009538: D3D84079 180001A9 - v_accvgpr_read_b32 v122, a173 // 000000009540: D3D8407A 180001AD - v_accvgpr_read_b32 v123, a177 // 000000009548: D3D8407B 180001B1 - v_accvgpr_read_b32 v124, a181 // 000000009550: D3D8407C 180001B5 - v_accvgpr_read_b32 v125, a185 // 000000009558: D3D8407D 180001B9 - v_accvgpr_read_b32 v126, a189 // 000000009560: D3D8407E 180001BD - v_accvgpr_read_b32 v127, a193 // 000000009568: D3D8407F 180001C1 - v_accvgpr_read_b32 v128, a197 // 000000009570: D3D84080 180001C5 - buffer_store_dword v15, v129, s[16:19], 0 offen nt // 000000009578: E0721000 80040F81 - buffer_store_dword v16, v130, s[16:19], 0 offen nt // 000000009580: E0721000 80041082 - buffer_store_dword v17, v131, s[16:19], 0 offen nt // 000000009588: E0721000 80041183 - buffer_store_dword v18, v135, s[16:19], 0 offen nt // 000000009590: E0721000 80041287 - buffer_store_dword v19, v136, s[16:19], 0 offen nt // 000000009598: E0721000 80041388 - buffer_store_dword v20, v137, s[16:19], 0 offen nt // 0000000095A0: E0721000 80041489 - buffer_store_dword v21, v138, s[16:19], 0 offen nt // 0000000095A8: E0721000 8004158A - buffer_store_dword v22, v139, s[16:19], 0 offen nt // 0000000095B0: E0721000 8004168B - buffer_store_dword v23, v140, s[16:19], 0 offen nt // 0000000095B8: E0721000 8004178C - buffer_store_dword v24, v141, s[16:19], 0 offen nt // 0000000095C0: E0721000 8004188D - buffer_store_dword v25, v142, s[16:19], 0 offen nt // 0000000095C8: E0721000 8004198E - buffer_store_dword v26, v143, s[16:19], 0 offen nt // 0000000095D0: E0721000 80041A8F - buffer_store_dword v27, v144, s[16:19], 0 offen nt // 0000000095D8: E0721000 80041B90 - buffer_store_dword v28, v145, s[16:19], 0 offen nt // 0000000095E0: E0721000 80041C91 - buffer_store_dword v29, v146, s[16:19], 0 offen nt // 0000000095E8: E0721000 80041D92 - buffer_store_dword v30, v147, s[16:19], 0 offen nt // 0000000095F0: E0721000 80041E93 - buffer_store_dword v31, v148, s[16:19], 0 offen nt // 0000000095F8: E0721000 80041F94 - buffer_store_dword v32, v149, s[16:19], 0 offen nt // 000000009600: E0721000 80042095 - buffer_store_dword v33, v150, s[16:19], 0 offen nt // 000000009608: E0721000 80042196 - buffer_store_dword v34, v151, s[16:19], 0 offen nt // 000000009610: E0721000 80042297 - buffer_store_dword v35, v152, s[16:19], 0 offen nt // 000000009618: E0721000 80042398 - buffer_store_dword v36, v153, s[16:19], 0 offen nt // 000000009620: E0721000 80042499 - buffer_store_dword v37, v154, s[16:19], 0 offen nt // 000000009628: E0721000 8004259A - buffer_store_dword v38, v155, s[16:19], 0 offen nt // 000000009630: E0721000 8004269B - buffer_store_dword v39, v156, s[16:19], 0 offen nt // 000000009638: E0721000 8004279C - buffer_store_dword v40, v157, s[16:19], 0 offen nt // 000000009640: E0721000 8004289D - buffer_store_dword v41, v158, s[16:19], 0 offen nt // 000000009648: E0721000 8004299E - buffer_store_dword v42, v159, s[16:19], 0 offen nt // 000000009650: E0721000 80042A9F - buffer_store_dword v43, v160, s[16:19], 0 offen nt // 000000009658: E0721000 80042BA0 - buffer_store_dword v44, v161, s[16:19], 0 offen nt // 000000009660: E0721000 80042CA1 - buffer_store_dword v45, v162, s[16:19], 0 offen nt // 000000009668: E0721000 80042DA2 - buffer_store_dword v46, v163, s[16:19], 0 offen nt // 000000009670: E0721000 80042EA3 - buffer_store_dword v47, v164, s[16:19], 0 offen nt // 000000009678: E0721000 80042FA4 - buffer_store_dword v48, v165, s[16:19], 0 offen nt // 000000009680: E0721000 800430A5 - buffer_store_dword v49, v166, s[16:19], 0 offen nt // 000000009688: E0721000 800431A6 - buffer_store_dword v50, v167, s[16:19], 0 offen nt // 000000009690: E0721000 800432A7 - buffer_store_dword v51, v168, s[16:19], 0 offen nt // 000000009698: E0721000 800433A8 - buffer_store_dword v52, v169, s[16:19], 0 offen nt // 0000000096A0: E0721000 800434A9 - buffer_store_dword v53, v170, s[16:19], 0 offen nt // 0000000096A8: E0721000 800435AA - buffer_store_dword v54, v171, s[16:19], 0 offen nt // 0000000096B0: E0721000 800436AB - buffer_store_dword v55, v172, s[16:19], 0 offen nt // 0000000096B8: E0721000 800437AC - buffer_store_dword v56, v173, s[16:19], 0 offen nt // 0000000096C0: E0721000 800438AD - buffer_store_dword v57, v174, s[16:19], 0 offen nt // 0000000096C8: E0721000 800439AE - buffer_store_dword v58, v175, s[16:19], 0 offen nt // 0000000096D0: E0721000 80043AAF - buffer_store_dword v59, v176, s[16:19], 0 offen nt // 0000000096D8: E0721000 80043BB0 - buffer_store_dword v60, v177, s[16:19], 0 offen nt // 0000000096E0: E0721000 80043CB1 - buffer_store_dword v61, v178, s[16:19], 0 offen nt // 0000000096E8: E0721000 80043DB2 - buffer_store_dword v62, v179, s[16:19], 0 offen nt // 0000000096F0: E0721000 80043EB3 - buffer_store_dword v63, v180, s[16:19], 0 offen nt // 0000000096F8: E0721000 80043FB4 - buffer_store_dword v64, v181, s[16:19], 0 offen nt // 000000009700: E0721000 800440B5 - buffer_store_dword v65, v182, s[16:19], 0 offen nt // 000000009708: E0721000 800441B6 - buffer_store_dword v66, v183, s[16:19], 0 offen nt // 000000009710: E0721000 800442B7 - buffer_store_dword v67, v184, s[16:19], 0 offen nt // 000000009718: E0721000 800443B8 - buffer_store_dword v68, v185, s[16:19], 0 offen nt // 000000009720: E0721000 800444B9 - buffer_store_dword v69, v186, s[16:19], 0 offen nt // 000000009728: E0721000 800445BA - buffer_store_dword v70, v187, s[16:19], 0 offen nt // 000000009730: E0721000 800446BB - buffer_store_dword v71, v188, s[16:19], 0 offen nt // 000000009738: E0721000 800447BC - buffer_store_dword v72, v189, s[16:19], 0 offen nt // 000000009740: E0721000 800448BD - buffer_store_dword v73, v190, s[16:19], 0 offen nt // 000000009748: E0721000 800449BE - buffer_store_dword v74, v191, s[16:19], 0 offen nt // 000000009750: E0721000 80044ABF - buffer_store_dword v75, v192, s[16:19], 0 offen nt // 000000009758: E0721000 80044BC0 - buffer_store_dword v76, v193, s[16:19], 0 offen nt // 000000009760: E0721000 80044CC1 - buffer_store_dword v77, v194, s[16:19], 0 offen nt // 000000009768: E0721000 80044DC2 - buffer_store_dword v78, v195, s[16:19], 0 offen nt // 000000009770: E0721000 80044EC3 - buffer_store_dword v79, v196, s[16:19], 0 offen nt // 000000009778: E0721000 80044FC4 - buffer_store_dword v80, v197, s[16:19], 0 offen nt // 000000009780: E0721000 800450C5 - buffer_store_dword v81, v198, s[16:19], 0 offen nt // 000000009788: E0721000 800451C6 - buffer_store_dword v82, v199, s[16:19], 0 offen nt // 000000009790: E0721000 800452C7 - buffer_store_dword v83, v200, s[16:19], 0 offen nt // 000000009798: E0721000 800453C8 - buffer_store_dword v84, v201, s[16:19], 0 offen nt // 0000000097A0: E0721000 800454C9 - buffer_store_dword v85, v202, s[16:19], 0 offen nt // 0000000097A8: E0721000 800455CA - buffer_store_dword v86, v203, s[16:19], 0 offen nt // 0000000097B0: E0721000 800456CB - buffer_store_dword v87, v204, s[16:19], 0 offen nt // 0000000097B8: E0721000 800457CC - buffer_store_dword v88, v205, s[16:19], 0 offen nt // 0000000097C0: E0721000 800458CD - buffer_store_dword v89, v206, s[16:19], 0 offen nt // 0000000097C8: E0721000 800459CE - buffer_store_dword v90, v207, s[16:19], 0 offen nt // 0000000097D0: E0721000 80045ACF - buffer_store_dword v91, v208, s[16:19], 0 offen nt // 0000000097D8: E0721000 80045BD0 - buffer_store_dword v92, v209, s[16:19], 0 offen nt // 0000000097E0: E0721000 80045CD1 - buffer_store_dword v93, v210, s[16:19], 0 offen nt // 0000000097E8: E0721000 80045DD2 - buffer_store_dword v94, v211, s[16:19], 0 offen nt // 0000000097F0: E0721000 80045ED3 - buffer_store_dword v95, v212, s[16:19], 0 offen nt // 0000000097F8: E0721000 80045FD4 - buffer_store_dword v96, v213, s[16:19], 0 offen nt // 000000009800: E0721000 800460D5 - buffer_store_dword v97, v214, s[16:19], 0 offen nt // 000000009808: E0721000 800461D6 - buffer_store_dword v98, v215, s[16:19], 0 offen nt // 000000009810: E0721000 800462D7 - buffer_store_dword v99, v216, s[16:19], 0 offen nt // 000000009818: E0721000 800463D8 - buffer_store_dword v100, v217, s[16:19], 0 offen nt // 000000009820: E0721000 800464D9 - buffer_store_dword v101, v218, s[16:19], 0 offen nt // 000000009828: E0721000 800465DA - buffer_store_dword v102, v219, s[16:19], 0 offen nt // 000000009830: E0721000 800466DB - buffer_store_dword v103, v220, s[16:19], 0 offen nt // 000000009838: E0721000 800467DC - buffer_store_dword v104, v221, s[16:19], 0 offen nt // 000000009840: E0721000 800468DD - buffer_store_dword v105, v222, s[16:19], 0 offen nt // 000000009848: E0721000 800469DE - buffer_store_dword v106, v223, s[16:19], 0 offen nt // 000000009850: E0721000 80046ADF - buffer_store_dword v107, v224, s[16:19], 0 offen nt // 000000009858: E0721000 80046BE0 - buffer_store_dword v108, v225, s[16:19], 0 offen nt // 000000009860: E0721000 80046CE1 - buffer_store_dword v109, v226, s[16:19], 0 offen nt // 000000009868: E0721000 80046DE2 - buffer_store_dword v110, v227, s[16:19], 0 offen nt // 000000009870: E0721000 80046EE3 - buffer_store_dword v111, v228, s[16:19], 0 offen nt // 000000009878: E0721000 80046FE4 - buffer_store_dword v112, v229, s[16:19], 0 offen nt // 000000009880: E0721000 800470E5 - buffer_store_dword v113, v230, s[16:19], 0 offen nt // 000000009888: E0721000 800471E6 - buffer_store_dword v114, v231, s[16:19], 0 offen nt // 000000009890: E0721000 800472E7 - buffer_store_dword v115, v232, s[16:19], 0 offen nt // 000000009898: E0721000 800473E8 - buffer_store_dword v116, v233, s[16:19], 0 offen nt // 0000000098A0: E0721000 800474E9 - buffer_store_dword v117, v234, s[16:19], 0 offen nt // 0000000098A8: E0721000 800475EA - buffer_store_dword v118, v235, s[16:19], 0 offen nt // 0000000098B0: E0721000 800476EB - buffer_store_dword v119, v236, s[16:19], 0 offen nt // 0000000098B8: E0721000 800477EC - buffer_store_dword v120, v237, s[16:19], 0 offen nt // 0000000098C0: E0721000 800478ED - buffer_store_dword v121, v238, s[16:19], 0 offen nt // 0000000098C8: E0721000 800479EE - buffer_store_dword v122, v239, s[16:19], 0 offen nt // 0000000098D0: E0721000 80047AEF - buffer_store_dword v123, v240, s[16:19], 0 offen nt // 0000000098D8: E0721000 80047BF0 - buffer_store_dword v124, v241, s[16:19], 0 offen nt // 0000000098E0: E0721000 80047CF1 - buffer_store_dword v125, v242, s[16:19], 0 offen nt // 0000000098E8: E0721000 80047DF2 - buffer_store_dword v126, v243, s[16:19], 0 offen nt // 0000000098F0: E0721000 80047EF3 - buffer_store_dword v127, v244, s[16:19], 0 offen nt // 0000000098F8: E0721000 80047FF4 - buffer_store_dword v128, v245, s[16:19], 0 offen nt // 000000009900: E0721000 800480F5 - s_nop 0 // 000000009908: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 00000000990C: 7E1402FF 80000000 - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000009914: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000991C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009924: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000992C: 86A2221E - v_add_lshl_u32 v129, v7, v8, 2 // 000000009930: D1FE0081 020A1107 - v_cndmask_b32_e64 v129, v10, v129, s[34:35] // 000000009938: D1000081 008B030A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000009940: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009948: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009950: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009958: 86A2221E - v_add_lshl_u32 v130, v7, v8, 2 // 00000000995C: D1FE0082 020A1107 - v_cndmask_b32_e64 v130, v10, v130, s[34:35] // 000000009964: D1000082 008B050A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000996C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009974: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000997C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009984: 86A2221E - v_add_lshl_u32 v131, v7, v8, 2 // 000000009988: D1FE0083 020A1107 - v_cndmask_b32_e64 v131, v10, v131, s[34:35] // 000000009990: D1000083 008B070A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000009998: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000099A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000099A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000099B0: 86A2221E - v_add_lshl_u32 v135, v7, v8, 2 // 0000000099B4: D1FE0087 020A1107 - v_cndmask_b32_e64 v135, v10, v135, s[34:35] // 0000000099BC: D1000087 008B0F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 0000000099C4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000099CC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 0000000099D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 0000000099DC: 86A2221E - v_add_lshl_u32 v136, v7, v8, 2 // 0000000099E0: D1FE0088 020A1107 - v_cndmask_b32_e64 v136, v10, v136, s[34:35] // 0000000099E8: D1000088 008B110A - v_add_co_u32_e64 v8, vcc, v4, 7 // 0000000099F0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 0000000099F8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009A00: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009A08: 86A2221E - v_add_lshl_u32 v137, v7, v8, 2 // 000000009A0C: D1FE0089 020A1107 - v_cndmask_b32_e64 v137, v10, v137, s[34:35] // 000000009A14: D1000089 008B130A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000009A1C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000009A24: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000009A2C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000009A34: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009A3C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009A44: 86A2221E - v_add_lshl_u32 v138, v7, v4, 2 // 000000009A48: D1FE008A 020A0907 - v_cndmask_b32_e64 v138, v10, v138, s[34:35] // 000000009A50: D100008A 008B150A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000009A58: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009A60: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009A68: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009A70: 86A2221E - v_add_lshl_u32 v139, v7, v8, 2 // 000000009A74: D1FE008B 020A1107 - v_cndmask_b32_e64 v139, v10, v139, s[34:35] // 000000009A7C: D100008B 008B170A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000009A84: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009A8C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009A94: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009A9C: 86A2221E - v_add_lshl_u32 v140, v7, v8, 2 // 000000009AA0: D1FE008C 020A1107 - v_cndmask_b32_e64 v140, v10, v140, s[34:35] // 000000009AA8: D100008C 008B190A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000009AB0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009AB8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009AC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009AC8: 86A2221E - v_add_lshl_u32 v141, v7, v8, 2 // 000000009ACC: D1FE008D 020A1107 - v_cndmask_b32_e64 v141, v10, v141, s[34:35] // 000000009AD4: D100008D 008B1B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000009ADC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009AE4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009AEC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009AF4: 86A2221E - v_add_lshl_u32 v142, v7, v8, 2 // 000000009AF8: D1FE008E 020A1107 - v_cndmask_b32_e64 v142, v10, v142, s[34:35] // 000000009B00: D100008E 008B1D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000009B08: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009B10: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009B18: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009B20: 86A2221E - v_add_lshl_u32 v143, v7, v8, 2 // 000000009B24: D1FE008F 020A1107 - v_cndmask_b32_e64 v143, v10, v143, s[34:35] // 000000009B2C: D100008F 008B1F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000009B34: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009B3C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009B44: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009B4C: 86A2221E - v_add_lshl_u32 v144, v7, v8, 2 // 000000009B50: D1FE0090 020A1107 - v_cndmask_b32_e64 v144, v10, v144, s[34:35] // 000000009B58: D1000090 008B210A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000009B60: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009B68: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009B70: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009B78: 86A2221E - v_add_lshl_u32 v145, v7, v8, 2 // 000000009B7C: D1FE0091 020A1107 - v_cndmask_b32_e64 v145, v10, v145, s[34:35] // 000000009B84: D1000091 008B230A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000009B8C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000009B94: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000009B9C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000009BA4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009BAC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009BB4: 86A2221E - v_add_lshl_u32 v146, v7, v4, 2 // 000000009BB8: D1FE0092 020A0907 - v_cndmask_b32_e64 v146, v10, v146, s[34:35] // 000000009BC0: D1000092 008B250A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000009BC8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009BD0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009BD8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009BE0: 86A2221E - v_add_lshl_u32 v147, v7, v8, 2 // 000000009BE4: D1FE0093 020A1107 - v_cndmask_b32_e64 v147, v10, v147, s[34:35] // 000000009BEC: D1000093 008B270A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000009BF4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009BFC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009C04: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009C0C: 86A2221E - v_add_lshl_u32 v148, v7, v8, 2 // 000000009C10: D1FE0094 020A1107 - v_cndmask_b32_e64 v148, v10, v148, s[34:35] // 000000009C18: D1000094 008B290A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000009C20: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009C28: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009C30: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009C38: 86A2221E - v_add_lshl_u32 v149, v7, v8, 2 // 000000009C3C: D1FE0095 020A1107 - v_cndmask_b32_e64 v149, v10, v149, s[34:35] // 000000009C44: D1000095 008B2B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000009C4C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009C54: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009C5C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009C64: 86A2221E - v_add_lshl_u32 v150, v7, v8, 2 // 000000009C68: D1FE0096 020A1107 - v_cndmask_b32_e64 v150, v10, v150, s[34:35] // 000000009C70: D1000096 008B2D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000009C78: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009C80: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009C88: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009C90: 86A2221E - v_add_lshl_u32 v151, v7, v8, 2 // 000000009C94: D1FE0097 020A1107 - v_cndmask_b32_e64 v151, v10, v151, s[34:35] // 000000009C9C: D1000097 008B2F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000009CA4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009CAC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009CB4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009CBC: 86A2221E - v_add_lshl_u32 v152, v7, v8, 2 // 000000009CC0: D1FE0098 020A1107 - v_cndmask_b32_e64 v152, v10, v152, s[34:35] // 000000009CC8: D1000098 008B310A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000009CD0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009CD8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009CE0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009CE8: 86A2221E - v_add_lshl_u32 v153, v7, v8, 2 // 000000009CEC: D1FE0099 020A1107 - v_cndmask_b32_e64 v153, v10, v153, s[34:35] // 000000009CF4: D1000099 008B330A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000009CFC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000009D04: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000009D0C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000009D14: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009D1C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009D24: 86A2221E - v_add_lshl_u32 v154, v7, v4, 2 // 000000009D28: D1FE009A 020A0907 - v_cndmask_b32_e64 v154, v10, v154, s[34:35] // 000000009D30: D100009A 008B350A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000009D38: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009D40: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009D48: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009D50: 86A2221E - v_add_lshl_u32 v155, v7, v8, 2 // 000000009D54: D1FE009B 020A1107 - v_cndmask_b32_e64 v155, v10, v155, s[34:35] // 000000009D5C: D100009B 008B370A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000009D64: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009D6C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009D74: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009D7C: 86A2221E - v_add_lshl_u32 v156, v7, v8, 2 // 000000009D80: D1FE009C 020A1107 - v_cndmask_b32_e64 v156, v10, v156, s[34:35] // 000000009D88: D100009C 008B390A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000009D90: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009D98: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009DA0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009DA8: 86A2221E - v_add_lshl_u32 v157, v7, v8, 2 // 000000009DAC: D1FE009D 020A1107 - v_cndmask_b32_e64 v157, v10, v157, s[34:35] // 000000009DB4: D100009D 008B3B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000009DBC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009DC4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009DCC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009DD4: 86A2221E - v_add_lshl_u32 v158, v7, v8, 2 // 000000009DD8: D1FE009E 020A1107 - v_cndmask_b32_e64 v158, v10, v158, s[34:35] // 000000009DE0: D100009E 008B3D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000009DE8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009DF0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009DF8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009E00: 86A2221E - v_add_lshl_u32 v159, v7, v8, 2 // 000000009E04: D1FE009F 020A1107 - v_cndmask_b32_e64 v159, v10, v159, s[34:35] // 000000009E0C: D100009F 008B3F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000009E14: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009E1C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009E24: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009E2C: 86A2221E - v_add_lshl_u32 v160, v7, v8, 2 // 000000009E30: D1FE00A0 020A1107 - v_cndmask_b32_e64 v160, v10, v160, s[34:35] // 000000009E38: D10000A0 008B410A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000009E40: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009E48: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009E50: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009E58: 86A2221E - v_add_lshl_u32 v161, v7, v8, 2 // 000000009E5C: D1FE00A1 020A1107 - v_cndmask_b32_e64 v161, v10, v161, s[34:35] // 000000009E64: D10000A1 008B430A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000009E6C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000009E74: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000009E7C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000009E84: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009E8C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009E94: 86A2221E - v_add_lshl_u32 v162, v7, v4, 2 // 000000009E98: D1FE00A2 020A0907 - v_cndmask_b32_e64 v162, v10, v162, s[34:35] // 000000009EA0: D10000A2 008B450A - v_add_co_u32_e64 v8, vcc, v4, 1 // 000000009EA8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009EB0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009EB8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009EC0: 86A2221E - v_add_lshl_u32 v163, v7, v8, 2 // 000000009EC4: D1FE00A3 020A1107 - v_cndmask_b32_e64 v163, v10, v163, s[34:35] // 000000009ECC: D10000A3 008B470A - v_add_co_u32_e64 v8, vcc, v4, 2 // 000000009ED4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009EDC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009EE4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009EEC: 86A2221E - v_add_lshl_u32 v164, v7, v8, 2 // 000000009EF0: D1FE00A4 020A1107 - v_cndmask_b32_e64 v164, v10, v164, s[34:35] // 000000009EF8: D10000A4 008B490A - v_add_co_u32_e64 v8, vcc, v4, 3 // 000000009F00: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009F08: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009F10: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009F18: 86A2221E - v_add_lshl_u32 v165, v7, v8, 2 // 000000009F1C: D1FE00A5 020A1107 - v_cndmask_b32_e64 v165, v10, v165, s[34:35] // 000000009F24: D10000A5 008B4B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 000000009F2C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009F34: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009F3C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009F44: 86A2221E - v_add_lshl_u32 v166, v7, v8, 2 // 000000009F48: D1FE00A6 020A1107 - v_cndmask_b32_e64 v166, v10, v166, s[34:35] // 000000009F50: D10000A6 008B4D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 000000009F58: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009F60: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009F68: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009F70: 86A2221E - v_add_lshl_u32 v167, v7, v8, 2 // 000000009F74: D1FE00A7 020A1107 - v_cndmask_b32_e64 v167, v10, v167, s[34:35] // 000000009F7C: D10000A7 008B4F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 000000009F84: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009F8C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009F94: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009F9C: 86A2221E - v_add_lshl_u32 v168, v7, v8, 2 // 000000009FA0: D1FE00A8 020A1107 - v_cndmask_b32_e64 v168, v10, v168, s[34:35] // 000000009FA8: D10000A8 008B510A - v_add_co_u32_e64 v8, vcc, v4, 7 // 000000009FB0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 000000009FB8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009FC0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 000000009FC8: 86A2221E - v_add_lshl_u32 v169, v7, v8, 2 // 000000009FCC: D1FE00A9 020A1107 - v_cndmask_b32_e64 v169, v10, v169, s[34:35] // 000000009FD4: D10000A9 008B530A - v_add_co_u32_e64 v5, vcc, v5, 1 // 000000009FDC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 000000009FE4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 000000009FEC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 000000009FF4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 000000009FFC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A004: 86A2221E - v_add_lshl_u32 v170, v7, v4, 2 // 00000000A008: D1FE00AA 020A0907 - v_cndmask_b32_e64 v170, v10, v170, s[34:35] // 00000000A010: D10000AA 008B550A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000A018: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A020: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A028: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A030: 86A2221E - v_add_lshl_u32 v171, v7, v8, 2 // 00000000A034: D1FE00AB 020A1107 - v_cndmask_b32_e64 v171, v10, v171, s[34:35] // 00000000A03C: D10000AB 008B570A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000A044: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A04C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A054: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A05C: 86A2221E - v_add_lshl_u32 v172, v7, v8, 2 // 00000000A060: D1FE00AC 020A1107 - v_cndmask_b32_e64 v172, v10, v172, s[34:35] // 00000000A068: D10000AC 008B590A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000A070: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A078: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A080: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A088: 86A2221E - v_add_lshl_u32 v173, v7, v8, 2 // 00000000A08C: D1FE00AD 020A1107 - v_cndmask_b32_e64 v173, v10, v173, s[34:35] // 00000000A094: D10000AD 008B5B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000A09C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A0A4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A0AC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A0B4: 86A2221E - v_add_lshl_u32 v174, v7, v8, 2 // 00000000A0B8: D1FE00AE 020A1107 - v_cndmask_b32_e64 v174, v10, v174, s[34:35] // 00000000A0C0: D10000AE 008B5D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000A0C8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A0D0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A0D8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A0E0: 86A2221E - v_add_lshl_u32 v175, v7, v8, 2 // 00000000A0E4: D1FE00AF 020A1107 - v_cndmask_b32_e64 v175, v10, v175, s[34:35] // 00000000A0EC: D10000AF 008B5F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000A0F4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A0FC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A104: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A10C: 86A2221E - v_add_lshl_u32 v176, v7, v8, 2 // 00000000A110: D1FE00B0 020A1107 - v_cndmask_b32_e64 v176, v10, v176, s[34:35] // 00000000A118: D10000B0 008B610A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000A120: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A128: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A130: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A138: 86A2221E - v_add_lshl_u32 v177, v7, v8, 2 // 00000000A13C: D1FE00B1 020A1107 - v_cndmask_b32_e64 v177, v10, v177, s[34:35] // 00000000A144: D10000B1 008B630A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000A14C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000A154: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000A15C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000A164: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A16C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A174: 86A2221E - v_add_lshl_u32 v178, v7, v4, 2 // 00000000A178: D1FE00B2 020A0907 - v_cndmask_b32_e64 v178, v10, v178, s[34:35] // 00000000A180: D10000B2 008B650A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000A188: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A190: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A198: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A1A0: 86A2221E - v_add_lshl_u32 v179, v7, v8, 2 // 00000000A1A4: D1FE00B3 020A1107 - v_cndmask_b32_e64 v179, v10, v179, s[34:35] // 00000000A1AC: D10000B3 008B670A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000A1B4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A1BC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A1C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A1CC: 86A2221E - v_add_lshl_u32 v180, v7, v8, 2 // 00000000A1D0: D1FE00B4 020A1107 - v_cndmask_b32_e64 v180, v10, v180, s[34:35] // 00000000A1D8: D10000B4 008B690A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000A1E0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A1E8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A1F0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A1F8: 86A2221E - v_add_lshl_u32 v181, v7, v8, 2 // 00000000A1FC: D1FE00B5 020A1107 - v_cndmask_b32_e64 v181, v10, v181, s[34:35] // 00000000A204: D10000B5 008B6B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000A20C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A214: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A21C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A224: 86A2221E - v_add_lshl_u32 v182, v7, v8, 2 // 00000000A228: D1FE00B6 020A1107 - v_cndmask_b32_e64 v182, v10, v182, s[34:35] // 00000000A230: D10000B6 008B6D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000A238: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A240: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A248: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A250: 86A2221E - v_add_lshl_u32 v183, v7, v8, 2 // 00000000A254: D1FE00B7 020A1107 - v_cndmask_b32_e64 v183, v10, v183, s[34:35] // 00000000A25C: D10000B7 008B6F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000A264: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A26C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A274: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A27C: 86A2221E - v_add_lshl_u32 v184, v7, v8, 2 // 00000000A280: D1FE00B8 020A1107 - v_cndmask_b32_e64 v184, v10, v184, s[34:35] // 00000000A288: D10000B8 008B710A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000A290: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A298: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A2A0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A2A8: 86A2221E - v_add_lshl_u32 v185, v7, v8, 2 // 00000000A2AC: D1FE00B9 020A1107 - v_cndmask_b32_e64 v185, v10, v185, s[34:35] // 00000000A2B4: D10000B9 008B730A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000A2BC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000A2C4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000A2CC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000A2D4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A2DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A2E4: 86A2221E - v_add_lshl_u32 v186, v7, v4, 2 // 00000000A2E8: D1FE00BA 020A0907 - v_cndmask_b32_e64 v186, v10, v186, s[34:35] // 00000000A2F0: D10000BA 008B750A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000A2F8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A300: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A308: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A310: 86A2221E - v_add_lshl_u32 v187, v7, v8, 2 // 00000000A314: D1FE00BB 020A1107 - v_cndmask_b32_e64 v187, v10, v187, s[34:35] // 00000000A31C: D10000BB 008B770A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000A324: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A32C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A334: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A33C: 86A2221E - v_add_lshl_u32 v188, v7, v8, 2 // 00000000A340: D1FE00BC 020A1107 - v_cndmask_b32_e64 v188, v10, v188, s[34:35] // 00000000A348: D10000BC 008B790A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000A350: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A358: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A360: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A368: 86A2221E - v_add_lshl_u32 v189, v7, v8, 2 // 00000000A36C: D1FE00BD 020A1107 - v_cndmask_b32_e64 v189, v10, v189, s[34:35] // 00000000A374: D10000BD 008B7B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000A37C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A384: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A38C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A394: 86A2221E - v_add_lshl_u32 v190, v7, v8, 2 // 00000000A398: D1FE00BE 020A1107 - v_cndmask_b32_e64 v190, v10, v190, s[34:35] // 00000000A3A0: D10000BE 008B7D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000A3A8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A3B0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A3B8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A3C0: 86A2221E - v_add_lshl_u32 v191, v7, v8, 2 // 00000000A3C4: D1FE00BF 020A1107 - v_cndmask_b32_e64 v191, v10, v191, s[34:35] // 00000000A3CC: D10000BF 008B7F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000A3D4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A3DC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A3E4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A3EC: 86A2221E - v_add_lshl_u32 v192, v7, v8, 2 // 00000000A3F0: D1FE00C0 020A1107 - v_cndmask_b32_e64 v192, v10, v192, s[34:35] // 00000000A3F8: D10000C0 008B810A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000A400: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A408: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A410: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A418: 86A2221E - v_add_lshl_u32 v193, v7, v8, 2 // 00000000A41C: D1FE00C1 020A1107 - v_cndmask_b32_e64 v193, v10, v193, s[34:35] // 00000000A424: D10000C1 008B830A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000A42C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000A434: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000A43C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000A444: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A44C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A454: 86A2221E - v_add_lshl_u32 v194, v7, v4, 2 // 00000000A458: D1FE00C2 020A0907 - v_cndmask_b32_e64 v194, v10, v194, s[34:35] // 00000000A460: D10000C2 008B850A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000A468: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A470: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A478: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A480: 86A2221E - v_add_lshl_u32 v195, v7, v8, 2 // 00000000A484: D1FE00C3 020A1107 - v_cndmask_b32_e64 v195, v10, v195, s[34:35] // 00000000A48C: D10000C3 008B870A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000A494: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A49C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A4A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A4AC: 86A2221E - v_add_lshl_u32 v196, v7, v8, 2 // 00000000A4B0: D1FE00C4 020A1107 - v_cndmask_b32_e64 v196, v10, v196, s[34:35] // 00000000A4B8: D10000C4 008B890A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000A4C0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A4C8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A4D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A4D8: 86A2221E - v_add_lshl_u32 v197, v7, v8, 2 // 00000000A4DC: D1FE00C5 020A1107 - v_cndmask_b32_e64 v197, v10, v197, s[34:35] // 00000000A4E4: D10000C5 008B8B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000A4EC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A4F4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A4FC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A504: 86A2221E - v_add_lshl_u32 v198, v7, v8, 2 // 00000000A508: D1FE00C6 020A1107 - v_cndmask_b32_e64 v198, v10, v198, s[34:35] // 00000000A510: D10000C6 008B8D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000A518: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A520: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A528: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A530: 86A2221E - v_add_lshl_u32 v199, v7, v8, 2 // 00000000A534: D1FE00C7 020A1107 - v_cndmask_b32_e64 v199, v10, v199, s[34:35] // 00000000A53C: D10000C7 008B8F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000A544: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A54C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A554: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A55C: 86A2221E - v_add_lshl_u32 v200, v7, v8, 2 // 00000000A560: D1FE00C8 020A1107 - v_cndmask_b32_e64 v200, v10, v200, s[34:35] // 00000000A568: D10000C8 008B910A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000A570: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A578: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A580: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A588: 86A2221E - v_add_lshl_u32 v201, v7, v8, 2 // 00000000A58C: D1FE00C9 020A1107 - v_cndmask_b32_e64 v201, v10, v201, s[34:35] // 00000000A594: D10000C9 008B930A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000A59C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000A5A4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000A5AC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000A5B4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A5BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A5C4: 86A2221E - v_add_lshl_u32 v202, v7, v4, 2 // 00000000A5C8: D1FE00CA 020A0907 - v_cndmask_b32_e64 v202, v10, v202, s[34:35] // 00000000A5D0: D10000CA 008B950A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000A5D8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A5E0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A5E8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A5F0: 86A2221E - v_add_lshl_u32 v203, v7, v8, 2 // 00000000A5F4: D1FE00CB 020A1107 - v_cndmask_b32_e64 v203, v10, v203, s[34:35] // 00000000A5FC: D10000CB 008B970A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000A604: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A60C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A614: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A61C: 86A2221E - v_add_lshl_u32 v204, v7, v8, 2 // 00000000A620: D1FE00CC 020A1107 - v_cndmask_b32_e64 v204, v10, v204, s[34:35] // 00000000A628: D10000CC 008B990A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000A630: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A638: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A640: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A648: 86A2221E - v_add_lshl_u32 v205, v7, v8, 2 // 00000000A64C: D1FE00CD 020A1107 - v_cndmask_b32_e64 v205, v10, v205, s[34:35] // 00000000A654: D10000CD 008B9B0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000A65C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A664: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A66C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A674: 86A2221E - v_add_lshl_u32 v206, v7, v8, 2 // 00000000A678: D1FE00CE 020A1107 - v_cndmask_b32_e64 v206, v10, v206, s[34:35] // 00000000A680: D10000CE 008B9D0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000A688: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A690: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A698: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A6A0: 86A2221E - v_add_lshl_u32 v207, v7, v8, 2 // 00000000A6A4: D1FE00CF 020A1107 - v_cndmask_b32_e64 v207, v10, v207, s[34:35] // 00000000A6AC: D10000CF 008B9F0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000A6B4: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A6BC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A6C4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A6CC: 86A2221E - v_add_lshl_u32 v208, v7, v8, 2 // 00000000A6D0: D1FE00D0 020A1107 - v_cndmask_b32_e64 v208, v10, v208, s[34:35] // 00000000A6D8: D10000D0 008BA10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000A6E0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A6E8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A6F0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A6F8: 86A2221E - v_add_lshl_u32 v209, v7, v8, 2 // 00000000A6FC: D1FE00D1 020A1107 - v_cndmask_b32_e64 v209, v10, v209, s[34:35] // 00000000A704: D10000D1 008BA30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000A70C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000A714: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000A71C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000A724: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A72C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A734: 86A2221E - v_add_lshl_u32 v210, v7, v4, 2 // 00000000A738: D1FE00D2 020A0907 - v_cndmask_b32_e64 v210, v10, v210, s[34:35] // 00000000A740: D10000D2 008BA50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000A748: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A750: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A758: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A760: 86A2221E - v_add_lshl_u32 v211, v7, v8, 2 // 00000000A764: D1FE00D3 020A1107 - v_cndmask_b32_e64 v211, v10, v211, s[34:35] // 00000000A76C: D10000D3 008BA70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000A774: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A77C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A784: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A78C: 86A2221E - v_add_lshl_u32 v212, v7, v8, 2 // 00000000A790: D1FE00D4 020A1107 - v_cndmask_b32_e64 v212, v10, v212, s[34:35] // 00000000A798: D10000D4 008BA90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000A7A0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A7A8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A7B0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A7B8: 86A2221E - v_add_lshl_u32 v213, v7, v8, 2 // 00000000A7BC: D1FE00D5 020A1107 - v_cndmask_b32_e64 v213, v10, v213, s[34:35] // 00000000A7C4: D10000D5 008BAB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000A7CC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A7D4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A7DC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A7E4: 86A2221E - v_add_lshl_u32 v214, v7, v8, 2 // 00000000A7E8: D1FE00D6 020A1107 - v_cndmask_b32_e64 v214, v10, v214, s[34:35] // 00000000A7F0: D10000D6 008BAD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000A7F8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A800: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A808: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A810: 86A2221E - v_add_lshl_u32 v215, v7, v8, 2 // 00000000A814: D1FE00D7 020A1107 - v_cndmask_b32_e64 v215, v10, v215, s[34:35] // 00000000A81C: D10000D7 008BAF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000A824: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A82C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A834: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A83C: 86A2221E - v_add_lshl_u32 v216, v7, v8, 2 // 00000000A840: D1FE00D8 020A1107 - v_cndmask_b32_e64 v216, v10, v216, s[34:35] // 00000000A848: D10000D8 008BB10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000A850: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A858: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A860: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A868: 86A2221E - v_add_lshl_u32 v217, v7, v8, 2 // 00000000A86C: D1FE00D9 020A1107 - v_cndmask_b32_e64 v217, v10, v217, s[34:35] // 00000000A874: D10000D9 008BB30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000A87C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000A884: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000A88C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000A894: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A89C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A8A4: 86A2221E - v_add_lshl_u32 v218, v7, v4, 2 // 00000000A8A8: D1FE00DA 020A0907 - v_cndmask_b32_e64 v218, v10, v218, s[34:35] // 00000000A8B0: D10000DA 008BB50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000A8B8: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A8C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A8C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A8D0: 86A2221E - v_add_lshl_u32 v219, v7, v8, 2 // 00000000A8D4: D1FE00DB 020A1107 - v_cndmask_b32_e64 v219, v10, v219, s[34:35] // 00000000A8DC: D10000DB 008BB70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000A8E4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A8EC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A8F4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A8FC: 86A2221E - v_add_lshl_u32 v220, v7, v8, 2 // 00000000A900: D1FE00DC 020A1107 - v_cndmask_b32_e64 v220, v10, v220, s[34:35] // 00000000A908: D10000DC 008BB90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000A910: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A918: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A920: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A928: 86A2221E - v_add_lshl_u32 v221, v7, v8, 2 // 00000000A92C: D1FE00DD 020A1107 - v_cndmask_b32_e64 v221, v10, v221, s[34:35] // 00000000A934: D10000DD 008BBB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000A93C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A944: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A94C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A954: 86A2221E - v_add_lshl_u32 v222, v7, v8, 2 // 00000000A958: D1FE00DE 020A1107 - v_cndmask_b32_e64 v222, v10, v222, s[34:35] // 00000000A960: D10000DE 008BBD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000A968: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A970: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A978: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A980: 86A2221E - v_add_lshl_u32 v223, v7, v8, 2 // 00000000A984: D1FE00DF 020A1107 - v_cndmask_b32_e64 v223, v10, v223, s[34:35] // 00000000A98C: D10000DF 008BBF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000A994: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A99C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A9A4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A9AC: 86A2221E - v_add_lshl_u32 v224, v7, v8, 2 // 00000000A9B0: D1FE00E0 020A1107 - v_cndmask_b32_e64 v224, v10, v224, s[34:35] // 00000000A9B8: D10000E0 008BC10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000A9C0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000A9C8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000A9D0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000A9D8: 86A2221E - v_add_lshl_u32 v225, v7, v8, 2 // 00000000A9DC: D1FE00E1 020A1107 - v_cndmask_b32_e64 v225, v10, v225, s[34:35] // 00000000A9E4: D10000E1 008BC30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000A9EC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000A9F4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000A9FC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000AA04: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AA0C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AA14: 86A2221E - v_add_lshl_u32 v226, v7, v4, 2 // 00000000AA18: D1FE00E2 020A0907 - v_cndmask_b32_e64 v226, v10, v226, s[34:35] // 00000000AA20: D10000E2 008BC50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000AA28: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AA30: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AA38: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AA40: 86A2221E - v_add_lshl_u32 v227, v7, v8, 2 // 00000000AA44: D1FE00E3 020A1107 - v_cndmask_b32_e64 v227, v10, v227, s[34:35] // 00000000AA4C: D10000E3 008BC70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000AA54: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AA5C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AA64: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AA6C: 86A2221E - v_add_lshl_u32 v228, v7, v8, 2 // 00000000AA70: D1FE00E4 020A1107 - v_cndmask_b32_e64 v228, v10, v228, s[34:35] // 00000000AA78: D10000E4 008BC90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000AA80: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AA88: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AA90: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AA98: 86A2221E - v_add_lshl_u32 v229, v7, v8, 2 // 00000000AA9C: D1FE00E5 020A1107 - v_cndmask_b32_e64 v229, v10, v229, s[34:35] // 00000000AAA4: D10000E5 008BCB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000AAAC: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AAB4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AABC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AAC4: 86A2221E - v_add_lshl_u32 v230, v7, v8, 2 // 00000000AAC8: D1FE00E6 020A1107 - v_cndmask_b32_e64 v230, v10, v230, s[34:35] // 00000000AAD0: D10000E6 008BCD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000AAD8: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AAE0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AAE8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AAF0: 86A2221E - v_add_lshl_u32 v231, v7, v8, 2 // 00000000AAF4: D1FE00E7 020A1107 - v_cndmask_b32_e64 v231, v10, v231, s[34:35] // 00000000AAFC: D10000E7 008BCF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000AB04: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AB0C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AB14: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AB1C: 86A2221E - v_add_lshl_u32 v232, v7, v8, 2 // 00000000AB20: D1FE00E8 020A1107 - v_cndmask_b32_e64 v232, v10, v232, s[34:35] // 00000000AB28: D10000E8 008BD10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000AB30: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AB38: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AB40: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AB48: 86A2221E - v_add_lshl_u32 v233, v7, v8, 2 // 00000000AB4C: D1FE00E9 020A1107 - v_cndmask_b32_e64 v233, v10, v233, s[34:35] // 00000000AB54: D10000E9 008BD30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000AB5C: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000AB64: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000AB6C: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000AB74: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AB7C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AB84: 86A2221E - v_add_lshl_u32 v234, v7, v4, 2 // 00000000AB88: D1FE00EA 020A0907 - v_cndmask_b32_e64 v234, v10, v234, s[34:35] // 00000000AB90: D10000EA 008BD50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000AB98: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ABA0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ABA8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ABB0: 86A2221E - v_add_lshl_u32 v235, v7, v8, 2 // 00000000ABB4: D1FE00EB 020A1107 - v_cndmask_b32_e64 v235, v10, v235, s[34:35] // 00000000ABBC: D10000EB 008BD70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000ABC4: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ABCC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ABD4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ABDC: 86A2221E - v_add_lshl_u32 v236, v7, v8, 2 // 00000000ABE0: D1FE00EC 020A1107 - v_cndmask_b32_e64 v236, v10, v236, s[34:35] // 00000000ABE8: D10000EC 008BD90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000ABF0: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ABF8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AC00: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AC08: 86A2221E - v_add_lshl_u32 v237, v7, v8, 2 // 00000000AC0C: D1FE00ED 020A1107 - v_cndmask_b32_e64 v237, v10, v237, s[34:35] // 00000000AC14: D10000ED 008BDB0A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000AC1C: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AC24: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AC2C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AC34: 86A2221E - v_add_lshl_u32 v238, v7, v8, 2 // 00000000AC38: D1FE00EE 020A1107 - v_cndmask_b32_e64 v238, v10, v238, s[34:35] // 00000000AC40: D10000EE 008BDD0A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000AC48: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AC50: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AC58: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AC60: 86A2221E - v_add_lshl_u32 v239, v7, v8, 2 // 00000000AC64: D1FE00EF 020A1107 - v_cndmask_b32_e64 v239, v10, v239, s[34:35] // 00000000AC6C: D10000EF 008BDF0A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000AC74: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AC7C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AC84: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AC8C: 86A2221E - v_add_lshl_u32 v240, v7, v8, 2 // 00000000AC90: D1FE00F0 020A1107 - v_cndmask_b32_e64 v240, v10, v240, s[34:35] // 00000000AC98: D10000F0 008BE10A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000ACA0: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000ACA8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ACB0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ACB8: 86A2221E - v_add_lshl_u32 v241, v7, v8, 2 // 00000000ACBC: D1FE00F1 020A1107 - v_cndmask_b32_e64 v241, v10, v241, s[34:35] // 00000000ACC4: D10000F1 008BE30A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000ACCC: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000ACD4: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000ACDC: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000ACE4: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000ACEC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000ACF4: 86A2221E - v_add_lshl_u32 v242, v7, v4, 2 // 00000000ACF8: D1FE00F2 020A0907 - v_cndmask_b32_e64 v242, v10, v242, s[34:35] // 00000000AD00: D10000F2 008BE50A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000AD08: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AD10: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AD18: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AD20: 86A2221E - v_add_lshl_u32 v243, v7, v8, 2 // 00000000AD24: D1FE00F3 020A1107 - v_cndmask_b32_e64 v243, v10, v243, s[34:35] // 00000000AD2C: D10000F3 008BE70A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000AD34: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AD3C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AD44: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AD4C: 86A2221E - v_add_lshl_u32 v244, v7, v8, 2 // 00000000AD50: D1FE00F4 020A1107 - v_cndmask_b32_e64 v244, v10, v244, s[34:35] // 00000000AD58: D10000F4 008BE90A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000AD60: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000AD68: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000AD70: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000AD78: 86A2221E - v_add_lshl_u32 v245, v7, v8, 2 // 00000000AD7C: D1FE00F5 020A1107 - v_cndmask_b32_e64 v245, v10, v245, s[34:35] // 00000000AD84: D10000F5 008BEB0A - v_accvgpr_read_b32 v15, a201 // 00000000AD8C: D3D8400F 180001C9 - v_accvgpr_read_b32 v16, a205 // 00000000AD94: D3D84010 180001CD - v_accvgpr_read_b32 v17, a209 // 00000000AD9C: D3D84011 180001D1 - v_accvgpr_read_b32 v18, a213 // 00000000ADA4: D3D84012 180001D5 - v_accvgpr_read_b32 v19, a217 // 00000000ADAC: D3D84013 180001D9 - v_accvgpr_read_b32 v20, a221 // 00000000ADB4: D3D84014 180001DD - v_accvgpr_read_b32 v21, a225 // 00000000ADBC: D3D84015 180001E1 - v_accvgpr_read_b32 v22, a229 // 00000000ADC4: D3D84016 180001E5 - v_accvgpr_read_b32 v23, a233 // 00000000ADCC: D3D84017 180001E9 - v_accvgpr_read_b32 v24, a237 // 00000000ADD4: D3D84018 180001ED - v_accvgpr_read_b32 v25, a241 // 00000000ADDC: D3D84019 180001F1 - v_accvgpr_read_b32 v26, a245 // 00000000ADE4: D3D8401A 180001F5 - v_accvgpr_read_b32 v27, a249 // 00000000ADEC: D3D8401B 180001F9 - v_accvgpr_read_b32 v28, a253 // 00000000ADF4: D3D8401C 180001FD - v_accvgpr_read_b32 v29, a2 // 00000000ADFC: D3D8401D 18000102 - v_accvgpr_read_b32 v30, a6 // 00000000AE04: D3D8401E 18000106 - v_accvgpr_read_b32 v31, a10 // 00000000AE0C: D3D8401F 1800010A - v_accvgpr_read_b32 v32, a14 // 00000000AE14: D3D84020 1800010E - v_accvgpr_read_b32 v33, a18 // 00000000AE1C: D3D84021 18000112 - v_accvgpr_read_b32 v34, a22 // 00000000AE24: D3D84022 18000116 - v_accvgpr_read_b32 v35, a26 // 00000000AE2C: D3D84023 1800011A - v_accvgpr_read_b32 v36, a30 // 00000000AE34: D3D84024 1800011E - v_accvgpr_read_b32 v37, a34 // 00000000AE3C: D3D84025 18000122 - v_accvgpr_read_b32 v38, a38 // 00000000AE44: D3D84026 18000126 - v_accvgpr_read_b32 v39, a42 // 00000000AE4C: D3D84027 1800012A - v_accvgpr_read_b32 v40, a46 // 00000000AE54: D3D84028 1800012E - v_accvgpr_read_b32 v41, a50 // 00000000AE5C: D3D84029 18000132 - v_accvgpr_read_b32 v42, a54 // 00000000AE64: D3D8402A 18000136 - v_accvgpr_read_b32 v43, a58 // 00000000AE6C: D3D8402B 1800013A - v_accvgpr_read_b32 v44, a62 // 00000000AE74: D3D8402C 1800013E - v_accvgpr_read_b32 v45, a66 // 00000000AE7C: D3D8402D 18000142 - v_accvgpr_read_b32 v46, a70 // 00000000AE84: D3D8402E 18000146 - v_accvgpr_read_b32 v47, a74 // 00000000AE8C: D3D8402F 1800014A - v_accvgpr_read_b32 v48, a78 // 00000000AE94: D3D84030 1800014E - v_accvgpr_read_b32 v49, a82 // 00000000AE9C: D3D84031 18000152 - v_accvgpr_read_b32 v50, a86 // 00000000AEA4: D3D84032 18000156 - v_accvgpr_read_b32 v51, a90 // 00000000AEAC: D3D84033 1800015A - v_accvgpr_read_b32 v52, a94 // 00000000AEB4: D3D84034 1800015E - v_accvgpr_read_b32 v53, a98 // 00000000AEBC: D3D84035 18000162 - v_accvgpr_read_b32 v54, a102 // 00000000AEC4: D3D84036 18000166 - v_accvgpr_read_b32 v55, a106 // 00000000AECC: D3D84037 1800016A - v_accvgpr_read_b32 v56, a110 // 00000000AED4: D3D84038 1800016E - v_accvgpr_read_b32 v57, a114 // 00000000AEDC: D3D84039 18000172 - v_accvgpr_read_b32 v58, a118 // 00000000AEE4: D3D8403A 18000176 - v_accvgpr_read_b32 v59, a122 // 00000000AEEC: D3D8403B 1800017A - v_accvgpr_read_b32 v60, a126 // 00000000AEF4: D3D8403C 1800017E - v_accvgpr_read_b32 v61, a130 // 00000000AEFC: D3D8403D 18000182 - v_accvgpr_read_b32 v62, a134 // 00000000AF04: D3D8403E 18000186 - v_accvgpr_read_b32 v63, a138 // 00000000AF0C: D3D8403F 1800018A - v_accvgpr_read_b32 v64, a142 // 00000000AF14: D3D84040 1800018E - v_accvgpr_read_b32 v65, a146 // 00000000AF1C: D3D84041 18000192 - v_accvgpr_read_b32 v66, a150 // 00000000AF24: D3D84042 18000196 - v_accvgpr_read_b32 v67, a154 // 00000000AF2C: D3D84043 1800019A - v_accvgpr_read_b32 v68, a158 // 00000000AF34: D3D84044 1800019E - v_accvgpr_read_b32 v69, a162 // 00000000AF3C: D3D84045 180001A2 - v_accvgpr_read_b32 v70, a166 // 00000000AF44: D3D84046 180001A6 - v_accvgpr_read_b32 v71, a170 // 00000000AF4C: D3D84047 180001AA - v_accvgpr_read_b32 v72, a174 // 00000000AF54: D3D84048 180001AE - v_accvgpr_read_b32 v73, a178 // 00000000AF5C: D3D84049 180001B2 - v_accvgpr_read_b32 v74, a182 // 00000000AF64: D3D8404A 180001B6 - v_accvgpr_read_b32 v75, a186 // 00000000AF6C: D3D8404B 180001BA - v_accvgpr_read_b32 v76, a190 // 00000000AF74: D3D8404C 180001BE - v_accvgpr_read_b32 v77, a194 // 00000000AF7C: D3D8404D 180001C2 - v_accvgpr_read_b32 v78, a198 // 00000000AF84: D3D8404E 180001C6 - v_accvgpr_read_b32 v79, a202 // 00000000AF8C: D3D8404F 180001CA - v_accvgpr_read_b32 v80, a206 // 00000000AF94: D3D84050 180001CE - v_accvgpr_read_b32 v81, a210 // 00000000AF9C: D3D84051 180001D2 - v_accvgpr_read_b32 v82, a214 // 00000000AFA4: D3D84052 180001D6 - v_accvgpr_read_b32 v83, a218 // 00000000AFAC: D3D84053 180001DA - v_accvgpr_read_b32 v84, a222 // 00000000AFB4: D3D84054 180001DE - v_accvgpr_read_b32 v85, a226 // 00000000AFBC: D3D84055 180001E2 - v_accvgpr_read_b32 v86, a230 // 00000000AFC4: D3D84056 180001E6 - v_accvgpr_read_b32 v87, a234 // 00000000AFCC: D3D84057 180001EA - v_accvgpr_read_b32 v88, a238 // 00000000AFD4: D3D84058 180001EE - v_accvgpr_read_b32 v89, a242 // 00000000AFDC: D3D84059 180001F2 - v_accvgpr_read_b32 v90, a246 // 00000000AFE4: D3D8405A 180001F6 - v_accvgpr_read_b32 v91, a250 // 00000000AFEC: D3D8405B 180001FA - v_accvgpr_read_b32 v92, a254 // 00000000AFF4: D3D8405C 180001FE - v_accvgpr_read_b32 v93, a3 // 00000000AFFC: D3D8405D 18000103 - v_accvgpr_read_b32 v94, a7 // 00000000B004: D3D8405E 18000107 - v_accvgpr_read_b32 v95, a11 // 00000000B00C: D3D8405F 1800010B - v_accvgpr_read_b32 v96, a15 // 00000000B014: D3D84060 1800010F - v_accvgpr_read_b32 v97, a19 // 00000000B01C: D3D84061 18000113 - v_accvgpr_read_b32 v98, a23 // 00000000B024: D3D84062 18000117 - v_accvgpr_read_b32 v99, a27 // 00000000B02C: D3D84063 1800011B - v_accvgpr_read_b32 v100, a31 // 00000000B034: D3D84064 1800011F - v_accvgpr_read_b32 v101, a35 // 00000000B03C: D3D84065 18000123 - v_accvgpr_read_b32 v102, a39 // 00000000B044: D3D84066 18000127 - v_accvgpr_read_b32 v103, a43 // 00000000B04C: D3D84067 1800012B - v_accvgpr_read_b32 v104, a47 // 00000000B054: D3D84068 1800012F - v_accvgpr_read_b32 v105, a51 // 00000000B05C: D3D84069 18000133 - v_accvgpr_read_b32 v106, a55 // 00000000B064: D3D8406A 18000137 - v_accvgpr_read_b32 v107, a59 // 00000000B06C: D3D8406B 1800013B - v_accvgpr_read_b32 v108, a63 // 00000000B074: D3D8406C 1800013F - v_accvgpr_read_b32 v109, a67 // 00000000B07C: D3D8406D 18000143 - v_accvgpr_read_b32 v110, a71 // 00000000B084: D3D8406E 18000147 - v_accvgpr_read_b32 v111, a75 // 00000000B08C: D3D8406F 1800014B - v_accvgpr_read_b32 v112, a79 // 00000000B094: D3D84070 1800014F - v_accvgpr_read_b32 v113, a83 // 00000000B09C: D3D84071 18000153 - v_accvgpr_read_b32 v114, a87 // 00000000B0A4: D3D84072 18000157 - v_accvgpr_read_b32 v115, a91 // 00000000B0AC: D3D84073 1800015B - v_accvgpr_read_b32 v116, a95 // 00000000B0B4: D3D84074 1800015F - v_accvgpr_read_b32 v117, a99 // 00000000B0BC: D3D84075 18000163 - v_accvgpr_read_b32 v118, a103 // 00000000B0C4: D3D84076 18000167 - v_accvgpr_read_b32 v119, a107 // 00000000B0CC: D3D84077 1800016B - v_accvgpr_read_b32 v120, a111 // 00000000B0D4: D3D84078 1800016F - v_accvgpr_read_b32 v121, a115 // 00000000B0DC: D3D84079 18000173 - v_accvgpr_read_b32 v122, a119 // 00000000B0E4: D3D8407A 18000177 - v_accvgpr_read_b32 v123, a123 // 00000000B0EC: D3D8407B 1800017B - v_accvgpr_read_b32 v124, a127 // 00000000B0F4: D3D8407C 1800017F - v_accvgpr_read_b32 v125, a131 // 00000000B0FC: D3D8407D 18000183 - v_accvgpr_read_b32 v126, a135 // 00000000B104: D3D8407E 18000187 - v_accvgpr_read_b32 v127, a139 // 00000000B10C: D3D8407F 1800018B - v_accvgpr_read_b32 v128, a143 // 00000000B114: D3D84080 1800018F - buffer_store_dword v15, v129, s[16:19], 0 offen nt // 00000000B11C: E0721000 80040F81 - buffer_store_dword v16, v130, s[16:19], 0 offen nt // 00000000B124: E0721000 80041082 - buffer_store_dword v17, v131, s[16:19], 0 offen nt // 00000000B12C: E0721000 80041183 - buffer_store_dword v18, v135, s[16:19], 0 offen nt // 00000000B134: E0721000 80041287 - buffer_store_dword v19, v136, s[16:19], 0 offen nt // 00000000B13C: E0721000 80041388 - buffer_store_dword v20, v137, s[16:19], 0 offen nt // 00000000B144: E0721000 80041489 - buffer_store_dword v21, v138, s[16:19], 0 offen nt // 00000000B14C: E0721000 8004158A - buffer_store_dword v22, v139, s[16:19], 0 offen nt // 00000000B154: E0721000 8004168B - buffer_store_dword v23, v140, s[16:19], 0 offen nt // 00000000B15C: E0721000 8004178C - buffer_store_dword v24, v141, s[16:19], 0 offen nt // 00000000B164: E0721000 8004188D - buffer_store_dword v25, v142, s[16:19], 0 offen nt // 00000000B16C: E0721000 8004198E - buffer_store_dword v26, v143, s[16:19], 0 offen nt // 00000000B174: E0721000 80041A8F - buffer_store_dword v27, v144, s[16:19], 0 offen nt // 00000000B17C: E0721000 80041B90 - buffer_store_dword v28, v145, s[16:19], 0 offen nt // 00000000B184: E0721000 80041C91 - buffer_store_dword v29, v146, s[16:19], 0 offen nt // 00000000B18C: E0721000 80041D92 - buffer_store_dword v30, v147, s[16:19], 0 offen nt // 00000000B194: E0721000 80041E93 - buffer_store_dword v31, v148, s[16:19], 0 offen nt // 00000000B19C: E0721000 80041F94 - buffer_store_dword v32, v149, s[16:19], 0 offen nt // 00000000B1A4: E0721000 80042095 - buffer_store_dword v33, v150, s[16:19], 0 offen nt // 00000000B1AC: E0721000 80042196 - buffer_store_dword v34, v151, s[16:19], 0 offen nt // 00000000B1B4: E0721000 80042297 - buffer_store_dword v35, v152, s[16:19], 0 offen nt // 00000000B1BC: E0721000 80042398 - buffer_store_dword v36, v153, s[16:19], 0 offen nt // 00000000B1C4: E0721000 80042499 - buffer_store_dword v37, v154, s[16:19], 0 offen nt // 00000000B1CC: E0721000 8004259A - buffer_store_dword v38, v155, s[16:19], 0 offen nt // 00000000B1D4: E0721000 8004269B - buffer_store_dword v39, v156, s[16:19], 0 offen nt // 00000000B1DC: E0721000 8004279C - buffer_store_dword v40, v157, s[16:19], 0 offen nt // 00000000B1E4: E0721000 8004289D - buffer_store_dword v41, v158, s[16:19], 0 offen nt // 00000000B1EC: E0721000 8004299E - buffer_store_dword v42, v159, s[16:19], 0 offen nt // 00000000B1F4: E0721000 80042A9F - buffer_store_dword v43, v160, s[16:19], 0 offen nt // 00000000B1FC: E0721000 80042BA0 - buffer_store_dword v44, v161, s[16:19], 0 offen nt // 00000000B204: E0721000 80042CA1 - buffer_store_dword v45, v162, s[16:19], 0 offen nt // 00000000B20C: E0721000 80042DA2 - buffer_store_dword v46, v163, s[16:19], 0 offen nt // 00000000B214: E0721000 80042EA3 - buffer_store_dword v47, v164, s[16:19], 0 offen nt // 00000000B21C: E0721000 80042FA4 - buffer_store_dword v48, v165, s[16:19], 0 offen nt // 00000000B224: E0721000 800430A5 - buffer_store_dword v49, v166, s[16:19], 0 offen nt // 00000000B22C: E0721000 800431A6 - buffer_store_dword v50, v167, s[16:19], 0 offen nt // 00000000B234: E0721000 800432A7 - buffer_store_dword v51, v168, s[16:19], 0 offen nt // 00000000B23C: E0721000 800433A8 - buffer_store_dword v52, v169, s[16:19], 0 offen nt // 00000000B244: E0721000 800434A9 - buffer_store_dword v53, v170, s[16:19], 0 offen nt // 00000000B24C: E0721000 800435AA - buffer_store_dword v54, v171, s[16:19], 0 offen nt // 00000000B254: E0721000 800436AB - buffer_store_dword v55, v172, s[16:19], 0 offen nt // 00000000B25C: E0721000 800437AC - buffer_store_dword v56, v173, s[16:19], 0 offen nt // 00000000B264: E0721000 800438AD - buffer_store_dword v57, v174, s[16:19], 0 offen nt // 00000000B26C: E0721000 800439AE - buffer_store_dword v58, v175, s[16:19], 0 offen nt // 00000000B274: E0721000 80043AAF - buffer_store_dword v59, v176, s[16:19], 0 offen nt // 00000000B27C: E0721000 80043BB0 - buffer_store_dword v60, v177, s[16:19], 0 offen nt // 00000000B284: E0721000 80043CB1 - buffer_store_dword v61, v178, s[16:19], 0 offen nt // 00000000B28C: E0721000 80043DB2 - buffer_store_dword v62, v179, s[16:19], 0 offen nt // 00000000B294: E0721000 80043EB3 - buffer_store_dword v63, v180, s[16:19], 0 offen nt // 00000000B29C: E0721000 80043FB4 - buffer_store_dword v64, v181, s[16:19], 0 offen nt // 00000000B2A4: E0721000 800440B5 - buffer_store_dword v65, v182, s[16:19], 0 offen nt // 00000000B2AC: E0721000 800441B6 - buffer_store_dword v66, v183, s[16:19], 0 offen nt // 00000000B2B4: E0721000 800442B7 - buffer_store_dword v67, v184, s[16:19], 0 offen nt // 00000000B2BC: E0721000 800443B8 - buffer_store_dword v68, v185, s[16:19], 0 offen nt // 00000000B2C4: E0721000 800444B9 - buffer_store_dword v69, v186, s[16:19], 0 offen nt // 00000000B2CC: E0721000 800445BA - buffer_store_dword v70, v187, s[16:19], 0 offen nt // 00000000B2D4: E0721000 800446BB - buffer_store_dword v71, v188, s[16:19], 0 offen nt // 00000000B2DC: E0721000 800447BC - buffer_store_dword v72, v189, s[16:19], 0 offen nt // 00000000B2E4: E0721000 800448BD - buffer_store_dword v73, v190, s[16:19], 0 offen nt // 00000000B2EC: E0721000 800449BE - buffer_store_dword v74, v191, s[16:19], 0 offen nt // 00000000B2F4: E0721000 80044ABF - buffer_store_dword v75, v192, s[16:19], 0 offen nt // 00000000B2FC: E0721000 80044BC0 - buffer_store_dword v76, v193, s[16:19], 0 offen nt // 00000000B304: E0721000 80044CC1 - buffer_store_dword v77, v194, s[16:19], 0 offen nt // 00000000B30C: E0721000 80044DC2 - buffer_store_dword v78, v195, s[16:19], 0 offen nt // 00000000B314: E0721000 80044EC3 - buffer_store_dword v79, v196, s[16:19], 0 offen nt // 00000000B31C: E0721000 80044FC4 - buffer_store_dword v80, v197, s[16:19], 0 offen nt // 00000000B324: E0721000 800450C5 - buffer_store_dword v81, v198, s[16:19], 0 offen nt // 00000000B32C: E0721000 800451C6 - buffer_store_dword v82, v199, s[16:19], 0 offen nt // 00000000B334: E0721000 800452C7 - buffer_store_dword v83, v200, s[16:19], 0 offen nt // 00000000B33C: E0721000 800453C8 - buffer_store_dword v84, v201, s[16:19], 0 offen nt // 00000000B344: E0721000 800454C9 - buffer_store_dword v85, v202, s[16:19], 0 offen nt // 00000000B34C: E0721000 800455CA - buffer_store_dword v86, v203, s[16:19], 0 offen nt // 00000000B354: E0721000 800456CB - buffer_store_dword v87, v204, s[16:19], 0 offen nt // 00000000B35C: E0721000 800457CC - buffer_store_dword v88, v205, s[16:19], 0 offen nt // 00000000B364: E0721000 800458CD - buffer_store_dword v89, v206, s[16:19], 0 offen nt // 00000000B36C: E0721000 800459CE - buffer_store_dword v90, v207, s[16:19], 0 offen nt // 00000000B374: E0721000 80045ACF - buffer_store_dword v91, v208, s[16:19], 0 offen nt // 00000000B37C: E0721000 80045BD0 - buffer_store_dword v92, v209, s[16:19], 0 offen nt // 00000000B384: E0721000 80045CD1 - buffer_store_dword v93, v210, s[16:19], 0 offen nt // 00000000B38C: E0721000 80045DD2 - buffer_store_dword v94, v211, s[16:19], 0 offen nt // 00000000B394: E0721000 80045ED3 - buffer_store_dword v95, v212, s[16:19], 0 offen nt // 00000000B39C: E0721000 80045FD4 - buffer_store_dword v96, v213, s[16:19], 0 offen nt // 00000000B3A4: E0721000 800460D5 - buffer_store_dword v97, v214, s[16:19], 0 offen nt // 00000000B3AC: E0721000 800461D6 - buffer_store_dword v98, v215, s[16:19], 0 offen nt // 00000000B3B4: E0721000 800462D7 - buffer_store_dword v99, v216, s[16:19], 0 offen nt // 00000000B3BC: E0721000 800463D8 - buffer_store_dword v100, v217, s[16:19], 0 offen nt // 00000000B3C4: E0721000 800464D9 - buffer_store_dword v101, v218, s[16:19], 0 offen nt // 00000000B3CC: E0721000 800465DA - buffer_store_dword v102, v219, s[16:19], 0 offen nt // 00000000B3D4: E0721000 800466DB - buffer_store_dword v103, v220, s[16:19], 0 offen nt // 00000000B3DC: E0721000 800467DC - buffer_store_dword v104, v221, s[16:19], 0 offen nt // 00000000B3E4: E0721000 800468DD - buffer_store_dword v105, v222, s[16:19], 0 offen nt // 00000000B3EC: E0721000 800469DE - buffer_store_dword v106, v223, s[16:19], 0 offen nt // 00000000B3F4: E0721000 80046ADF - buffer_store_dword v107, v224, s[16:19], 0 offen nt // 00000000B3FC: E0721000 80046BE0 - buffer_store_dword v108, v225, s[16:19], 0 offen nt // 00000000B404: E0721000 80046CE1 - buffer_store_dword v109, v226, s[16:19], 0 offen nt // 00000000B40C: E0721000 80046DE2 - buffer_store_dword v110, v227, s[16:19], 0 offen nt // 00000000B414: E0721000 80046EE3 - buffer_store_dword v111, v228, s[16:19], 0 offen nt // 00000000B41C: E0721000 80046FE4 - buffer_store_dword v112, v229, s[16:19], 0 offen nt // 00000000B424: E0721000 800470E5 - buffer_store_dword v113, v230, s[16:19], 0 offen nt // 00000000B42C: E0721000 800471E6 - buffer_store_dword v114, v231, s[16:19], 0 offen nt // 00000000B434: E0721000 800472E7 - buffer_store_dword v115, v232, s[16:19], 0 offen nt // 00000000B43C: E0721000 800473E8 - buffer_store_dword v116, v233, s[16:19], 0 offen nt // 00000000B444: E0721000 800474E9 - buffer_store_dword v117, v234, s[16:19], 0 offen nt // 00000000B44C: E0721000 800475EA - buffer_store_dword v118, v235, s[16:19], 0 offen nt // 00000000B454: E0721000 800476EB - buffer_store_dword v119, v236, s[16:19], 0 offen nt // 00000000B45C: E0721000 800477EC - buffer_store_dword v120, v237, s[16:19], 0 offen nt // 00000000B464: E0721000 800478ED - buffer_store_dword v121, v238, s[16:19], 0 offen nt // 00000000B46C: E0721000 800479EE - buffer_store_dword v122, v239, s[16:19], 0 offen nt // 00000000B474: E0721000 80047AEF - buffer_store_dword v123, v240, s[16:19], 0 offen nt // 00000000B47C: E0721000 80047BF0 - buffer_store_dword v124, v241, s[16:19], 0 offen nt // 00000000B484: E0721000 80047CF1 - buffer_store_dword v125, v242, s[16:19], 0 offen nt // 00000000B48C: E0721000 80047DF2 - buffer_store_dword v126, v243, s[16:19], 0 offen nt // 00000000B494: E0721000 80047EF3 - buffer_store_dword v127, v244, s[16:19], 0 offen nt // 00000000B49C: E0721000 80047FF4 - buffer_store_dword v128, v245, s[16:19], 0 offen nt // 00000000B4A4: E0721000 800480F5 - s_nop 0 // 00000000B4AC: BF800000 - v_mov_b32_e32 v10, 0x80000000 // 00000000B4B0: 7E1402FF 80000000 - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000B4B8: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B4C0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B4C8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B4D0: 86A2221E - v_add_lshl_u32 v43, v7, v8, 2 // 00000000B4D4: D1FE002B 020A1107 - v_cndmask_b32_e64 v43, v10, v43, s[34:35] // 00000000B4DC: D100002B 008A570A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000B4E4: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B4EC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B4F4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B4FC: 86A2221E - v_add_lshl_u32 v44, v7, v8, 2 // 00000000B500: D1FE002C 020A1107 - v_cndmask_b32_e64 v44, v10, v44, s[34:35] // 00000000B508: D100002C 008A590A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000B510: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B518: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B520: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B528: 86A2221E - v_add_lshl_u32 v45, v7, v8, 2 // 00000000B52C: D1FE002D 020A1107 - v_cndmask_b32_e64 v45, v10, v45, s[34:35] // 00000000B534: D100002D 008A5B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000B53C: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B544: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B54C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B554: 86A2221E - v_add_lshl_u32 v46, v7, v8, 2 // 00000000B558: D1FE002E 020A1107 - v_cndmask_b32_e64 v46, v10, v46, s[34:35] // 00000000B560: D100002E 008A5D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000B568: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000B570: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000B578: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000B580: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B588: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B590: 86A2221E - v_add_lshl_u32 v47, v7, v4, 2 // 00000000B594: D1FE002F 020A0907 - v_cndmask_b32_e64 v47, v10, v47, s[34:35] // 00000000B59C: D100002F 008A5F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000B5A4: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B5AC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B5B4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B5BC: 86A2221E - v_add_lshl_u32 v48, v7, v8, 2 // 00000000B5C0: D1FE0030 020A1107 - v_cndmask_b32_e64 v48, v10, v48, s[34:35] // 00000000B5C8: D1000030 008A610A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000B5D0: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B5D8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B5E0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B5E8: 86A2221E - v_add_lshl_u32 v49, v7, v8, 2 // 00000000B5EC: D1FE0031 020A1107 - v_cndmask_b32_e64 v49, v10, v49, s[34:35] // 00000000B5F4: D1000031 008A630A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000B5FC: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B604: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B60C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B614: 86A2221E - v_add_lshl_u32 v50, v7, v8, 2 // 00000000B618: D1FE0032 020A1107 - v_cndmask_b32_e64 v50, v10, v50, s[34:35] // 00000000B620: D1000032 008A650A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000B628: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B630: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B638: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B640: 86A2221E - v_add_lshl_u32 v51, v7, v8, 2 // 00000000B644: D1FE0033 020A1107 - v_cndmask_b32_e64 v51, v10, v51, s[34:35] // 00000000B64C: D1000033 008A670A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000B654: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B65C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B664: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B66C: 86A2221E - v_add_lshl_u32 v52, v7, v8, 2 // 00000000B670: D1FE0034 020A1107 - v_cndmask_b32_e64 v52, v10, v52, s[34:35] // 00000000B678: D1000034 008A690A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000B680: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B688: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B690: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B698: 86A2221E - v_add_lshl_u32 v53, v7, v8, 2 // 00000000B69C: D1FE0035 020A1107 - v_cndmask_b32_e64 v53, v10, v53, s[34:35] // 00000000B6A4: D1000035 008A6B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000B6AC: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B6B4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B6BC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B6C4: 86A2221E - v_add_lshl_u32 v54, v7, v8, 2 // 00000000B6C8: D1FE0036 020A1107 - v_cndmask_b32_e64 v54, v10, v54, s[34:35] // 00000000B6D0: D1000036 008A6D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000B6D8: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000B6E0: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000B6E8: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000B6F0: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B6F8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B700: 86A2221E - v_add_lshl_u32 v55, v7, v4, 2 // 00000000B704: D1FE0037 020A0907 - v_cndmask_b32_e64 v55, v10, v55, s[34:35] // 00000000B70C: D1000037 008A6F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000B714: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B71C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B724: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B72C: 86A2221E - v_add_lshl_u32 v56, v7, v8, 2 // 00000000B730: D1FE0038 020A1107 - v_cndmask_b32_e64 v56, v10, v56, s[34:35] // 00000000B738: D1000038 008A710A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000B740: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B748: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B750: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B758: 86A2221E - v_add_lshl_u32 v57, v7, v8, 2 // 00000000B75C: D1FE0039 020A1107 - v_cndmask_b32_e64 v57, v10, v57, s[34:35] // 00000000B764: D1000039 008A730A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000B76C: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B774: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B77C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B784: 86A2221E - v_add_lshl_u32 v58, v7, v8, 2 // 00000000B788: D1FE003A 020A1107 - v_cndmask_b32_e64 v58, v10, v58, s[34:35] // 00000000B790: D100003A 008A750A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000B798: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B7A0: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B7A8: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B7B0: 86A2221E - v_add_lshl_u32 v59, v7, v8, 2 // 00000000B7B4: D1FE003B 020A1107 - v_cndmask_b32_e64 v59, v10, v59, s[34:35] // 00000000B7BC: D100003B 008A770A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000B7C4: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B7CC: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B7D4: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B7DC: 86A2221E - v_add_lshl_u32 v60, v7, v8, 2 // 00000000B7E0: D1FE003C 020A1107 - v_cndmask_b32_e64 v60, v10, v60, s[34:35] // 00000000B7E8: D100003C 008A790A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000B7F0: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B7F8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B800: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B808: 86A2221E - v_add_lshl_u32 v61, v7, v8, 2 // 00000000B80C: D1FE003D 020A1107 - v_cndmask_b32_e64 v61, v10, v61, s[34:35] // 00000000B814: D100003D 008A7B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000B81C: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B824: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B82C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B834: 86A2221E - v_add_lshl_u32 v62, v7, v8, 2 // 00000000B838: D1FE003E 020A1107 - v_cndmask_b32_e64 v62, v10, v62, s[34:35] // 00000000B840: D100003E 008A7D0A - v_add_co_u32_e64 v5, vcc, v5, 1 // 00000000B848: D1196A05 00010305 - v_add_u32_e64 v6, v6, s38 // 00000000B850: D1340006 00004D06 - v_add_u32_e64 v7, v7, s36 // 00000000B858: D1340007 00004907 - v_cmp_lt_u32_e64 s[30:31], v4, s24 // 00000000B860: D0C9001E 00003104 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B868: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B870: 86A2221E - v_add_lshl_u32 v63, v7, v4, 2 // 00000000B874: D1FE003F 020A0907 - v_cndmask_b32_e64 v63, v10, v63, s[34:35] // 00000000B87C: D100003F 008A7F0A - v_add_co_u32_e64 v8, vcc, v4, 1 // 00000000B884: D1196A08 00010304 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B88C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B894: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B89C: 86A2221E - v_add_lshl_u32 v64, v7, v8, 2 // 00000000B8A0: D1FE0040 020A1107 - v_cndmask_b32_e64 v64, v10, v64, s[34:35] // 00000000B8A8: D1000040 008A810A - v_add_co_u32_e64 v8, vcc, v4, 2 // 00000000B8B0: D1196A08 00010504 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B8B8: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B8C0: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B8C8: 86A2221E - v_add_lshl_u32 v65, v7, v8, 2 // 00000000B8CC: D1FE0041 020A1107 - v_cndmask_b32_e64 v65, v10, v65, s[34:35] // 00000000B8D4: D1000041 008A830A - v_add_co_u32_e64 v8, vcc, v4, 3 // 00000000B8DC: D1196A08 00010704 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B8E4: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B8EC: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B8F4: 86A2221E - v_add_lshl_u32 v66, v7, v8, 2 // 00000000B8F8: D1FE0042 020A1107 - v_cndmask_b32_e64 v66, v10, v66, s[34:35] // 00000000B900: D1000042 008A850A - v_add_co_u32_e64 v8, vcc, v4, 4 // 00000000B908: D1196A08 00010904 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B910: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B918: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B920: 86A2221E - v_add_lshl_u32 v67, v7, v8, 2 // 00000000B924: D1FE0043 020A1107 - v_cndmask_b32_e64 v67, v10, v67, s[34:35] // 00000000B92C: D1000043 008A870A - v_add_co_u32_e64 v8, vcc, v4, 5 // 00000000B934: D1196A08 00010B04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B93C: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B944: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B94C: 86A2221E - v_add_lshl_u32 v68, v7, v8, 2 // 00000000B950: D1FE0044 020A1107 - v_cndmask_b32_e64 v68, v10, v68, s[34:35] // 00000000B958: D1000044 008A890A - v_add_co_u32_e64 v8, vcc, v4, 6 // 00000000B960: D1196A08 00010D04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B968: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B970: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B978: 86A2221E - v_add_lshl_u32 v69, v7, v8, 2 // 00000000B97C: D1FE0045 020A1107 - v_cndmask_b32_e64 v69, v10, v69, s[34:35] // 00000000B984: D1000045 008A8B0A - v_add_co_u32_e64 v8, vcc, v4, 7 // 00000000B98C: D1196A08 00010F04 - v_cmp_lt_u32_e64 s[30:31], v8, s24 // 00000000B994: D0C9001E 00003108 - v_cmp_lt_u32_e64 s[34:35], v5, s25 // 00000000B99C: D0C90022 00003305 - s_and_b64 s[34:35], s[30:31], s[34:35] // 00000000B9A4: 86A2221E - v_add_lshl_u32 v70, v7, v8, 2 // 00000000B9A8: D1FE0046 020A1107 - v_cndmask_b32_e64 v70, v10, v70, s[34:35] // 00000000B9B0: D1000046 008A8D0A - v_accvgpr_read_b32 v15, a147 // 00000000B9B8: D3D8400F 18000193 - v_accvgpr_read_b32 v16, a151 // 00000000B9C0: D3D84010 18000197 - v_accvgpr_read_b32 v17, a155 // 00000000B9C8: D3D84011 1800019B - v_accvgpr_read_b32 v18, a159 // 00000000B9D0: D3D84012 1800019F - v_accvgpr_read_b32 v19, a163 // 00000000B9D8: D3D84013 180001A3 - v_accvgpr_read_b32 v20, a167 // 00000000B9E0: D3D84014 180001A7 - v_accvgpr_read_b32 v21, a171 // 00000000B9E8: D3D84015 180001AB - v_accvgpr_read_b32 v22, a175 // 00000000B9F0: D3D84016 180001AF - v_accvgpr_read_b32 v23, a179 // 00000000B9F8: D3D84017 180001B3 - v_accvgpr_read_b32 v24, a183 // 00000000BA00: D3D84018 180001B7 - v_accvgpr_read_b32 v25, a187 // 00000000BA08: D3D84019 180001BB - v_accvgpr_read_b32 v26, a191 // 00000000BA10: D3D8401A 180001BF - v_accvgpr_read_b32 v27, a195 // 00000000BA18: D3D8401B 180001C3 - v_accvgpr_read_b32 v28, a199 // 00000000BA20: D3D8401C 180001C7 - v_accvgpr_read_b32 v29, a203 // 00000000BA28: D3D8401D 180001CB - v_accvgpr_read_b32 v30, a207 // 00000000BA30: D3D8401E 180001CF - v_accvgpr_read_b32 v31, a211 // 00000000BA38: D3D8401F 180001D3 - v_accvgpr_read_b32 v32, a215 // 00000000BA40: D3D84020 180001D7 - v_accvgpr_read_b32 v33, a219 // 00000000BA48: D3D84021 180001DB - v_accvgpr_read_b32 v34, a223 // 00000000BA50: D3D84022 180001DF - v_accvgpr_read_b32 v35, a227 // 00000000BA58: D3D84023 180001E3 - v_accvgpr_read_b32 v36, a231 // 00000000BA60: D3D84024 180001E7 - v_accvgpr_read_b32 v37, a235 // 00000000BA68: D3D84025 180001EB - v_accvgpr_read_b32 v38, a239 // 00000000BA70: D3D84026 180001EF - v_accvgpr_read_b32 v39, a243 // 00000000BA78: D3D84027 180001F3 - v_accvgpr_read_b32 v40, a247 // 00000000BA80: D3D84028 180001F7 - v_accvgpr_read_b32 v41, a251 // 00000000BA88: D3D84029 180001FB - v_accvgpr_read_b32 v42, a255 // 00000000BA90: D3D8402A 180001FF - buffer_store_dword v15, v43, s[16:19], 0 offen nt // 00000000BA98: E0721000 80040F2B - buffer_store_dword v16, v44, s[16:19], 0 offen nt // 00000000BAA0: E0721000 8004102C - buffer_store_dword v17, v45, s[16:19], 0 offen nt // 00000000BAA8: E0721000 8004112D - buffer_store_dword v18, v46, s[16:19], 0 offen nt // 00000000BAB0: E0721000 8004122E - buffer_store_dword v19, v47, s[16:19], 0 offen nt // 00000000BAB8: E0721000 8004132F - buffer_store_dword v20, v48, s[16:19], 0 offen nt // 00000000BAC0: E0721000 80041430 - buffer_store_dword v21, v49, s[16:19], 0 offen nt // 00000000BAC8: E0721000 80041531 - buffer_store_dword v22, v50, s[16:19], 0 offen nt // 00000000BAD0: E0721000 80041632 - buffer_store_dword v23, v51, s[16:19], 0 offen nt // 00000000BAD8: E0721000 80041733 - buffer_store_dword v24, v52, s[16:19], 0 offen nt // 00000000BAE0: E0721000 80041834 - buffer_store_dword v25, v53, s[16:19], 0 offen nt // 00000000BAE8: E0721000 80041935 - buffer_store_dword v26, v54, s[16:19], 0 offen nt // 00000000BAF0: E0721000 80041A36 - buffer_store_dword v27, v55, s[16:19], 0 offen nt // 00000000BAF8: E0721000 80041B37 - buffer_store_dword v28, v56, s[16:19], 0 offen nt // 00000000BB00: E0721000 80041C38 - buffer_store_dword v29, v57, s[16:19], 0 offen nt // 00000000BB08: E0721000 80041D39 - buffer_store_dword v30, v58, s[16:19], 0 offen nt // 00000000BB10: E0721000 80041E3A - buffer_store_dword v31, v59, s[16:19], 0 offen nt // 00000000BB18: E0721000 80041F3B - buffer_store_dword v32, v60, s[16:19], 0 offen nt // 00000000BB20: E0721000 8004203C - buffer_store_dword v33, v61, s[16:19], 0 offen nt // 00000000BB28: E0721000 8004213D - buffer_store_dword v34, v62, s[16:19], 0 offen nt // 00000000BB30: E0721000 8004223E - buffer_store_dword v35, v63, s[16:19], 0 offen nt // 00000000BB38: E0721000 8004233F - buffer_store_dword v36, v64, s[16:19], 0 offen nt // 00000000BB40: E0721000 80042440 - buffer_store_dword v37, v65, s[16:19], 0 offen nt // 00000000BB48: E0721000 80042541 - buffer_store_dword v38, v66, s[16:19], 0 offen nt // 00000000BB50: E0721000 80042642 - buffer_store_dword v39, v67, s[16:19], 0 offen nt // 00000000BB58: E0721000 80042743 - buffer_store_dword v40, v68, s[16:19], 0 offen nt // 00000000BB60: E0721000 80042844 - buffer_store_dword v41, v69, s[16:19], 0 offen nt // 00000000BB68: E0721000 80042945 - buffer_store_dword v42, v70, s[16:19], 0 offen nt // 00000000BB70: E0721000 80042A46 - s_nop 0 // 00000000BB78: BF800000 - s_branch label_GW_End_1 // 00000000BB7C: BF820000 - -label_GW_End_1: - s_getpc_b64 s[30:31] // 00000000BB80: BE9E1C00 - s_add_i32 s32, 0x13a48, 4 // 00000000BB84: 812084FF 00013A48 - s_add_u32 s30, s30, s32 // 00000000BB8C: 801E201E - s_addc_u32 s31, s31, 0 // 00000000BB90: 821F801F - s_setpc_b64 s[30:31] // 00000000BB94: BE801D1E - -label_GSU_4: - s_and_b32 s30, 0xff, s24 // 00000000BBA0: 861E18FF 000000FF - s_add_u32 s31, -1, s14 // 00000000BBA8: 801F0EC1 - s_cmp_ge_u32 s2, s31 // 00000000BBAC: BF091F02 - s_cselect_b32 s30, s30, 0 // 00000000BBB0: 851E801E - s_and_b32 s30, 0xff, s25 // 00000000BBBC: 861E19FF 000000FF - s_add_u32 s31, -1, s15 // 00000000BBC4: 801F0FC1 - s_cmp_ge_u32 s3, s31 // 00000000BBC8: BF091F03 - s_cselect_b32 s30, s30, 0 // 00000000BBCC: 851E801E - -label_GW_End: end: s_endpgm // 00000001F5D0: BF810000 diff --git a/extra/gemm/asm/test.py b/extra/gemm/asm/test.py index 2f6c639d5f..817d379859 100644 --- a/extra/gemm/asm/test.py +++ b/extra/gemm/asm/test.py @@ -1,7 +1,7 @@ # Run assembly on the AMD runtime and check correctness # VIZ=2 to profile import pathlib -from tinygrad import Tensor, Device, dtypes +from tinygrad import Tensor, Device, dtypes, Context from tinygrad.engine.realize import ExecItem, CompiledRunner from tinygrad.renderer import ProgramSpec from tinygrad.uop.ops import track_rewrites, UOp @@ -55,9 +55,10 @@ def get_asm_prg() -> ProgramSpec: eis.append(ExecItem(ast, [C_asm.uop.buffer, from_torch(B).uop.buffer, from_torch(A).uop.buffer], fixedvars={"SZ":N, "NUM_WG":NUM_WG}, prg=CompiledRunner(get_asm_prg()))) -for ei in eis: - et = ei.run(wait=True) - print(f"{(N*N*N*2 / et)*1e-12:.2f} REAL TFLOPS") +with Context(DEBUG=2): + for ei in eis: + et = ei.run(wait=True) + print(f"{(N*N*N*2 / et)*1e-12:.2f} REAL TFLOPS") # ** correctness