@@ -143,11 +143,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
143143; CHECK-NEXT: v_mov_b32_e32 v47, 0
144144; CHECK-NEXT: s_mov_b32 s49, 0
145145; CHECK-NEXT: s_branch .LBB0_7
146- ; CHECK-NEXT: .LBB0_5: ; %Flow41
146+ ; CHECK-NEXT: .LBB0_5: ; %Flow43
147147; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
148148; CHECK-NEXT: s_inst_prefetch 0x2
149149; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49
150- ; CHECK-NEXT: .LBB0_6: ; %Flow42
150+ ; CHECK-NEXT: .LBB0_6: ; %Flow44
151151; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
152152; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s48, v45
153153; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47
@@ -304,7 +304,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
304304; CHECK-NEXT: ds_write_b32 v0, v58
305305; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
306306; CHECK-NEXT: s_branch .LBB0_9
307- ; CHECK-NEXT: .LBB0_18: ; %Flow43
307+ ; CHECK-NEXT: .LBB0_18: ; %Flow45
308308; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
309309; CHECK-NEXT: v_mov_b32_e32 v57, v0
310310; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
@@ -357,7 +357,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
357357; CHECK-NEXT: ds_write_b32 v0, v57
358358; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53
359359; CHECK-NEXT: s_branch .LBB0_21
360- ; CHECK-NEXT: .LBB0_24: ; %Flow47
360+ ; CHECK-NEXT: .LBB0_24: ; %Flow49
361361; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s42
362362; CHECK-NEXT: .LBB0_25:
363363; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -382,13 +382,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
382382; CHECK-NEXT: s_cmov_b32 exec_lo, vcc_lo
383383; CHECK-NEXT: s_cbranch_scc0 .LBB0_34
384384; CHECK-NEXT: ; %bb.26:
385- ; CHECK-NEXT: s_add_u32 s42, s44, 8
386- ; CHECK-NEXT: s_addc_u32 s43, s45, 0
387- ; CHECK-NEXT: s_mov_b32 s44, 0
385+ ; CHECK-NEXT: s_mov_b32 s42, 0
388386; CHECK-NEXT: s_branch .LBB0_29
389- ; CHECK-NEXT: .LBB0_27: ; %Flow38
387+ ; CHECK-NEXT: .LBB0_27: ; %Flow40
390388; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1
391- ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45
389+ ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s43
392390; CHECK-NEXT: .LBB0_28: ; in Loop: Header=BB0_29 Depth=1
393391; CHECK-NEXT: v_mov_b32_e32 v31, v40
394392; CHECK-NEXT: v_mov_b32_e32 v0, 0
@@ -405,13 +403,13 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
405403; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
406404; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
407405; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41
408- ; CHECK-NEXT: s_or_b32 s44 , vcc_lo, s44
409- ; CHECK-NEXT: s_andn2_b32 s4, exec_lo, s44
410- ; CHECK-NEXT: s_cselect_b32 exec_lo, s4, s44
406+ ; CHECK-NEXT: s_or_b32 s42 , vcc_lo, s42
407+ ; CHECK-NEXT: s_andn2_b32 s4, exec_lo, s42
408+ ; CHECK-NEXT: s_cselect_b32 exec_lo, s4, s42
411409; CHECK-NEXT: s_cbranch_scc0 .LBB0_34
412410; CHECK-NEXT: .LBB0_29: ; =>This Inner Loop Header: Depth=1
413411; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41
414- ; CHECK-NEXT: s_mov_b32 s45 , exec_lo
412+ ; CHECK-NEXT: s_mov_b32 s43 , exec_lo
415413; CHECK-NEXT: ds_read_b32 v0, v0
416414; CHECK-NEXT: s_waitcnt lgkmcnt(0)
417415; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0
@@ -420,15 +418,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
420418; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63
421419; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62
422420; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72
423- ; CHECK-NEXT: v_add_co_u32 v2, s4, s42 , v1
424- ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s43 , 0, s4
421+ ; CHECK-NEXT: v_add_co_u32 v2, s4, s44 , v1
422+ ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45 , 0, s4
425423; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
426424; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
427425; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
428426; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
429427; CHECK-NEXT: s_clause 0x1
430- ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
431- ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
428+ ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8
429+ ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8
432430; CHECK-NEXT: s_waitcnt vmcnt(0)
433431; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5
434432; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4
@@ -442,8 +440,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
442440; CHECK-NEXT: s_cbranch_scc0 .LBB0_28
443441; CHECK-NEXT: ; %bb.30: ; in Loop: Header=BB0_29 Depth=1
444442; CHECK-NEXT: s_clause 0x1
445- ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16
446- ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16
443+ ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24
444+ ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24
447445; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45
448446; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12
449447; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45
0 commit comments