-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+# WAVE_BARRIER and SI_MASKED_UNREACHABLE ect. are not really instructions. To
+# fix the hazard (m0 def followed by V_INTERP), the compiler should insert a
+# S_NOP.
-# WAVE_BARRIER and SI_MASKED_UNREACHABLE are not really instructions.
-# To fix the hazard (m0 def followed by V_INTERP), the scheduler
-# should move another instruction into the slot.
---
-# CHECK-LABEL: name: hazard_wave_barrier
-# CHECK-LABEL: bb.0:
-# GCN: $m0 = S_MOV_B32 killed renamable $sgpr0
-# GCN-NEXT: WAVE_BARRIER
-# GCN-NEXT: S_MOV_B32 0
-# GCN-NEXT: V_INTERP_MOV_F32
name: hazard_wave_barrier
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
+ ; GCN-LABEL: name: hazard_wave_barrier
+ ; GCN: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $m0 = S_MOV_B32 killed renamable $sgpr0
+ ; GCN-NEXT: WAVE_BARRIER
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $mode, implicit $m0, implicit $exec
+ ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 0
+ ; GCN-NEXT: S_ENDPGM 0
$m0 = S_MOV_B32 killed renamable $sgpr0
WAVE_BARRIER
renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $mode, implicit $m0, implicit $exec
S_ENDPGM 0
...
-# GCN-LABEL: name: hazard-masked-unreachable
-# CHECK-LABEL: bb.0:
-# GCN: $m0 = S_MOV_B32 killed renamable $sgpr0
-# GCN-NEXT: SI_MASKED_UNREACHABLE
-# GCN-NEXT: S_MOV_B32 0
-# GCN-NEXT: V_INTERP_MOV_F32
+
---
name: hazard-masked-unreachable
tracksRegLiveness: true
body: |
+ ; GCN-LABEL: name: hazard-masked-unreachable
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $m0 = S_MOV_B32 killed renamable $sgpr0
+ ; GCN-NEXT: SI_MASKED_UNREACHABLE
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $mode, implicit $m0, implicit $exec
+ ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0
bb.1:
S_ENDPGM 0
...
+
+---
+name: hazard_sched_barrier
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GCN-LABEL: name: hazard_sched_barrier
+ ; GCN: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $m0 = S_MOV_B32 killed renamable $sgpr0
+ ; GCN-NEXT: SCHED_BARRIER 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $mode, implicit $m0, implicit $exec
+ ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 0
+ ; GCN-NEXT: S_ENDPGM 0
+ $m0 = S_MOV_B32 killed renamable $sgpr0
+ SCHED_BARRIER 0
+ renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $mode, implicit $m0, implicit $exec
+ renamable $sgpr1 = S_MOV_B32 0
+ S_ENDPGM 0
+
+...
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
+
+--- |
+ define amdgpu_kernel void @no_sched_barrier(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void }
+ define amdgpu_kernel void @sched_barrier_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void }
+ define amdgpu_kernel void @sched_barrier_1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void }
+
+ !0 = distinct !{!0}
+ !1 = !{!1, !0}
+...
+
+---
+name: no_sched_barrier
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: no_sched_barrier
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ S_NOP 0
+ %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ S_ENDPGM 0
+...
+
+---
+name: sched_barrier_0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: sched_barrier_0
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ S_NOP 0
+ SCHED_BARRIER 0
+ %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ S_ENDPGM 0
+...
+
+---
+name: sched_barrier_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: sched_barrier_1
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: SCHED_BARRIER 1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ S_NOP 0
+ SCHED_BARRIER 1
+ %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ S_ENDPGM 0
+...