AMDGPU: Put un-initiaized enumerators together in an enum definition.
authorChangpeng Fang <changpeng.fang@amd.com>
Fri, 20 Jan 2023 22:39:40 +0000 (14:39 -0800)
committerChangpeng Fang <changpeng.fang@amd.com>
Fri, 20 Jan 2023 22:39:40 +0000 (14:39 -0800)
Summary:
  For any enumerator whose definition does not have an initializer,
the associated value is the value of the previous enumerator plus one.
In order to avoid the possibility that two unrelated enumerators to
have the same value, we should try to cluster the uninitialized enumerators
together.

Reviewers: arsenm

Differential Revision
  https://reviews.llvm.org/D141643

llvm/lib/Target/AMDGPU/SIDefines.h
llvm/test/CodeGen/AMDGPU/permlane-op-sel.ll [new file with mode: 0644]

index 7a4f7fc..97a5834 100644 (file)
@@ -194,6 +194,12 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_AC_V2INT32,
   OPERAND_REG_INLINE_AC_V2FP32,
 
+  // Operand for source modifiers for VOP instructions
+  OPERAND_INPUT_MODS,
+
+  // Operand for SDWA instructions
+  OPERAND_SDWA_VOPC_DST,
+
   OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
 
@@ -207,13 +213,7 @@ enum OperandType : unsigned {
   OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
 
   OPERAND_KIMM_FIRST = OPERAND_KIMM32,
-  OPERAND_KIMM_LAST = OPERAND_KIMM16,
-
-  // Operand for source modifiers for VOP instructions
-  OPERAND_INPUT_MODS,
-
-  // Operand for SDWA instructions
-  OPERAND_SDWA_VOPC_DST
+  OPERAND_KIMM_LAST = OPERAND_KIMM16
 
 };
 }
diff --git a/llvm/test/CodeGen/AMDGPU/permlane-op-sel.ll b/llvm/test/CodeGen/AMDGPU/permlane-op-sel.ll
new file mode 100644 (file)
index 0000000..20fe075
--- /dev/null
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1030 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1030 -d - | FileCheck -check-prefix=OBJ %s
+; RUN: llc -march=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefix=ASM %s
+
+declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1)
+
+; OBJ-LABEL: <permlane_op_sel>:
+; OBJ: v_permlane16_b32 v0, v0, s7, s0 op_sel:[1,0]
+
+; ASM-LABEL: permlane_op_sel:
+; ASM-CHRCK: v_permlane16_b32 v0, v0, s7, s0 op_sel:[1,0] ; encoding: [0x00,0x00,0x77,0xd7,0x00,0x0f,0x00,0x20]
+define amdgpu_kernel void @permlane_op_sel(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0)
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}