From 9a24ba2397ea72c5124dbc75b4dd9ee9db676518 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Tue, 16 May 2023 02:43:27 -0400
Subject: [PATCH] Correct the sort logic in AsmMatcherEmmitter.cpp

The logic from line 633 to 640 is specific for ARM as the comments said, it will make all the targets will prefer to using instruction with more predicates when compiler do AsmMatching.
And for code from line 642 to 649, X86 want to use the order records written in source file to sort the instructions. So X86 could be affected by this logic. (These code could be arrived only by X86)
After change this, seems AVX instructions have not be affected but it exposed some other errors for instruction push and call.
CALLpcrel16 could not be used in 64 bit mode, we need add Predicate for it. And for push instruction, previously because pushi32 has predicates = [Not64bitmode], so it precede pushi16, which is incorrect here, we should get pushw here and it also align with gcc.

Reviewed By: skan

Differential Revision: https://reviews.llvm.org/D150436
---
 llvm/lib/Target/X86/X86InstrArithmetic.td | 24 +++++++++++------------
 llvm/lib/Target/X86/X86InstrControl.td    |  2 +-
 llvm/test/MC/Disassembler/X86/x86-64.txt  |  4 ++--
 llvm/test/MC/X86/I86-64.s                 |  4 ----
 llvm/test/MC/X86/pr22028.s                |  2 +-
 llvm/test/MC/X86/x86-16.s                 |  3 +++
 llvm/test/MC/X86/x86_64-encoding.s        |  5 -----
 llvm/utils/TableGen/AsmMatcherEmitter.cpp | 18 ++++++++---------
 8 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 550480b38080..c4e4eb333882 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -578,17 +578,17 @@ def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
 let Defs = [EFLAGS] in {
 let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
-def INC8r  : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>;
-def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>;
-def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>;
-def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>;
-} // isConvertibleToThreeAddress = 1, CodeSize = 2
-
 // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
 let CodeSize = 1, hasSideEffects = 0 in {
 def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>;
 def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>;
 } // CodeSize = 1, hasSideEffects = 0
+
+def INC8r  : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>;
+def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>;
+def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>;
+def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
 } // Constraints = "$src1 = $dst", SchedRW
 
 let CodeSize = 2, SchedRW = [WriteALURMW] in {
@@ -603,18 +603,18 @@ let Predicates = [UseIncDec, In64BitMode] in {
 } // CodeSize = 2, SchedRW
 
 let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
+let CodeSize = 1, hasSideEffects = 0 in {
+def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>;
+def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>;
+} // CodeSize = 1, hasSideEffects = 0
+
 let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
 def DEC8r  : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>;
 def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>;
 def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>;
 def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>;
 } // isConvertibleToThreeAddress = 1, CodeSize = 2
-
-// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-let CodeSize = 1, hasSideEffects = 0 in {
-def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>;
-def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>;
-} // CodeSize = 1, hasSideEffects = 0
 } // Constraints = "$src1 = $dst", SchedRW
 
 let CodeSize = 2, SchedRW = [WriteALURMW] in {
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 2a0801a6519c..fd996603476d 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -212,7 +212,7 @@ let isCall = 1 in
       def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
                              (outs), (ins i16imm_brtarget:$dst),
                              "call{w}\t$dst", []>, OpSize16,
-                        Sched<[WriteJump]>;
+                        Requires<[Not64BitMode]>, Sched<[WriteJump]>;
     def CALL16r     : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
                         "call{w}\t{*}$dst", [(X86call GR16:$dst)]>,
                       OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt
index 159d9efcbf7e..8d6564dd0989 100644
--- a/llvm/test/MC/Disassembler/X86/x86-64.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -326,8 +326,8 @@
 # CHECK: movq %rax, 1515870810
 0x67, 0x48 0xa3 0x5a 0x5a 0x5a 0x5a
 
-# CHECK: callw 32767
-0x66 0xe8 0xff 0x7f
+# CHECK: callq 32767
+0xe8,0xff,0x7f,0x00,0x00
 
 # TODO: Should display data16 prefixes.
 # CHECK-NOT: data16
diff --git a/llvm/test/MC/X86/I86-64.s b/llvm/test/MC/X86/I86-64.s
index 7e9544e22899..31144653b9b5 100644
--- a/llvm/test/MC/X86/I86-64.s
+++ b/llvm/test/MC/X86/I86-64.s
@@ -676,10 +676,6 @@ andw (%rdx), %r14w
 // CHECK: encoding: [0xe8,A,A,A,A]         
 callq 64 
 
-// CHECK: callw 64 
-// CHECK: encoding: [0x66,0xe8,A,A]         
-callw 64 
-
 // CHECK: cbtw 
 // CHECK: encoding: [0x66,0x98]          
 cbtw 
diff --git a/llvm/test/MC/X86/pr22028.s b/llvm/test/MC/X86/pr22028.s
index d82b50f051a1..af42baf6878c 100644
--- a/llvm/test/MC/X86/pr22028.s
+++ b/llvm/test/MC/X86/pr22028.s
@@ -14,7 +14,7 @@ push 65536
 //CHECK16:	pushw	$-1	                # encoding: [0x6a,0xff]
 //CHECK16:	pushw	$30                     # encoding: [0x6a,0x1e]
 //CHECK16:	pushw	$257                    # encoding: [0x68,0x01,0x01]
-//CHECK16:	pushl	$65536                  # encoding: [0x66,0x68,0x00,0x00,0x01,0x00]
+//CHECK16:	pushw	$65536                  # encoding: [0x68,0x00,0x00]
 
 //CHECK:	pushl	$0                      # encoding: [0x6a,0x00]
 //CHECK:	pushl	$-1                     # encoding: [0x6a,0xff]
diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s
index 6670e736a6fe..be0563e5b1d6 100644
--- a/llvm/test/MC/X86/x86-16.s
+++ b/llvm/test/MC/X86/x86-16.s
@@ -549,9 +549,12 @@ ljmp	$0x7ace,$0x7ace
 // CHECK: calll a
 // CHECK: calll a
 // CHECK: calll a
+// CHECK: callw 42
+// CHECK: encoding: [0xe8,A,A]
  calll a
 data32 call a
 data32 callw a
+callw 42
 
 // CHECK:      ljmpl $1, $2
 // CHECK-NEXT: ljmpl $1, $2
diff --git a/llvm/test/MC/X86/x86_64-encoding.s b/llvm/test/MC/X86/x86_64-encoding.s
index bfa7304ca950..ff541c2d6568 100644
--- a/llvm/test/MC/X86/x86_64-encoding.s
+++ b/llvm/test/MC/X86/x86_64-encoding.s
@@ -1,10 +1,5 @@
 // RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
 
-// PR7195
-// CHECK: callw 42
-// CHECK: encoding: [0x66,0xe8,A,A]
-       callw 42
-
 // rdar://8127102
 // CHECK: movq	%gs:(%rdi), %rax
 // CHECK: encoding: [0x65,0x48,0x8b,0x07]
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 8f3c98b4303f..e9e2571693f7 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -630,15 +630,6 @@ struct MatchableInfo {
         return false;
     }
 
-    // Give matches that require more features higher precedence. This is useful
-    // because we cannot define AssemblerPredicates with the negation of
-    // processor features. For example, ARM v6 "nop" may be either a HINT or
-    // MOV. With v6, we want to match HINT. The assembler has no way to
-    // predicate MOV under "NoV6", but HINT will always match first because it
-    // requires V6 while MOV does not.
-    if (RequiredFeatures.size() != RHS.RequiredFeatures.size())
-      return RequiredFeatures.size() > RHS.RequiredFeatures.size();
-
     // For X86 AVX/AVX512 instructions, we prefer vex encoding because the
     // vex encoding size is smaller. Since X86InstrSSE.td is included ahead
     // of X86InstrAVX512.td, the AVX instruction ID is less than AVX512 ID.
@@ -648,6 +639,15 @@ struct MatchableInfo {
         TheDef->getValueAsBit("HasPositionOrder"))
       return TheDef->getID() < RHS.TheDef->getID();
 
+    // Give matches that require more features higher precedence. This is useful
+    // because we cannot define AssemblerPredicates with the negation of
+    // processor features. For example, ARM v6 "nop" may be either a HINT or
+    // MOV. With v6, we want to match HINT. The assembler has no way to
+    // predicate MOV under "NoV6", but HINT will always match first because it
+    // requires V6 while MOV does not.
+    if (RequiredFeatures.size() != RHS.RequiredFeatures.size())
+      return RequiredFeatures.size() > RHS.RequiredFeatures.size();
+
     return false;
   }
 
-- 
2.34.1