[x86] make 8-bit shl undesirable

author Sanjay Patel <spatel@rotateright.com>

Mon, 8 Apr 2019 13:58:50 +0000 (13:58 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 8 Apr 2019 13:58:50 +0000 (13:58 +0000)
author Sanjay Patel <spatel@rotateright.com>
Mon, 8 Apr 2019 13:58:50 +0000 (13:58 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 8 Apr 2019 13:58:50 +0000 (13:58 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 8c12ab4..4cc2bf4 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42798,11 +42798,15 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
    if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
      return false;
  
-  // 8-bit multiply is probably not much cheaper than 32-bit multiply, and
-  // we have specializations to turn 32-bit multiply into LEA or other ops.
+  // TODO: Almost no 8-bit ops are desirable because they have no actual
+  //       size/speed advantages vs. 32-bit ops, but they do have a major
+  //       potential disadvantage by causing partial register stalls.
+  //
+  // 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and
+  // we have specializations to turn 32-bit multiply/shl into LEA or other ops.
    // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
    // check for a constant operand to the multiply.
-  if (Opc == ISD::MUL && VT == MVT::i8)
+  if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8)
      return false;
  
    // i16 instruction encodings are longer and some i16 instructions are slow,
diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll

index c3aea3d..59df366 100644 (file)
--- a/llvm/test/CodeGen/X86/bt.ll
+++ b/llvm/test/CodeGen/X86/bt.ll
@@ -1150,19 +1150,18 @@ define void @demanded_i32(i32* nocapture readonly, i32* nocapture, i32) nounwind
  define zeroext i1 @demanded_with_known_zeroes(i32 %bit, i32 %bits) {
  ; X86-LABEL: demanded_with_known_zeroes:
  ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    shlb $2, %cl
-; X86-NEXT:    movzbl %cl, %ecx
-; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    shlb $2, %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    btl %eax, %ecx
  ; X86-NEXT:    setb %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: demanded_with_known_zeroes:
  ; X64:       # %bb.0: # %entry
-; X64-NEXT:    shlb $2, %dil
-; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    btl %eax, %esi
+; X64-NEXT:    shll $2, %edi
+; X64-NEXT:    btl %edi, %esi
  ; X64-NEXT:    setb %al
  ; X64-NEXT:    retq
  entry:
diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll

index 5e64be9..79c745d 100644 (file)
--- a/llvm/test/CodeGen/X86/btc_bts_btr.ll
+++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll
@@ -954,15 +954,15 @@ define i32 @btr_32_mask_zeros(i32 %x, i32 %n) {
  ; X64-LABEL: btr_32_mask_zeros:
  ; X64:       # %bb.0:
  ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shlb $2, %sil
+; X64-NEXT:    shll $2, %esi
  ; X64-NEXT:    btrl %esi, %eax
  ; X64-NEXT:    retq
  ;
  ; X86-LABEL: btr_32_mask_zeros:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
  ; X86-NEXT:    shlb $2, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    btrl %ecx, %eax
  ; X86-NEXT:    retl
    %1 = shl i32 %n, 2
@@ -977,15 +977,15 @@ define i32 @bts_32_mask_zeros(i32 %x, i32 %n) {
  ; X64-LABEL: bts_32_mask_zeros:
  ; X64:       # %bb.0:
  ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shlb $2, %sil
+; X64-NEXT:    shll $2, %esi
  ; X64-NEXT:    btsl %esi, %eax
  ; X64-NEXT:    retq
  ;
  ; X86-LABEL: bts_32_mask_zeros:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
  ; X86-NEXT:    shlb $2, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    btsl %ecx, %eax
  ; X86-NEXT:    retl
    %1 = shl i32 %n, 2
@@ -999,15 +999,15 @@ define i32 @btc_32_mask_zeros(i32 %x, i32 %n) {
  ; X64-LABEL: btc_32_mask_zeros:
  ; X64:       # %bb.0:
  ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shlb $2, %sil
+; X64-NEXT:    shll $2, %esi
  ; X64-NEXT:    btcl %esi, %eax
  ; X64-NEXT:    retq
  ;
  ; X86-LABEL: btc_32_mask_zeros:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
  ; X86-NEXT:    shlb $2, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    btcl %ecx, %eax
  ; X86-NEXT:    retl
    %1 = shl i32 %n, 2
@@ -1021,14 +1021,14 @@ define i64 @btr_64_mask_zeros(i64 %x, i64 %n) {
  ; X64-LABEL: btr_64_mask_zeros:
  ; X64:       # %bb.0:
  ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shlb $2, %sil
+; X64-NEXT:    shlq $2, %rsi
  ; X64-NEXT:    btrq %rsi, %rax
  ; X64-NEXT:    retq
  ;
  ; X86-LABEL: btr_64_mask_zeros:
  ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    shlb $2, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $2, %ecx
  ; X86-NEXT:    movl $1, %eax
  ; X86-NEXT:    xorl %edx, %edx
  ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -1056,14 +1056,14 @@ define i64 @bts_64_mask_zeros(i64 %x, i64 %n) {
  ; X64-LABEL: bts_64_mask_zeros:
  ; X64:       # %bb.0:
  ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shlb $2, %sil
+; X64-NEXT:    shlq $2, %rsi
  ; X64-NEXT:    btsq %rsi, %rax
  ; X64-NEXT:    retq
  ;
  ; X86-LABEL: bts_64_mask_zeros:
  ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    shlb $2, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $2, %ecx
  ; X86-NEXT:    movl $1, %eax
  ; X86-NEXT:    xorl %edx, %edx
  ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -1088,14 +1088,14 @@ define i64 @btc_64_mask_zeros(i64 %x, i64 %n) {
  ; X64-LABEL: btc_64_mask_zeros:
  ; X64:       # %bb.0:
  ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shlb $2, %sil
+; X64-NEXT:    shlq $2, %rsi
  ; X64-NEXT:    btcq %rsi, %rax
  ; X64-NEXT:    retq
  ;
  ; X86-LABEL: btc_64_mask_zeros:
  ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    shlb $2, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $2, %ecx
  ; X86-NEXT:    movl $1, %eax
  ; X86-NEXT:    xorl %edx, %edx
  ; X86-NEXT:    shldl %cl, %eax, %edx
diff --git a/llvm/test/CodeGen/X86/rotate4.ll b/llvm/test/CodeGen/X86/rotate4.ll

index fa7f550..e58451b 100644 (file)
--- a/llvm/test/CodeGen/X86/rotate4.ll
+++ b/llvm/test/CodeGen/X86/rotate4.ll
@@ -633,9 +633,9 @@ define i32 @rotate_demanded_bits_2(i32, i32) {
  define i32 @rotate_demanded_bits_3(i32, i32) {
  ; X86-LABEL: rotate_demanded_bits_3:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
  ; X86-NEXT:    addb %cl, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    roll %cl, %eax
  ; X86-NEXT:    retl
  ;
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll

index 95eb8e7..3fcc5a3 100644 (file)
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -16,7 +16,7 @@ define i256 @test1(i256 %a) nounwind {
  ; ILP-NEXT:    pushq %rbx
  ; ILP-NEXT:    movq %rdi, %rax
  ; ILP-NEXT:    xorl %r8d, %r8d
-; ILP-NEXT:    addb %sil, %sil
+; ILP-NEXT:    addq %rsi, %rsi
  ; ILP-NEXT:    addb $2, %sil
  ; ILP-NEXT:    orb $1, %sil
  ; ILP-NEXT:    movl $1, %r10d
@@ -61,7 +61,7 @@ define i256 @test1(i256 %a) nounwind {
  ; HYBRID-LABEL: test1:
  ; HYBRID:       # %bb.0:
  ; HYBRID-NEXT:    movq %rdi, %rax
-; HYBRID-NEXT:    addb %sil, %sil
+; HYBRID-NEXT:    addq %rsi, %rsi
  ; HYBRID-NEXT:    addb $2, %sil
  ; HYBRID-NEXT:    orb $1, %sil
  ; HYBRID-NEXT:    movb $-128, %cl
@@ -104,7 +104,7 @@ define i256 @test1(i256 %a) nounwind {
  ; BURR-LABEL: test1:
  ; BURR:       # %bb.0:
  ; BURR-NEXT:    movq %rdi, %rax
-; BURR-NEXT:    addb %sil, %sil
+; BURR-NEXT:    addq %rsi, %rsi
  ; BURR-NEXT:    addb $2, %sil
  ; BURR-NEXT:    orb $1, %sil
  ; BURR-NEXT:    movb $-128, %cl
@@ -148,7 +148,7 @@ define i256 @test1(i256 %a) nounwind {
  ; SRC:       # %bb.0:
  ; SRC-NEXT:    pushq %rbx
  ; SRC-NEXT:    movq %rdi, %rax
-; SRC-NEXT:    addb %sil, %sil
+; SRC-NEXT:    addq %rsi, %rsi
  ; SRC-NEXT:    addb $2, %sil
  ; SRC-NEXT:    orb $1, %sil
  ; SRC-NEXT:    movb $-128, %cl
@@ -195,7 +195,7 @@ define i256 @test1(i256 %a) nounwind {
  ; LIN-NEXT:    movq %rdi, %rax
  ; LIN-NEXT:    xorl %r9d, %r9d
  ; LIN-NEXT:    movl $1, %r8d
-; LIN-NEXT:    addb %sil, %sil
+; LIN-NEXT:    addq %rsi, %rsi
  ; LIN-NEXT:    addb $2, %sil
  ; LIN-NEXT:    orb $1, %sil
  ; LIN-NEXT:    movl $1, %edx
diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll

index 0f10649..852032a 100644 (file)
--- a/llvm/test/CodeGen/X86/select_const.ll
+++ b/llvm/test/CodeGen/X86/select_const.ll
@@ -353,9 +353,9 @@ define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
  define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
  ; CHECK-LABEL: select_pow2_diff_neg:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlb $4, %dil
-; CHECK-NEXT:    movzbl %dil, %eax
-; CHECK-NEXT:    orl $-25, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    shll $4, %edi
+; CHECK-NEXT:    leal -25(%rdi), %eax
  ; CHECK-NEXT:    retq
    %sel = select i1 %cond, i32 -9, i32 -25
    ret i32 %sel
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 8 Apr 2019 13:58:50 +0000 (13:58 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 8 Apr 2019 13:58:50 +0000 (13:58 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/bt.ll		patch \| blob \| history
llvm/test/CodeGen/X86/btc_bts_btr.ll		patch \| blob \| history
llvm/test/CodeGen/X86/rotate4.ll		patch \| blob \| history
llvm/test/CodeGen/X86/scheduler-backtracking.ll		patch \| blob \| history
llvm/test/CodeGen/X86/select_const.ll		patch \| blob \| history