[AVR] Expand shifts of all types except int8 and int16

author Patryk Wychowaniec <pwychowaniec@pm.me>

Wed, 19 Jul 2023 03:54:34 +0000 (11:54 +0800)

committer Ben Shi <2283975856@qq.com>

Wed, 19 Jul 2023 03:57:00 +0000 (11:57 +0800)
author Patryk Wychowaniec <pwychowaniec@pm.me>
Wed, 19 Jul 2023 03:54:34 +0000 (11:54 +0800)
committer Ben Shi <2283975856@qq.com>
Wed, 19 Jul 2023 03:57:00 +0000 (11:57 +0800)
diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp

index b7dcd86..f549ae6 100644 (file)
--- a/llvm/lib/Target/AVR/AVRShiftExpand.cpp
+++ b/llvm/lib/Target/AVR/AVRShiftExpand.cpp
@@ -7,9 +7,10 @@
  //===----------------------------------------------------------------------===//
  //
  /// \file
-/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
-/// like avr-gcc. This must be done in IR because otherwise the type legalizer
-/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
+/// Expand non-8-bit and non-16-bit shift instructions (shl, lshr, ashr) to
+/// inline loops, just like avr-gcc. This must be done in IR because otherwise
+/// the type legalizer will turn 32-bit shifts into (non-existing) library calls
+/// such as __ashlsi3.
  //
  //===----------------------------------------------------------------------===//
  
@@ -51,8 +52,9 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
      if (!I.isShift())
        // Only expand shift instructions (shl, lshr, ashr).
        continue;
-    if (I.getType() != Type::getInt32Ty(Ctx))
-      // Only expand plain i32 types.
+    if (I.getType() == Type::getInt8Ty(Ctx) || I.getType() == Type::getInt16Ty(Ctx))
+      // Only expand non-8-bit and non-16-bit shifts, since those are expanded
+      // directly during isel.
        continue;
      if (isa<ConstantInt>(I.getOperand(1)))
        // Only expand when the shift amount is not known.
@@ -75,7 +77,7 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
  void AVRShiftExpand::expand(BinaryOperator *BI) {
    auto &Ctx = BI->getContext();
    IRBuilder<> Builder(BI);
-  Type *Int32Ty = Type::getInt32Ty(Ctx);
+  Type *InputTy = cast<Instruction>(BI)->getType();
    Type *Int8Ty = Type::getInt8Ty(Ctx);
    Value *Int8Zero = ConstantInt::get(Int8Ty, 0);
  
@@ -101,7 +103,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
    Builder.SetInsertPoint(LoopBB);
    PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
    ShiftAmountPHI->addIncoming(ShiftAmount, BB);
-  PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
+  PHINode *ValuePHI = Builder.CreatePHI(InputTy, 2);
    ValuePHI->addIncoming(BI->getOperand(0), BB);
  
    // Subtract the shift amount by one, as we're shifting one this loop
@@ -116,13 +118,13 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
    Value *ValueShifted;
    switch (BI->getOpcode()) {
    case Instruction::Shl:
-    ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
+    ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(InputTy, 1));
      break;
    case Instruction::LShr:
-    ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+    ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(InputTy, 1));
      break;
    case Instruction::AShr:
-    ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+    ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(InputTy, 1));
      break;
    default:
      llvm_unreachable("asked to expand an instruction that is not a shift");
@@ -137,7 +139,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
    // Collect the resulting value. This is necessary in the IR but won't produce
    // any actual instructions.
    Builder.SetInsertPoint(BI);
-  PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
+  PHINode *Result = Builder.CreatePHI(InputTy, 2);
    Result->addIncoming(BI->getOperand(0), BB);
    Result->addIncoming(ValueShifted, LoopBB);
  
diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll

index 7baba06..be075e5 100644 (file)
--- a/llvm/test/CodeGen/AVR/shift-expand.ll
+++ b/llvm/test/CodeGen/AVR/shift-expand.ll
@@ -8,8 +8,17 @@
  target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
  target triple = "avr"
  
-define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
-; CHECK-LABEL: @shl(
+define i16 @shl16(i16 %value, i16 %amount) addrspace(1) {
+; CHECK-LABEL: @shl16(
+; CHECK-NEXT:    [[RESULT:%.*]] = shl i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
+; CHECK-NEXT:    ret i16 [[RESULT]]
+;
+  %result = shl i16 %value, %amount
+  ret i16 %result
+}
+
+define i32 @shl32(i32 %value, i32 %amount) addrspace(1) {
+; CHECK-LABEL: @shl32(
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
  ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
  ; CHECK-NEXT:    br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
@@ -28,8 +37,39 @@ define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
    ret i32 %result
  }
  
-define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
-; CHECK-LABEL: @lshr(
+define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
+; CHECK-LABEL: @shl40(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
+; CHECK:       shift.loop:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP5]] = sub i8 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP6]] = shl i40 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
+; CHECK:       shift.done:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    ret i40 [[TMP8]]
+;
+  %result = shl i40 %value, %amount
+  ret i40 %result
+}
+
+; ------------------------------------------------------------------------------
+
+define i16 @lshr16(i16 %value, i16 %amount) addrspace(1) {
+; CHECK-LABEL: @lshr16(
+; CHECK-NEXT:    [[RESULT:%.*]] = lshr i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
+; CHECK-NEXT:    ret i16 [[RESULT]]
+;
+  %result = lshr i16 %value, %amount
+  ret i16 %result
+}
+
+define i32 @lshr32(i32 %value, i32 %amount) addrspace(1) {
+; CHECK-LABEL: @lshr32(
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
  ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
  ; CHECK-NEXT:    br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
@@ -48,42 +88,73 @@ define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
    ret i32 %result
  }
  
-define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
-; CHECK-LABEL: @ashr(
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
-; CHECK-NEXT:    br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
+define i40 @lshr40(i40 %value, i40 %amount) addrspace(1) {
+; CHECK-LABEL: @lshr40(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
  ; CHECK:       shift.loop:
-; CHECK-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT:    [[TMP7]] = sub i8 [[TMP5]], 1
-; CHECK-NEXT:    [[TMP8]] = ashr i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0
-; CHECK-NEXT:    br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP5]] = sub i8 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP6]] = lshr i40 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
  ; CHECK:       shift.done:
-; CHECK-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT:    ret i32 [[TMP10]]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    ret i40 [[TMP8]]
  ;
-  %3 = ashr i32 %0, %1
-  ret i32 %3
+  %result = lshr i40 %value, %amount
+  ret i40 %result
  }
  
-; This function is not modified because it is not an i32.
-define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
-; CHECK-LABEL: @shl40(
-; CHECK-NEXT:    [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]]
-; CHECK-NEXT:    ret i40 [[RESULT]]
+; ------------------------------------------------------------------------------
+
+define i16 @ashr16(i16 %value, i16 %amount) addrspace(1) {
+; CHECK-LABEL: @ashr16(
+; CHECK-NEXT:    [[RESULT:%.*]] = ashr i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
+; CHECK-NEXT:    ret i16 [[RESULT]]
  ;
-  %result = shl i40 %value, %amount
-  ret i40 %result
+  %result = ashr i16 %value, %amount
+  ret i16 %result
  }
  
-; This function isn't either, although perhaps it should.
-define i24 @shl24(i24 %value, i24 %amount) addrspace(1) {
-; CHECK-LABEL: @shl24(
-; CHECK-NEXT:    [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]]
-; CHECK-NEXT:    ret i24 [[RESULT]]
+define i32 @ashr32(i32 %value, i32 %amount) addrspace(1) {
+; CHECK-LABEL: @ashr32(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
+; CHECK:       shift.loop:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP5]] = sub i8 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP6]] = ashr i32 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
+; CHECK:       shift.done:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    ret i32 [[TMP8]]
+;
+  %result = ashr i32 %value, %amount
+  ret i32 %result
+}
+
+define i40 @ashr40(i40 %value, i40 %amount) addrspace(1) {
+; CHECK-LABEL: @ashr40(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
+; CHECK:       shift.loop:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    [[TMP5]] = sub i8 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP6]] = ashr i40 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
+; CHECK:       shift.done:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT:    ret i40 [[TMP8]]
  ;
-  %result = shl i24 %value, %amount
-  ret i24 %result
+  %result = ashr i40 %value, %amount
+  ret i40 %result
  }
diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll

index 7d9198b..c0abc77 100644 (file)
--- a/llvm/test/CodeGen/AVR/shift.ll
+++ b/llvm/test/CodeGen/AVR/shift.ll
@@ -54,10 +54,36 @@ define i64 @shift_i64_i64(i64 %a, i64 %b) {
  ; CHECK:       ; %bb.0:
  ; CHECK-NEXT:    push r16
  ; CHECK-NEXT:    push r17
-; CHECK-NEXT:    mov r16, r10
-; CHECK-NEXT:    mov r17, r11
-; CHECK-NEXT:    andi r17, 0
-; CHECK-NEXT:    rcall __ashldi3
+; CHECK-NEXT:    mov r30, r10
+; CHECK-NEXT:    mov r31, r11
+; CHECK-NEXT:    cpi r30, 0
+; CHECK-NEXT:    breq .LBB3_3
+; CHECK-NEXT:  ; %bb.1: ; %shift.loop.preheader
+; CHECK-NEXT:    mov r27, r1
+; CHECK-NEXT:    mov r16, r1
+; CHECK-NEXT:    mov r17, r1
+; CHECK-NEXT:  .LBB3_2: ; %shift.loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov r31, r21
+; CHECK-NEXT:    lsl r31
+; CHECK-NEXT:    mov r26, r1
+; CHECK-NEXT:    rol r26
+; CHECK-NEXT:    lsl r22
+; CHECK-NEXT:    rol r23
+; CHECK-NEXT:    rol r24
+; CHECK-NEXT:    rol r25
+; CHECK-NEXT:    or r24, r16
+; CHECK-NEXT:    or r25, r17
+; CHECK-NEXT:    or r22, r26
+; CHECK-NEXT:    or r23, r27
+; CHECK-NEXT:    lsl r18
+; CHECK-NEXT:    rol r19
+; CHECK-NEXT:    rol r20
+; CHECK-NEXT:    rol r21
+; CHECK-NEXT:    dec r30
+; CHECK-NEXT:    cpi r30, 0
+; CHECK-NEXT:    brne .LBB3_2
+; CHECK-NEXT:  .LBB3_3: ; %shift.done
  ; CHECK-NEXT:    pop r17
  ; CHECK-NEXT:    pop r16
  ; CHECK-NEXT:    ret
author	Patryk Wychowaniec <pwychowaniec@pm.me>
	Wed, 19 Jul 2023 03:54:34 +0000 (11:54 +0800)
committer	Ben Shi <2283975856@qq.com>
	Wed, 19 Jul 2023 03:57:00 +0000 (11:57 +0800)
llvm/lib/Target/AVR/AVRShiftExpand.cpp		patch \| blob \| history
llvm/test/CodeGen/AVR/shift-expand.ll		patch \| blob \| history
llvm/test/CodeGen/AVR/shift.ll		patch \| blob \| history