Recommit.
Check for legal zext 'sinks' before inserting a trunc.
Differential Revision: https://reviews.llvm.org/D115451
continue;
}
+ // Don't insert a trunc for a zext which can still legally promote.
+ if (auto ZExt = dyn_cast<ZExtInst>(I))
+ if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth)
+ continue;
+
// Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Type *Ty = TruncTysMap[I][i];
; CHECK-NEXT: .LBB0_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x9, #72]
-; CHECK-NEXT: and x0, x0, #0xffffffff00000000
-; CHECK-NEXT: ldr x12, [x9, #8]
; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: lsr w11, w10, #8
+; CHECK-NEXT: ubfx x11, x10, #8, #24
+; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w8, w8, w11, eq
-; CHECK-NEXT: cset w11, ne
-; CHECK-NEXT: ldr x9, [x12, #16]
-; CHECK-NEXT: bfi w10, w8, #8, #24
-; CHECK-NEXT: bfi x0, x11, #16, #1
-; CHECK-NEXT: bfxil x0, x10, #0, #16
-; CHECK-NEXT: cbnz x12, .LBB0_1
+; CHECK-NEXT: ldr x11, [x9, #8]
+; CHECK-NEXT: and x9, x10, #0xff
+; CHECK-NEXT: and x10, x0, #0xffffffff00000000
+; CHECK-NEXT: bfi x9, x8, #8, #32
+; CHECK-NEXT: bfi x10, x12, #16, #1
+; CHECK-NEXT: orr x0, x10, x9
+; CHECK-NEXT: ldr x9, [x11, #16]
+; CHECK-NEXT: cbnz x11, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
; CHECK-NEXT: .LBB1_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x9, #72]
-; CHECK-NEXT: and x0, x0, #0xffffffff00000000
-; CHECK-NEXT: ldr x12, [x9, #8]
; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: lsr w11, w10, #8
+; CHECK-NEXT: ubfx x11, x10, #8, #24
+; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w8, w8, w11, eq
-; CHECK-NEXT: cset w11, ne
-; CHECK-NEXT: ldr x9, [x12, #16]
-; CHECK-NEXT: bfi w10, w8, #8, #24
-; CHECK-NEXT: bfi x0, x11, #16, #1
-; CHECK-NEXT: bfxil x0, x10, #0, #16
-; CHECK-NEXT: cbnz x12, .LBB1_1
+; CHECK-NEXT: ldr x11, [x9, #8]
+; CHECK-NEXT: and x9, x10, #0xff
+; CHECK-NEXT: and x10, x0, #0xffffffff00000000
+; CHECK-NEXT: bfi x9, x8, #8, #32
+; CHECK-NEXT: bfi x10, x12, #16, #1
+; CHECK-NEXT: orr x0, x10, x9
+; CHECK-NEXT: ldr x9, [x11, #16]
+; CHECK-NEXT: cbnz x11, .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
; CHECK-NEXT: [[SOURCE_0184:%.*]] = phi i8* [ [[SOURCE_6:%.*]], [[CLEANUP]] ], [ [[I]], [[ENTRY]] ]
; CHECK-NEXT: [[I2:%.*]] = load i8, i8* [[SOURCE_0184]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[I2]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i8], [256 x i8]* @_ZN4llvmL20trailingBytesForUTF8E, i64 0, i64 [[IDXPROM]]
; CHECK-NEXT: [[I3:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[I3]] to i64
; CHECK-NEXT: [[SRCPTR_1_I:%.*]] = phi i8* [ [[ADD_PTR_I]], [[IF_END]] ], [ [[INCDEC_PTR4_I]], [[SW_BB3_I]] ]
; CHECK-NEXT: [[INCDEC_PTR13_I:%.*]] = getelementptr inbounds i8, i8* [[SRCPTR_1_I]], i64 -1
; CHECK-NEXT: [[I10:%.*]] = load i8, i8* [[INCDEC_PTR13_I]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[I10]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; CHECK-NEXT: [[I11:%.*]] = icmp sgt i8 [[TMP3]], -65
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[I10]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; CHECK-NEXT: [[I11:%.*]] = icmp sgt i8 [[TMP2]], -65
; CHECK-NEXT: br i1 [[I11]], label [[WHILE_END]], label [[IF_END20_I:%.*]]
; CHECK: if.end20.i:
; CHECK-NEXT: switch i32 [[TMP0]], label [[SW_BB47_I]] [
; CHECK-NEXT: i32 244, label [[SW_BB37_I:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb22.i:
-; CHECK-NEXT: [[CMP24_I:%.*]] = icmp ult i32 [[TMP2]], 160
+; CHECK-NEXT: [[CMP24_I:%.*]] = icmp ult i32 [[TMP1]], 160
; CHECK-NEXT: br i1 [[CMP24_I]], label [[WHILE_END]], label [[IF_END5:%.*]]
; CHECK: sw.bb27.i:
-; CHECK-NEXT: [[CMP29_I:%.*]] = icmp ugt i32 [[TMP2]], 159
+; CHECK-NEXT: [[CMP29_I:%.*]] = icmp ugt i32 [[TMP1]], 159
; CHECK-NEXT: br i1 [[CMP29_I]], label [[WHILE_END]], label [[IF_END5]]
; CHECK: sw.bb32.i:
-; CHECK-NEXT: [[CMP34_I:%.*]] = icmp ult i32 [[TMP2]], 144
+; CHECK-NEXT: [[CMP34_I:%.*]] = icmp ult i32 [[TMP1]], 144
; CHECK-NEXT: br i1 [[CMP34_I]], label [[WHILE_END]], label [[IF_END5]]
; CHECK: sw.bb37.i:
-; CHECK-NEXT: [[CMP39_I:%.*]] = icmp ugt i32 [[TMP2]], 143
+; CHECK-NEXT: [[CMP39_I:%.*]] = icmp ugt i32 [[TMP1]], 143
; CHECK-NEXT: br i1 [[CMP39_I]], label [[WHILE_END]], label [[IF_END5]]
; CHECK: sw.bb47.i:
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT: [[I12:%.*]] = icmp slt i8 [[TMP4]], -62
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT: [[I12:%.*]] = icmp slt i8 [[TMP3]], -62
; CHECK-NEXT: [[CMP56_I:%.*]] = icmp ugt i32 [[TMP0]], 244
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[I12]], [[CMP56_I]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[WHILE_END]], label [[IF_END5]]
; CHECK-NEXT: [[CONV16:%.*]] = zext i8 [[I2]] to i32
; CHECK-NEXT: [[SHL18:%.*]] = shl nuw nsw i32 [[CONV16]], 6
; CHECK-NEXT: [[DOTPRE232:%.*]] = load i8, i8* [[INCDEC_PTR15]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[DOTPRE232]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[DOTPRE232]] to i32
; CHECK-NEXT: br label [[SW_BB19]]
; CHECK: sw.bb19:
-; CHECK-NEXT: [[I13:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP5]], [[SW_BB14]] ]
+; CHECK-NEXT: [[I13:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP4]], [[SW_BB14]] ]
; CHECK-NEXT: [[SOURCE_3:%.*]] = phi i8* [ [[SOURCE_0184]], [[IF_END5]] ], [ [[INCDEC_PTR15]], [[SW_BB14]] ]
; CHECK-NEXT: [[CH_2:%.*]] = phi i32 [ 0, [[IF_END5]] ], [ [[SHL18]], [[SW_BB14]] ]
; CHECK-NEXT: [[INCDEC_PTR20:%.*]] = getelementptr inbounds i8, i8* [[SOURCE_3]], i64 1
; CHECK-NEXT: [[ADD22:%.*]] = add nuw nsw i32 [[CH_2]], [[I13]]
; CHECK-NEXT: [[SHL23:%.*]] = shl nsw i32 [[ADD22]], 6
; CHECK-NEXT: [[DOTPRE233:%.*]] = load i8, i8* [[INCDEC_PTR20]], align 1
-; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[DOTPRE233]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[DOTPRE233]] to i32
; CHECK-NEXT: br label [[SW_BB24]]
; CHECK: sw.bb24:
-; CHECK-NEXT: [[I14:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP6]], [[SW_BB19]] ]
+; CHECK-NEXT: [[I14:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP5]], [[SW_BB19]] ]
; CHECK-NEXT: [[SOURCE_4:%.*]] = phi i8* [ [[SOURCE_0184]], [[IF_END5]] ], [ [[INCDEC_PTR20]], [[SW_BB19]] ]
; CHECK-NEXT: [[CH_3:%.*]] = phi i32 [ 0, [[IF_END5]] ], [ [[SHL23]], [[SW_BB19]] ]
; CHECK-NEXT: [[INCDEC_PTR25:%.*]] = getelementptr inbounds i8, i8* [[SOURCE_4]], i64 1
; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[CH_3]], [[I14]]
; CHECK-NEXT: [[SHL28:%.*]] = shl i32 [[ADD27]], 6
; CHECK-NEXT: [[DOTPRE234:%.*]] = load i8, i8* [[INCDEC_PTR25]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[DOTPRE234]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[DOTPRE234]] to i32
; CHECK-NEXT: br label [[SW_BB29]]
; CHECK: sw.bb29:
-; CHECK-NEXT: [[I15:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP7]], [[SW_BB24]] ]
+; CHECK-NEXT: [[I15:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP6]], [[SW_BB24]] ]
; CHECK-NEXT: [[SOURCE_5:%.*]] = phi i8* [ [[SOURCE_0184]], [[IF_END5]] ], [ [[INCDEC_PTR25]], [[SW_BB24]] ]
; CHECK-NEXT: [[CH_4:%.*]] = phi i32 [ 0, [[IF_END5]] ], [ [[SHL28]], [[SW_BB24]] ]
; CHECK-NEXT: [[INCDEC_PTR30:%.*]] = getelementptr inbounds i8, i8* [[SOURCE_5]], i64 1
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=aarch64 -type-promotion -verify -S %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define dso_local i32 @avoid_trunc_gep(i8* nocapture readonly %ip) {
+; CHECK-LABEL: @avoid_trunc_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TAG_0_IN8:%.*]] = load i8, i8* [[IP:%.*]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[TAG_0_IN8]] to i32
+; CHECK-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP0]], 100
+; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[TAG_0_IN10:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[TAG_0:%.*]] = zext i32 [[TAG_0_IN10]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[IP]], i64 [[TAG_0]]
+; CHECK-NEXT: [[TAG_0_IN:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1]] = zext i8 [[TAG_0_IN]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 100
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: [[TAG_0_IN_LCSSA:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TAG_0_IN_LCSSA]] to i8
+; CHECK-NEXT: ret i32 [[TAG_0_IN_LCSSA]]
+;
+entry:
+ %tag.0.in8 = load i8, i8* %ip, align 1
+ %cmp9 = icmp ult i8 %tag.0.in8, 100
+ br i1 %cmp9, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %tag.0.in10 = phi i8 [ %tag.0.in, %for.body ], [ %tag.0.in8, %for.body.preheader ]
+ %tag.0 = zext i8 %tag.0.in10 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %ip, i64 %tag.0
+ %tag.0.in = load i8, i8* %arrayidx, align 1
+ %cmp = icmp ult i8 %tag.0.in, 100
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %tag.0.in.lcssa = phi i8 [ %tag.0.in8, %entry ], [ %tag.0.in, %for.end.loopexit ]
+ %conv3 = zext i8 %tag.0.in.lcssa to i32
+ ret i32 %conv3
+}
; CHECK-NEXT: [[VAR24:%.*]] = and i32 [[TMP0]], 255
; CHECK-NEXT: [[VAR25:%.*]] = or i32 [[VAR23]], [[VAR24]]
; CHECK-NEXT: [[VAR26:%.*]] = select i1 [[VAR18]], i64 0, i64 65536
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[VAR25]] to i16
-; CHECK-NEXT: [[VAR27:%.*]] = zext i16 [[TMP2]] to i64
+; CHECK-NEXT: [[VAR27:%.*]] = zext i32 [[VAR25]] to i64
; CHECK-NEXT: [[VAR28:%.*]] = and i64 [[VAR4]], -4294967296
; CHECK-NEXT: [[VAR29:%.*]] = or i64 [[VAR26]], [[VAR28]]
; CHECK-NEXT: [[VAR30]] = or i64 [[VAR29]], [[VAR27]]
; CHECK-NEXT: [[VAR24:%.*]] = and i32 [[TMP0]], 255
; CHECK-NEXT: [[VAR25:%.*]] = or i32 [[VAR23]], [[VAR24]]
; CHECK-NEXT: [[VAR26:%.*]] = select i1 [[VAR18]], i64 0, i64 65536
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[VAR25]] to i16
-; CHECK-NEXT: [[VAR27:%.*]] = zext i16 [[TMP2]] to i64
+; CHECK-NEXT: [[VAR27:%.*]] = zext i32 [[VAR25]] to i64
; CHECK-NEXT: [[VAR28:%.*]] = and i64 [[VAR4]], -4294967296
; CHECK-NEXT: [[VAR29:%.*]] = or i64 [[VAR26]], [[VAR28]]
; CHECK-NEXT: [[VAR30]] = or i64 [[VAR29]], [[VAR27]]