case MachineCombinerPattern::MULADDXI_OP1: {
// MUL I=A,B,0
// ADD R,I,Imm
- // ==> ORR V, ZR, Imm
+ // ==> MOV V, Imm
// ==> MADD R,A,B,V
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(Imm, BitSize);
- uint64_t Encoding;
- if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ // The immediate can be composed via a single instruction.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
+ if (Insn.size() != 1)
return;
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
+ auto MovI = Insn.begin();
+ MachineInstrBuilder MIB1;
+ // MOV is an alias for one of three instructions: movz, movn, and orr.
+ if (MovI->Opcode == OrrOpc)
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(MovI->Op2);
+ else {
+ if (BitSize == 32)
+ assert((MovI->Opcode == AArch64::MOVNWi ||
+ MovI->Opcode == AArch64::MOVZWi) &&
+ "Expected opcode");
+ else
+ assert((MovI->Opcode == AArch64::MOVNXi ||
+ MovI->Opcode == AArch64::MOVZXi) &&
+ "Expected opcode");
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR)
+ .addImm(MovI->Op1)
+ .addImm(MovI->Op2);
+ }
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
case MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
- // ==> ORR V, ZR, -Imm
+ // ==> MOV V, -Imm
// ==> MADD R,A,B,V // = -Imm + A*B
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(-Imm, BitSize);
- uint64_t Encoding;
- if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ // The immediate can be composed via a single instruction.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
+ if (Insn.size() != 1)
return;
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
+ auto MovI = Insn.begin();
+ MachineInstrBuilder MIB1;
+ // MOV is an alias for one of three instructions: movz, movn, and orr.
+ if (MovI->Opcode == OrrOpc)
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(MovI->Op2);
+ else {
+ if (BitSize == 32)
+ assert((MovI->Opcode == AArch64::MOVNWi ||
+ MovI->Opcode == AArch64::MOVZWi) &&
+ "Expected opcode");
+ else
+ assert((MovI->Opcode == AArch64::MOVNXi ||
+ MovI->Opcode == AArch64::MOVZXi) &&
+ "Expected opcode");
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR)
+ .addImm(MovI->Op1)
+ .addImm(MovI->Op2);
+ }
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
; CHECK-LABEL: addimm_mulimm_accept_00:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: add x0, x8, #1147
+; CHECK-NEXT: mov x9, #1147
+; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
%tmp0 = add i64 %a, 31
%tmp1 = mul i64 %tmp0, 37
; CHECK-LABEL: addimm_mulimm_accept_01:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: sub x0, x8, #1147
+; CHECK-NEXT: mov x9, #-1147
+; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
%tmp0 = add i64 %a, -31
%tmp1 = mul i64 %tmp0, 37
; CHECK-LABEL: addimm_mulimm_accept_02:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: add w0, w8, #1147
+; CHECK-NEXT: mov w9, #1147
+; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
%tmp0 = add i32 %a, 31
%tmp1 = mul i32 %tmp0, 37
; CHECK-LABEL: addimm_mulimm_accept_03:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: sub w0, w8, #1147
+; CHECK-NEXT: mov w9, #-1147
+; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
%tmp0 = add i32 %a, -31
%tmp1 = mul i32 %tmp0, 37
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: orr w8, wzr, #0x1
+; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: madd w19, w0, w0, w8
; CHECK-NEXT: mov w0, #4
; CHECK-NEXT: bl __cxa_allocate_exception
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: mov w0, #4
-; CHECK-NEXT: orr w9, wzr, #0x1
+; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: madd w19, w8, w8, w9
; CHECK-NEXT: bl __cxa_allocate_exception
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
define i32 @mul_add_imm(i32 %a, i32 %b) {
; CHECK-LABEL: mul_add_imm:
; CHECK: ; %bb.0:
-; CHECK-NEXT: orr w8, wzr, #0x4
+; CHECK-NEXT: mov w8, #4
; CHECK-NEXT: madd w0, w0, w1, w8
; CHECK-NEXT: ret
%1 = mul i32 %a, %b
; CHECK-FAST-LABEL: mul_add_imm2:
; CHECK-FAST: ; %bb.0: ; %entry
; CHECK-FAST-NEXT: mov x8, #-3
-; CHECK-FAST-NEXT: orr x9, xzr, #0xfffffffffffffffd
+; CHECK-FAST-NEXT: mov x9, #-3
; CHECK-FAST-NEXT: madd x8, x8, x8, x9
; CHECK-FAST-NEXT: mov x9, #45968
; CHECK-FAST-NEXT: movk x9, #48484, lsl #16
ret i64 %sub
}
+; We may hoist the "mov" instructions out of a loop
+define i32 @mull6_sub(i32 %x) {
+; CHECK-LABEL: mull6_sub:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #6
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: madd w0, w0, w8, w9
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: mull6_sub:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #6
+; GISEL-NEXT: mov w9, #-1
+; GISEL-NEXT: madd w0, w0, w8, w9
+; GISEL-NEXT: ret
+ %mul = mul nsw i32 %x, 6
+ %sub = add nsw i32 %mul, -1
+ ret i32 %sub
+}
+
+define i64 @mull6_sub_orr(i64 %x) {
+; CHECK-LABEL: mull6_sub_orr:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #6
+; CHECK-NEXT: mov x9, #16773120
+; CHECK-NEXT: madd x0, x0, x8, x9
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: mull6_sub_orr:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #6
+; GISEL-NEXT: mov x9, #16773120
+; GISEL-NEXT: madd x0, x0, x8, x9
+; GISEL-NEXT: ret
+ %mul = mul nsw i64 %x, 6
+ %sub = add nsw i64 %mul, 16773120
+ ret i64 %sub
+}
+
define i32 @test7(i32 %x) {
; CHECK-LABEL: test7:
; CHECK: // %bb.0:
;
; GISEL-LABEL: muladd_demand_commute:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI42_1
-; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI42_1]
-; GISEL-NEXT: adrp x8, .LCPI42_0
+; GISEL-NEXT: adrp x8, .LCPI44_1
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI44_1]
+; GISEL-NEXT: adrp x8, .LCPI44_0
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
-; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI42_0]
+; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI44_0]
; GISEL-NEXT: and v0.16b, v1.16b, v0.16b
; GISEL-NEXT: ret
%m = mul <4 x i32> %x, <i32 131008, i32 131008, i32 131008, i32 131008>
; CHECK-LABEL: test_srem_odd_bit30:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
-; CHECK-NEXT: orr w9, wzr, #0x1
+; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: movk w8, #27306, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, #3
; CHECK-LABEL: test_srem_odd_bit31:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #21845
-; CHECK-NEXT: orr w9, wzr, #0x1
+; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: movk w8, #54613, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, #3
; CHECK-LABEL: test_srem_even_bit30:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #20165
-; CHECK-NEXT: orr w9, wzr, #0x8
+; CHECK-NEXT: mov w9, #8
; CHECK-NEXT: movk w8, #64748, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: ror w8, w8, #3
; CHECK-LABEL: test_srem_even_bit31:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1285
-; CHECK-NEXT: orr w9, wzr, #0x2
+; CHECK-NEXT: mov w9, #2
; CHECK-NEXT: movk w8, #50437, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: ror w8, w8, #1
; CHECK-LABEL: t32_6_3:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
-; CHECK-NEXT: mov w9, #43691
+; CHECK-NEXT: mov w9, #-1
; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: madd w8, w0, w8, w9
+; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: movk w9, #10922, lsl #16
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: sub w8, w8, #1
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-LABEL: t8_3_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-85
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: sub w8, w8, #86
+; CHECK-NEXT: mov w9, #-86
+; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: cmp w8, #85
; CHECK-NEXT: cset w0, lo