"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
-(define_expand "<u>maddditi4"
+;; umaddditi4 generally needs maddhdu + maddld + add instructions,
+;; unless last operand is zero extended from DImode, then needs
+;; maddhdu + maddld, which is both faster than mulld + mulhdu + addc + adde
+;; resp. mulld + mulhdu + addc + addze.
+;; We don't define maddditi4, as that one needs
+;; maddhd + sradi + maddld + add + sub and for last operand sign extended
+;; from DImode nothing is able to optimize it into maddhd + maddld, while
+;; without maddditi4 mulld + mulhd + addc + adde or
+;; mulld + mulhd + sradi + addc + adde is needed. See PR108787.
+(define_expand "umaddditi4"
[(set (match_operand:TI 0 "gpc_reg_operand")
(plus:TI
- (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
- (any_extend:TI (match_operand:DI 2 "gpc_reg_operand")))
- (any_extend:TI (match_operand:DI 3 "gpc_reg_operand"))))]
+ (mult:TI (zero_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
+ (zero_extend:TI (match_operand:DI 2 "gpc_reg_operand")))
+ (match_operand:TI 3 "gpc_reg_operand")))]
"TARGET_MADDLD && TARGET_POWERPC64"
{
rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0);
rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8);
+ rtx op3_lo = gen_rtx_SUBREG (DImode, operands[3], BYTES_BIG_ENDIAN ? 8 : 0);
+ rtx op3_hi = gen_rtx_SUBREG (DImode, operands[3], BYTES_BIG_ENDIAN ? 0 : 8);
+ rtx hi_temp = gen_reg_rtx (DImode);
- emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], operands[3]));
+ emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], op3_lo));
if (BYTES_BIG_ENDIAN)
- emit_insn (gen_<u>madddi4_highpart (op0_hi, operands[1], operands[2],
- operands[3]));
+ emit_insn (gen_umadddi4_highpart (hi_temp, operands[1], operands[2],
+ op3_lo));
else
- emit_insn (gen_<u>madddi4_highpart_le (op0_hi, operands[1], operands[2],
- operands[3]));
+ emit_insn (gen_umadddi4_highpart_le (hi_temp, operands[1], operands[2],
+ op3_lo));
+
+ emit_insn (gen_adddi3 (op0_hi, hi_temp, op3_hi));
+
DONE;
})
--- /dev/null
+/* PR target/108787 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-O2" } */
+
+__attribute__((noipa)) unsigned __int128
+foo (unsigned long long x, unsigned long long y, unsigned long long z, unsigned long long u, unsigned long long v, unsigned long long w)
+{
+ unsigned __int128 r, d;
+ r = ((unsigned __int128) x * u);
+ d = ((unsigned __int128) y * w);
+ r += d;
+ d = ((unsigned __int128) z * v);
+ r += d;
+ return r;
+}
+
+int
+main ()
+{
+ if (__CHAR_BIT__ != 8 || __SIZEOF_LONG_LONG__ != 8 || __SIZEOF_INT128__ != 16)
+ return 0;
+ unsigned __int128 x = foo (0x3efe88da491ULL, 0xd105e9b4a44ULL, 0x4efa677b3dbULL, 0x42c052bac7bULL, 0x99638a13199cULL, 0x56b640d064ULL);
+ if ((unsigned long long) (x >> 64) != 0x000000000309ff93ULL
+ || (unsigned long long) x != 0xbd5c98fdf2bdbcafULL)
+ __builtin_abort ();
+ return 0;
+}