From 119c34e7f9c66dbdb77f69d67bb50507c91dc2ef Mon Sep 17 00:00:00 2001 From: =?utf8?q?Pawe=C5=82=20Bylica?= Date: Sat, 22 Oct 2022 13:53:34 +0000 Subject: [PATCH] [InstCombine][test] Add tests for mul combinations Tests taken from https://reviews.llvm.org/D56214 and ported to InstCombine for https://reviews.llvm.org/D136015. --- llvm/test/Transforms/InstCombine/mul_full_32.ll | 117 ++++ llvm/test/Transforms/InstCombine/mul_full_64.ll | 731 ++++++++++++++++++++++++ 2 files changed, 848 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/mul_full_32.ll create mode 100644 llvm/test/Transforms/InstCombine/mul_full_64.ll diff --git a/llvm/test/Transforms/InstCombine/mul_full_32.ll b/llvm/test/Transforms/InstCombine/mul_full_32.ll new file mode 100644 index 0000000..6a8d735 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/mul_full_32.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +define { i64, i64 } @mul_full_64(i64 %x, i64 %y) { +; CHECK-LABEL: @mul_full_64( +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X:%.*]], 4294967295 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] +; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[LO:%.*]] = or i64 [[U1LS]], [[T0L]] +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] +; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 +; CHECK-NEXT: ret { i64, i64 } [[RES]] +; + %xl = and i64 %x, 4294967295 + %xh = lshr i64 %x, 32 + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + + %t0 = mul nuw i64 %yl, %xl + %t1 = mul nuw i64 %yl, %xh + %t2 = mul nuw i64 %yh, %xl + %t3 = mul nuw i64 %yh, %xh + + %t0l = and i64 %t0, 4294967295 + %t0h = lshr i64 %t0, 32 + + %u0 = add i64 %t0h, %t1 + %u0l = and i64 %u0, 4294967295 + %u0h = lshr i64 %u0, 32 + + %u1 = add i64 %u0l, %t2 + %u1ls = shl i64 %u1, 32 + %u1h = lshr i64 %u1, 32 + + %u2 = add i64 %u0h, %t3 + + %lo = or i64 %u1ls, %t0l + %hi = add i64 %u2, %u1h + + %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 + %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 + ret { i64, i64 } %res +} + +define { i32, i32 } @mul_full_32(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_full_32( +; CHECK-NEXT: [[XL:%.*]] = and i32 [[X:%.*]], 65535 +; CHECK-NEXT: [[XH:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[YL:%.*]] = and i32 [[Y:%.*]], 65535 +; CHECK-NEXT: [[YH:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i32 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i32 [[YL]], [[XH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i32 [[YH]], [[XL]] +; CHECK-NEXT: [[T3:%.*]] = mul nuw i32 [[YH]], [[XH]] +; CHECK-NEXT: [[T0L:%.*]] = and i32 [[T0]], 65535 +; CHECK-NEXT: [[T0H:%.*]] = lshr i32 [[T0]], 16 +; CHECK-NEXT: [[U0:%.*]] = add i32 [[T0H]], [[T1]] +; CHECK-NEXT: [[U0L:%.*]] = and i32 [[U0]], 65535 +; CHECK-NEXT: [[U0H:%.*]] = lshr i32 [[U0]], 16 +; CHECK-NEXT: [[U1:%.*]] = add i32 [[U0L]], [[T2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i32 [[U1]], 16 +; CHECK-NEXT: [[U1H:%.*]] = lshr i32 [[U1]], 16 +; CHECK-NEXT: [[U2:%.*]] = add i32 [[U0H]], [[T3]] +; CHECK-NEXT: [[LO:%.*]] = or i32 [[U1LS]], [[T0L]] +; CHECK-NEXT: [[HI:%.*]] = add i32 [[U2]], [[U1H]] +; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i32, i32 } undef, i32 [[LO]], 0 +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i32, i32 } [[RES_LO]], i32 [[HI]], 1 +; CHECK-NEXT: ret { i32, i32 } [[RES]] +; + %xl = and i32 %x, 65535 + %xh = lshr i32 %x, 16 + %yl = and i32 %y, 65535 + %yh = lshr i32 %y, 16 + + %t0 = mul nuw i32 %yl, %xl + %t1 = mul nuw i32 %yl, %xh + %t2 = mul nuw i32 %yh, %xl + %t3 = mul nuw i32 %yh, %xh + + %t0l = and i32 %t0, 65535 + %t0h = lshr i32 %t0, 16 + + %u0 = add i32 %t0h, %t1 + %u0l = and i32 %u0, 65535 + %u0h = lshr i32 %u0, 16 + + %u1 = add i32 %u0l, %t2 + %u1ls = shl i32 %u1, 16 + %u1h = lshr i32 %u1, 16 + + %u2 = add i32 %u0h, %t3 + + %lo = or i32 %u1ls, %t0l + %hi = add i32 %u2, %u1h + + %res_lo = insertvalue { i32, i32 } undef, i32 %lo, 0 + %res = insertvalue { i32, i32 } %res_lo, i32 %hi, 1 + ret { i32, i32 } %res +} diff --git a/llvm/test/Transforms/InstCombine/mul_full_64.ll b/llvm/test/Transforms/InstCombine/mul_full_64.ll new file mode 100644 index 0000000..fa58271 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/mul_full_64.ll @@ -0,0 +1,731 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define { i64, i64 } @mul_full_64_variant0(i64 %x, i64 %y) { +; CHECK-LABEL: @mul_full_64_variant0( +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X:%.*]], 4294967295 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] +; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[LO:%.*]] = or i64 [[U1LS]], [[T0L]] +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] +; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 +; CHECK-NEXT: ret { i64, i64 } [[RES]] +; + %xl = and i64 %x, 4294967295 + %xh = lshr i64 %x, 32 + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + + %t0 = mul nuw i64 %yl, %xl + %t1 = mul nuw i64 %yl, %xh + %t2 = mul nuw i64 %yh, %xl + %t3 = mul nuw i64 %yh, %xh + + %t0l = and i64 %t0, 4294967295 + %t0h = lshr i64 %t0, 32 + + %u0 = add i64 %t0h, %t1 + %u0l = and i64 %u0, 4294967295 + %u0h = lshr i64 %u0, 32 + + %u1 = add i64 %u0l, %t2 + %u1ls = shl i64 %u1, 32 + %u1h = lshr i64 %u1, 32 + + %u2 = add i64 %u0h, %t3 + + %lo = or i64 %u1ls, %t0l + %hi = add i64 %u2, %u1h + + %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 + %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 + ret { i64, i64 } %res +} + +; The following variants 1 - 3 are generated with this C++ program: +; +; #include +; +; uint64_t mulxu(uint64_t a, uint64_t b, uint64_t *rhi) { +; auto hi = [](uint64_t x) { return x >> 32; }; +; auto lo = [](uint64_t x) { return uint32_t(x); }; +; uint64_t xl = lo(a); +; uint64_t xh = hi(a); +; uint64_t yl = lo(b); +; uint64_t yh = hi(b); +; +; uint64_t rhh = xh * yh; +; uint64_t rhl = xh * yl; +; uint64_t rlh = xl * yh; +; uint64_t rll = xl * yl; +; +; *rhi = rhh + hi(rhl + hi(rll)) + hi((rlh + lo(rhl + hi(rll)))); +; #if ONE +; return a*b; +; #elif TWO +; return (uint64_t(lo(rlh + lo(rhl + hi(rll)))) << 32) + lo(rll); +; #elif THREE +; return ((rlh + rhl) << 32) + rll; +; #endif +; } + +define i64 @mul_full_64_variant1(i64 %a, i64 %b, i64* nocapture %rhi) { +; CHECK-LABEL: @mul_full_64_variant1( +; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295 +; CHECK-NEXT: [[SHR_I43:%.*]] = lshr i64 [[A]], 32 +; CHECK-NEXT: [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295 +; CHECK-NEXT: [[SHR_I41:%.*]] = lshr i64 [[B]], 32 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[SHR_I41]], [[SHR_I43]] +; CHECK-NEXT: [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I43]] +; CHECK-NEXT: [[MUL6:%.*]] = mul nuw i64 [[SHR_I41]], [[CONV]] +; CHECK-NEXT: [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]] +; CHECK-NEXT: [[SHR_I40:%.*]] = lshr i64 [[MUL7]], 32 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SHR_I40]], [[MUL5]] +; CHECK-NEXT: [[SHR_I39:%.*]] = lshr i64 [[ADD]], 32 +; CHECK-NEXT: [[ADD10:%.*]] = add i64 [[SHR_I39]], [[MUL]] +; CHECK-NEXT: [[CONV14:%.*]] = and i64 [[ADD]], 4294967295 +; CHECK-NEXT: [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]] +; CHECK-NEXT: [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32 +; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]] +; CHECK-NEXT: store i64 [[ADD17]], i64* [[RHI:%.*]], align 8 +; CHECK-NEXT: [[MULLO:%.*]] = mul i64 [[B]], [[A]] +; CHECK-NEXT: ret i64 [[MULLO]] +; + %conv = and i64 %a, 4294967295 + %shr.i43 = lshr i64 %a, 32 + %conv3 = and i64 %b, 4294967295 + %shr.i41 = lshr i64 %b, 32 + %mul = mul nuw i64 %shr.i41, %shr.i43 + %mul5 = mul nuw i64 %conv3, %shr.i43 + %mul6 = mul nuw i64 %shr.i41, %conv + %mul7 = mul nuw i64 %conv3, %conv + %shr.i40 = lshr i64 %mul7, 32 + %add = add i64 %shr.i40, %mul5 + %shr.i39 = lshr i64 %add, 32 + %add10 = add i64 %shr.i39, %mul + %conv14 = and i64 %add, 4294967295 + %add15 = add i64 %conv14, %mul6 + %shr.i = lshr i64 %add15, 32 + %add17 = add i64 %add10, %shr.i + store i64 %add17, i64* %rhi, align 8 + %mullo = mul i64 %b, %a + ret i64 %mullo +} + +define i64 @mul_full_64_variant2(i64 %a, i64 %b, i64* nocapture %rhi) { +; CHECK-LABEL: @mul_full_64_variant2( +; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295 +; CHECK-NEXT: [[SHR_I58:%.*]] = lshr i64 [[A]], 32 +; CHECK-NEXT: [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295 +; CHECK-NEXT: [[SHR_I56:%.*]] = lshr i64 [[B]], 32 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[SHR_I56]], [[SHR_I58]] +; CHECK-NEXT: [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I58]] +; CHECK-NEXT: [[MUL6:%.*]] = mul nuw i64 [[SHR_I56]], [[CONV]] +; CHECK-NEXT: [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]] +; CHECK-NEXT: [[SHR_I55:%.*]] = lshr i64 [[MUL7]], 32 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SHR_I55]], [[MUL5]] +; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i64 [[ADD]], 32 +; CHECK-NEXT: [[ADD10:%.*]] = add i64 [[SHR_I54]], [[MUL]] +; CHECK-NEXT: [[CONV14:%.*]] = and i64 [[ADD]], 4294967295 +; CHECK-NEXT: [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]] +; CHECK-NEXT: [[SHR_I51:%.*]] = lshr i64 [[ADD15]], 32 +; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I51]] +; CHECK-NEXT: store i64 [[ADD17]], i64* [[RHI:%.*]], align 8 +; CHECK-NEXT: [[CONV24:%.*]] = shl i64 [[ADD15]], 32 +; CHECK-NEXT: [[CONV26:%.*]] = and i64 [[MUL7]], 4294967295 +; CHECK-NEXT: [[ADD27:%.*]] = or i64 [[CONV24]], [[CONV26]] +; CHECK-NEXT: ret i64 [[ADD27]] +; + %conv = and i64 %a, 4294967295 + %shr.i58 = lshr i64 %a, 32 + %conv3 = and i64 %b, 4294967295 + %shr.i56 = lshr i64 %b, 32 + %mul = mul nuw i64 %shr.i56, %shr.i58 + %mul5 = mul nuw i64 %conv3, %shr.i58 + %mul6 = mul nuw i64 %shr.i56, %conv + %mul7 = mul nuw i64 %conv3, %conv + %shr.i55 = lshr i64 %mul7, 32 + %add = add i64 %shr.i55, %mul5 + %shr.i54 = lshr i64 %add, 32 + %add10 = add i64 %shr.i54, %mul + %conv14 = and i64 %add, 4294967295 + %add15 = add i64 %conv14, %mul6 + %shr.i51 = lshr i64 %add15, 32 + %add17 = add i64 %add10, %shr.i51 + store i64 %add17, i64* %rhi, align 8 + %conv24 = shl i64 %add15, 32 + %conv26 = and i64 %mul7, 4294967295 + %add27 = or i64 %conv24, %conv26 + ret i64 %add27 +} + +define i64 @mul_full_64_variant3(i64 %a, i64 %b, i64* nocapture %rhi) { +; CHECK-LABEL: @mul_full_64_variant3( +; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295 +; CHECK-NEXT: [[SHR_I45:%.*]] = lshr i64 [[A]], 32 +; CHECK-NEXT: [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295 +; CHECK-NEXT: [[SHR_I43:%.*]] = lshr i64 [[B]], 32 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[SHR_I43]], [[SHR_I45]] +; CHECK-NEXT: [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I45]] +; CHECK-NEXT: [[MUL6:%.*]] = mul nuw i64 [[SHR_I43]], [[CONV]] +; CHECK-NEXT: [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]] +; CHECK-NEXT: [[SHR_I42:%.*]] = lshr i64 [[MUL7]], 32 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SHR_I42]], [[MUL5]] +; CHECK-NEXT: [[SHR_I41:%.*]] = lshr i64 [[ADD]], 32 +; CHECK-NEXT: [[ADD10:%.*]] = add i64 [[SHR_I41]], [[MUL]] +; CHECK-NEXT: [[CONV14:%.*]] = and i64 [[ADD]], 4294967295 +; CHECK-NEXT: [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]] +; CHECK-NEXT: [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32 +; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]] +; CHECK-NEXT: store i64 [[ADD17]], i64* [[RHI:%.*]], align 8 +; CHECK-NEXT: [[ADD18:%.*]] = add i64 [[MUL6]], [[MUL5]] +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[ADD18]], 32 +; CHECK-NEXT: [[ADD19:%.*]] = add i64 [[SHL]], [[MUL7]] +; CHECK-NEXT: ret i64 [[ADD19]] +; + %conv = and i64 %a, 4294967295 + %shr.i45 = lshr i64 %a, 32 + %conv3 = and i64 %b, 4294967295 + %shr.i43 = lshr i64 %b, 32 + %mul = mul nuw i64 %shr.i43, %shr.i45 + %mul5 = mul nuw i64 %conv3, %shr.i45 + %mul6 = mul nuw i64 %shr.i43, %conv + %mul7 = mul nuw i64 %conv3, %conv + %shr.i42 = lshr i64 %mul7, 32 + %add = add i64 %shr.i42, %mul5 + %shr.i41 = lshr i64 %add, 32 + %add10 = add i64 %shr.i41, %mul + %conv14 = and i64 %add, 4294967295 + %add15 = add i64 %conv14, %mul6 + %shr.i = lshr i64 %add15, 32 + %add17 = add i64 %add10, %shr.i + store i64 %add17, i64* %rhi, align 8 + %add18 = add i64 %mul6, %mul5 + %shl = shl i64 %add18, 32 + %add19 = add i64 %shl, %mul7 + ret i64 %add19 +} + + +define { i32, i32 } @mul_full_32(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_full_32( +; CHECK-NEXT: [[XL:%.*]] = and i32 [[X:%.*]], 65535 +; CHECK-NEXT: [[XH:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[YL:%.*]] = and i32 [[Y:%.*]], 65535 +; CHECK-NEXT: [[YH:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i32 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i32 [[YL]], [[XH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i32 [[YH]], [[XL]] +; CHECK-NEXT: [[T3:%.*]] = mul nuw i32 [[YH]], [[XH]] +; CHECK-NEXT: [[T0L:%.*]] = and i32 [[T0]], 65535 +; CHECK-NEXT: [[T0H:%.*]] = lshr i32 [[T0]], 16 +; CHECK-NEXT: [[U0:%.*]] = add i32 [[T0H]], [[T1]] +; CHECK-NEXT: [[U0L:%.*]] = and i32 [[U0]], 65535 +; CHECK-NEXT: [[U0H:%.*]] = lshr i32 [[U0]], 16 +; CHECK-NEXT: [[U1:%.*]] = add i32 [[U0L]], [[T2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i32 [[U1]], 16 +; CHECK-NEXT: [[U1H:%.*]] = lshr i32 [[U1]], 16 +; CHECK-NEXT: [[U2:%.*]] = add i32 [[U0H]], [[T3]] +; CHECK-NEXT: [[LO:%.*]] = or i32 [[U1LS]], [[T0L]] +; CHECK-NEXT: [[HI:%.*]] = add i32 [[U2]], [[U1H]] +; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i32, i32 } undef, i32 [[LO]], 0 +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i32, i32 } [[RES_LO]], i32 [[HI]], 1 +; CHECK-NEXT: ret { i32, i32 } [[RES]] +; + %xl = and i32 %x, 65535 + %xh = lshr i32 %x, 16 + %yl = and i32 %y, 65535 + %yh = lshr i32 %y, 16 + + %t0 = mul nuw i32 %yl, %xl + %t1 = mul nuw i32 %yl, %xh + %t2 = mul nuw i32 %yh, %xl + %t3 = mul nuw i32 %yh, %xh + + %t0l = and i32 %t0, 65535 + %t0h = lshr i32 %t0, 16 + + %u0 = add i32 %t0h, %t1 + %u0l = and i32 %u0, 65535 + %u0h = lshr i32 %u0, 16 + + %u1 = add i32 %u0l, %t2 + %u1ls = shl i32 %u1, 16 + %u1h = lshr i32 %u1, 16 + + %u2 = add i32 %u0h, %t3 + + %lo = or i32 %u1ls, %t0l + %hi = add i32 %u2, %u1h + + %res_lo = insertvalue { i32, i32 } undef, i32 %lo, 0 + %res = insertvalue { i32, i32 } %res_lo, i32 %hi, 1 + ret { i32, i32 } %res +} + + +declare i64 @get_number() + +; In the following test cases %x and %y are instructions, not arguments. +; This tests the placement of mul i128 and zexts. +; Instructions are also shuffled. + +define { i64, i64 } @mul_full_64_variant0_1() { +; CHECK-LABEL: @mul_full_64_variant0_1( +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[YL:%.*]] = and i64 [[TMP1]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[TMP2]], 32 +; CHECK-NEXT: [[XL:%.*]] = and i64 [[TMP2]], 4294967295 +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 +; CHECK-NEXT: [[LO:%.*]] = or i64 [[U1LS]], [[T0L]] +; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 +; CHECK-NEXT: ret { i64, i64 } [[RES]] +; + %1 = call i64 @get_number() + %yl = and i64 %1, 4294967295 + %yh = lshr i64 %1, 32 + + %2 = call i64 @get_number() + %xh = lshr i64 %2, 32 + %xl = and i64 %2, 4294967295 + + %t1 = mul nuw i64 %yl, %xh + %t3 = mul nuw i64 %yh, %xh + %t2 = mul nuw i64 %yh, %xl + %t0 = mul nuw i64 %yl, %xl + + %t0h = lshr i64 %t0, 32 + %u0 = add i64 %t0h, %t1 + %u0l = and i64 %u0, 4294967295 + %u1 = add i64 %u0l, %t2 + %u0h = lshr i64 %u0, 32 + %u2 = add i64 %u0h, %t3 + %u1h = lshr i64 %u1, 32 + %hi = add i64 %u2, %u1h + + %u1ls = shl i64 %u1, 32 + %t0l = and i64 %t0, 4294967295 + %lo = or i64 %u1ls, %t0l + + %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 + %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 + ret { i64, i64 } %res +} + +define { i64, i64 } @mul_full_64_variant0_2() { +; CHECK-LABEL: @mul_full_64_variant0_2( +; CHECK-NEXT: [[X:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[Y:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]] +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]] +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T1]], [[T0H]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[T2]], [[U0L]] +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U1H]], [[U2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 +; CHECK-NEXT: [[LO:%.*]] = or i64 [[T0L]], [[U1LS]] +; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 +; CHECK-NEXT: ret { i64, i64 } [[RES]] +; + %x = call i64 @get_number() + %y = call i64 @get_number() + + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + %xh = lshr i64 %x, 32 + %xl = and i64 %x, 4294967295 + + %t3 = mul nuw i64 %xh, %yh + %t2 = mul nuw i64 %xl, %yh + %t1 = mul nuw i64 %xh, %yl + %t0 = mul nuw i64 %xl, %yl + + %t0h = lshr i64 %t0, 32 + %u0 = add i64 %t1, %t0h + %u0l = and i64 %u0, 4294967295 + %u1 = add i64 %t2, %u0l + %u0h = lshr i64 %u0, 32 + %u2 = add i64 %u0h, %t3 + %u1h = lshr i64 %u1, 32 + %hi = add i64 %u1h, %u2 + + %u1ls = shl i64 %u1, 32 + %t0l = and i64 %t0, 4294967295 + %lo = or i64 %t0l, %u1ls + + %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 + %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 + ret { i64, i64 } %res +} + + +define i64 @umulh_64(i64 %x, i64 %y) { +; CHECK-LABEL: @umulh_64( +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X:%.*]], 4294967295 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] +; CHECK-NEXT: ret i64 [[HI]] +; + %xl = and i64 %x, 4294967295 + %xh = lshr i64 %x, 32 + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + + %t0 = mul nuw i64 %yl, %xl + %t1 = mul nuw i64 %yl, %xh + %t2 = mul nuw i64 %yh, %xl + %t3 = mul nuw i64 %yh, %xh + + %t0h = lshr i64 %t0, 32 + + %u0 = add i64 %t0h, %t1 + %u0l = and i64 %u0, 4294967295 + %u0h = lshr i64 %u0, 32 + + %u1 = add i64 %u0l, %t2 + %u1h = lshr i64 %u1, 32 + + %u2 = add i64 %u0h, %t3 + + %hi = add i64 %u2, %u1h + ret i64 %hi +} + + +define i64 @mullo(i64 %x, i64 %y) { +; CHECK-LABEL: @mullo( +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X:%.*]], 4294967295 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul i64 [[XH]], [[Y]] +; CHECK-NEXT: [[T2:%.*]] = mul i64 [[YH]], [[X]] +; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] +; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0]], [[T2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[LO:%.*]] = or i64 [[U1LS]], [[T0L]] +; CHECK-NEXT: ret i64 [[LO]] +; + %xl = and i64 %x, 4294967295 + %xh = lshr i64 %x, 32 + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + + %t0 = mul nuw i64 %yl, %xl + %t1 = mul nuw i64 %yl, %xh + %t2 = mul nuw i64 %yh, %xl + + %t0l = and i64 %t0, 4294967295 + %t0h = lshr i64 %t0, 32 + + %u0 = add i64 %t0h, %t1 + %u0l = and i64 %u0, 4294967295 + + %u1 = add i64 %u0l, %t2 + %u1ls = shl i64 %u1, 32 + + %lo = or i64 %u1ls, %t0l + ret i64 %lo +} + + +define i64 @mullo_variant3(i64 %a, i64 %b) { +; CHECK-LABEL: @mullo_variant3( +; CHECK-NEXT: [[AL:%.*]] = and i64 [[A:%.*]], 4294967295 +; CHECK-NEXT: [[AH:%.*]] = lshr i64 [[A]], 32 +; CHECK-NEXT: [[BL:%.*]] = and i64 [[B:%.*]], 4294967295 +; CHECK-NEXT: [[BH:%.*]] = lshr i64 [[B]], 32 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[BL]], [[AL]] +; CHECK-NEXT: [[T1:%.*]] = mul i64 [[AH]], [[B]] +; CHECK-NEXT: [[T2:%.*]] = mul i64 [[BH]], [[A]] +; CHECK-NEXT: [[U1:%.*]] = add i64 [[T2]], [[T1]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[LO:%.*]] = add i64 [[U1LS]], [[T0]] +; CHECK-NEXT: ret i64 [[LO]] +; + %al = and i64 %a, 4294967295 + %ah = lshr i64 %a, 32 + %bl = and i64 %b, 4294967295 + %bh = lshr i64 %b, 32 + + %t0 = mul nuw i64 %bl, %al + %t1 = mul nuw i64 %bl, %ah + %t2 = mul nuw i64 %bh, %al + + %u1 = add i64 %t2, %t1 + %u1ls = shl i64 %u1, 32 + + %lo = add i64 %u1ls, %t0 + ret i64 %lo +} + + +declare void @eat_i64(i64) +declare void @eat_i128(i128) + +define i64 @mullo_duplicate(i64 %x, i64 %y) { +; CHECK-LABEL: @mullo_duplicate( +; CHECK-NEXT: [[DUPLICATED_MUL:%.*]] = mul i64 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @eat_i64(i64 [[DUPLICATED_MUL]]) +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul i64 [[XH]], [[Y]] +; CHECK-NEXT: [[T2:%.*]] = mul i64 [[YH]], [[X]] +; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] +; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0]], [[T2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[LO:%.*]] = or i64 [[U1LS]], [[T0L]] +; CHECK-NEXT: ret i64 [[LO]] +; + %duplicated_mul = mul i64 %x, %y + call void @eat_i64(i64 %duplicated_mul) + + %xl = and i64 %x, 4294967295 + %xh = lshr i64 %x, 32 + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + + %t0 = mul nuw i64 %yl, %xl + %t1 = mul nuw i64 %yl, %xh + %t2 = mul nuw i64 %yh, %xl + + %t0l = and i64 %t0, 4294967295 + %t0h = lshr i64 %t0, 32 + + %u0 = add i64 %t0h, %t1 + %u0l = and i64 %u0, 4294967295 + + %u1 = add i64 %u0l, %t2 + %u1ls = shl i64 %u1, 32 + + %lo = or i64 %u1ls, %t0l + ret i64 %lo +} + +define { i64, i64 } @mul_full_64_duplicate(i64 %x, i64 %y) { +; CHECK-LABEL: @mul_full_64_duplicate( +; CHECK-NEXT: [[XX:%.*]] = zext i64 [[X:%.*]] to i128 +; CHECK-NEXT: [[YY:%.*]] = zext i64 [[Y:%.*]] to i128 +; CHECK-NEXT: [[DUPLICATED_MUL:%.*]] = mul nuw i128 [[XX]], [[YY]] +; CHECK-NEXT: call void @eat_i128(i128 [[DUPLICATED_MUL]]) +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] +; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] +; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[LO:%.*]] = or i64 [[U1LS]], [[T0L]] +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] +; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 +; CHECK-NEXT: ret { i64, i64 } [[RES]] +; + %xx = zext i64 %x to i128 + %yy = zext i64 %y to i128 + %duplicated_mul = mul i128 %xx, %yy + call void @eat_i128(i128 %duplicated_mul) + + %xl = and i64 %x, 4294967295 + %xh = lshr i64 %x, 32 + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + + %t0 = mul nuw i64 %yl, %xl + %t1 = mul nuw i64 %yl, %xh + %t2 = mul nuw i64 %yh, %xl + %t3 = mul nuw i64 %yh, %xh + + %t0l = and i64 %t0, 4294967295 + %t0h = lshr i64 %t0, 32 + + %u0 = add i64 %t0h, %t1 + %u0l = and i64 %u0, 4294967295 + %u0h = lshr i64 %u0, 32 + + %u1 = add i64 %u0l, %t2 + %u1ls = shl i64 %u1, 32 + %u1h = lshr i64 %u1, 32 + + %u2 = add i64 %u0h, %t3 + + %lo = or i64 %u1ls, %t0l + %hi = add i64 %u2, %u1h + + %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 + %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 + ret { i64, i64 } %res +} + + +define i64 @umulhi_64_v2() { +; CHECK-LABEL: @umulhi_64_v2( +; CHECK-NEXT: [[X:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[Y:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]] +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]] +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T1]], [[T0H]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[T2]], [[U0L]] +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U1H]], [[U2]] +; CHECK-NEXT: ret i64 [[HI]] +; + %x = call i64 @get_number() + %y = call i64 @get_number() + + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + %xh = lshr i64 %x, 32 + %xl = and i64 %x, 4294967295 + + %t3 = mul nuw i64 %xh, %yh + %t2 = mul nuw i64 %xl, %yh + %t1 = mul nuw i64 %xh, %yl + %t0 = mul nuw i64 %xl, %yl + + %t0h = lshr i64 %t0, 32 + %u0 = add i64 %t1, %t0h + %u0l = and i64 %u0, 4294967295 + %u1 = add i64 %t2, %u0l + %u0h = lshr i64 %u0, 32 + %u2 = add i64 %u0h, %t3 + %u1h = lshr i64 %u1, 32 + %hi = add i64 %u1h, %u2 + + ret i64 %hi +} + + +define i64 @umulhi_64_v3() { +; CHECK-LABEL: @umulhi_64_v3( +; CHECK-NEXT: [[X:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 +; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 +; CHECK-NEXT: [[Y:%.*]] = call i64 @get_number() +; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 +; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 +; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]] +; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]] +; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]] +; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]] +; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 +; CHECK-NEXT: [[U0:%.*]] = add i64 [[T1]], [[T0H]] +; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 +; CHECK-NEXT: [[U1:%.*]] = add i64 [[T2]], [[U0L]] +; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 +; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] +; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 +; CHECK-NEXT: [[HI:%.*]] = add i64 [[U1H]], [[U2]] +; CHECK-NEXT: ret i64 [[HI]] +; + %x = call i64 @get_number() + %xh = lshr i64 %x, 32 + %xl = and i64 %x, 4294967295 + + %y = call i64 @get_number() + %yl = and i64 %y, 4294967295 + %yh = lshr i64 %y, 32 + + %t3 = mul nuw i64 %xh, %yh + %t2 = mul nuw i64 %xl, %yh + %t1 = mul nuw i64 %xh, %yl + %t0 = mul nuw i64 %xl, %yl + + %t0h = lshr i64 %t0, 32 + %u0 = add i64 %t1, %t0h + %u0l = and i64 %u0, 4294967295 + %u1 = add i64 %t2, %u0l + %u0h = lshr i64 %u0, 32 + %u2 = add i64 %u0h, %t3 + %u1h = lshr i64 %u1, 32 + %hi = add i64 %u1h, %u2 + + ret i64 %hi +} -- 2.7.4