From ec7063ac7712f82c6ff62c75ffc8ed02db6dbf58 Mon Sep 17 00:00:00 2001 From: Silviu Baranga Date: Mon, 15 Feb 2016 15:38:17 +0000 Subject: [PATCH] [LV] Add support for insertelt/extractelt processing during type truncation Summary: While shrinking types according to the required bits, we can encounter insert/extract element instructions. This will cause us to reach an llvm_unreachable statement. This change adds support for truncating insert/extract element operations, and adds a regression test. Reviewers: jmolloy Subscribers: mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D17078 llvm-svn: 260893 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 +++++++ .../AArch64/type-shrinkage-insertelt.ll | 47 ++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c5a10ba..a32a0ad 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3187,6 +3187,9 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { if (TruncatedTy == OriginalTy) continue; + if (!isa(I)) + continue; + IRBuilder<> B(cast(I)); auto ShrinkOperand = [&](Value *V) -> Value* { if (auto *ZI = dyn_cast(V)) @@ -3242,6 +3245,17 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { } else if (isa(I)) { // Don't do anything with the operands, just extend the result. continue; + } else if (auto *IE = dyn_cast(I)) { + auto Elements = IE->getOperand(0)->getType()->getVectorNumElements(); + auto *O0 = B.CreateZExtOrTrunc( + IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements)); + auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy); + NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2)); + } else if (auto *EE = dyn_cast(I)) { + auto Elements = EE->getOperand(0)->getType()->getVectorNumElements(); + auto *O0 = B.CreateZExtOrTrunc( + EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements)); + NewI = B.CreateExtractElement(O0, EE->getOperand(2)); } else { llvm_unreachable("Unhandled instruction type!"); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll new file mode 100644 index 0000000..ffe8480 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -0,0 +1,47 @@ +; RUN: opt -S < %s -loop-vectorize -force-vector-width=4 | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; CHECK-LABEL: test0 +define void @test0(i16* noalias %M3) { +entry: + br label %if.then1165.us + +if.then1165.us: ; preds = %if.then1165.us, %entry + %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ] + %conv1177.us = zext i16 undef to i32 + %add1178.us = add nsw i32 %conv1177.us, undef + %conv1179.us = trunc i32 %add1178.us to i16 + %idxprom1181.us = ashr exact i64 undef, 32 + %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us + store i16 %conv1179.us, i16* %arrayidx1185.us, align 2 + %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1 + %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16 + br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us + +for.inc1286.loopexit: ; preds = %if.then1165.us + ret void +} + +; CHECK-LABEL: test1 +define void @test1(i16* noalias %M3) { +entry: + br label %if.then1165.us + +if.then1165.us: ; preds = %if.then1165.us, %entry + %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ] + %fptr = load i32, i32* undef, align 4 + %conv1177.us = zext i16 undef to i32 + %add1178.us = add nsw i32 %conv1177.us, %fptr + %conv1179.us = trunc i32 %add1178.us to i16 + %idxprom1181.us = ashr exact i64 undef, 32 + %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us + store i16 %conv1179.us, i16* %arrayidx1185.us, align 2 + %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1 + %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16 + br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us + +for.inc1286.loopexit: ; preds = %if.then1165.us + ret void +} -- 2.7.4