[InstCombine][SSE4a] Fix assertion failure in the insertq/insertqi combining logic.

author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Wed, 7 Sep 2016 12:47:53 +0000 (12:47 +0000)

committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Wed, 7 Sep 2016 12:47:53 +0000 (12:47 +0000)
author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Wed, 7 Sep 2016 12:47:53 +0000 (12:47 +0000)
committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Wed, 7 Sep 2016 12:47:53 +0000 (12:47 +0000)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

index eb33444..ebeba7e 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -722,10 +722,10 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
    Constant *C0 = dyn_cast<Constant>(Op0);
    Constant *C1 = dyn_cast<Constant>(Op1);
    ConstantInt *CI00 =
-      C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+      C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
           : nullptr;
    ConstantInt *CI10 =
-      C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+      C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
           : nullptr;
  
    // Constant Fold - insert bottom Length bits starting at the Index'th bit.
@@ -1919,7 +1919,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      // See if we're dealing with constant values.
      Constant *C1 = dyn_cast<Constant>(Op1);
      ConstantInt *CI11 =
-        C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+        C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
             : nullptr;
  
      // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
diff --git a/llvm/test/Transforms/InstCombine/x86-sse4a.ll b/llvm/test/Transforms/InstCombine/x86-sse4a.ll

index e135c38..d2714e0 100644 (file)
--- a/llvm/test/Transforms/InstCombine/x86-sse4a.ll
+++ b/llvm/test/Transforms/InstCombine/x86-sse4a.ll
@@ -177,6 +177,15 @@ define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
    ret <2 x i64> %1
  }
  
+define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_insertq_call_constexpr(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
+  ret <2 x i64> %1
+}
+
  ;
  ; INSERTQI
  ;
@@ -213,6 +222,15 @@ define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
    ret <2 x i64> %1
  }
  
+define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_insertqi_call_constexpr(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
+  ret <2 x i64> %1
+}
+
  ; The result of this insert is the second arg, since the top 64 bits of
  ; the result are undefined, and we copy the bottom 64 bits from the
  ; second arg
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Wed, 7 Sep 2016 12:47:53 +0000 (12:47 +0000)
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Wed, 7 Sep 2016 12:47:53 +0000 (12:47 +0000)
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/x86-sse4a.ll		patch \| blob \| history