From ba39a6e14a528a14ce2e10c49893a5a5e0e7a4a4 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sat, 17 Sep 2022 23:44:23 -0700 Subject: [PATCH] [msan] Instrument vtest instrinsics Instrumentation just ORs shadow of inputs. I assume some result shadow bits can be reset if we go into specifics of particular checks, but as-is it is still an improvement against existing default strict instruction handler, when every set bit of input shadow is reported as an error. Reviewed By: kda Differential Revision: https://reviews.llvm.org/D134123 --- .../Transforms/Instrumentation/MemorySanitizer.cpp | 34 +++ .../MemorySanitizer/avx-intrinsics-x86.ll | 240 +++++++-------------- .../MemorySanitizer/sse41-intrinsics-x86.ll | 48 ++--- 3 files changed, 124 insertions(+), 198 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 2dbadb6..9152550 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3592,6 +3592,19 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + void handleVtestIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *Shadow0 = getShadow(&I, 0); + Value *Shadow1 = getShadow(&I, 1); + Value *Or = IRB.CreateOr(Shadow0, Shadow1); + Value *NZ = IRB.CreateICmpNE(Or, Constant::getNullValue(Or->getType())); + Value *Scalar = convertShadowToScalar(NZ, IRB); + Value *Shadow = IRB.CreateZExt(Scalar, getShadowTy(&I)); + + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + void handleBinarySdSsIntrinsic(IntrinsicInst &I) { IRBuilder<> IRB(&I); unsigned Width = @@ -3900,6 +3913,27 @@ struct MemorySanitizerVisitor : public InstVisitor { handleBinarySdSsIntrinsic(I); break; + case Intrinsic::x86_avx_vtestc_pd: + case Intrinsic::x86_avx_vtestc_pd_256: + case Intrinsic::x86_avx_vtestc_ps: + case Intrinsic::x86_avx_vtestc_ps_256: + case Intrinsic::x86_avx_vtestnzc_pd: + case Intrinsic::x86_avx_vtestnzc_pd_256: + case Intrinsic::x86_avx_vtestnzc_ps: + case Intrinsic::x86_avx_vtestnzc_ps_256: + case Intrinsic::x86_avx_vtestz_pd: + case Intrinsic::x86_avx_vtestz_pd_256: + case Intrinsic::x86_avx_vtestz_ps: + case Intrinsic::x86_avx_vtestz_ps_256: + case Intrinsic::x86_avx_ptestc_256: + case Intrinsic::x86_avx_ptestnzc_256: + case Intrinsic::x86_avx_ptestz_256: + case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_sse41_ptestz: + handleVtestIntrinsic(I); + break; + case Intrinsic::fshl: case Intrinsic::fshr: handleFunnelShift(I); diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll index 5a3a9cc..ae2ae61 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll @@ -812,18 +812,12 @@ define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] @@ -837,18 +831,12 @@ define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] @@ -862,18 +850,12 @@ define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] @@ -1108,18 +1090,12 @@ define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1133,18 +1109,12 @@ define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] @@ -1158,18 +1128,12 @@ define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -1183,18 +1147,12 @@ define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] @@ -1208,18 +1166,12 @@ define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1233,18 +1185,12 @@ define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] @@ -1258,18 +1204,12 @@ define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -1283,18 +1223,12 @@ define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] @@ -1308,18 +1242,12 @@ define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1333,18 +1261,12 @@ define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] @@ -1358,18 +1280,12 @@ define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -1383,18 +1299,12 @@ define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll index 6a625e5..92fe09c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll @@ -245,18 +245,12 @@ define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] @@ -270,18 +264,12 @@ define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] @@ -295,18 +283,12 @@ define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestz(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) -; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 ; CHECK-NEXT: ret i32 [[RES]] ; %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] -- 2.7.4