From 9692e30e8b75816b3a966d991ea9eb1e9518292c Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Tue, 21 Apr 2015 21:28:33 +0000 Subject: [PATCH] [MemCpyOpt] Use the raw i8* dest when optimizing memset+memcpy. MemIntrinsic::getDest() looks through pointer casts, and using it directly when building the new GEP+memset results in stuff like: %0 = getelementptr i64* %p, i32 16 %1 = bitcast i64* %0 to i8* call ..memset(i8* %1, ...) instead of the correct: %0 = bitcast i64* %p to i8* %1 = getelementptr i8* %0, i32 16 call ..memset(i8* %1, ...) Instead, use getRawDest, which just gives you the i8* value. While there, use the memcpy's dest, as it's live anyway. In most cases, when the optimization triggers, the memset and memcpy sizes are the same, so the built memset is 0-sized and eliminated. The problem occurs when they're different. Fixes a regression caused by r235232: PR23300. llvm-svn: 235419 --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 ++- .../MemCpyOpt/memset-memcpy-redundant-memset.ll | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 563e49c..8465ffd 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -860,7 +860,8 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy, if (MemSet->getDest() != MemCpy->getDest()) return false; - Value *Dest = MemSet->getDest(); + // Use the same i8* dest as the memcpy, killing the memset dest if different. + Value *Dest = MemCpy->getRawDest(); Value *DestSize = MemSet->getLength(); Value *SrcSize = MemCpy->getLength(); diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll index 5d3ef6b..0a21e71 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -100,6 +100,22 @@ define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) { ret void } +; CHECK-LABEL: define void @test_non_i8_dst_type +; CHECK-NEXT: %dst = bitcast i64* %dst_pi64 to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) +; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size +; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size +; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size +; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) +; CHECK-NEXT: ret void +define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) { + %dst = bitcast i64* %dst_pi64 to i8* + call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) + ret void +} + ; CHECK-LABEL: define void @test_different_dst ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false) -- 2.7.4