From: David Chisnall Date: Sun, 3 Mar 2013 16:02:42 +0000 (+0000) Subject: Improve C11 atomics support: X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ef78c305faa1eee1e5a402dfd3a4d2ecb3ec7b5e;p=platform%2Fupstream%2Fllvm.git Improve C11 atomics support: - Generate atomicrmw operations in most of the cases when it's sensible to do so. - Don't crash in several common cases (and hopefully don't crash in more of them). - Add some better tests. We now generate significantly better code for things like: _Atomic(int) x; ... x++; On MIPS, this now generates a 4-instruction ll/sc loop, where previously it generated about 30 instructions in two nested loops. On x86-64, we generate a single lock incl, instead of a lock cmpxchgl loop (one instruction instead of ten). llvm-svn: 176420 --- diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 1bfd414..7df4818 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre) { QualType type = E->getSubExpr()->getType(); - llvm::Value *value = EmitLoadOfLValue(LV); - llvm::Value *input = value; llvm::PHINode *atomicPHI = 0; + llvm::Value *value; + llvm::Value *input; int amount = (isInc ? 1 : -1); if (const AtomicType *atomicTy = type->getAs()) { + type = atomicTy->getValueType(); + if (isInc && type->isBooleanType()) { + llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type); + if (isPre) { + Builder.Insert(new llvm::StoreInst(True, + LV.getAddress(), LV.isVolatileQualified(), + LV.getAlignment().getQuantity(), + llvm::SequentiallyConsistent)); + return Builder.getTrue(); + } + // For atomic bool increment, we just store true and return it for + // preincrement, do an atomic swap with true for postincrement + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, + LV.getAddress(), True, llvm::SequentiallyConsistent); + } + // Special case for atomic increment / decrement on integers, emit + // atomicrmw instructions. We skip this if we want to be doing overflow + // checking, and fall into the slow path with the atomic cmpxchg loop. + if (!type->isBooleanType() && type->isIntegerType() && + !(type->isUnsignedIntegerType() && + CGF.SanOpts->UnsignedIntegerOverflow) && + CGF.getLangOpts().getSignedOverflowBehavior() != + LangOptions::SOB_Trapping) { + llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add : + llvm::AtomicRMWInst::Sub; + llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add : + llvm::Instruction::Sub; + llvm::Value *amt = CGF.EmitToMemory( + llvm::ConstantInt::get(ConvertType(type), 1, true), type); + llvm::Value *old = Builder.CreateAtomicRMW(aop, + LV.getAddress(), amt, llvm::SequentiallyConsistent); + return isPre ? Builder.CreateBinOp(op, old, amt) : old; + } + value = EmitLoadOfLValue(LV); + input = value; + // For every other atomic operation, we need to emit a load-op-cmpxchg loop llvm::BasicBlock *startBB = Builder.GetInsertBlock(); llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn); + value = CGF.EmitToMemory(value, type); Builder.CreateBr(opBB); Builder.SetInsertPoint(opBB); atomicPHI = Builder.CreatePHI(value->getType(), 2); atomicPHI->addIncoming(value, startBB); - type = atomicTy->getValueType(); value = atomicPHI; + } else { + value = EmitLoadOfLValue(LV); + input = value; } // Special case of integer increment that we have to check first: bool++. @@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::BasicBlock *opBB = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI, - value, llvm::SequentiallyConsistent); + CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent); atomicPHI->addIncoming(old, opBB); llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI); Builder.CreateCondBr(success, contBB, opBB); @@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( OpInfo.E = E; // Load/convert the LHS. LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); - OpInfo.LHS = EmitLoadOfLValue(LHSLV); llvm::PHINode *atomicPHI = 0; - if (LHSTy->isAtomicType()) { + if (const AtomicType *atomicTy = LHSTy->getAs()) { + QualType type = atomicTy->getValueType(); + if (!type->isBooleanType() && type->isIntegerType() && + !(type->isUnsignedIntegerType() && + CGF.SanOpts->UnsignedIntegerOverflow) && + CGF.getLangOpts().getSignedOverflowBehavior() != + LangOptions::SOB_Trapping) { + llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP; + switch (OpInfo.Opcode) { + // We don't have atomicrmw operands for *, %, /, <<, >> + case BO_MulAssign: case BO_DivAssign: + case BO_RemAssign: + case BO_ShlAssign: + case BO_ShrAssign: + break; + case BO_AddAssign: + aop = llvm::AtomicRMWInst::Add; + break; + case BO_SubAssign: + aop = llvm::AtomicRMWInst::Sub; + break; + case BO_AndAssign: + aop = llvm::AtomicRMWInst::And; + break; + case BO_XorAssign: + aop = llvm::AtomicRMWInst::Xor; + break; + case BO_OrAssign: + aop = llvm::AtomicRMWInst::Or; + break; + default: + llvm_unreachable("Invalid compound assignment type"); + } + if (aop != llvm::AtomicRMWInst::BAD_BINOP) { + llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS, + E->getRHS()->getType(), LHSTy), LHSTy); + Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt, + llvm::SequentiallyConsistent); + return LHSLV; + } + } // FIXME: For floating point types, we should be saving and restoring the // floating point environment in the loop. llvm::BasicBlock *startBB = Builder.GetInsertBlock(); llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn); + OpInfo.LHS = EmitLoadOfLValue(LHSLV); + OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type); Builder.CreateBr(opBB); Builder.SetInsertPoint(opBB); atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2); atomicPHI->addIncoming(OpInfo.LHS, startBB); OpInfo.LHS = atomicPHI; } + else + OpInfo.LHS = EmitLoadOfLValue(LHSLV); OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType()); @@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( llvm::BasicBlock *opBB = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI, - Result, llvm::SequentiallyConsistent); + CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent); atomicPHI->addIncoming(old, opBB); llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI); Builder.CreateCondBr(success, contBB, opBB); diff --git a/clang/test/CodeGen/atomic_ops.c b/clang/test/CodeGen/atomic_ops.c index 481d1e0..910e9b9 100644 --- a/clang/test/CodeGen/atomic_ops.c +++ b/clang/test/CodeGen/atomic_ops.c @@ -15,9 +15,4 @@ void foo(int x) // CHECK: sdiv i32 // CHECK: cmpxchg i16* - // These should be emitting atomicrmw instructions, but they aren't yet - i += 2; // CHECK: cmpxchg - i -= 2; // CHECK: cmpxchg - i++; // CHECK: cmpxchg - i--; // CHECK: cmpxchg } diff --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c new file mode 100644 index 0000000..fd5d3de --- /dev/null +++ b/clang/test/CodeGen/c11atomics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s + +// Test that we are generating atomicrmw instructions, rather than +// compare-exchange loops for common atomic ops. This makes a big difference +// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for +// the load and then another ll/sc in the loop, expanding to about 30 +// instructions when it should be only 4. It has a smaller, but still +// noticeable, impact on platforms like x86 and RISC-V, where there are atomic +// RMW instructions. +// +// We currently emit cmpxchg loops for most operations on _Bools, because +// they're sufficiently rare that it's not worth making sure that the semantics +// are correct. + +typedef int __attribute__((vector_size(16))) vector; + +_Atomic(_Bool) b; +_Atomic(int) i; +_Atomic(long long) l; +_Atomic(short) s; +_Atomic(char*) p; +_Atomic(float) f; +_Atomic(vector) v; + +// CHECK-NOT: cmpxchg + +// CHECK: testinc +void testinc(void) +{ + // Special case for suffix bool++, sets to true and returns the old value. + // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst + b++; + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst + i++; + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst + l++; + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst + s++; + // Prefix increment + // Special case for bool: set to true and return true + // CHECK: store atomic i8 1, i8* @b seq_cst, align 1 + ++b; + // Currently, we have no variant of atomicrmw that returns the new value, so + // we have to generate an atomic add, which returns the old value, and then a + // non-atomic add. + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst + // CHECK: add i32 + ++i; + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst + // CHECK: add i64 + ++l; + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst + // CHECK: add i16 + ++s; +} +// CHECK: testdec +void testdec(void) +{ + // CHECK: cmpxchg i8* @b + b--; + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst + i--; + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst + l--; + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst + s--; + // CHECK: cmpxchg i8* @b + --b; + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst + // CHECK: sub i32 + --i; + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst + // CHECK: sub i64 + --l; + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst + // CHECK: sub i16 + --s; +} +// CHECK: testaddeq +void testaddeq(void) +{ + // CHECK: cmpxchg i8* @b + // CHECK: atomicrmw add i32* @i, i32 42 seq_cst + // CHECK: atomicrmw add i64* @l, i64 42 seq_cst + // CHECK: atomicrmw add i16* @s, i16 42 seq_cst + b += 42; + i += 42; + l += 42; + s += 42; +} +// CHECK: testsubeq +void testsubeq(void) +{ + // CHECK: cmpxchg i8* @b + // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst + // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst + // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst + b -= 42; + i -= 42; + l -= 42; + s -= 42; +} +// CHECK: testxoreq +void testxoreq(void) +{ + // CHECK: cmpxchg i8* @b + // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst + // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst + // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst + b ^= 42; + i ^= 42; + l ^= 42; + s ^= 42; +} +// CHECK: testoreq +void testoreq(void) +{ + // CHECK: cmpxchg i8* @b + // CHECK: atomicrmw or i32* @i, i32 42 seq_cst + // CHECK: atomicrmw or i64* @l, i64 42 seq_cst + // CHECK: atomicrmw or i16* @s, i16 42 seq_cst + b |= 42; + i |= 42; + l |= 42; + s |= 42; +} +// CHECK: testandeq +void testandeq(void) +{ + // CHECK: cmpxchg i8* @b + // CHECK: atomicrmw and i32* @i, i32 42 seq_cst + // CHECK: atomicrmw and i64* @l, i64 42 seq_cst + // CHECK: atomicrmw and i16* @s, i16 42 seq_cst + b &= 42; + i &= 42; + l &= 42; + s &= 42; +} +