From: John McCall Date: Thu, 7 Mar 2013 21:37:17 +0000 (+0000) Subject: Promote atomic type sizes up to a power of two, capped by X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a8ec7eb9cfe2adeb1b5d03a94f6cee70972e47b7;p=platform%2Fupstream%2Fllvm.git Promote atomic type sizes up to a power of two, capped by MaxAtomicPromoteWidth. Fix a ton of terrible bugs with _Atomic types and (non-intrinsic-mediated) loads and stores thereto. llvm-svn: 176658 --- diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index db1aa1a..4580424 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1602,18 +1602,21 @@ ASTContext::getTypeInfoImpl(const Type *T) const { } case Type::Atomic: { + // Start with the base type information. std::pair Info = getTypeInfo(cast(T)->getValueType()); Width = Info.first; Align = Info.second; - if (Width != 0 && Width <= Target->getMaxAtomicPromoteWidth() && - llvm::isPowerOf2_64(Width)) { - // We can potentially perform lock-free atomic operations for this - // type; promote the alignment appropriately. - // FIXME: We could potentially promote the width here as well... - // is that worthwhile? (Non-struct atomic types generally have - // power-of-two size anyway, but structs might not. Requires a bit - // of implementation work to make sure we zero out the extra bits.) + + // If the size of the type doesn't exceed the platform's max + // atomic promotion width, make the size and alignment more + // favorable to atomic operations: + if (Width != 0 && Width <= Target->getMaxAtomicPromoteWidth()) { + // Round the size up to a power of 2. + if (!llvm::isPowerOf2_64(Width)) + Width = llvm::NextPowerOf2(Width); + + // Set the alignment equal to the size. Align = static_cast(Width); } } diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index f17e48d..817d5c4 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -17,10 +17,169 @@ #include "clang/AST/ASTContext.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" using namespace clang; using namespace CodeGen; +// The ABI values for various atomic memory orderings. +enum AtomicOrderingKind { + AO_ABI_memory_order_relaxed = 0, + AO_ABI_memory_order_consume = 1, + AO_ABI_memory_order_acquire = 2, + AO_ABI_memory_order_release = 3, + AO_ABI_memory_order_acq_rel = 4, + AO_ABI_memory_order_seq_cst = 5 +}; + +namespace { + class AtomicInfo { + CodeGenFunction &CGF; + QualType AtomicTy; + QualType ValueTy; + uint64_t AtomicSizeInBits; + uint64_t ValueSizeInBits; + CharUnits AtomicAlign; + CharUnits ValueAlign; + CharUnits LValueAlign; + TypeEvaluationKind EvaluationKind; + bool UseLibcall; + public: + AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) { + assert(lvalue.isSimple()); + + AtomicTy = lvalue.getType(); + ValueTy = AtomicTy->castAs()->getValueType(); + EvaluationKind = CGF.getEvaluationKind(ValueTy); + + ASTContext &C = CGF.getContext(); + + uint64_t valueAlignInBits; + llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy); + + uint64_t atomicAlignInBits; + llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy); + + assert(ValueSizeInBits <= AtomicSizeInBits); + assert(valueAlignInBits <= atomicAlignInBits); + + AtomicAlign = C.toCharUnitsFromBits(atomicAlignInBits); + ValueAlign = C.toCharUnitsFromBits(valueAlignInBits); + if (lvalue.getAlignment().isZero()) + lvalue.setAlignment(AtomicAlign); + + UseLibcall = + (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) || + AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth()); + } + + QualType getAtomicType() const { return AtomicTy; } + QualType getValueType() const { return ValueTy; } + CharUnits getAtomicAlignment() const { return AtomicAlign; } + CharUnits getValueAlignment() const { return ValueAlign; } + uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } + uint64_t getValueSizeInBits() const { return AtomicSizeInBits; } + TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } + bool shouldUseLibcall() const { return UseLibcall; } + + /// Is the atomic size larger than the underlying value type? + /// + /// Note that the absence of padding does not mean that atomic + /// objects are completely interchangeable with non-atomic + /// objects: we might have promoted the alignment of a type + /// without making it bigger. + bool hasPadding() const { + return (ValueSizeInBits != AtomicSizeInBits); + } + + void emitMemSetZeroIfNecessary(LValue dest) const; + + llvm::Value *getAtomicSizeValue() const { + CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits); + return CGF.CGM.getSize(size); + } + + /// Cast the given pointer to an integer pointer suitable for + /// atomic operations. + llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const; + + /// Turn an atomic-layout object into an r-value. + RValue convertTempToRValue(llvm::Value *addr, + AggValueSlot resultSlot) const; + + /// Copy an atomic r-value into atomic-layout memory. + void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const; + + /// Project an l-value down to the value field. + LValue projectValue(LValue lvalue) const { + llvm::Value *addr = lvalue.getAddress(); + if (hasPadding()) + addr = CGF.Builder.CreateStructGEP(addr, 0); + + return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(), + CGF.getContext(), lvalue.getTBAAInfo()); + } + + /// Materialize an atomic r-value in atomic-layout memory. + llvm::Value *materializeRValue(RValue rvalue) const; + + private: + bool requiresMemSetZero(llvm::Type *type) const; + }; +} + +static RValue emitAtomicLibcall(CodeGenFunction &CGF, + StringRef fnName, + QualType resultType, + CallArgList &args) { + const CGFunctionInfo &fnInfo = + CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args, + FunctionType::ExtInfo(), RequiredArgs::All); + llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); + llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); + return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args); +} + +/// Does a store of the given IR type modify the full expected width? +static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type, + uint64_t expectedSize) { + return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize); +} + +/// Does the atomic type require memsetting to zero before initialization? +/// +/// The IR type is provided as a way of making certain queries faster. +bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const { + // If the atomic type has size padding, we definitely need a memset. + if (hasPadding()) return true; + + // Otherwise, do some simple heuristics to try to avoid it: + switch (getEvaluationKind()) { + // For scalars and complexes, check whether the store size of the + // type uses the full size. + case TEK_Scalar: + return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits); + case TEK_Complex: + return !isFullSizeType(CGF.CGM, type->getStructElementType(0), + AtomicSizeInBits / 2); + + // Just be pessimistic about aggregates. + case TEK_Aggregate: + return true; + } + llvm_unreachable("bad evaluation kind"); +} + +void AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const { + llvm::Value *addr = dest.getAddress(); + if (!requiresMemSetZero(addr->getType()->getPointerElementType())) + return; + + CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0), + AtomicSizeInBits / 8, + dest.getAlignment().getQuantity()); +} + static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest, llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2, @@ -177,24 +336,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) { if (E->getOp() == AtomicExpr::AO__c11_atomic_init) { assert(!Dest && "Init does not return a value"); - LValue LV = MakeAddrLValue(Ptr, AtomicTy, alignChars); - switch (getEvaluationKind(E->getVal1()->getType())) { - case TEK_Scalar: - EmitScalarInit(EmitScalarExpr(E->getVal1()), LV); - return RValue::get(0); - case TEK_Complex: - EmitComplexExprIntoLValue(E->getVal1(), LV, /*isInit*/ true); - return RValue::get(0); - case TEK_Aggregate: { - AggValueSlot Slot = AggValueSlot::forLValue(LV, - AggValueSlot::IsNotDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); - EmitAggExpr(E->getVal1(), Slot); - return RValue::get(0); - } - } - llvm_unreachable("bad evaluation kind"); + LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext()); + EmitAtomicInit(E->getVal1(), lvalue); + return RValue::get(0); } Order = EmitScalarExpr(E->getOrder()); @@ -385,30 +529,30 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) { if (isa(Order)) { int ord = cast(Order)->getZExtValue(); switch (ord) { - case 0: // memory_order_relaxed + case AO_ABI_memory_order_relaxed: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, llvm::Monotonic); break; - case 1: // memory_order_consume - case 2: // memory_order_acquire + case AO_ABI_memory_order_consume: + case AO_ABI_memory_order_acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, llvm::Acquire); break; - case 3: // memory_order_release + case AO_ABI_memory_order_release: if (IsLoad) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, llvm::Release); break; - case 4: // memory_order_acq_rel + case AO_ABI_memory_order_acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, llvm::AcquireRelease); break; - case 5: // memory_order_seq_cst + case AO_ABI_memory_order_seq_cst: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, llvm::SequentiallyConsistent); break; @@ -483,3 +627,316 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) { return RValue::get(0); return convertTempToRValue(OrigDest, E->getType()); } + +llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const { + unsigned addrspace = + cast(addr->getType())->getAddressSpace(); + llvm::IntegerType *ty = + llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits); + return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace)); +} + +RValue AtomicInfo::convertTempToRValue(llvm::Value *addr, + AggValueSlot resultSlot) const { + if (EvaluationKind == TEK_Aggregate) { + // Nothing to do if the result is ignored. + if (resultSlot.isIgnored()) return resultSlot.asRValue(); + + assert(resultSlot.getAddr() == addr || hasPadding()); + + // In these cases, we should have emitted directly into the result slot. + if (!hasPadding() || resultSlot.isValueOfAtomic()) + return resultSlot.asRValue(); + + // Otherwise, fall into the common path. + } + + // Drill into the padding structure if we have one. + if (hasPadding()) + addr = CGF.Builder.CreateStructGEP(addr, 0); + + // If we're emitting to an aggregate, copy into the result slot. + if (EvaluationKind == TEK_Aggregate) { + CGF.EmitAggregateCopy(resultSlot.getAddr(), addr, getValueType(), + resultSlot.isVolatile()); + return resultSlot.asRValue(); + } + + // Otherwise, just convert the temporary to an r-value using the + // normal conversion routine. + return CGF.convertTempToRValue(addr, getValueType()); +} + +/// Emit a load from an l-value of atomic type. Note that the r-value +/// we produce is an r-value of the atomic *value* type. +RValue CodeGenFunction::EmitAtomicLoad(LValue src, AggValueSlot resultSlot) { + AtomicInfo atomics(*this, src); + + // Check whether we should use a library call. + if (atomics.shouldUseLibcall()) { + llvm::Value *tempAddr; + if (resultSlot.isValueOfAtomic()) { + assert(atomics.getEvaluationKind() == TEK_Aggregate); + tempAddr = resultSlot.getPaddedAtomicAddr(); + } else if (!resultSlot.isIgnored() && !atomics.hasPadding()) { + assert(atomics.getEvaluationKind() == TEK_Aggregate); + tempAddr = resultSlot.getAddr(); + } else { + tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); + } + + // void __atomic_load(size_t size, void *mem, void *return, int order); + CallArgList args; + args.add(RValue::get(atomics.getAtomicSizeValue()), + getContext().getSizeType()); + args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())), + getContext().VoidPtrTy); + args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), + getContext().VoidPtrTy); + args.add(RValue::get(llvm::ConstantInt::get(IntTy, + AO_ABI_memory_order_seq_cst)), + getContext().IntTy); + emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args); + + // Produce the r-value. + return atomics.convertTempToRValue(tempAddr, resultSlot); + } + + // Okay, we're doing this natively. + llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress()); + llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load"); + load->setAtomic(llvm::SequentiallyConsistent); + + // Other decoration. + load->setAlignment(src.getAlignment().getQuantity()); + if (src.isVolatileQualified()) + load->setVolatile(true); + if (src.getTBAAInfo()) + CGM.DecorateInstruction(load, src.getTBAAInfo()); + + // Okay, turn that back into the original value type. + QualType valueType = atomics.getValueType(); + llvm::Value *result = load; + + // If we're ignoring an aggregate return, don't do anything. + if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored()) + return RValue::getAggregate(0, false); + + // The easiest way to do this this is to go through memory, but we + // try not to in some easy cases. + if (atomics.getEvaluationKind() == TEK_Scalar && !atomics.hasPadding()) { + llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType); + if (isa(resultTy)) { + assert(result->getType() == resultTy); + result = EmitFromMemory(result, valueType); + } else if (isa(resultTy)) { + result = Builder.CreateIntToPtr(result, resultTy); + } else { + result = Builder.CreateBitCast(result, resultTy); + } + return RValue::get(result); + } + + // Create a temporary. This needs to be big enough to hold the + // atomic integer. + llvm::Value *temp; + bool tempIsVolatile = false; + CharUnits tempAlignment; + if (atomics.getEvaluationKind() == TEK_Aggregate && + (!atomics.hasPadding() || resultSlot.isValueOfAtomic())) { + assert(!resultSlot.isIgnored()); + if (resultSlot.isValueOfAtomic()) { + temp = resultSlot.getPaddedAtomicAddr(); + tempAlignment = atomics.getAtomicAlignment(); + } else { + temp = resultSlot.getAddr(); + tempAlignment = atomics.getValueAlignment(); + } + tempIsVolatile = resultSlot.isVolatile(); + } else { + temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); + tempAlignment = atomics.getAtomicAlignment(); + } + + // Slam the integer into the temporary. + llvm::Value *castTemp = atomics.emitCastToAtomicIntPointer(temp); + Builder.CreateAlignedStore(result, castTemp, tempAlignment.getQuantity()) + ->setVolatile(tempIsVolatile); + + return atomics.convertTempToRValue(temp, resultSlot); +} + + + +/// Copy an r-value into memory as part of storing to an atomic type. +/// This needs to create a bit-pattern suitable for atomic operations. +void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const { + // If we have an r-value, the rvalue should be of the atomic type, + // which means that the caller is responsible for having zeroed + // any padding. Just do an aggregate copy of that type. + if (rvalue.isAggregate()) { + CGF.EmitAggregateCopy(dest.getAddress(), + rvalue.getAggregateAddr(), + getAtomicType(), + (rvalue.isVolatileQualified() + || dest.isVolatileQualified()), + dest.getAlignment()); + return; + } + + // Okay, otherwise we're copying stuff. + + // Zero out the buffer if necessary. + emitMemSetZeroIfNecessary(dest); + + // Drill past the padding if present. + dest = projectValue(dest); + + // Okay, store the rvalue in. + if (rvalue.isScalar()) { + CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true); + } else { + CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true); + } +} + + +/// Materialize an r-value into memory for the purposes of storing it +/// to an atomic type. +llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const { + // Aggregate r-values are already in memory, and EmitAtomicStore + // requires them to be values of the atomic type. + if (rvalue.isAggregate()) + return rvalue.getAggregateAddr(); + + // Otherwise, make a temporary and materialize into it. + llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp"); + LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment()); + emitCopyIntoMemory(rvalue, tempLV); + return temp; +} + +/// Emit a store to an l-value of atomic type. +/// +/// Note that the r-value is expected to be an r-value *of the atomic +/// type*; this means that for aggregate r-values, it should include +/// storage for any padding that was necessary. +void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, + bool isInit) { + // If this is an aggregate r-value, it should agree in type except + // maybe for address-space qualification. + assert(!rvalue.isAggregate() || + rvalue.getAggregateAddr()->getType()->getPointerElementType() + == dest.getAddress()->getType()->getPointerElementType()); + + AtomicInfo atomics(*this, dest); + + // If this is an initialization, just put the value there normally. + if (isInit) { + atomics.emitCopyIntoMemory(rvalue, dest); + return; + } + + // Check whether we should use a library call. + if (atomics.shouldUseLibcall()) { + // Produce a source address. + llvm::Value *srcAddr = atomics.materializeRValue(rvalue); + + // void __atomic_store(size_t size, void *mem, void *val, int order) + CallArgList args; + args.add(RValue::get(atomics.getAtomicSizeValue()), + getContext().getSizeType()); + args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())), + getContext().VoidPtrTy); + args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), + getContext().VoidPtrTy); + args.add(RValue::get(llvm::ConstantInt::get(IntTy, + AO_ABI_memory_order_seq_cst)), + getContext().IntTy); + emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); + return; + } + + // Okay, we're doing this natively. + llvm::Value *intValue; + + // If we've got a scalar value of the right size, try to avoid going + // through memory. + if (rvalue.isScalar() && !atomics.hasPadding()) { + llvm::Value *value = rvalue.getScalarVal(); + if (isa(value->getType())) { + intValue = value; + } else { + llvm::IntegerType *inputIntTy = + llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits()); + if (isa(value->getType())) { + intValue = Builder.CreatePtrToInt(value, inputIntTy); + } else { + intValue = Builder.CreateBitCast(value, inputIntTy); + } + } + + // Otherwise, we need to go through memory. + } else { + // Put the r-value in memory. + llvm::Value *addr = atomics.materializeRValue(rvalue); + + // Cast the temporary to the atomic int type and pull a value out. + addr = atomics.emitCastToAtomicIntPointer(addr); + intValue = Builder.CreateAlignedLoad(addr, + atomics.getAtomicAlignment().getQuantity()); + } + + // Do the atomic store. + llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress()); + llvm::StoreInst *store = Builder.CreateStore(intValue, addr); + + // Initializations don't need to be atomic. + if (!isInit) store->setAtomic(llvm::SequentiallyConsistent); + + // Other decoration. + store->setAlignment(dest.getAlignment().getQuantity()); + if (dest.isVolatileQualified()) + store->setVolatile(true); + if (dest.getTBAAInfo()) + CGM.DecorateInstruction(store, dest.getTBAAInfo()); +} + +void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { + AtomicInfo atomics(*this, dest); + + switch (atomics.getEvaluationKind()) { + case TEK_Scalar: { + llvm::Value *value = EmitScalarExpr(init); + atomics.emitCopyIntoMemory(RValue::get(value), dest); + return; + } + + case TEK_Complex: { + ComplexPairTy value = EmitComplexExpr(init); + atomics.emitCopyIntoMemory(RValue::getComplex(value), dest); + return; + } + + case TEK_Aggregate: { + // Memset the buffer first if there's any possibility of + // uninitialized internal bits. + atomics.emitMemSetZeroIfNecessary(dest); + + // HACK: whether the initializer actually has an atomic type + // doesn't really seem reliable right now. + if (!init->getType()->isAtomicType()) { + dest = atomics.projectValue(dest); + } + + // Evaluate the expression directly into the destination. + AggValueSlot slot = AggValueSlot::forLValue(dest, + AggValueSlot::IsNotDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased); + EmitAggExpr(init, slot); + return; + } + } + llvm_unreachable("bad evaluation kind"); +} diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index bb5d638..0e00130 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1129,11 +1129,15 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, return; } case TEK_Aggregate: - // TODO: how can we delay here if D is captured by its initializer? - EmitAggExpr(init, AggValueSlot::forLValue(lvalue, + if (type->isAtomicType()) { + EmitAtomicInit(const_cast(init), lvalue); + } else { + // TODO: how can we delay here if D is captured by its initializer? + EmitAggExpr(init, AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased)); + } MaybeEmitStdInitializerListCleanup(lvalue.getAddress(), init); return; } diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index ba816af..a170028 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1144,6 +1144,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile, return EmitFromMemory(V, Ty); } } + + // Atomic operations have to be done on integral types. + if (Ty->isAtomicType()) { + LValue lvalue = LValue::MakeAddr(Addr, Ty, + CharUnits::fromQuantity(Alignment), + getContext(), TBAAInfo); + return EmitAtomicLoad(lvalue).getScalarVal(); + } llvm::LoadInst *Load = Builder.CreateLoad(Addr); if (Volatile) @@ -1152,9 +1160,6 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile, Load->setAlignment(Alignment); if (TBAAInfo) CGM.DecorateInstruction(Load, TBAAInfo); - // If this is an atomic type, all normal reads must be atomic - if (Ty->isAtomicType()) - Load->setAtomic(llvm::SequentiallyConsistent); if ((SanOpts->Bool && hasBooleanRepresentation(Ty)) || (SanOpts->Enum && Ty->getAs())) { @@ -1251,13 +1256,20 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr, Value = EmitToMemory(Value, Ty); + if (Ty->isAtomicType()) { + EmitAtomicStore(RValue::get(Value), + LValue::MakeAddr(Addr, Ty, + CharUnits::fromQuantity(Alignment), + getContext(), TBAAInfo), + isInit); + return; + } + llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile); if (Alignment) Store->setAlignment(Alignment); if (TBAAInfo) CGM.DecorateInstruction(Store, TBAAInfo); - if (!isInit && Ty->isAtomicType()) - Store->setAtomic(llvm::SequentiallyConsistent); } void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index f8921db..1ac13c0 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -29,6 +29,14 @@ using namespace CodeGen; // Aggregate Expression Emitter //===----------------------------------------------------------------------===// +llvm::Value *AggValueSlot::getPaddedAtomicAddr() const { + assert(isValueOfAtomic()); + llvm::GEPOperator *op = cast(getAddr()); + assert(op->getNumIndices() == 2); + assert(op->hasAllZeroIndices()); + return op->getPointerOperand(); +} + namespace { class AggExprEmitter : public StmtVisitor { CodeGenFunction &CGF; @@ -190,6 +198,38 @@ public: CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddr()); } }; + +/// A helper class for emitting expressions into the value sub-object +/// of a padded atomic type. +class ValueDestForAtomic { + AggValueSlot Dest; +public: + ValueDestForAtomic(CodeGenFunction &CGF, AggValueSlot dest, QualType type) + : Dest(dest) { + assert(!Dest.isValueOfAtomic()); + if (!Dest.isIgnored() && CGF.CGM.isPaddedAtomicType(type)) { + llvm::Value *valueAddr = CGF.Builder.CreateStructGEP(Dest.getAddr(), 0); + Dest = AggValueSlot::forAddr(valueAddr, + Dest.getAlignment(), + Dest.getQualifiers(), + Dest.isExternallyDestructed(), + Dest.requiresGCollection(), + Dest.isPotentiallyAliased(), + Dest.isZeroed(), + AggValueSlot::IsValueOfAtomic); + } + } + + const AggValueSlot &getDest() const { return Dest; } + + ~ValueDestForAtomic() { + // Kill the GEP if we made one and it didn't end up used. + if (Dest.isValueOfAtomic()) { + llvm::Instruction *addr = cast(Dest.getAddr()); + if (addr->use_empty()) addr->eraseFromParent(); + } + } +}; } // end anonymous namespace. //===----------------------------------------------------------------------===// @@ -201,6 +241,14 @@ public: /// then loads the result into DestPtr. void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) { LValue LV = CGF.EmitLValue(E); + + // If the type of the l-value is atomic, then do an atomic load. + if (LV.getType()->isAtomicType()) { + ValueDestForAtomic valueDest(CGF, Dest, LV.getType()); + CGF.EmitAtomicLoad(LV, valueDest.getDest()); + return; + } + EmitFinalDestCopy(E->getType(), LV); } @@ -543,6 +591,20 @@ AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { CGF.EmitAggExpr(E->getInitializer(), Slot); } +/// Attempt to look through various unimportant expressions to find a +/// cast of the given kind. +static Expr *findPeephole(Expr *op, CastKind kind) { + while (true) { + op = op->IgnoreParens(); + if (CastExpr *castE = dyn_cast(op)) { + if (castE->getCastKind() == kind) + return castE->getSubExpr(); + if (castE->getCastKind() == CK_NoOp) + continue; + } + return 0; + } +} void AggExprEmitter::VisitCastExpr(CastExpr *E) { switch (E->getCastKind()) { @@ -582,6 +644,75 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { "should have been unpacked before we got here"); } + case CK_NonAtomicToAtomic: + case CK_AtomicToNonAtomic: { + bool isToAtomic = (E->getCastKind() == CK_NonAtomicToAtomic); + + // Determine the atomic and value types. + QualType atomicType = E->getSubExpr()->getType(); + QualType valueType = E->getType(); + if (isToAtomic) std::swap(atomicType, valueType); + + assert(atomicType->isAtomicType()); + assert(CGF.getContext().hasSameUnqualifiedType(valueType, + atomicType->castAs()->getValueType())); + + // Just recurse normally if we're ignoring the result or the + // atomic type doesn't change representation. + if (Dest.isIgnored() || !CGF.CGM.isPaddedAtomicType(atomicType)) { + return Visit(E->getSubExpr()); + } + + CastKind peepholeTarget = + (isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic); + + // These two cases are reverses of each other; try to peephole them. + if (Expr *op = findPeephole(E->getSubExpr(), peepholeTarget)) { + assert(CGF.getContext().hasSameUnqualifiedType(op->getType(), + E->getType()) && + "peephole significantly changed types?"); + return Visit(op); + } + + // If we're converting an r-value of non-atomic type to an r-value + // of atomic type, just make an atomic temporary, emit into that, + // and then copy the value out. (FIXME: do we need to + // zero-initialize it first?) + if (isToAtomic) { + ValueDestForAtomic valueDest(CGF, Dest, atomicType); + CGF.EmitAggExpr(E->getSubExpr(), valueDest.getDest()); + return; + } + + // Otherwise, we're converting an atomic type to a non-atomic type. + + // If the dest is a value-of-atomic subobject, drill back out. + if (Dest.isValueOfAtomic()) { + AggValueSlot atomicSlot = + AggValueSlot::forAddr(Dest.getPaddedAtomicAddr(), + Dest.getAlignment(), + Dest.getQualifiers(), + Dest.isExternallyDestructed(), + Dest.requiresGCollection(), + Dest.isPotentiallyAliased(), + Dest.isZeroed(), + AggValueSlot::IsNotValueOfAtomic); + CGF.EmitAggExpr(E->getSubExpr(), atomicSlot); + return; + } + + // Otherwise, make an atomic temporary, emit into that, and then + // copy the value out. + AggValueSlot atomicSlot = + CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp"); + CGF.EmitAggExpr(E->getSubExpr(), atomicSlot); + + llvm::Value *valueAddr = + Builder.CreateStructGEP(atomicSlot.getAddr(), 0); + RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile()); + return EmitFinalDestCopy(valueType, rvalue); + } + case CK_LValueToRValue: // If we're loading from a volatile type, force the destination // into existence. @@ -589,11 +720,10 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { EnsureDest(E->getType()); return Visit(E->getSubExpr()); } + // fallthrough case CK_NoOp: - case CK_AtomicToNonAtomic: - case CK_NonAtomicToAtomic: case CK_UserDefinedConversion: case CK_ConstructorConversion: assert(CGF.getContext().hasSameUnqualifiedType(E->getSubExpr()->getType(), @@ -775,6 +905,12 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { // Now emit the LHS and copy into it. LValue LHS = CGF.EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); + // That copy is an atomic copy if the LHS is atomic. + if (LHS.getType()->isAtomicType()) { + CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false); + return; + } + EmitCopy(E->getLHS()->getType(), AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), @@ -785,6 +921,15 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { LValue LHS = CGF.EmitLValue(E->getLHS()); + // If we have an atomic type, evaluate into the destination and then + // do an atomic copy. + if (LHS.getType()->isAtomicType()) { + EnsureDest(E->getRHS()->getType()); + Visit(E->getRHS()); + CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false); + return; + } + // Codegen the RHS so that it stores directly into the LHS. AggValueSlot LHSSlot = AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 840463b..5fc73aa 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -42,7 +42,6 @@ class ComplexExprEmitter : public StmtVisitor { CodeGenFunction &CGF; CGBuilderTy &Builder; - // True is we should ignore the value of a bool IgnoreReal; bool IgnoreImag; public: @@ -286,6 +285,9 @@ public: /// load the real and imaginary pieces, returning them as Real/Imag. ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue) { assert(lvalue.isSimple() && "non-simple complex l-value?"); + if (lvalue.getType()->isAtomicType()) + return CGF.EmitAtomicLoad(lvalue).getComplexVal(); + llvm::Value *SrcPtr = lvalue.getAddress(); bool isVolatile = lvalue.isVolatileQualified(); @@ -310,6 +312,9 @@ ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue) { void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, LValue lvalue, bool isInit) { + if (lvalue.getType()->isAtomicType()) + return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit); + llvm::Value *Ptr = lvalue.getAddress(); llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, "real"); llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, "imag"); diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h index 0bbd373..6b0c271 100644 --- a/clang/lib/CodeGen/CGValue.h +++ b/clang/lib/CodeGen/CGValue.h @@ -350,11 +350,23 @@ class AggValueSlot { /// evaluating an expression which constructs such an object. bool AliasedFlag : 1; + /// ValueOfAtomicFlag - This is set to true if the slot is the value + /// subobject of an object the size of an _Atomic(T). The specific + /// guarantees this makes are: + /// - the address is guaranteed to be a getelementptr into the + /// padding struct and + /// - it is okay to store something the width of an _Atomic(T) + /// into the address. + /// Tracking this allows us to avoid some obviously unnecessary + /// memcpys. + bool ValueOfAtomicFlag : 1; + public: enum IsAliased_t { IsNotAliased, IsAliased }; enum IsDestructed_t { IsNotDestructed, IsDestructed }; enum IsZeroed_t { IsNotZeroed, IsZeroed }; enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers }; + enum IsValueOfAtomic_t { IsNotValueOfAtomic, IsValueOfAtomic }; /// ignored - Returns an aggregate value slot indicating that the /// aggregate value is being ignored. @@ -378,7 +390,9 @@ public: IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, - IsZeroed_t isZeroed = IsNotZeroed) { + IsZeroed_t isZeroed = IsNotZeroed, + IsValueOfAtomic_t isValueOfAtomic + = IsNotValueOfAtomic) { AggValueSlot AV; AV.Addr = addr; AV.Alignment = align.getQuantity(); @@ -387,6 +401,7 @@ public: AV.ObjCGCFlag = needsGC; AV.ZeroedFlag = isZeroed; AV.AliasedFlag = isAliased; + AV.ValueOfAtomicFlag = isValueOfAtomic; return AV; } @@ -394,9 +409,12 @@ public: IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, - IsZeroed_t isZeroed = IsNotZeroed) { + IsZeroed_t isZeroed = IsNotZeroed, + IsValueOfAtomic_t isValueOfAtomic + = IsNotValueOfAtomic) { return forAddr(LV.getAddress(), LV.getAlignment(), - LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed); + LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed, + isValueOfAtomic); } IsDestructed_t isExternallyDestructed() const { @@ -428,6 +446,12 @@ public: return Addr; } + IsValueOfAtomic_t isValueOfAtomic() const { + return IsValueOfAtomic_t(ValueOfAtomicFlag); + } + + llvm::Value *getPaddedAtomicAddr() const; + bool isIgnored() const { return Addr == 0; } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 4e6c72e..2be8dcc 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2149,6 +2149,13 @@ public: RValue convertTempToRValue(llvm::Value *addr, QualType type); + void EmitAtomicInit(Expr *E, LValue lvalue); + + RValue EmitAtomicLoad(LValue lvalue, + AggValueSlot slot = AggValueSlot::ignored()); + + void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit); + /// EmitToMemory - Change a scalar value from its value /// representation to its in-memory representation. llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index f5ae9cc..2bddb6f 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -47,6 +47,7 @@ namespace llvm { namespace clang { class TargetCodeGenInfo; class ASTContext; + class AtomicType; class FunctionDecl; class IdentifierInfo; class ObjCMethodDecl; @@ -494,6 +495,9 @@ public: bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor); + bool isPaddedAtomicType(QualType type); + bool isPaddedAtomicType(const AtomicType *type); + static void DecorateInstruction(llvm::Instruction *Inst, llvm::MDNode *TBAAInfo); diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 259d106..8fc78e3 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -582,7 +582,21 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { } case Type::Atomic: { - ResultType = ConvertType(cast(Ty)->getValueType()); + QualType valueType = cast(Ty)->getValueType(); + ResultType = ConvertTypeForMem(valueType); + + // Pad out to the inflated size if necessary. + uint64_t valueSize = Context.getTypeSize(valueType); + uint64_t atomicSize = Context.getTypeSize(Ty); + if (valueSize != atomicSize) { + assert(valueSize < atomicSize); + llvm::Type *elts[] = { + ResultType, + llvm::ArrayType::get(CGM.Int8Ty, (atomicSize - valueSize) / 8) + }; + ResultType = llvm::StructType::get(getLLVMContext(), + llvm::makeArrayRef(elts)); + } break; } } @@ -593,6 +607,14 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { return ResultType; } +bool CodeGenModule::isPaddedAtomicType(QualType type) { + return isPaddedAtomicType(type->castAs()); +} + +bool CodeGenModule::isPaddedAtomicType(const AtomicType *type) { + return Context.getTypeSize(type) != Context.getTypeSize(type->getValueType()); +} + /// ConvertRecordDeclType - Lay out a tagged decl type like struct or union. llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) { // TagDecl's are not necessarily unique, instead use the (clang) diff --git a/clang/test/CodeGen/c11atomics-ios.c b/clang/test/CodeGen/c11atomics-ios.c new file mode 100644 index 0000000..d1c9b14 --- /dev/null +++ b/clang/test/CodeGen/c11atomics-ios.c @@ -0,0 +1,214 @@ +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-apple-ios -std=c11 | FileCheck %s + +// There isn't really anything special about iOS; it just happens to +// only deploy on processors with native atomics support, so it's a good +// way to test those code-paths. + +// This work was done in pursuit of . + +// CHECK: define arm_aapcscc void @testFloat(float* +void testFloat(_Atomic(float) *fp) { +// CHECK: [[FP:%.*]] = alloca float* +// CHECK-NEXT: [[X:%.*]] = alloca float +// CHECK-NEXT: [[F:%.*]] = alloca float +// CHECK-NEXT: store float* {{%.*}}, float** [[FP]] + +// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] +// CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4 + __c11_atomic_init(fp, 1.0f); + +// CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4 + _Atomic(float) x = 2.0f; + +// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i32* +// CHECK-NEXT: [[T2:%.*]] = load atomic i32* [[T1]] seq_cst, align 4 +// CHECK-NEXT: [[T3:%.*]] = bitcast i32 [[T2]] to float +// CHECK-NEXT: store float [[T3]], float* [[F]] + float f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4 +// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4 +// CHECK-NEXT: [[T2:%.*]] = bitcast float [[T0]] to i32 +// CHECK-NEXT: [[T3:%.*]] = bitcast float* [[T1]] to i32* +// CHECK-NEXT: store atomic i32 [[T2]], i32* [[T3]] seq_cst, align 4 + *fp = f; + +// CHECK-NEXT: ret void +} + +// CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]* +void testComplexFloat(_Atomic(_Complex float) *fp) { +// CHECK: [[FP:%.*]] = alloca [[CF]]*, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8 +// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8 +// CHECK-NEXT: store [[CF]]* + +// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1 +// CHECK-NEXT: store float 1.000000e+00, float* [[T0]] +// CHECK-NEXT: store float 0.000000e+00, float* [[T1]] + __c11_atomic_init(fp, 1.0f); + +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1 +// CHECK-NEXT: store float 2.000000e+00, float* [[T0]] +// CHECK-NEXT: store float 0.000000e+00, float* [[T1]] + _Atomic(_Complex float) x = 2.0f; + +// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i64* +// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8 +// CHECK-NEXT: [[T3:%.*]] = bitcast [[CF]]* [[TMP0]] to i64* +// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[R:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[I:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 +// CHECK-NEXT: store float [[R]], float* [[T0]] +// CHECK-NEXT: store float [[I]], float* [[T1]] + _Complex float f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 +// CHECK-NEXT: [[R:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 +// CHECK-NEXT: [[I:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1 +// CHECK-NEXT: store float [[R]], float* [[T0]] +// CHECK-NEXT: store float [[I]], float* [[T1]] +// CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[TMP1]] to i64* +// CHECK-NEXT: [[T1:%.*]] = load i64* [[T0]], align 8 +// CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[DEST]] to i64* +// CHECK-NEXT: store atomic i64 [[T1]], i64* [[T2]] seq_cst, align 8 + *fp = f; + +// CHECK-NEXT: ret void +} + +typedef struct { short x, y, z, w; } S; +// CHECK: define arm_aapcscc void @testStruct([[S:.*]]* +void testStruct(_Atomic(S) *fp) { +// CHECK: [[FP:%.*]] = alloca [[S]]*, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8 +// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8 +// CHECK-NEXT: store [[S]]* + +// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]] +// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3 +// CHECK-NEXT: store i16 4, i16* [[T0]], align 2 + __c11_atomic_init(fp, (S){1,2,3,4}); + +// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3 +// CHECK-NEXT: store i16 4, i16* [[T0]], align 2 + _Atomic(S) x = (S){1,2,3,4}; + +// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i64* +// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8 +// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[F]] to i64* +// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2 + S f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false) +// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[TMP0]] to i64* +// CHECK-NEXT: [[T4:%.*]] = load i64* [[T3]], align 8 +// CHECK-NEXT: [[T5:%.*]] = bitcast [[S]]* [[T0]] to i64* +// CHECK-NEXT: store atomic i64 [[T4]], i64* [[T5]] seq_cst, align 8 + *fp = f; + +// CHECK-NEXT: ret void +} + +typedef struct { short x, y, z; } PS; +// CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]* +void testPromotedStruct(_Atomic(PS) *fp) { +// CHECK: [[FP:%.*]] = alloca [[APS]]*, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8 +// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8 +// CHECK-NEXT: store [[APS]]* + +// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]] +// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T1]], align 2 + __c11_atomic_init(fp, (PS){1,2,3}); + +// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T1]], align 2 + _Atomic(PS) x = (PS){1,2,3}; + +// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i64* +// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8 +// CHECK-NEXT: [[T3:%.*]] = bitcast [[APS]]* [[TMP0]] to i64* +// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) + PS f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0 +// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8* +// CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false) +// CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[TMP1]] to i64* +// CHECK-NEXT: [[T5:%.*]] = load i64* [[T4]], align 8 +// CHECK-NEXT: [[T6:%.*]] = bitcast [[APS]]* [[T0]] to i64* +// CHECK-NEXT: store atomic i64 [[T5]], i64* [[T6]] seq_cst, align 8 + *fp = f; + +// CHECK-NEXT: ret void +} + +void testPromotedStructOps(_Atomic(PS) *p) { + PS a = __c11_atomic_load(p, 5); + __c11_atomic_store(p, a, 5); + PS b = __c11_atomic_exchange(p, a, 5); + + _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); + v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); +} diff --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c index 726fc51..8d298af 100644 --- a/clang/test/CodeGen/c11atomics.c +++ b/clang/test/CodeGen/c11atomics.c @@ -135,3 +135,210 @@ void testandeq(void) s &= 42; } +// CHECK: define arm_aapcscc void @testFloat(float* +void testFloat(_Atomic(float) *fp) { +// CHECK: [[FP:%.*]] = alloca float* +// CHECK-NEXT: [[X:%.*]] = alloca float +// CHECK-NEXT: [[F:%.*]] = alloca float +// CHECK-NEXT: [[TMP0:%.*]] = alloca float +// CHECK-NEXT: [[TMP1:%.*]] = alloca float +// CHECK-NEXT: store float* {{%.*}}, float** [[FP]] + +// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] +// CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4 + __c11_atomic_init(fp, 1.0f); + +// CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4 + _Atomic(float) x = 2.0f; + +// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5) +// CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4 +// CHECK-NEXT: store float [[T3]], float* [[F]] + float f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4 +// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4 +// CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4 +// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8* +// CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5) + *fp = f; + +// CHECK-NEXT: ret void +} + +// CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]* +void testComplexFloat(_Atomic(_Complex float) *fp) { +// CHECK: [[FP:%.*]] = alloca [[CF]]*, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8 +// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8 +// CHECK-NEXT: store [[CF]]* + +// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1 +// CHECK-NEXT: store float 1.000000e+00, float* [[T0]] +// CHECK-NEXT: store float 0.000000e+00, float* [[T1]] + __c11_atomic_init(fp, 1.0f); + +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1 +// CHECK-NEXT: store float 2.000000e+00, float* [[T0]] +// CHECK-NEXT: store float 0.000000e+00, float* [[T1]] + _Atomic(_Complex float) x = 2.0f; + +// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[R:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[I:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 +// CHECK-NEXT: store float [[R]], float* [[T0]] +// CHECK-NEXT: store float [[I]], float* [[T1]] + _Complex float f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 +// CHECK-NEXT: [[R:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 +// CHECK-NEXT: [[I:%.*]] = load float* [[T0]] +// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1 +// CHECK-NEXT: store float [[R]], float* [[T0]] +// CHECK-NEXT: store float [[I]], float* [[T1]] +// CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8* +// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5) + *fp = f; + +// CHECK-NEXT: ret void +} + +typedef struct { short x, y, z, w; } S; +// CHECK: define arm_aapcscc void @testStruct([[S:.*]]* +void testStruct(_Atomic(S) *fp) { +// CHECK: [[FP:%.*]] = alloca [[S]]*, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8 +// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8 +// CHECK-NEXT: store [[S]]* + +// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]] +// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3 +// CHECK-NEXT: store i16 4, i16* [[T0]], align 2 + __c11_atomic_init(fp, (S){1,2,3,4}); + +// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T0]], align 2 +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3 +// CHECK-NEXT: store i16 4, i16* [[T0]], align 2 + _Atomic(S) x = (S){1,2,3,4}; + +// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) + S f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false) +// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8* +// CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5) + *fp = f; + +// CHECK-NEXT: ret void +} + +typedef struct { short x, y, z; } PS; +// CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]* +void testPromotedStruct(_Atomic(PS) *fp) { +// CHECK: [[FP:%.*]] = alloca [[APS]]*, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8 +// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8 +// CHECK-NEXT: store [[APS]]* + +// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]] +// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T1]], align 2 + __c11_atomic_init(fp, (PS){1,2,3}); + +// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 +// CHECK-NEXT: store i16 1, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 +// CHECK-NEXT: store i16 2, i16* [[T1]], align 2 +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 +// CHECK-NEXT: store i16 3, i16* [[T1]], align 2 + _Atomic(PS) x = (PS){1,2,3}; + +// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) +// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8* +// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) + PS f = *fp; + +// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] +// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0 +// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8* +// CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false) +// CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8* +// CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8* +// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5) + *fp = f; + +// CHECK-NEXT: ret void +} + +// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]* + +// FIXME: none of these look right, but we can leave the "test" here +// to make sure they at least don't crash. +void testPromotedStructOps(_Atomic(PS) *p) { + PS a = __c11_atomic_load(p, 5); + __c11_atomic_store(p, a, 5); + PS b = __c11_atomic_exchange(p, a, 5); + _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); + v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); +}