From 31486a9fc27a12e2c504861a1c4c3465cbb55856 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 21 Mar 2022 16:20:54 +0530 Subject: [PATCH] [mlir][OpenMP] Added translation from `omp.atomic.capture` to LLVM IR This patch adds translation from `omp.atomic.capture` to LLVM IR. Also added tests for the same. Depends on D121546 Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D121554 --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 7 +- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 24 + mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 27 + .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 105 ++++ mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir | 38 ++ mlir/test/Target/LLVMIR/openmp-llvm.mlir | 584 +++++++++++++++++++++ 6 files changed, 784 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f0f02e8..4626d20 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3591,6 +3591,7 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( case AtomicRMWInst::Nand: case AtomicRMWInst::Or: case AtomicRMWInst::Xor: + case AtomicRMWInst::Xchg: emitRMWOp = XElemTy; break; case AtomicRMWInst::Sub: @@ -3606,7 +3607,11 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO); // not needed except in case of postfix captures. Generate anyway for // consistency with the else part. Will be removed with any DCE pass. - Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); + // AtomicRMWInst::Xchg does not have a coressponding instruction. + if (RMWOp == AtomicRMWInst::Xchg) + Res.second = Res.first; + else + Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); } else { unsigned Addrspace = cast(X->getType())->getAddressSpace(); IntegerType *IntCastTy = diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 0cf9918..726152f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -717,6 +717,11 @@ def AtomicUpdateOp : OpenMP_Op<"atomic.update", }]; let hasVerifier = 1; let hasRegionVerifier = 1; + let extraClassDeclaration = [{ + Operation* getFirstOp() { + return &getRegion().front().getOperations().front(); + } + }]; } def AtomicCaptureOp : OpenMP_Op<"atomic.capture", @@ -764,6 +769,25 @@ def AtomicCaptureOp : OpenMP_Op<"atomic.capture", $region attr-dict }]; let hasRegionVerifier = 1; + let extraClassDeclaration = [{ + /// Returns the first operation in atomic capture region + Operation* getFirstOp(); + + /// Returns the second operation in atomic capture region + Operation* getSecondOp(); + + /// Returns the `atomic.read` operation inside the region, if any. + /// Otherwise, it returns nullptr. + AtomicReadOp getAtomicReadOp(); + + /// Returns the `atomic.write` operation inside the region, if any. + /// Otherwise, it returns nullptr. + AtomicWriteOp getAtomicWriteOp(); + + /// Returns the `atomic.update` operation inside the region, if any. + /// Otherwise, it returns nullptr. + AtomicUpdateOp getAtomicUpdateOp(); + }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 774b6b3..602b26c 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1149,6 +1149,33 @@ LogicalResult AtomicUpdateOp::verifyRegions() { // Verifier for AtomicCaptureOp //===----------------------------------------------------------------------===// +Operation *AtomicCaptureOp::getFirstOp() { + return &getRegion().front().getOperations().front(); +} + +Operation *AtomicCaptureOp::getSecondOp() { + auto &ops = getRegion().front().getOperations(); + return ops.getNextNode(ops.front()); +} + +AtomicReadOp AtomicCaptureOp::getAtomicReadOp() { + if (auto op = dyn_cast(getFirstOp())) + return op; + return dyn_cast(getSecondOp()); +} + +AtomicWriteOp AtomicCaptureOp::getAtomicWriteOp() { + if (auto op = dyn_cast(getFirstOp())) + return op; + return dyn_cast(getSecondOp()); +} + +AtomicUpdateOp AtomicCaptureOp::getAtomicUpdateOp() { + if (auto op = dyn_cast(getFirstOp())) + return op; + return dyn_cast(getSecondOp()); +} + LogicalResult AtomicCaptureOp::verifyRegions() { Block::OpListType &ops = region().front().getOperations(); if (ops.size() != 3) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a835b72..b71f5d9 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1114,6 +1114,108 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, return updateGenStatus; } +static LogicalResult +convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + mlir::Value mlirExpr; + bool isXBinopExpr = false, isPostfixUpdate = false; + llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; + + omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); + omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); + + assert((atomicUpdateOp || atomicWriteOp) && + "internal op must be an atomic.update or atomic.write op"); + + if (atomicWriteOp) { + isPostfixUpdate = true; + mlirExpr = atomicWriteOp.value(); + } else { + isPostfixUpdate = atomicCaptureOp.getSecondOp() == + atomicCaptureOp.getAtomicUpdateOp().getOperation(); + auto &innerOpList = atomicUpdateOp.region().front().getOperations(); + if (innerOpList.size() != 2) + return atomicUpdateOp.emitError( + "exactly two operations are allowed inside an " + "atomic update region while lowering to LLVM IR"); + Operation *innerUpdateOp = atomicUpdateOp.getFirstOp(); + if (innerUpdateOp->getNumOperands() != 2 || + !llvm::is_contained(innerUpdateOp->getOperands(), + atomicUpdateOp.getRegion().getArgument(0))) + return atomicUpdateOp.emitError( + "the update operation inside the region must be a binary operation " + "and that update operation must have the region argument as an " + "operand"); + binop = convertBinOpToAtomic(*innerUpdateOp); + + isXBinopExpr = innerUpdateOp->getOperand(0) == + atomicUpdateOp.getRegion().getArgument(0); + + mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1) + : innerUpdateOp->getOperand(0)); + } + + llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); + llvm::Value *llvmX = + moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x()); + llvm::Value *llvmV = + moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v()); + auto mlirXType = atomicCaptureOp.getAtomicReadOp() + .x() + .getType() + .cast(); + llvm::Type *llvmXElementType = + moduleTranslation.convertType(mlirXType.getElementType()); + llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, + /*isSigned=*/false, + /*isVolatile=*/false}; + llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, + /*isSigned=*/false, + /*isVolatile=*/false}; + + llvm::AtomicOrdering atomicOrdering = + convertAtomicOrdering(atomicCaptureOp.memory_order_val()); + + LogicalResult updateGenStatus = success(); + auto updateFn = [&](llvm::Value *atomicx, + llvm::IRBuilder<> &builder) -> llvm::Value * { + if (atomicWriteOp) + return moduleTranslation.lookupValue(atomicWriteOp.value()); + Block &bb = *atomicUpdateOp.region().begin(); + moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx); + moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); + if (failed(moduleTranslation.convertBlock(bb, true, builder))) { + updateGenStatus = (atomicUpdateOp.emitError() + << "unable to convert update operation to llvm IR"); + return nullptr; + } + omp::YieldOp yieldop = dyn_cast(bb.getTerminator()); + assert(yieldop && yieldop.results().size() == 1 && + "terminator must be omp.yield op and it must have exactly one " + "argument"); + return moduleTranslation.lookupValue(yieldop.results()[0]); + }; + // Handle ambiguous alloca, if any. + auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + llvm::UnreachableInst *unreachableInst; + if (allocaIP.getPoint() == ompLoc.IP.getPoint()) { + // Same point => split basic block and make them unambigous. + unreachableInst = builder.CreateUnreachable(); + builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock( + unreachableInst, "alloca_split")); + ompLoc.IP = builder.saveIP(); + unreachableInst->removeFromParent(); + } + builder.restoreIP(ompBuilder->createAtomicCapture( + ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX, + llvmAtomicV, llvmExpr, atomicOrdering, binop, updateFn, atomicUpdateOp, + isPostfixUpdate, isXBinopExpr)); + return updateGenStatus; +} + /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the /// mapping between reduction variables and their private equivalents to have /// been stored on the ModuleTranslation stack. Currently only supports @@ -1247,6 +1349,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::AtomicUpdateOp op) { return convertOmpAtomicUpdate(op, builder, moduleTranslation); }) + .Case([&](omp::AtomicCaptureOp op) { + return convertOmpAtomicCapture(op, builder, moduleTranslation); + }) .Case([&](omp::SectionsOp) { return convertOmpSections(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir index 232df2e..171db04 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir @@ -29,3 +29,41 @@ llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr, %expr: i32 } llvm.return } + +// ----- + +// Checking translation when the update is carried out by using more than one +// operations in the atomic capture region. +llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr, %v: !llvm.ptr, %expr: i32) { + // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}} + omp.atomic.capture memory_order(seq_cst) { + omp.atomic.read %v = %x : !llvm.ptr + // expected-error @+1 {{the update operation inside the region must be a binary operation and that update operation must have the region argument as an operand}} + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.mul %expr, %expr : i32 + omp.yield(%newval : i32) + } + } + llvm.return +} + +// ----- + +// Checking translation when the captured variable is not used in the inner +// update operation +llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr, %v: !llvm.ptr, %expr: i32) { + // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}} + omp.atomic.capture memory_order(seq_cst) { + omp.atomic.read %v = %x : !llvm.ptr + // expected-error @+1 {{exactly two operations are allowed inside an atomic update region while lowering to LLVM IR}} + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %t1 = llvm.mul %xval, %expr : i32 + %t2 = llvm.sdiv %t1, %expr : i32 + %newval = llvm.add %xval, %t2 : i32 + omp.yield(%newval : i32) + } + } + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index fe3a34d..26cdaf8 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -1063,6 +1063,590 @@ llvm.func @omp_atomic_update_intrinsic(%x:!llvm.ptr, %expr: i32) { // ----- +// CHECK-LABEL: @omp_atomic_capture_prefix_update +// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]]) +llvm.func @omp_atomic_capture_prefix_update( + %x: !llvm.ptr, %v: !llvm.ptr, %expr: i32, + %xf: !llvm.ptr, %vf: !llvm.ptr, %exprf: f32) -> () { + // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic + // CHECK-NEXT: %[[newval:.*]] = add i32 %[[res]], %[[expr]] + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[res:.*]] = atomicrmw sub i32* %[[x]], i32 %[[expr]] monotonic + // CHECK-NEXT: %[[newval:.*]] = sub i32 %[[res]], %[[expr]] + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.sub %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[res:.*]] = atomicrmw and i32* %[[x]], i32 %[[expr]] monotonic + // CHECK-NEXT: %[[newval:.*]] = and i32 %[[res]], %[[expr]] + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.and %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[res:.*]] = atomicrmw or i32* %[[x]], i32 %[[expr]] monotonic + // CHECK-NEXT: %[[newval:.*]] = or i32 %[[res]], %[[expr]] + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.or %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[res:.*]] = atomicrmw xor i32* %[[x]], i32 %[[expr]] monotonic + // CHECK-NEXT: %[[newval:.*]] = xor i32 %[[res]], %[[expr]] + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.xor %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = mul i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.mul %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = sdiv i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.sdiv %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = udiv i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.udiv %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = shl i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.shl %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = lshr i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.lshr %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = ashr i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.ashr %xval, %expr : i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smax.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smin.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.smin"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umax.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.umax"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umin.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[newval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.umin"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + omp.atomic.read %v = %x : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK: %[[newval:.*]] = fadd float %{{.*}}, %[[exprf]] + // CHECK: store float %[[newval]], float* %{{.*}} + // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32* + // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store float %[[newval]], float* %[[vf]] + omp.atomic.capture { + omp.atomic.update %xf : !llvm.ptr { + ^bb0(%xval: f32): + %newval = llvm.fadd %xval, %exprf : f32 + omp.yield(%newval : f32) + } + omp.atomic.read %vf = %xf : !llvm.ptr + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK: %[[newval:.*]] = fsub float %{{.*}}, %[[exprf]] + // CHECK: store float %[[newval]], float* %{{.*}} + // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32* + // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store float %[[newval]], float* %[[vf]] + omp.atomic.capture { + omp.atomic.update %xf : !llvm.ptr { + ^bb0(%xval: f32): + %newval = llvm.fsub %xval, %exprf : f32 + omp.yield(%newval : f32) + } + omp.atomic.read %vf = %xf : !llvm.ptr + } + + llvm.return +} + +// ----- + +// CHECK-LABEL: @omp_atomic_capture_postfix_update +// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]]) +llvm.func @omp_atomic_capture_postfix_update( + %x: !llvm.ptr, %v: !llvm.ptr, %expr: i32, + %xf: !llvm.ptr, %vf: !llvm.ptr, %exprf: f32) -> () { + // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw sub i32* %[[x]], i32 %[[expr]] monotonic + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.sub %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw and i32* %[[x]], i32 %[[expr]] monotonic + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.and %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw or i32* %[[x]], i32 %[[expr]] monotonic + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.or %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw xor i32* %[[x]], i32 %[[expr]] monotonic + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.xor %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = mul i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.mul %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = sdiv i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.sdiv %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = udiv i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.udiv %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = shl i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.shl %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = lshr i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.lshr %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = ashr i32 %[[xval]], %[[expr]] + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.ashr %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smax.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smin.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.smin"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umax.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.umax"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umin.i32(i32 %[[xval]], i32 %[[expr]]) + // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}} + // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.umin"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float + // CHECK: %[[newval:.*]] = fadd float %{{.*}}, %[[exprf]] + // CHECK: store float %[[newval]], float* %{{.*}} + // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32* + // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store float %[[xvalf]], float* %[[vf]] + omp.atomic.capture { + omp.atomic.read %vf = %xf : !llvm.ptr + omp.atomic.update %xf : !llvm.ptr { + ^bb0(%xval: f32): + %newval = llvm.fadd %xval, %exprf : f32 + omp.yield(%newval : f32) + } + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float + // CHECK: %[[newval:.*]] = fsub float %{{.*}}, %[[exprf]] + // CHECK: store float %[[newval]], float* %{{.*}} + // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32* + // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store float %[[xvalf]], float* %[[vf]] + omp.atomic.capture { + omp.atomic.read %vf = %xf : !llvm.ptr + omp.atomic.update %xf : !llvm.ptr { + ^bb0(%xval: f32): + %newval = llvm.fsub %xval, %exprf : f32 + omp.yield(%newval : f32) + } + } + + llvm.return +} + +// ----- +// CHECK-LABEL: @omp_atomic_capture_misc +// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]]) +llvm.func @omp_atomic_capture_misc( + %x: !llvm.ptr, %v: !llvm.ptr, %expr: i32, + %xf: !llvm.ptr, %vf: !llvm.ptr, %exprf: f32) -> () { + // CHECK: %[[xval:.*]] = atomicrmw xchg i32* %[[x]], i32 %[[expr]] monotonic + // CHECK: store i32 %[[xval]], i32* %[[v]] + omp.atomic.capture{ + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.write %x = %expr : !llvm.ptr, i32 + } + + // CHECK: %[[xval:.*]] = phi i32 + // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float + // CHECK: store float %[[exprf]], float* %{{.*}} + // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}} + // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32* + // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic + // CHECK: store float %[[xvalf]], float* %[[vf]] + omp.atomic.capture{ + omp.atomic.read %vf = %xf : !llvm.ptr + omp.atomic.write %xf = %exprf : !llvm.ptr, f32 + } + + // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] seq_cst + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture memory_order(seq_cst) { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] acquire + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture memory_order(acquire) { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] release + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture memory_order(release) { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture memory_order(relaxed) { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] acq_rel + // CHECK: store i32 %[[res]], i32* %[[v]] + omp.atomic.capture memory_order(acq_rel) { + omp.atomic.read %v = %x : !llvm.ptr + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + } + + llvm.return +} + +// ----- + // CHECK-LABEL: @omp_sections_empty llvm.func @omp_sections_empty() -> () { omp.sections { -- 2.7.4