From 31486a9fc27a12e2c504861a1c4c3465cbb55856 Mon Sep 17 00:00:00 2001
From: Shraiysh Vaishay <Shraiysh.Vaishay@amd.com>
Date: Mon, 21 Mar 2022 16:20:54 +0530
Subject: [PATCH] [mlir][OpenMP] Added translation from `omp.atomic.capture` to
 LLVM IR

This patch adds translation from `omp.atomic.capture` to LLVM IR. Also
added tests for the same.

Depends on D121546

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D121554
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp          |   7 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td      |  24 +
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp       |  27 +
 .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp   | 105 ++++
 mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir   |  38 ++
 mlir/test/Target/LLVMIR/openmp-llvm.mlir           | 584 +++++++++++++++++++++
 6 files changed, 784 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index f0f02e8..4626d20 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3591,6 +3591,7 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
   case AtomicRMWInst::Nand:
   case AtomicRMWInst::Or:
   case AtomicRMWInst::Xor:
+  case AtomicRMWInst::Xchg:
     emitRMWOp = XElemTy;
     break;
   case AtomicRMWInst::Sub:
@@ -3606,7 +3607,11 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
     Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
     // not needed except in case of postfix captures. Generate anyway for
     // consistency with the else part. Will be removed with any DCE pass.
-    Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
+    // AtomicRMWInst::Xchg does not have a coressponding instruction.
+    if (RMWOp == AtomicRMWInst::Xchg)
+      Res.second = Res.first;
+    else
+      Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
   } else {
     unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
     IntegerType *IntCastTy =
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 0cf9918..726152f 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -717,6 +717,11 @@ def AtomicUpdateOp : OpenMP_Op<"atomic.update",
   }];
   let hasVerifier = 1;
   let hasRegionVerifier = 1;
+  let extraClassDeclaration = [{
+    Operation* getFirstOp() {
+      return &getRegion().front().getOperations().front();
+    }
+  }];
 }
 
 def AtomicCaptureOp : OpenMP_Op<"atomic.capture",
@@ -764,6 +769,25 @@ def AtomicCaptureOp : OpenMP_Op<"atomic.capture",
     $region attr-dict
   }];
   let hasRegionVerifier = 1;
+  let extraClassDeclaration = [{
+    /// Returns the first operation in atomic capture region
+    Operation* getFirstOp();
+
+    /// Returns the second operation in atomic capture region
+    Operation* getSecondOp();
+
+    /// Returns the `atomic.read` operation inside the region, if any.
+    /// Otherwise, it returns nullptr.
+    AtomicReadOp getAtomicReadOp();
+
+    /// Returns the `atomic.write` operation inside the region, if any.
+    /// Otherwise, it returns nullptr.
+    AtomicWriteOp getAtomicWriteOp();
+
+    /// Returns the `atomic.update` operation inside the region, if any.
+    /// Otherwise, it returns nullptr.
+    AtomicUpdateOp getAtomicUpdateOp();
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 774b6b3..602b26c 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1149,6 +1149,33 @@ LogicalResult AtomicUpdateOp::verifyRegions() {
 // Verifier for AtomicCaptureOp
 //===----------------------------------------------------------------------===//
 
+Operation *AtomicCaptureOp::getFirstOp() {
+  return &getRegion().front().getOperations().front();
+}
+
+Operation *AtomicCaptureOp::getSecondOp() {
+  auto &ops = getRegion().front().getOperations();
+  return ops.getNextNode(ops.front());
+}
+
+AtomicReadOp AtomicCaptureOp::getAtomicReadOp() {
+  if (auto op = dyn_cast<AtomicReadOp>(getFirstOp()))
+    return op;
+  return dyn_cast<AtomicReadOp>(getSecondOp());
+}
+
+AtomicWriteOp AtomicCaptureOp::getAtomicWriteOp() {
+  if (auto op = dyn_cast<AtomicWriteOp>(getFirstOp()))
+    return op;
+  return dyn_cast<AtomicWriteOp>(getSecondOp());
+}
+
+AtomicUpdateOp AtomicCaptureOp::getAtomicUpdateOp() {
+  if (auto op = dyn_cast<AtomicUpdateOp>(getFirstOp()))
+    return op;
+  return dyn_cast<AtomicUpdateOp>(getSecondOp());
+}
+
 LogicalResult AtomicCaptureOp::verifyRegions() {
   Block::OpListType &ops = region().front().getOperations();
   if (ops.size() != 3)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a835b72..b71f5d9 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1114,6 +1114,108 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
   return updateGenStatus;
 }
 
+static LogicalResult
+convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
+                        llvm::IRBuilderBase &builder,
+                        LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  mlir::Value mlirExpr;
+  bool isXBinopExpr = false, isPostfixUpdate = false;
+  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
+
+  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
+  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
+
+  assert((atomicUpdateOp || atomicWriteOp) &&
+         "internal op must be an atomic.update or atomic.write op");
+
+  if (atomicWriteOp) {
+    isPostfixUpdate = true;
+    mlirExpr = atomicWriteOp.value();
+  } else {
+    isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
+                      atomicCaptureOp.getAtomicUpdateOp().getOperation();
+    auto &innerOpList = atomicUpdateOp.region().front().getOperations();
+    if (innerOpList.size() != 2)
+      return atomicUpdateOp.emitError(
+          "exactly two operations are allowed inside an "
+          "atomic update region while lowering to LLVM IR");
+    Operation *innerUpdateOp = atomicUpdateOp.getFirstOp();
+    if (innerUpdateOp->getNumOperands() != 2 ||
+        !llvm::is_contained(innerUpdateOp->getOperands(),
+                            atomicUpdateOp.getRegion().getArgument(0)))
+      return atomicUpdateOp.emitError(
+          "the update operation inside the region must be a binary operation "
+          "and that update operation must have the region argument as an "
+          "operand");
+    binop = convertBinOpToAtomic(*innerUpdateOp);
+
+    isXBinopExpr = innerUpdateOp->getOperand(0) ==
+                   atomicUpdateOp.getRegion().getArgument(0);
+
+    mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1)
+                             : innerUpdateOp->getOperand(0));
+  }
+
+  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
+  llvm::Value *llvmX =
+      moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x());
+  llvm::Value *llvmV =
+      moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v());
+  auto mlirXType = atomicCaptureOp.getAtomicReadOp()
+                       .x()
+                       .getType()
+                       .cast<LLVM::LLVMPointerType>();
+  llvm::Type *llvmXElementType =
+      moduleTranslation.convertType(mlirXType.getElementType());
+  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
+                                                      /*isSigned=*/false,
+                                                      /*isVolatile=*/false};
+  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
+                                                      /*isSigned=*/false,
+                                                      /*isVolatile=*/false};
+
+  llvm::AtomicOrdering atomicOrdering =
+      convertAtomicOrdering(atomicCaptureOp.memory_order_val());
+
+  LogicalResult updateGenStatus = success();
+  auto updateFn = [&](llvm::Value *atomicx,
+                      llvm::IRBuilder<> &builder) -> llvm::Value * {
+    if (atomicWriteOp)
+      return moduleTranslation.lookupValue(atomicWriteOp.value());
+    Block &bb = *atomicUpdateOp.region().begin();
+    moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx);
+    moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
+    if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
+      updateGenStatus = (atomicUpdateOp.emitError()
+                         << "unable to convert update operation to llvm IR");
+      return nullptr;
+    }
+    omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
+    assert(yieldop && yieldop.results().size() == 1 &&
+           "terminator must be omp.yield op and it must have exactly one "
+           "argument");
+    return moduleTranslation.lookupValue(yieldop.results()[0]);
+  };
+  // Handle ambiguous alloca, if any.
+  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::UnreachableInst *unreachableInst;
+  if (allocaIP.getPoint() == ompLoc.IP.getPoint()) {
+    // Same point => split basic block and make them unambigous.
+    unreachableInst = builder.CreateUnreachable();
+    builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock(
+        unreachableInst, "alloca_split"));
+    ompLoc.IP = builder.saveIP();
+    unreachableInst->removeFromParent();
+  }
+  builder.restoreIP(ompBuilder->createAtomicCapture(
+      ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX,
+      llvmAtomicV, llvmExpr, atomicOrdering, binop, updateFn, atomicUpdateOp,
+      isPostfixUpdate, isXBinopExpr));
+  return updateGenStatus;
+}
+
 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
 /// mapping between reduction variables and their private equivalents to have
 /// been stored on the ModuleTranslation stack. Currently only supports
@@ -1247,6 +1349,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
       .Case([&](omp::AtomicUpdateOp op) {
         return convertOmpAtomicUpdate(op, builder, moduleTranslation);
       })
+      .Case([&](omp::AtomicCaptureOp op) {
+        return convertOmpAtomicCapture(op, builder, moduleTranslation);
+      })
       .Case([&](omp::SectionsOp) {
         return convertOmpSections(*op, builder, moduleTranslation);
       })
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
index 232df2e..171db04 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
@@ -29,3 +29,41 @@ llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %expr: i32
   }
   llvm.return
 }
+
+// -----
+
+// Checking translation when the update is carried out by using more than one
+// operations in the atomic capture region.
+llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32) {
+  // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}}
+  omp.atomic.capture memory_order(seq_cst) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    // expected-error @+1 {{the update operation inside the region must be a binary operation and that update operation must have the region argument as an operand}}
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.mul %expr, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+  llvm.return
+}
+
+// -----
+
+// Checking translation when the captured variable is not used in the inner
+// update operation
+llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32) {
+  // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}}
+  omp.atomic.capture memory_order(seq_cst) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    // expected-error @+1 {{exactly two operations are allowed inside an atomic update region while lowering to LLVM IR}}
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %t1 = llvm.mul %xval, %expr : i32
+      %t2 = llvm.sdiv %t1, %expr : i32
+      %newval = llvm.add %xval, %t2 : i32
+      omp.yield(%newval : i32)
+    }
+  }
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index fe3a34d..26cdaf8 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -1063,6 +1063,590 @@ llvm.func @omp_atomic_update_intrinsic(%x:!llvm.ptr<i32>, %expr: i32) {
 
 // -----
 
+// CHECK-LABEL: @omp_atomic_capture_prefix_update
+// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]])
+llvm.func @omp_atomic_capture_prefix_update(
+  %x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32,
+  %xf: !llvm.ptr<f32>, %vf: !llvm.ptr<f32>, %exprf: f32) -> () {
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = add i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw sub i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = sub i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sub %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw and i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = and i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.and %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw or i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = or i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.or %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw xor i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = xor i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.xor %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = mul i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.mul %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = sdiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sdiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = udiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.udiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = shl i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.shl %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = lshr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.lshr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = ashr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.ashr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[newval:.*]] = fadd float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[newval]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fadd %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[newval:.*]] = fsub float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[newval]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fsub %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+  }
+
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: @omp_atomic_capture_postfix_update
+// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]])
+llvm.func @omp_atomic_capture_postfix_update(
+  %x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32,
+  %xf: !llvm.ptr<f32>, %vf: !llvm.ptr<f32>, %exprf: f32) -> () {
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw sub i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sub %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw and i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.and %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw or i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.or %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw xor i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.xor %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = mul i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.mul %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = sdiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sdiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = udiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.udiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = shl i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.shl %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = lshr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.lshr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = ashr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.ashr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float
+  // CHECK: %[[newval:.*]] = fadd float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[xvalf]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fadd %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float
+  // CHECK: %[[newval:.*]] = fsub float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[xvalf]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fsub %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+  }
+
+  llvm.return
+}
+
+// -----
+// CHECK-LABEL: @omp_atomic_capture_misc
+// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]])
+llvm.func @omp_atomic_capture_misc(
+  %x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32,
+  %xf: !llvm.ptr<f32>, %vf: !llvm.ptr<f32>, %exprf: f32) -> () {
+  // CHECK: %[[xval:.*]] = atomicrmw xchg i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture{
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.write %x = %expr : !llvm.ptr<i32>, i32
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float
+  // CHECK: store float %[[exprf]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[xvalf]], float* %[[vf]]
+  omp.atomic.capture{
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+    omp.atomic.write %xf = %exprf : !llvm.ptr<f32>, f32
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] seq_cst
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(seq_cst) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] acquire
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(acquire) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] release
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(release) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(relaxed) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] acq_rel
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(acq_rel) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  llvm.return
+}
+
+// -----
+
 // CHECK-LABEL: @omp_sections_empty
 llvm.func @omp_sections_empty() -> () {
   omp.sections {
-- 
2.7.4