From fed7382ef695ae81769620dbeee74a058615e148 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 9 Jul 2018 22:30:51 +0000 Subject: [PATCH] [WebAssembly] Support for binary atomic RMW instructions Summary: This adds support for binary atomic read-modify-write instructions: add, sub, and, or, xor, and xchg. This does not yet support translations of some of LLVM IR atomicrmw instructions (nand, max, min, umax, and umin) that do not have a direct counterpart in wasm instructions. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D49088 llvm-svn: 336615 --- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 84 ++ .../Target/WebAssembly/WebAssemblyInstrAtomics.td | 324 +++++++- .../WebAssembly/WebAssemblySetP2AlignOperands.cpp | 42 + llvm/test/CodeGen/WebAssembly/atomic-rmw.ll | 857 +++++++++++++++++++++ llvm/test/CodeGen/WebAssembly/offset-atomics.ll | 417 ++++++++++ 5 files changed, 1718 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/atomic-rmw.ll diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 8575601..af4ebd5 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -128,6 +128,30 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::ATOMIC_STORE8_I32_S: case WebAssembly::ATOMIC_STORE8_I64: case WebAssembly::ATOMIC_STORE8_I64_S: + case WebAssembly::ATOMIC_RMW8_U_ADD_I32: + case WebAssembly::ATOMIC_RMW8_U_ADD_I32_S: + case WebAssembly::ATOMIC_RMW8_U_ADD_I64: + case WebAssembly::ATOMIC_RMW8_U_ADD_I64_S: + case WebAssembly::ATOMIC_RMW8_U_SUB_I32: + case WebAssembly::ATOMIC_RMW8_U_SUB_I32_S: + case WebAssembly::ATOMIC_RMW8_U_SUB_I64: + case WebAssembly::ATOMIC_RMW8_U_SUB_I64_S: + case WebAssembly::ATOMIC_RMW8_U_AND_I32: + case WebAssembly::ATOMIC_RMW8_U_AND_I32_S: + case WebAssembly::ATOMIC_RMW8_U_AND_I64: + case WebAssembly::ATOMIC_RMW8_U_AND_I64_S: + case WebAssembly::ATOMIC_RMW8_U_OR_I32: + case WebAssembly::ATOMIC_RMW8_U_OR_I32_S: + case WebAssembly::ATOMIC_RMW8_U_OR_I64: + case WebAssembly::ATOMIC_RMW8_U_OR_I64_S: + case WebAssembly::ATOMIC_RMW8_U_XOR_I32: + case WebAssembly::ATOMIC_RMW8_U_XOR_I32_S: + case WebAssembly::ATOMIC_RMW8_U_XOR_I64: + case WebAssembly::ATOMIC_RMW8_U_XOR_I64_S: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S: return 0; case WebAssembly::LOAD16_S_I32: case WebAssembly::LOAD16_S_I32_S: @@ -149,6 +173,30 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::ATOMIC_STORE16_I32_S: case WebAssembly::ATOMIC_STORE16_I64: case WebAssembly::ATOMIC_STORE16_I64_S: + case WebAssembly::ATOMIC_RMW16_U_ADD_I32: + case WebAssembly::ATOMIC_RMW16_U_ADD_I32_S: + case WebAssembly::ATOMIC_RMW16_U_ADD_I64: + case WebAssembly::ATOMIC_RMW16_U_ADD_I64_S: + case WebAssembly::ATOMIC_RMW16_U_SUB_I32: + case WebAssembly::ATOMIC_RMW16_U_SUB_I32_S: + case WebAssembly::ATOMIC_RMW16_U_SUB_I64: + case WebAssembly::ATOMIC_RMW16_U_SUB_I64_S: + case WebAssembly::ATOMIC_RMW16_U_AND_I32: + case WebAssembly::ATOMIC_RMW16_U_AND_I32_S: + case WebAssembly::ATOMIC_RMW16_U_AND_I64: + case WebAssembly::ATOMIC_RMW16_U_AND_I64_S: + case WebAssembly::ATOMIC_RMW16_U_OR_I32: + case WebAssembly::ATOMIC_RMW16_U_OR_I32_S: + case WebAssembly::ATOMIC_RMW16_U_OR_I64: + case WebAssembly::ATOMIC_RMW16_U_OR_I64_S: + case WebAssembly::ATOMIC_RMW16_U_XOR_I32: + case WebAssembly::ATOMIC_RMW16_U_XOR_I32_S: + case WebAssembly::ATOMIC_RMW16_U_XOR_I64: + case WebAssembly::ATOMIC_RMW16_U_XOR_I64_S: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S: return 1; case WebAssembly::LOAD_I32: case WebAssembly::LOAD_I32_S: @@ -172,6 +220,30 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::ATOMIC_STORE_I32_S: case WebAssembly::ATOMIC_STORE32_I64: case WebAssembly::ATOMIC_STORE32_I64_S: + case WebAssembly::ATOMIC_RMW_ADD_I32: + case WebAssembly::ATOMIC_RMW_ADD_I32_S: + case WebAssembly::ATOMIC_RMW32_U_ADD_I64: + case WebAssembly::ATOMIC_RMW32_U_ADD_I64_S: + case WebAssembly::ATOMIC_RMW_SUB_I32: + case WebAssembly::ATOMIC_RMW_SUB_I32_S: + case WebAssembly::ATOMIC_RMW32_U_SUB_I64: + case WebAssembly::ATOMIC_RMW32_U_SUB_I64_S: + case WebAssembly::ATOMIC_RMW_AND_I32: + case WebAssembly::ATOMIC_RMW_AND_I32_S: + case WebAssembly::ATOMIC_RMW32_U_AND_I64: + case WebAssembly::ATOMIC_RMW32_U_AND_I64_S: + case WebAssembly::ATOMIC_RMW_OR_I32: + case WebAssembly::ATOMIC_RMW_OR_I32_S: + case WebAssembly::ATOMIC_RMW32_U_OR_I64: + case WebAssembly::ATOMIC_RMW32_U_OR_I64_S: + case WebAssembly::ATOMIC_RMW_XOR_I32: + case WebAssembly::ATOMIC_RMW_XOR_I32_S: + case WebAssembly::ATOMIC_RMW32_U_XOR_I64: + case WebAssembly::ATOMIC_RMW32_U_XOR_I64_S: + case WebAssembly::ATOMIC_RMW_XCHG_I32: + case WebAssembly::ATOMIC_RMW_XCHG_I32_S: + case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S: return 2; case WebAssembly::LOAD_I64: case WebAssembly::LOAD_I64_S: @@ -185,6 +257,18 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::ATOMIC_LOAD_I64_S: case WebAssembly::ATOMIC_STORE_I64: case WebAssembly::ATOMIC_STORE_I64_S: + case WebAssembly::ATOMIC_RMW_ADD_I64: + case WebAssembly::ATOMIC_RMW_ADD_I64_S: + case WebAssembly::ATOMIC_RMW_SUB_I64: + case WebAssembly::ATOMIC_RMW_SUB_I64_S: + case WebAssembly::ATOMIC_RMW_AND_I64: + case WebAssembly::ATOMIC_RMW_AND_I64_S: + case WebAssembly::ATOMIC_RMW_OR_I64: + case WebAssembly::ATOMIC_RMW_OR_I64_S: + case WebAssembly::ATOMIC_RMW_XOR_I64: + case WebAssembly::ATOMIC_RMW_XOR_I64_S: + case WebAssembly::ATOMIC_RMW_XCHG_I64: + case WebAssembly::ATOMIC_RMW_XCHG_I64_S: return 3; default: llvm_unreachable("Only loads and stores have p2align values"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index cac651d..d879932 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -330,16 +330,328 @@ def : AStorePatExternSymOffOnly; } // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// -// Low-level exclusive operations +// Atomic binary read-modify-writes //===----------------------------------------------------------------------===// -// TODO: add exclusive operations here... +let Defs = [ARGUMENTS] in { + +multiclass WebAssemblyBinRMW { + defm "" : I<(outs rc:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $val"), + !strconcat(Name, "\t${off}, ${p2align}"), Opcode>; +} + +defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW; +defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_ADD_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_ADD_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_ADD_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_ADD_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW32_U_ADD_I64 : + WebAssemblyBinRMW; + +defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW; +defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_SUB_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_SUB_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_SUB_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_SUB_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW32_U_SUB_I64 : + WebAssemblyBinRMW; + +defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW; +defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_AND_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_AND_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_AND_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_AND_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW32_U_AND_I64 : + WebAssemblyBinRMW; + +defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW; +defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_OR_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_OR_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_OR_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_OR_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW32_U_OR_I64 : + WebAssemblyBinRMW; + +defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW; +defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_XOR_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_XOR_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_XOR_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_XOR_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW32_U_XOR_I64 : + WebAssemblyBinRMW; + +defm ATOMIC_RMW_XCHG_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW_XCHG_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_XCHG_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_XCHG_I32 : + WebAssemblyBinRMW; +defm ATOMIC_RMW8_U_XCHG_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW16_U_XCHG_I64 : + WebAssemblyBinRMW; +defm ATOMIC_RMW32_U_XCHG_I64 : + WebAssemblyBinRMW; +} + +// Select binary RMWs with no constant offset. +class BinRMWPatNoOffset : + Pat<(ty (kind I32:$addr, ty:$val)), (inst 0, 0, I32:$addr, ty:$val)>; + +// Select binary RMWs with a constant offset. + +// Pattern with address + immediate offset +class BinRMWPatImmOff : + Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), + (inst 0, imm:$off, I32:$addr, ty:$val)>; + +class BinRMWPatGlobalAddr : + Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), + ty:$val)), + (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>; + +class BinRMWPatExternalSym : + Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)), + ty:$val)), + (inst 0, texternalsym:$off, I32:$addr, ty:$val)>; + +// Select binary RMWs with just a constant offset. +class BinRMWPatOffsetOnly : + Pat<(ty (kind imm:$off, ty:$val)), + (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; + +class BinRMWPatGlobalAddrOffOnly : + Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)), + (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>; + +class BinRMWPatExternSymOffOnly : + Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$val)), + (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>; + +// Patterns for various addressing modes. +multiclass BinRMWPattern { + def : BinRMWPatNoOffset; + def : BinRMWPatNoOffset; -// Load-exclusives. + def : BinRMWPatImmOff; + def : BinRMWPatImmOff; + def : BinRMWPatImmOff; + def : BinRMWPatImmOff; -// Store-exclusives. + def : BinRMWPatGlobalAddr; + def : BinRMWPatGlobalAddr; -// Store-release-exclusives. + def : BinRMWPatExternalSym; + def : BinRMWPatExternalSym; -// And clear exclusive. + def : BinRMWPatOffsetOnly; + def : BinRMWPatOffsetOnly; + def : BinRMWPatGlobalAddrOffOnly; + def : BinRMWPatGlobalAddrOffOnly; + + def : BinRMWPatExternSymOffOnly; + def : BinRMWPatExternSymOffOnly; +} + +let Predicates = [HasAtomics] in { +defm : BinRMWPattern; +defm : BinRMWPattern; +defm : BinRMWPattern; +defm : BinRMWPattern; +defm : BinRMWPattern; +defm : BinRMWPattern; +} // Predicates = [HasAtomics] + +// Truncating & zero-extending binary RMW patterns. +// These are combined patterns of truncating store patterns and zero-extending +// load patterns above. +class zext_bin_rmw_8_32 : + PatFrag<(ops node:$addr, node:$val), + (and (i32 (kind node:$addr, node:$val)), 255)>; +class zext_bin_rmw_16_32 : + PatFrag<(ops node:$addr, node:$val), + (and (i32 (kind node:$addr, node:$val)), 65535)>; +class zext_bin_rmw_8_64 : + PatFrag<(ops node:$addr, node:$val), + (and (i64 (anyext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$val))))))), 255)>; +class zext_bin_rmw_16_64 : + PatFrag<(ops node:$addr, node:$val), + (and (i64 (anyext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$val))))))), 65535)>; +class zext_bin_rmw_32_64 : + PatFrag<(ops node:$addr, node:$val), + (zext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>; + +// Truncating & sign-extending binary RMW patterns. +// These are combined patterns of truncating store patterns and sign-extending +// load patterns above. We match subword RMWs (for 32-bit) and anyext RMWs (for +// 64-bit) and select a zext RMW; the next instruction will be sext_inreg which +// is selected by itself. +class sext_bin_rmw_8_32 : + PatFrag<(ops node:$addr, node:$val), (kind node:$addr, node:$val)>; +class sext_bin_rmw_16_32 : sext_bin_rmw_8_32; +class sext_bin_rmw_8_64 : + PatFrag<(ops node:$addr, node:$val), + (anyext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>; +class sext_bin_rmw_16_64 : sext_bin_rmw_8_64; +// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_s/i32 + +// Patterns for various addressing modes for truncating-extending binary RMWs. +multiclass BinRMWTruncExtPattern< + PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64, + NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> { + // Truncating-extending binary RMWs with no constant offset + def : BinRMWPatNoOffset, inst8_32>; + def : BinRMWPatNoOffset, inst16_32>; + def : BinRMWPatNoOffset, inst8_64>; + def : BinRMWPatNoOffset, inst16_64>; + def : BinRMWPatNoOffset, inst32_64>; + + def : BinRMWPatNoOffset, inst8_32>; + def : BinRMWPatNoOffset, inst16_32>; + def : BinRMWPatNoOffset, inst8_64>; + def : BinRMWPatNoOffset, inst16_64>; + + // Truncating-extending binary RMWs with a constant offset + def : BinRMWPatImmOff, regPlusImm, inst8_32>; + def : BinRMWPatImmOff, regPlusImm, inst16_32>; + def : BinRMWPatImmOff, regPlusImm, inst8_64>; + def : BinRMWPatImmOff, regPlusImm, inst16_64>; + def : BinRMWPatImmOff, regPlusImm, inst32_64>; + def : BinRMWPatImmOff, or_is_add, inst8_32>; + def : BinRMWPatImmOff, or_is_add, inst16_32>; + def : BinRMWPatImmOff, or_is_add, inst8_64>; + def : BinRMWPatImmOff, or_is_add, inst16_64>; + def : BinRMWPatImmOff, or_is_add, inst32_64>; + + def : BinRMWPatImmOff, regPlusImm, inst8_32>; + def : BinRMWPatImmOff, regPlusImm, inst16_32>; + def : BinRMWPatImmOff, regPlusImm, inst8_64>; + def : BinRMWPatImmOff, regPlusImm, inst16_64>; + def : BinRMWPatImmOff, or_is_add, inst8_32>; + def : BinRMWPatImmOff, or_is_add, inst16_32>; + def : BinRMWPatImmOff, or_is_add, inst8_64>; + def : BinRMWPatImmOff, or_is_add, inst16_64>; + + def : BinRMWPatGlobalAddr, inst8_32>; + def : BinRMWPatGlobalAddr, inst16_32>; + def : BinRMWPatGlobalAddr, inst8_64>; + def : BinRMWPatGlobalAddr, inst16_64>; + def : BinRMWPatGlobalAddr, inst32_64>; + + def : BinRMWPatGlobalAddr, inst8_32>; + def : BinRMWPatGlobalAddr, inst16_32>; + def : BinRMWPatGlobalAddr, inst8_64>; + def : BinRMWPatGlobalAddr, inst16_64>; + + def : BinRMWPatExternalSym, inst8_32>; + def : BinRMWPatExternalSym, inst16_32>; + def : BinRMWPatExternalSym, inst8_64>; + def : BinRMWPatExternalSym, inst16_64>; + def : BinRMWPatExternalSym, inst32_64>; + + def : BinRMWPatExternalSym, inst8_32>; + def : BinRMWPatExternalSym, inst16_32>; + def : BinRMWPatExternalSym, inst8_64>; + def : BinRMWPatExternalSym, inst16_64>; + + // Truncating-extending binary RMWs with just a constant offset + def : BinRMWPatOffsetOnly, inst8_32>; + def : BinRMWPatOffsetOnly, inst16_32>; + def : BinRMWPatOffsetOnly, inst8_64>; + def : BinRMWPatOffsetOnly, inst16_64>; + def : BinRMWPatOffsetOnly, inst32_64>; + + def : BinRMWPatOffsetOnly, inst8_32>; + def : BinRMWPatOffsetOnly, inst16_32>; + def : BinRMWPatOffsetOnly, inst8_64>; + def : BinRMWPatOffsetOnly, inst16_64>; + + def : BinRMWPatGlobalAddrOffOnly, inst8_32>; + def : BinRMWPatGlobalAddrOffOnly, inst16_32>; + def : BinRMWPatGlobalAddrOffOnly, inst8_64>; + def : BinRMWPatGlobalAddrOffOnly, inst16_64>; + def : BinRMWPatGlobalAddrOffOnly, inst32_64>; + + def : BinRMWPatGlobalAddrOffOnly, inst8_32>; + def : BinRMWPatGlobalAddrOffOnly, inst16_32>; + def : BinRMWPatGlobalAddrOffOnly, inst8_64>; + def : BinRMWPatGlobalAddrOffOnly, inst16_64>; + + def : BinRMWPatExternSymOffOnly, inst8_32>; + def : BinRMWPatExternSymOffOnly, inst16_32>; + def : BinRMWPatExternSymOffOnly, inst8_64>; + def : BinRMWPatExternSymOffOnly, inst16_64>; + def : BinRMWPatExternSymOffOnly, inst32_64>; + + def : BinRMWPatExternSymOffOnly, inst8_32>; + def : BinRMWPatExternSymOffOnly, inst16_32>; + def : BinRMWPatExternSymOffOnly, inst8_64>; + def : BinRMWPatExternSymOffOnly, inst16_64>; +} + +let Predicates = [HasAtomics] in { +defm : BinRMWTruncExtPattern< + atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64, + ATOMIC_RMW8_U_ADD_I32, ATOMIC_RMW16_U_ADD_I32, + ATOMIC_RMW8_U_ADD_I64, ATOMIC_RMW16_U_ADD_I64, ATOMIC_RMW32_U_ADD_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32, atomic_load_sub_64, + ATOMIC_RMW8_U_SUB_I32, ATOMIC_RMW16_U_SUB_I32, + ATOMIC_RMW8_U_SUB_I64, ATOMIC_RMW16_U_SUB_I64, ATOMIC_RMW32_U_SUB_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_and_8, atomic_load_and_16, atomic_load_and_32, atomic_load_and_64, + ATOMIC_RMW8_U_AND_I32, ATOMIC_RMW16_U_AND_I32, + ATOMIC_RMW8_U_AND_I64, ATOMIC_RMW16_U_AND_I64, ATOMIC_RMW32_U_AND_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_or_8, atomic_load_or_16, atomic_load_or_32, atomic_load_or_64, + ATOMIC_RMW8_U_OR_I32, ATOMIC_RMW16_U_OR_I32, + ATOMIC_RMW8_U_OR_I64, ATOMIC_RMW16_U_OR_I64, ATOMIC_RMW32_U_OR_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32, atomic_load_xor_64, + ATOMIC_RMW8_U_XOR_I32, ATOMIC_RMW16_U_XOR_I32, + ATOMIC_RMW8_U_XOR_I64, ATOMIC_RMW16_U_XOR_I64, ATOMIC_RMW32_U_XOR_I64>; +defm : BinRMWTruncExtPattern< + atomic_swap_8, atomic_swap_16, atomic_swap_32, atomic_swap_64, + ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32, + ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>; +} // Predicates = [HasAtomics] diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index 6f0ae89..1422199 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -107,6 +107,48 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::ATOMIC_LOAD8_U_I64: case WebAssembly::ATOMIC_LOAD16_U_I64: case WebAssembly::ATOMIC_LOAD32_U_I64: + case WebAssembly::ATOMIC_RMW8_U_ADD_I32: + case WebAssembly::ATOMIC_RMW8_U_ADD_I64: + case WebAssembly::ATOMIC_RMW8_U_SUB_I32: + case WebAssembly::ATOMIC_RMW8_U_SUB_I64: + case WebAssembly::ATOMIC_RMW8_U_AND_I32: + case WebAssembly::ATOMIC_RMW8_U_AND_I64: + case WebAssembly::ATOMIC_RMW8_U_OR_I32: + case WebAssembly::ATOMIC_RMW8_U_OR_I64: + case WebAssembly::ATOMIC_RMW8_U_XOR_I32: + case WebAssembly::ATOMIC_RMW8_U_XOR_I64: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW16_U_ADD_I32: + case WebAssembly::ATOMIC_RMW16_U_ADD_I64: + case WebAssembly::ATOMIC_RMW16_U_SUB_I32: + case WebAssembly::ATOMIC_RMW16_U_SUB_I64: + case WebAssembly::ATOMIC_RMW16_U_AND_I32: + case WebAssembly::ATOMIC_RMW16_U_AND_I64: + case WebAssembly::ATOMIC_RMW16_U_OR_I32: + case WebAssembly::ATOMIC_RMW16_U_OR_I64: + case WebAssembly::ATOMIC_RMW16_U_XOR_I32: + case WebAssembly::ATOMIC_RMW16_U_XOR_I64: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW_ADD_I32: + case WebAssembly::ATOMIC_RMW32_U_ADD_I64: + case WebAssembly::ATOMIC_RMW_SUB_I32: + case WebAssembly::ATOMIC_RMW32_U_SUB_I64: + case WebAssembly::ATOMIC_RMW_AND_I32: + case WebAssembly::ATOMIC_RMW32_U_AND_I64: + case WebAssembly::ATOMIC_RMW_OR_I32: + case WebAssembly::ATOMIC_RMW32_U_OR_I64: + case WebAssembly::ATOMIC_RMW_XOR_I32: + case WebAssembly::ATOMIC_RMW32_U_XOR_I64: + case WebAssembly::ATOMIC_RMW_XCHG_I32: + case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW_ADD_I64: + case WebAssembly::ATOMIC_RMW_SUB_I64: + case WebAssembly::ATOMIC_RMW_AND_I64: + case WebAssembly::ATOMIC_RMW_OR_I64: + case WebAssembly::ATOMIC_RMW_XOR_I64: + case WebAssembly::ATOMIC_RMW_XCHG_I64: RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); break; case WebAssembly::STORE_I32: diff --git a/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll b/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll new file mode 100644 index 0000000..1b877c6 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll @@ -0,0 +1,857 @@ +; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -mattr=+atomics,+sign-ext | FileCheck %s + +; Test atomic RMW (read-modify-write) instructions are assembled properly. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +;===---------------------------------------------------------------------------- +; Atomic read-modify-writes: 32-bit +;===---------------------------------------------------------------------------- + +; CHECK-LABEL: add_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @add_i32(i32* %p, i32 %v) { + %old = atomicrmw add i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: sub_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @sub_i32(i32* %p, i32 %v) { + %old = atomicrmw sub i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: and_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @and_i32(i32* %p, i32 %v) { + %old = atomicrmw and i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: or_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @or_i32(i32* %p, i32 %v) { + %old = atomicrmw or i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: xor_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @xor_i32(i32* %p, i32 %v) { + %old = atomicrmw xor i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: xchg_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @xchg_i32(i32* %p, i32 %v) { + %old = atomicrmw xchg i32* %p, i32 %v seq_cst + ret i32 %old +} + +;===---------------------------------------------------------------------------- +; Atomic read-modify-writes: 64-bit +;===---------------------------------------------------------------------------- + +; CHECK-LABEL: add_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @add_i64(i64* %p, i64 %v) { + %old = atomicrmw add i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: sub_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @sub_i64(i64* %p, i64 %v) { + %old = atomicrmw sub i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: and_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @and_i64(i64* %p, i64 %v) { + %old = atomicrmw and i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: or_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @or_i64(i64* %p, i64 %v) { + %old = atomicrmw or i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: xor_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xor_i64(i64* %p, i64 %v) { + %old = atomicrmw xor i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: xchg_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xchg_i64(i64* %p, i64 %v) { + %old = atomicrmw xchg i64* %p, i64 %v seq_cst + ret i64 %old +} + +;===---------------------------------------------------------------------------- +; Atomic truncating & sign-extending RMWs +;===---------------------------------------------------------------------------- + +; add + +; CHECK-LABEL: add_sext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @add_sext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %p, i8 %t seq_cst + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: add_sext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @add_sext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw add i16* %p, i16 %t seq_cst + %e = sext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: add_sext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @add_sext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw add i8* %p, i8 %t seq_cst + %e = sext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: add_sext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @add_sext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw add i16* %p, i16 %t seq_cst + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.add, i64_extend_s/i32 +; CHECK-LABEL: add_sext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i32.wrap/i64 $push0=, $1 +; CHECK: i32.atomic.rmw.add $push1=, 0($0), $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} +; CHECK-NEXT: return $pop2{{$}} +define i64 @add_sext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw add i32* %p, i32 %t seq_cst + %e = sext i32 %old to i64 + ret i64 %e +} + +; sub + +; CHECK-LABEL: sub_sext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @sub_sext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw sub i8* %p, i8 %t seq_cst + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: sub_sext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @sub_sext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw sub i16* %p, i16 %t seq_cst + %e = sext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: sub_sext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @sub_sext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw sub i8* %p, i8 %t seq_cst + %e = sext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: sub_sext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @sub_sext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw sub i16* %p, i16 %t seq_cst + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.sub, i64_extend_s/i32 +; CHECK-LABEL: sub_sext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i32.wrap/i64 $push0=, $1 +; CHECK: i32.atomic.rmw.sub $push1=, 0($0), $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} +; CHECK-NEXT: return $pop2{{$}} +define i64 @sub_sext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw sub i32* %p, i32 %t seq_cst + %e = sext i32 %old to i64 + ret i64 %e +} + +; and + +; CHECK-LABEL: and_sext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @and_sext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw and i8* %p, i8 %t seq_cst + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: and_sext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @and_sext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw and i16* %p, i16 %t seq_cst + %e = sext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: and_sext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @and_sext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw and i8* %p, i8 %t seq_cst + %e = sext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: and_sext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @and_sext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw and i16* %p, i16 %t seq_cst + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.and, i64_extend_s/i32 +; CHECK-LABEL: and_sext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i32.wrap/i64 $push0=, $1 +; CHECK: i32.atomic.rmw.and $push1=, 0($0), $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} +; CHECK-NEXT: return $pop2{{$}} +define i64 @and_sext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw and i32* %p, i32 %t seq_cst + %e = sext i32 %old to i64 + ret i64 %e +} + +; or + +; CHECK-LABEL: or_sext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @or_sext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw or i8* %p, i8 %t seq_cst + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: or_sext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @or_sext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw or i16* %p, i16 %t seq_cst + %e = sext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: or_sext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @or_sext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw or i8* %p, i8 %t seq_cst + %e = sext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: or_sext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @or_sext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw or i16* %p, i16 %t seq_cst + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.or, i64_extend_s/i32 +; CHECK-LABEL: or_sext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i32.wrap/i64 $push0=, $1 +; CHECK: i32.atomic.rmw.or $push1=, 0($0), $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} +; CHECK-NEXT: return $pop2{{$}} +define i64 @or_sext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw or i32* %p, i32 %t seq_cst + %e = sext i32 %old to i64 + ret i64 %e +} + +; xor + +; CHECK-LABEL: xor_sext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @xor_sext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw xor i8* %p, i8 %t seq_cst + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xor_sext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @xor_sext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw xor i16* %p, i16 %t seq_cst + %e = sext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xor_sext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @xor_sext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw xor i8* %p, i8 %t seq_cst + %e = sext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: xor_sext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @xor_sext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw xor i16* %p, i16 %t seq_cst + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.xor, i64_extend_s/i32 +; CHECK-LABEL: xor_sext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i32.wrap/i64 $push0=, $1 +; CHECK: i32.atomic.rmw.xor $push1=, 0($0), $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} +; CHECK-NEXT: return $pop2{{$}} +define i64 @xor_sext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw xor i32* %p, i32 %t seq_cst + %e = sext i32 %old to i64 + ret i64 %e +} + +; xchg + +; CHECK-LABEL: xchg_sext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @xchg_sext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw xchg i8* %p, i8 %t seq_cst + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xchg_sext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @xchg_sext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw xchg i16* %p, i16 %t seq_cst + %e = sext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xchg_sext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @xchg_sext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw xchg i8* %p, i8 %t seq_cst + %e = sext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: xchg_sext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @xchg_sext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw xchg i16* %p, i16 %t seq_cst + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.xchg, i64_extend_s/i32 +; CHECK-LABEL: xchg_sext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i32.wrap/i64 $push0=, $1 +; CHECK: i32.atomic.rmw.xchg $push1=, 0($0), $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} +; CHECK-NEXT: return $pop2{{$}} +define i64 @xchg_sext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw xchg i32* %p, i32 %t seq_cst + %e = sext i32 %old to i64 + ret i64 %e +} + +;===---------------------------------------------------------------------------- +; Atomic truncating & zero-extending RMWs +;===---------------------------------------------------------------------------- + +; add + +; CHECK-LABEL: add_zext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @add_zext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %p, i8 %t seq_cst + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: add_zext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @add_zext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw add i16* %p, i16 %t seq_cst + %e = zext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: add_zext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @add_zext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw add i8* %p, i8 %t seq_cst + %e = zext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: add_zext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @add_zext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw add i16* %p, i16 %t seq_cst + %e = zext i16 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: add_zext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw32_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @add_zext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw add i32* %p, i32 %t seq_cst + %e = zext i32 %old to i64 + ret i64 %e +} + +; sub + +; CHECK-LABEL: sub_zext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @sub_zext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw sub i8* %p, i8 %t seq_cst + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: sub_zext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @sub_zext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw sub i16* %p, i16 %t seq_cst + %e = zext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: sub_zext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @sub_zext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw sub i8* %p, i8 %t seq_cst + %e = zext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: sub_zext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @sub_zext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw sub i16* %p, i16 %t seq_cst + %e = zext i16 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: sub_zext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw32_u.sub $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @sub_zext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw sub i32* %p, i32 %t seq_cst + %e = zext i32 %old to i64 + ret i64 %e +} + +; and + +; CHECK-LABEL: and_zext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @and_zext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw and i8* %p, i8 %t seq_cst + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: and_zext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @and_zext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw and i16* %p, i16 %t seq_cst + %e = zext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: and_zext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @and_zext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw and i8* %p, i8 %t seq_cst + %e = zext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: and_zext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @and_zext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw and i16* %p, i16 %t seq_cst + %e = zext i16 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: and_zext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw32_u.and $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @and_zext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw and i32* %p, i32 %t seq_cst + %e = zext i32 %old to i64 + ret i64 %e +} + +; or + +; CHECK-LABEL: or_zext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @or_zext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw or i8* %p, i8 %t seq_cst + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: or_zext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @or_zext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw or i16* %p, i16 %t seq_cst + %e = zext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: or_zext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @or_zext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw or i8* %p, i8 %t seq_cst + %e = zext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: or_zext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @or_zext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw or i16* %p, i16 %t seq_cst + %e = zext i16 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: or_zext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw32_u.or $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @or_zext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw or i32* %p, i32 %t seq_cst + %e = zext i32 %old to i64 + ret i64 %e +} + +; xor + +; CHECK-LABEL: xor_zext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @xor_zext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw xor i8* %p, i8 %t seq_cst + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xor_zext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @xor_zext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw xor i16* %p, i16 %t seq_cst + %e = zext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xor_zext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xor_zext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw xor i8* %p, i8 %t seq_cst + %e = zext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: xor_zext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xor_zext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw xor i16* %p, i16 %t seq_cst + %e = zext i16 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: xor_zext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw32_u.xor $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xor_zext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw xor i32* %p, i32 %t seq_cst + %e = zext i32 %old to i64 + ret i64 %e +} + +; xchg + +; CHECK-LABEL: xchg_zext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @xchg_zext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw xchg i8* %p, i8 %t seq_cst + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xchg_zext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @xchg_zext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw xchg i16* %p, i16 %t seq_cst + %e = zext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: xchg_zext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw8_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xchg_zext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw xchg i8* %p, i8 %t seq_cst + %e = zext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: xchg_zext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw16_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xchg_zext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw xchg i16* %p, i16 %t seq_cst + %e = zext i16 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: xchg_zext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw32_u.xchg $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @xchg_zext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw xchg i32* %p, i32 %t seq_cst + %e = zext i32 %old to i64 + ret i64 %e +} diff --git a/llvm/test/CodeGen/WebAssembly/offset-atomics.ll b/llvm/test/CodeGen/WebAssembly/offset-atomics.ll index 75074dc99..5ead954 100644 --- a/llvm/test/CodeGen/WebAssembly/offset-atomics.ll +++ b/llvm/test/CodeGen/WebAssembly/offset-atomics.ll @@ -653,3 +653,420 @@ define void @store_i8_i64_with_folded_or_offset(i32 %x, i64 %v) { store atomic i8 %t, i8* %arrayidx seq_cst, align 1 ret void } + +;===---------------------------------------------------------------------------- +; Atomic binary read-modify-writes: 32-bit +;===---------------------------------------------------------------------------- + +; There are several RMW instructions, but here we only test 'add' as an example. + +; Basic RMW. + +; CHECK-LABEL: rmw_add_i32_no_offset: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: i32.atomic.rmw.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @rmw_add_i32_no_offset(i32* %p, i32 %v) { + %old = atomicrmw add i32* %p, i32 %v seq_cst + ret i32 %old +} + +; With an nuw add, we can fold an offset. + +; CHECK-LABEL: rmw_add_i32_with_folded_offset: +; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} +define i32 @rmw_add_i32_with_folded_offset(i32* %p, i32 %v) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %old = atomicrmw add i32* %s, i32 %v seq_cst + ret i32 %old +} + +; With an inbounds gep, we can fold an offset. + +; CHECK-LABEL: rmw_add_i32_with_folded_gep_offset: +; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} +define i32 @rmw_add_i32_with_folded_gep_offset(i32* %p, i32 %v) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + %old = atomicrmw add i32* %s, i32 %v seq_cst + ret i32 %old +} + +; We can't fold a negative offset though, even with an inbounds gep. + +; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} +define i32 @rmw_add_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %v) { + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %old = atomicrmw add i32* %s, i32 %v seq_cst + ret i32 %old +} + +; Without nuw, and even with nsw, we can't fold an offset. + +; CHECK-LABEL: rmw_add_i32_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} +define i32 @rmw_add_i32_with_unfolded_offset(i32* %p, i32 %v) { + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %old = atomicrmw add i32* %s, i32 %v seq_cst + ret i32 %old +} + +; Without inbounds, we can't fold a gep offset. + +; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} +define i32 @rmw_add_i32_with_unfolded_gep_offset(i32* %p, i32 %v) { + %s = getelementptr i32, i32* %p, i32 6 + %old = atomicrmw add i32* %s, i32 %v seq_cst + ret i32 %old +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: rmw_add_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw.add $push1=, 42($pop0), $0{{$}} +define i32 @rmw_add_i32_from_numeric_address(i32 %v) { + %s = inttoptr i32 42 to i32* + %old = atomicrmw add i32* %s, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: rmw_add_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw.add $push1=, gv($pop0), $0{{$}} +define i32 @rmw_add_i32_from_global_address(i32 %v) { + %old = atomicrmw add i32* @gv, i32 %v seq_cst + ret i32 %old +} + +;===---------------------------------------------------------------------------- +; Atomic binary read-modify-writes: 64-bit +;===---------------------------------------------------------------------------- + +; Basic RMW. + +; CHECK-LABEL: rmw_add_i64_no_offset: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: i64.atomic.rmw.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @rmw_add_i64_no_offset(i64* %p, i64 %v) { + %old = atomicrmw add i64* %p, i64 %v seq_cst + ret i64 %old +} + +; With an nuw add, we can fold an offset. + +; CHECK-LABEL: rmw_add_i64_with_folded_offset: +; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} +define i64 @rmw_add_i64_with_folded_offset(i64* %p, i64 %v) { + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %old = atomicrmw add i64* %s, i64 %v seq_cst + ret i64 %old +} + +; With an inbounds gep, we can fold an offset. + +; CHECK-LABEL: rmw_add_i64_with_folded_gep_offset: +; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} +define i64 @rmw_add_i64_with_folded_gep_offset(i64* %p, i64 %v) { + %s = getelementptr inbounds i64, i64* %p, i32 3 + %old = atomicrmw add i64* %s, i64 %v seq_cst + ret i64 %old +} + +; We can't fold a negative offset though, even with an inbounds gep. + +; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} +define i64 @rmw_add_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %v) { + %s = getelementptr inbounds i64, i64* %p, i32 -3 + %old = atomicrmw add i64* %s, i64 %v seq_cst + ret i64 %old +} + +; Without nuw, and even with nsw, we can't fold an offset. + +; CHECK-LABEL: rmw_add_i64_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} +define i64 @rmw_add_i64_with_unfolded_offset(i64* %p, i64 %v) { + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %old = atomicrmw add i64* %s, i64 %v seq_cst + ret i64 %old +} + +; Without inbounds, we can't fold a gep offset. + +; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} +define i64 @rmw_add_i64_with_unfolded_gep_offset(i64* %p, i64 %v) { + %s = getelementptr i64, i64* %p, i32 3 + %old = atomicrmw add i64* %s, i64 %v seq_cst + ret i64 %old +} + +;===---------------------------------------------------------------------------- +; Atomic truncating & sign-extending binary RMWs +;===---------------------------------------------------------------------------- + +; Fold an offset into a sign-extending rmw. + +; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_offset: +; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0 +define i32 @rmw_add_i8_i32_s_with_folded_offset(i8* %p, i32 %v) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %s, i8 %t seq_cst + %u = sext i8 %old to i32 + ret i32 %u +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.add, i64_extend_s/i32 +; CHECK-LABEL: rmw_add_i32_i64_s_with_folded_offset: +; CHECK: i32.wrap/i64 $push0=, $1 +; CHECK-NEXT: i32.atomic.rmw.add $push1=, 24($0), $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} +define i64 @rmw_add_i32_i64_s_with_folded_offset(i32* %p, i64 %v) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = trunc i64 %v to i32 + %old = atomicrmw add i32* %s, i32 %t seq_cst + %u = sext i32 %old to i64 + ret i64 %u +} + +; Fold a gep offset into a sign-extending rmw. + +; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_gep_offset: +; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0 +define i32 @rmw_add_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %v) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %s, i8 %t seq_cst + %u = sext i8 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: rmw_add_i16_i32_s_with_folded_gep_offset: +; CHECK: i32.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0 +define i32 @rmw_add_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %v) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = trunc i32 %v to i16 + %old = atomicrmw add i16* %s, i16 %t seq_cst + %u = sext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: rmw_add_i16_i64_s_with_folded_gep_offset: +; CHECK: i64.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0 +define i64 @rmw_add_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %v) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = trunc i64 %v to i16 + %old = atomicrmw add i16* %s, i16 %t seq_cst + %u = sext i16 %old to i64 + ret i64 %u +} + +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_or_offset: +; CHECK: i32.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} +; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} +define i32 @rmw_add_i8_i32_s_with_folded_or_offset(i32 %x, i32 %v) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst + %conv = sext i8 %old to i32 + ret i32 %conv +} + +; CHECK-LABEL: rmw_add_i8_i64_s_with_folded_or_offset: +; CHECK: i64.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} +; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} +define i64 @rmw_add_i8_i64_s_with_folded_or_offset(i32 %x, i64 %v) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t = trunc i64 %v to i8 + %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst + %conv = sext i8 %old to i64 + ret i64 %conv +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: rmw_add_i16_i32_s_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw16_u.add $push1=, 42($pop0), $0{{$}} +; CHECK-NEXT: i32.extend16_s $push2=, $pop1 +define i32 @rmw_add_i16_i32_s_from_numeric_address(i32 %v) { + %s = inttoptr i32 42 to i16* + %t = trunc i32 %v to i16 + %old = atomicrmw add i16* %s, i16 %t seq_cst + %u = sext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: rmw_add_i8_i32_s_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw8_u.add $push1=, gv8($pop0), $0{{$}} +; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} +define i32 @rmw_add_i8_i32_s_from_global_address(i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* @gv8, i8 %t seq_cst + %u = sext i8 %old to i32 + ret i32 %u +} + +;===---------------------------------------------------------------------------- +; Atomic truncating & zero-extending binary RMWs +;===---------------------------------------------------------------------------- + +; Fold an offset into a zero-extending rmw. + +; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_offset: +; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} +define i32 @rmw_add_i8_i32_z_with_folded_offset(i8* %p, i32 %v) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %s, i8 %t seq_cst + %u = zext i8 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: rmw_add_i32_i64_z_with_folded_offset: +; CHECK: i64.atomic.rmw32_u.add $push0=, 24($0), $1{{$}} +define i64 @rmw_add_i32_i64_z_with_folded_offset(i32* %p, i64 %v) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = trunc i64 %v to i32 + %old = atomicrmw add i32* %s, i32 %t seq_cst + %u = zext i32 %old to i64 + ret i64 %u +} + +; Fold a gep offset into a zero-extending rmw. + +; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_gep_offset: +; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} +define i32 @rmw_add_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %v) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %s, i8 %t seq_cst + %u = zext i8 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: rmw_add_i16_i32_z_with_folded_gep_offset: +; CHECK: i32.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} +define i32 @rmw_add_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %v) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = trunc i32 %v to i16 + %old = atomicrmw add i16* %s, i16 %t seq_cst + %u = zext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: rmw_add_i16_i64_z_with_folded_gep_offset: +; CHECK: i64.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} +define i64 @rmw_add_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %v) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = trunc i64 %v to i16 + %old = atomicrmw add i16* %s, i16 %t seq_cst + %u = zext i16 %old to i64 + ret i64 %u +} + +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_or_offset: +; CHECK: i32.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} +define i32 @rmw_add_i8_i32_z_with_folded_or_offset(i32 %x, i32 %v) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst + %conv = zext i8 %old to i32 + ret i32 %conv +} + +; CHECK-LABEL: rmw_add_i8_i64_z_with_folded_or_offset: +; CHECK: i64.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} +define i64 @rmw_add_i8_i64_z_with_folded_or_offset(i32 %x, i64 %v) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t = trunc i64 %v to i8 + %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst + %conv = zext i8 %old to i64 + ret i64 %conv +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: rmw_add_i16_i32_z_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw16_u.add $push1=, 42($pop0), $0{{$}} +define i32 @rmw_add_i16_i32_z_from_numeric_address(i32 %v) { + %s = inttoptr i32 42 to i16* + %t = trunc i32 %v to i16 + %old = atomicrmw add i16* %s, i16 %t seq_cst + %u = zext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: rmw_add_i8_i32_z_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw8_u.add $push1=, gv8($pop0), $0{{$}} +define i32 @rmw_add_i8_i32_z_from_global_address(i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* @gv8, i8 %t seq_cst + %u = zext i8 %old to i32 + ret i32 %u +} + +; i8 return value should test anyext RMWs + +; CHECK-LABEL: rmw_add_i8_i32_retvalue: +; CHECK: i32.atomic.rmw8_u.add $push0=, 0($0), $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i8 @rmw_add_i8_i32_retvalue(i8 *%p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw add i8* %p, i8 %t seq_cst + ret i8 %old +} -- 2.7.4