From 39bf39f35c208109f6d5907708ee53dee2878bed Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Tue, 2 Aug 2016 23:16:09 +0000 Subject: [PATCH] [WebAssembly] Initial SIMD128 support. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Kicks off the implementation of wasm SIMD128 support (spec: https://github.com/stoklund/portable-simd/blob/master/portable-simd.md), adding support for add, sub, mul for i8x16, i16x8, i32x4, and f32x4. The spec is WIP, and might change in the near future. Patch by João Porto Differential Revision: https://reviews.llvm.org/D22686 llvm-svn: 277543 --- .../InstPrinter/WebAssemblyInstPrinter.cpp | 5 + llvm/lib/Target/WebAssembly/WebAssembly.td | 2 +- .../Target/WebAssembly/WebAssemblyArgumentMove.cpp | 7 +- .../Target/WebAssembly/WebAssemblyAsmPrinter.cpp | 13 +- .../lib/Target/WebAssembly/WebAssemblyFastISel.cpp | 80 ++++++++++- .../Target/WebAssembly/WebAssemblyISelLowering.cpp | 10 ++ .../lib/Target/WebAssembly/WebAssemblyInstrCall.td | 34 +++++ .../Target/WebAssembly/WebAssemblyInstrControl.td | 15 ++ .../Target/WebAssembly/WebAssemblyInstrFormats.td | 18 +++ .../lib/Target/WebAssembly/WebAssemblyInstrInfo.td | 11 ++ .../lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 7 +- .../lib/Target/WebAssembly/WebAssemblyPeephole.cpp | 31 +++- .../WebAssemblyPrepareForLiveIntervals.cpp | 6 +- .../Target/WebAssembly/WebAssemblyRegNumbering.cpp | 6 +- .../Target/WebAssembly/WebAssemblyRegStackify.cpp | 8 +- .../Target/WebAssembly/WebAssemblyRegisterInfo.td | 4 + llvm/test/CodeGen/WebAssembly/simd-arith.ll | 158 +++++++++++++++++++++ 17 files changed, 401 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/simd-arith.ll diff --git a/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp index 267d716d..aadbf33 100644 --- a/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -210,6 +210,11 @@ const char *llvm::WebAssembly::TypeToString(MVT Ty) { return "f32"; case MVT::f64: return "f64"; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + return "v128"; default: llvm_unreachable("unsupported type"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 551ad93..f647349 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -23,7 +23,7 @@ include "llvm/Target/Target.td" // WebAssembly Subtarget features. //===----------------------------------------------------------------------===// -def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "false", +def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true", "Enable 128-bit SIMD">; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp index 5887f45..0ccc813 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp @@ -26,9 +26,10 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -71,6 +72,10 @@ static bool IsArgument(const MachineInstr &MI) { case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: case WebAssembly::ARGUMENT_F64: + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4f32: return true; default: return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index b95d77b..7894575 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -95,7 +95,8 @@ private: MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); - for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) + for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16, + MVT::v4i32, MVT::v4f32}) if (TRC->hasType(T)) return T; DEBUG(errs() << "Unknown type for register number: " << RegNo); @@ -234,13 +235,21 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: case WebAssembly::ARGUMENT_F64: + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4f32: // These represent values which are live into the function entry, so there's // no instruction to emit. break; case WebAssembly::FALLTHROUGH_RETURN_I32: case WebAssembly::FALLTHROUGH_RETURN_I64: case WebAssembly::FALLTHROUGH_RETURN_F32: - case WebAssembly::FALLTHROUGH_RETURN_F64: { + case WebAssembly::FALLTHROUGH_RETURN_F64: + case WebAssembly::FALLTHROUGH_RETURN_v16i8: + case WebAssembly::FALLTHROUGH_RETURN_v8i16: + case WebAssembly::FALLTHROUGH_RETURN_v4i32: + case WebAssembly::FALLTHROUGH_RETURN_v4f32: { // These instructions represent the implicit return at the end of a // function body. The operand is always a pop. assert(MFI->isVRegStackified(MI->getOperand(0).getReg())); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 7bfa407..b4786e5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -113,6 +113,13 @@ private: case MVT::f32: case MVT::f64: return VT; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + if (Subtarget->hasSIMD128()) + return VT; + break; default: break; } @@ -575,7 +582,9 @@ bool WebAssemblyFastISel::fastLowerArguments() { return false; Type *ArgTy = Arg.getType(); - if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + if (ArgTy->isStructTy() || ArgTy->isArrayTy()) + return false; + if (!Subtarget->hasSIMD128() && ArgTy->isVectorTy()) return false; unsigned Opc; @@ -600,6 +609,22 @@ bool WebAssemblyFastISel::fastLowerArguments() { Opc = WebAssembly::ARGUMENT_F64; RC = &WebAssembly::F64RegClass; break; + case MVT::v16i8: + Opc = WebAssembly::ARGUMENT_v16i8; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v8i16: + Opc = WebAssembly::ARGUMENT_v8i16; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v4i32: + Opc = WebAssembly::ARGUMENT_v4i32; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v4f32: + Opc = WebAssembly::ARGUMENT_v4f32; + RC = &WebAssembly::V128RegClass; + break; default: return false; } @@ -639,6 +664,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { if (IsVoid) { Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::CALL_INDIRECT_VOID; } else { + if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy()) + return false; + MVT::SimpleValueType RetTy = getSimpleType(Call->getType()); switch (RetTy) { case MVT::i1: @@ -660,6 +688,26 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::CALL_INDIRECT_F64; ResultReg = createResultReg(&WebAssembly::F64RegClass); break; + case MVT::v16i8: + Opc = + IsDirect ? WebAssembly::CALL_v16i8 : WebAssembly::CALL_INDIRECT_v16i8; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v8i16: + Opc = + IsDirect ? WebAssembly::CALL_v8i16 : WebAssembly::CALL_INDIRECT_v8i16; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v4i32: + Opc = + IsDirect ? WebAssembly::CALL_v4i32 : WebAssembly::CALL_INDIRECT_v4i32; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v4f32: + Opc = + IsDirect ? WebAssembly::CALL_v4f32 : WebAssembly::CALL_INDIRECT_v4f32; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; default: return false; } @@ -972,6 +1020,8 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) { const LoadInst *Load = cast(I); if (Load->isAtomic()) return false; + if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy()) + return false; Address Addr; if (!computeAddress(Load->getPointerOperand(), Addr)) @@ -1027,6 +1077,9 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) { const StoreInst *Store = cast(I); if (Store->isAtomic()) return false; + if (!Subtarget->hasSIMD128() && + Store->getValueOperand()->getType()->isVectorTy()) + return false; Address Addr; if (!computeAddress(Store->getPointerOperand(), Addr)) @@ -1102,7 +1155,7 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addMBB(TBB) .addReg(CondReg); - + finishCondBranch(Br->getParent(), TBB, FBB); return true; } @@ -1120,6 +1173,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { } Value *RV = Ret->getOperand(0); + if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy()) + return false; + unsigned Opc; switch (getSimpleType(RV->getType())) { case MVT::i1: case MVT::i8: @@ -1129,8 +1185,24 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { case MVT::i64: Opc = WebAssembly::RETURN_I64; break; - case MVT::f32: Opc = WebAssembly::RETURN_F32; break; - case MVT::f64: Opc = WebAssembly::RETURN_F64; break; + case MVT::f32: + Opc = WebAssembly::RETURN_F32; + break; + case MVT::f64: + Opc = WebAssembly::RETURN_F64; + break; + case MVT::v16i8: + Opc = WebAssembly::RETURN_v16i8; + break; + case MVT::v8i16: + Opc = WebAssembly::RETURN_v8i16; + break; + case MVT::v4i32: + Opc = WebAssembly::RETURN_v4i32; + break; + case MVT::v4f32: + Opc = WebAssembly::RETURN_v4f32; + break; default: return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index cb9ab15..931e2ad 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -54,6 +54,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); + if (Subtarget->hasSIMD128()) { + addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); + } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -190,6 +196,10 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint( switch (Constraint[0]) { case 'r': assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (Subtarget->hasSIMD128() && VT.isVector()) { + if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &WebAssembly::V128RegClass); + } if (VT.isInteger() && !VT.isVector()) { if (VT.getSizeInBits() <= 32) return std::make_pair(0U, &WebAssembly::I32RegClass); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td index cfa1519..4bbe2d7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -33,11 +33,29 @@ multiclass CALL { [(set vt:$dst, (WebAssemblycall1 I32:$callee))], !strconcat(prefix, "call_indirect\t$dst, $callee")>; } + +multiclass SIMD_CALL { + def CALL_#vt : SIMD_I<(outs V128:$dst), (ins i32imm:$callee, variable_ops), + [(set (vt V128:$dst), + (WebAssemblycall1 (i32 imm:$callee)))], + !strconcat(prefix, "call\t$dst, $callee")>; + def CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst), + (ins I32:$callee, variable_ops), + [(set (vt V128:$dst), + (WebAssemblycall1 I32:$callee))], + !strconcat(prefix, + "call_indirect\t$dst, $callee")>; +} + let Uses = [SP32, SP64], isCall = 1 in { defm : CALL; defm : CALL; defm : CALL; defm : CALL; + defm : SIMD_CALL; + defm : SIMD_CALL; + defm : SIMD_CALL; + defm : SIMD_CALL; def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops), [(WebAssemblycall0 (i32 imm:$callee))], @@ -58,6 +76,14 @@ def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), (CALL_F32 tglobaladdr:$callee)>; def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), (CALL_F64 tglobaladdr:$callee)>; +def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), (CALL_VOID tglobaladdr:$callee)>; @@ -70,5 +96,13 @@ def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), (CALL_F32 texternalsym:$callee)>; def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), (CALL_F64 texternalsym:$callee)>; +def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>; def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), (CALL_VOID texternalsym:$callee)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 444e275..06cb061 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -77,12 +77,27 @@ multiclass RETURN { def FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), []>; } +multiclass SIMD_RETURN { + def RETURN_#vt : SIMD_I<(outs), (ins V128:$val), + [(WebAssemblyreturn (vt V128:$val))], + "return \t$val">; + // Equivalent to RETURN_#vt, for use at the end of a function when wasm + // semantics return by falling off the end of the block. + let isCodeGenOnly = 1 in + def FALLTHROUGH_RETURN_#vt : SIMD_I<(outs), (ins V128:$val), []>; +} + let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { let isReturn = 1 in { defm : RETURN; defm : RETURN; defm : RETURN; defm : RETURN; + defm : SIMD_RETURN; + defm : SIMD_RETURN; + defm : SIMD_RETURN; + defm : SIMD_RETURN; + def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">; // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index 8008dd3..66145b0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -28,6 +28,9 @@ class I pattern, string asmstr = ""> let Pattern = pattern; } +class SIMD_I pattern, string asmstr = ""> + : I, Requires<[HasSIMD128]>; + // Unary and binary instructions, for the local types that WebAssembly supports. multiclass UnaryInt { def _I32 : I<(outs I32:$dst), (ins I32:$src), @@ -61,6 +64,21 @@ multiclass BinaryFP { [(set F64:$dst, (node F64:$lhs, F64:$rhs))], !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; } +multiclass SIMDBinary { + def _I8x16 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v16i8 V128:$dst), (node V128:$lhs, V128:$rhs))], + !strconcat("i8x16.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I16x8 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v8i16 V128:$dst), (node V128:$lhs, V128:$rhs))], + !strconcat("i16x8.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v4i32 V128:$dst), (node V128:$lhs, V128:$rhs))], + !strconcat("i32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _F32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v4f32 V128:$dst), (fnode V128:$lhs, V128:$rhs))], + !strconcat("f32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + +} multiclass ComparisonInt { def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))], diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 4b31987..922279d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -100,10 +100,20 @@ multiclass ARGUMENT { def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno), [(set vt:$res, (WebAssemblyargument timm:$argno))]>; } +multiclass SIMD_ARGUMENT { + let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in + def ARGUMENT_#vt : SIMD_I<(outs V128:$res), (ins i32imm:$argno), + [(set (vt V128:$res), + (WebAssemblyargument timm:$argno))]>; +} defm : ARGUMENT; defm : ARGUMENT; defm : ARGUMENT; defm : ARGUMENT; +defm : SIMD_ARGUMENT; +defm : SIMD_ARGUMENT; +defm : SIMD_ARGUMENT; +defm : SIMD_ARGUMENT; let Defs = [ARGUMENTS] in { @@ -131,6 +141,7 @@ defm : LOCAL; defm : LOCAL; defm : LOCAL; defm : LOCAL; +defm : LOCAL, Requires<[HasSIMD128]>; let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 3e29906..e403534 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -12,5 +12,8 @@ /// //===----------------------------------------------------------------------===// -// TODO: Implement SIMD instructions. -// Note: use Requires<[HasSIMD128]>. +let isCommutable = 1 in { +defm ADD : SIMDBinary; +defm MUL: SIMDBinary; +} // isCommutable = 1 +defm SUB: SIMDBinary; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index 56d44e6..1c3c104 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -108,7 +108,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo(); - const auto &TII = *MF.getSubtarget().getInstrInfo(); + const auto &Subtarget = MF.getSubtarget(); + const auto &TII = *Subtarget.getInstrInfo(); const WebAssemblyTargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); auto &LibInfo = getAnalysis().getTLI(); @@ -186,6 +187,34 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64, WebAssembly::COPY_LOCAL_F64); break; + case WebAssembly::RETURN_v16i8: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v16i8, + WebAssembly::COPY_LOCAL_V128); + break; + case WebAssembly::RETURN_v8i16: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v8i16, + WebAssembly::COPY_LOCAL_V128); + break; + case WebAssembly::RETURN_v4i32: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v4i32, + WebAssembly::COPY_LOCAL_V128); + break; + case WebAssembly::RETURN_v4f32: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v4f32, + WebAssembly::COPY_LOCAL_V128); + break; case WebAssembly::RETURN_VOID: if (!DisableWebAssemblyFallthroughReturnOpt && &MBB == &MF.back() && &MI == &MBB.back()) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 30444ac..7417cde 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -65,6 +65,10 @@ static bool IsArgument(const MachineInstr *MI) { case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: case WebAssembly::ARGUMENT_F64: + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4f32: return true; default: return false; @@ -73,7 +77,7 @@ static bool IsArgument(const MachineInstr *MI) { // Test whether the given register has an ARGUMENT def. static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { - for (auto &Def : MRI.def_instructions(Reg)) + for (const auto &Def : MRI.def_instructions(Reg)) if (IsArgument(&Def)) return true; return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index 4a8fd96..5e43804 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -72,7 +72,11 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::ARGUMENT_I32: case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: - case WebAssembly::ARGUMENT_F64: { + case WebAssembly::ARGUMENT_F64: + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4f32: { int64_t Imm = MI.getOperand(1).getImm(); DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg " << Imm << "\n"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 0aa3b621..5ff0085 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -418,6 +418,8 @@ static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) { return WebAssembly::TEE_LOCAL_F32; if (RC == &WebAssembly::F64RegClass) return WebAssembly::TEE_LOCAL_F64; + if (RC == &WebAssembly::V128RegClass) + return WebAssembly::TEE_LOCAL_V128; llvm_unreachable("Unexpected register class"); } @@ -765,7 +767,11 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || Def->getOpcode() == WebAssembly::ARGUMENT_I64 || Def->getOpcode() == WebAssembly::ARGUMENT_F32 || - Def->getOpcode() == WebAssembly::ARGUMENT_F64) + Def->getOpcode() == WebAssembly::ARGUMENT_F64 || + Def->getOpcode() == WebAssembly::ARGUMENT_v16i8 || + Def->getOpcode() == WebAssembly::ARGUMENT_v8i16 || + Def->getOpcode() == WebAssembly::ARGUMENT_v4i32 || + Def->getOpcode() == WebAssembly::ARGUMENT_v4f32) continue; // Decide which strategy to take. Prefer to move a single-use value diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 80a83fa..52456aa 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -39,6 +39,8 @@ def SP64 : WebAssemblyReg<"%SP64">; def F32_0 : WebAssemblyReg<"%f32.0">; def F64_0 : WebAssemblyReg<"%f64.0">; +def V128_0: WebAssemblyReg<"%v128">; + // The expression stack "register". This is an opaque entity which serves to // order uses and defs that must remain in LIFO order. def EXPR_STACK : WebAssemblyReg<"STACK">; @@ -56,3 +58,5 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>; def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>; def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; +def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>; + diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll new file mode 100644 index 0000000..f0e71f2 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -0,0 +1,158 @@ +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128 + +; Test that basic SIMD128 arithmetic operations assemble as expected. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare i32 @llvm.ctlz.i32(i32, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i32 @llvm.ctpop.i32(i32) + +; ============================================================================== +; 16 x i8 +; ============================================================================== +; CHECK-LABEL: add_v16i8 +; NO-SIMD128-NOT: i8x16 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i8x16.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = add <16 x i8> %x, %y + ret <16 x i8> %a +} + +; CHECK-LABEL: sub_v16i8 +; NO-SIMD128-NOT: i8x16 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i8x16.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = sub <16 x i8> %x, %y + ret <16 x i8> %a +} + +; CHECK-LABEL: mul_v16i8 +; NO-SIMD128-NOT: i8x16 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i8x16.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = mul <16 x i8> %x, %y + ret <16 x i8> %a +} + +; ============================================================================== +; 8 x i16 +; ============================================================================== +; CHECK-LABEL: add_v8i16 +; NO-SIMD128-NOT: i16x8 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i16x8.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = add <8 x i16> %x, %y + ret <8 x i16> %a +} + +; CHECK-LABEL: sub_v8i16 +; NO-SIMD128-NOT: i16x8 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i16x8.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = sub <8 x i16> %x, %y + ret <8 x i16> %a +} + +; CHECK-LABEL: mul_v8i16 +; NO-SIMD128-NOT: i16x8 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i16x8.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = mul <8 x i16> %x, %y + ret <8 x i16> %a +} + +; ============================================================================== +; 4 x i32 +; ============================================================================== +; CHECK-LABEL: add_v4i32 +; NO-SIMD128-NOT: i32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i32x4.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { + %a = add <4 x i32> %x, %y + ret <4 x i32> %a +} + +; CHECK-LABEL: sub_v4i32 +; NO-SIMD128-NOT: i32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i32x4.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { + %a = sub <4 x i32> %x, %y + ret <4 x i32> %a +} + +; CHECK-LABEL: mul_v4i32 +; NO-SIMD128-NOT: i32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i32x4.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { + %a = mul <4 x i32> %x, %y + ret <4 x i32> %a +} + +; ============================================================================== +; 4 x float +; ============================================================================== +; CHECK-LABEL: add_v4f32 +; NO-SIMD128-NOT: f32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: f32x4.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { + %a = fadd <4 x float> %x, %y + ret <4 x float> %a +} + +; CHECK-LABEL: sub_v4f32 +; NO-SIMD128-NOT: f32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: f32x4.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { + %a = fsub <4 x float> %x, %y + ret <4 x float> %a +} + +; CHECK-LABEL: mul_v4f32 +; NO-SIMD128-NOT: f32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: f32x4.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { + %a = fmul <4 x float> %x, %y + ret <4 x float> %a +} + -- 2.7.4