--- /dev/null
+//===- arm_mve_defs.td - definitions and infrastructure for arm_mve.td ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The definitions in this file are designed to work in close conjunction with
+// clang/utils/TableGen/MveEmitter.cpp. Comments in there will probably be
+// useful as well.
+//
+//===----------------------------------------------------------------------===//
+
+// -----------------------------------------------------------------------------
+// Forward declarations.
+class Type;
+
+// -----------------------------------------------------------------------------
+// Dummy record used as the dag operator for the argument list of an intrinsic.
+//
+// We store arguments as a dag rather than a list<Type> so that we can give
+// each one a name, to be used in codegen. For example, (args Vector:$a,
+// Scalar:$b) defines the names $a and $b which the specification of the code
+// for that intrinsic can refer to.
+
+def args;
+
+// -----------------------------------------------------------------------------
+// Family of nodes for use in the codegen dag for an intrinsic, corresponding
+// roughly to operations in LLVM IR. More precisely, they correspond to calls
+// to methods of llvm::IRBuilder.
+class IRBuilder<string func_> {
+ string func = func_; // the method name
+ list<int> address_params = []; // indices of parameters with type Address
+ list<int> int_constant_params = []; // indices of plain integer parameters
+}
+def add: IRBuilder<"CreateAdd">;
+def or: IRBuilder<"CreateOr">;
+def and: IRBuilder<"CreateAnd">;
+def sub: IRBuilder<"CreateSub">;
+def shl: IRBuilder<"CreateShl">;
+def lshr: IRBuilder<"CreateLShr">;
+def fadd: IRBuilder<"CreateFAdd">;
+def fsub: IRBuilder<"CreateFSub">;
+def load: IRBuilder<"CreateLoad"> { let address_params = [0]; }
+def store: IRBuilder<"CreateStore"> { let address_params = [1]; }
+def xval: IRBuilder<"CreateExtractValue"> { let int_constant_params = [1]; }
+
+// Another node class you can use in the codegen dag. This one corresponds to
+// an IR intrinsic function, which has to be specialized to a particular list
+// of types.
+class IRInt<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
+ string intname = name_; // base name of the intrinsic, minus "arm_mve_"
+ list<Type> params = params_; // list of parameter types
+
+ // If this flag is set, then the IR intrinsic name will get a suffix _s, _u
+ // or _f depending on whether the main parameter type of the ACLE intrinsic
+ // being generated is a signed integer, unsigned integer, or float. Mostly
+ // this is useful for signed vs unsigned integers, because the ACLE
+ // intrinsics and the source-level integer types distinguish them, but at IR
+ // level the distinction has moved from the type system into the operations
+ // and you just have i32 or i16 etc. So when an IR intrinsic has to vary with
+ // signedness, you set this bit, and then you can still put the signed and
+ // unsigned versions in the same subclass of Intrinsic, and the Tablegen
+ // backend will take care of adding _s or _u as appropriate in each instance.
+ bit appendKind = appendKind_;
+}
+
+// The 'seq' node in a codegen dag specifies a set of IR operations to be
+// performed in order. It has the special ability to define extra variable
+// names, on top of the ones that refer to the intrinsic's parameters. For
+// example:
+//
+// (seq (foo this, that):$a,
+// (bar this, $a):$b
+// (add $a, $b))
+//
+// defines the name $a to refer to the return value of the 'foo' operation;
+// then the 'bar' operation uses $a as one of its arguments, and the return
+// value of that is assigned the name $b; finally, $a and $b are added to give
+// the return value of the seq construction as a whole.
+def seq;
+
+// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
+// indicates that the IR generation for that intrinsic is done by handwritten
+// C++ and not autogenerated at all. The effect in the MVE builtin codegen
+// function is to break out of the main switch and fall through to the
+// manual-codegen cases below it, having set the CustomCodeGenType enumerated
+// variable to the value given by the 'type' string here.
+class CustomCodegen<string type_> { string type = type_; }
+
+// -----------------------------------------------------------------------------
+// System for building up complex instances of Type from simple ones.
+
+// ComplexType is used to represent any more complicated type: vectors,
+// multivectors, pointers etc. Its dag argument specifies how the type should
+// be constructed from simpler types. The operator of the dag will always be an
+// instance of ComplexTypeOp, defined below.
+class ComplexType<dag spec_>: Type { dag spec = spec_; }
+
+// Operators you can use in the ComplexType spec dag. These are an intermediate
+// layer, interpreted by MveEmitter::getType() in the Tablegen backend, and
+// only used in the definitions below. Actual intrinsic definitions in
+// arm_mve.td will use the defs defined below here.
+class ComplexTypeOp;
+def CTO_Parameter: ComplexTypeOp;
+def CTO_Vec: ComplexTypeOp;
+def CTO_Pred: ComplexTypeOp;
+class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
+class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
+class CTO_Sign<bit signed_>: ComplexTypeOp { bit signed = signed_; }
+
+// -----------------------------------------------------------------------------
+// Instances of Type intended to be used directly in the specification of an
+// intrinsic in arm_mve.td.
+
+// The type Void can be used for the return type of an intrinsic, and as the
+// parameter type for intrinsics that aren't actually parameterised by any kind
+// of _s32 / _f16 / _u8 suffix.
+def Void : Type;
+
+// Primitive types: base class, and an instance for the set of scalar integer
+// and floating types that MVE uses.
+class PrimitiveType<string kind_, int size_>: Type {
+ string kind = kind_;
+ int size = size_;
+}
+// The type records defined by these foreaches have names like s32, f16, u8.
+foreach size = [8, 16, 32, 64] in
+ foreach kind = ["u", "s"] in
+ def kind # size: PrimitiveType<kind, size>;
+foreach size = [16, 32] in
+ foreach kind = ["f"] in
+ def kind # size: PrimitiveType<kind, size>;
+
+// VecOf<t> expects t to be a scalar, and gives a 128-bit vector of whatever it
+// is.
+class VecOf<Type t>: ComplexType<(CTO_Vec t)>;
+
+// PredOf expects t to be a scalar, and expands to a predicate vector which
+// (logically speaking) has the same number of lanes as VecOf<t> would.
+class PredOf<Type t>: ComplexType<(CTO_Pred t)>;
+
+// Scalar expands to whatever is the main parameter type of the current
+// intrinsic. Vector and Predicate expand to the vector and predicate types
+// corresponding to that.
+def Scalar: ComplexType<(CTO_Parameter)>;
+def Vector: VecOf<Scalar>;
+def Predicate: PredOf<Scalar>;
+
+// MultiVector<n> expands to a type containing n instances of Vector. (There's
+// no need to define this for a general underlying vector type, since it's only
+// used by vld2q and friends, which don't need that generality.)
+class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>;
+
+// Ptr<t> and CPtr<t> expand to a pointer to t, or a pointer to const t,
+// respectively.
+class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>;
+class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>;
+
+// Unsigned<t> expects t to be a scalar, and expands to the unsigned integer
+// scalar of the same size. So it returns u16 if you give it s16 or f16 (or
+// u16 itself).
+class Unsigned<Type t>: ComplexType<(CTO_Sign<0> t)>;
+
+// -----------------------------------------------------------------------------
+// Internal definitions for specifying immediate arguments for an intrinsic.
+
+class ImmediateBounds;
+class Immediate<Type type_, ImmediateBounds bounds_>: Type {
+ Type type = type_;
+ ImmediateBounds bounds = bounds_;
+ string extra;
+ string extraarg;
+}
+class IB_ConstRange<int lo_, int hi_> : ImmediateBounds {
+ int lo = lo_;
+ int hi = hi_;
+}
+def IB_UEltValue : ImmediateBounds;
+def IB_LaneIndex : ImmediateBounds;
+class IB_EltBit<int base_> : ImmediateBounds { int base = base_; }
+
+// -----------------------------------------------------------------------------
+// End-user definitions for immediate arguments.
+
+// imm_simd and imm_simd_restrictive are used for the immediate operands to
+// intrinsics like vmvnq or vorrq. imm_simd_restrictive has to be an 8-bit
+// value shifted left by a whole number of bytes; imm_simd_vmvn can also be of
+// the form 0xXXFF for some byte value XX.
+def imm_simd_restrictive : Immediate<u32, IB_UEltValue> {
+ let extra = "ShiftedByte";
+}
+def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
+ let extra = "ShiftedByteOrXXFF";
+}
+
+// imm_1toN can take any value from 1 to N inclusive, where N is the number of
+// bits in the main parameter type. (E.g. an immediate shift count, in an
+// intrinsic that shifts every lane of a vector by the same amount.)
+//
+// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
+// inclusive.
+def imm_1toN : Immediate<u32, IB_EltBit<1>>;
+def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>;
+
+// imm_lane has to be the index of a vector lane in the main vector type, i.e
+// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
+def imm_lane : Immediate<u32, IB_LaneIndex>;
+
+// imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
+// intrinsics)
+def imm_1to32 : Immediate<u32, IB_ConstRange<1, 32>>;
+
+// imm_1248 can be 1, 2, 4 or 8. (e.g. vidupq)
+def imm_1248 : Immediate<u32, IB_ConstRange<1, 8>> {
+ let extra = "Power2";
+}
+
+// imm_mem7bit<n> is a valid immediate offset for a load/store intrinsic whose
+// memory access size is n bytes (e.g. 1 for vldrb_[whatever], 2 for vldrh,
+// ...). The set of valid immediates for these is {0*n, 1*n, ..., 127*n}.
+class imm_mem7bit<int membytes>
+ : Immediate<u32, IB_ConstRange<0, !mul(membytes, 127)>> {
+ let extra = !if(!eq(membytes, 1), ?, "Multiple");
+ let extraarg = !cast<string>(membytes);
+}
+
+// -----------------------------------------------------------------------------
+// Specification of ways that the full name of an intrinsic can be mapped to
+// its shorter polymorphic name.
+
+class PolymorphicNameType<int nt_, string x_> {
+ int NumTypeSuffixesToDiscard = nt_;
+ string ExtraSuffixToDiscard = x_;
+}
+
+// PNT_None: the intrinsic is not polymorphic at all, so its short name is the
+// same as its long name. (E.g. scalar shift intrinsics such as uqshl.)
+def PNT_None: PolymorphicNameType<0, ?>;
+
+// PNT_Type: the usual case, in which the polymorphic name is made by dropping
+// the type suffix, so it ends up the same as the Tablegen record name. E.g.
+// vaddq_u16 -> vaddq.
+def PNT_Type: PolymorphicNameType<1, ?>;
+
+// PNT_2Type: the polymorphic name is made by dropping _two_ type suffixes.
+// E.g. vcvtq_f16_u16 -> vcvtq.
+def PNT_2Type: PolymorphicNameType<2, ?>;
+
+// PNT_NType: the polymorphic name is made by dropping an "_n" suffix and a
+// type. E.g. vaddq_n_u16 -> vaddq.
+def PNT_NType: PolymorphicNameType<1, "n">;
+
+// PNT_NType: the polymorphic name is made by just dropping an "_n" suffix
+// (even if it isn't at the end of the name). E.g. vidupq_n_u16 -> vidupq_u16.
+def PNT_N: PolymorphicNameType<0, "n">;
+
+// PNT_WBType: the polymorphic name is made by dropping an "_wb" suffix and a
+// type. E.g. vidupq_m_wb_u16 -> vidupq_m.
+def PNT_WBType: PolymorphicNameType<1, "wb">;
+
+// PNT_WB: the polymorphic name is made by just dropping "_wb". E.g.
+// vidupq_wb_u16 -> vidupq_u16.
+def PNT_WB: PolymorphicNameType<0, "wb">;
+
+// -----------------------------------------------------------------------------
+// The main class Intrinsic. Define one of these for each family of ACLE
+// intrinsics which are the same apart from some final type suffix (e.g.
+// vaddq_{s8,u8,f16,...}.
+//
+// The record's name plus that type suffix is taken to be the full unambiguous
+// name of the function. Its shorter polymorphic name is constructed from that
+// in turn, in a way specified by the PolymorphicNameType system above.
+
+class Intrinsic<Type ret_, dag args_, dag codegen_> {
+ // List of parameter types to suffix to this intrinsic's name. A separate
+ // actual ACLE intrinsic will be generated for each of these. Set it to
+ // [Void] if the intrinsic is not polymorphic at all.
+ list<Type> params;
+
+ // Return type and arguments for the intrinsic.
+ Type ret = ret_;
+ dag args = args_;
+
+ // Specification of how to generate its IR.
+ dag codegen = codegen_;
+
+ // Default to PNT_Type, which is by far the most common case.
+ PolymorphicNameType pnt = PNT_Type;
+}
+
+// Sometimes you have to use two separate Intrinsic declarations to
+// declare intrinsics that are logically the same family (e.g. vaddq,
+// because it needs to expand to an Add or FAdd IR node depending on
+// type). For that purpose, you can derive from NameOverride to
+// specify the intrinsic's base name independently of the Tablegen
+// record name.
+
+class NameOverride<string basename_> {
+ string basename = basename_;
+}
+
+// -----------------------------------------------------------------------------
+// Convenience lists of parameter types. 'T' is just a container record, so you
+// can define a typical intrinsic with 'let Params = T.Usual', or similar,
+// instead of having to repeat a long list every time.
+
+def T {
+ list<Type> Signed = [s8, s16, s32];
+ list<Type> Unsigned = [u8, u16, u32];
+ list<Type> Int = Signed # Unsigned;
+ list<Type> Float = [f16, f32];
+ list<Type> Usual = Int # Float;
+ list<Type> Int8 = [s8, u8];
+ list<Type> Int16 = [s16, u16];
+ list<Type> Int32 = [s32, u32];
+ list<Type> Int64 = [s64, u64];
+ list<Type> All8 = Int8;
+ list<Type> All16 = Int16 # [f16];
+ list<Type> All32 = Int32 # [f32];
+ list<Type> All64 = Int64;
+ list<Type> All = Usual # All64;
+}
--- /dev/null
+//===- MveEmitter.cpp - Generate arm_mve.h for use with clang -*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This set of linked tablegen backends is responsible for emitting the bits
+// and pieces that implement <arm_mve.h>, which is defined by the ACLE standard
+// and provides a set of types and functions for (more or less) direct access
+// to the MVE instruction set, including the scalar shifts as well as the
+// vector instructions.
+//
+// MVE's standard intrinsic functions are unusual in that they have a system of
+// polymorphism. For example, the function vaddq() can behave like vaddq_u16(),
+// vaddq_f32(), vaddq_s8(), etc., depending on the types of the vector
+// arguments you give it.
+//
+// This constrains the implementation strategies. The usual approach to making
+// the user-facing functions polymorphic would be to either use
+// __attribute__((overloadable)) to make a set of vaddq() functions that are
+// all inline wrappers on the underlying clang builtins, or to define a single
+// vaddq() macro which expands to an instance of _Generic.
+//
+// The inline-wrappers approach would work fine for most intrinsics, except for
+// the ones that take an argument required to be a compile-time constant,
+// because if you wrap an inline function around a call to a builtin, the
+// constant nature of the argument is not passed through.
+//
+// The _Generic approach can be made to work with enough effort, but it takes a
+// lot of machinery, because of the design feature of _Generic that even the
+// untaken branches are required to pass all front-end validity checks such as
+// type-correctness. You can work around that by nesting further _Generics all
+// over the place to coerce things to the right type in untaken branches, but
+// what you get out is complicated, hard to guarantee its correctness, and
+// worst of all, gives _completely unreadable_ error messages if the user gets
+// the types wrong for an intrinsic call.
+//
+// Therefore, my strategy is to introduce a new __attribute__ that allows a
+// function to be mapped to a clang builtin even though it doesn't have the
+// same name, and then declare all the user-facing MVE function names with that
+// attribute, mapping each one directly to the clang builtin. And the
+// polymorphic ones have __attribute__((overloadable)) as well. So once the
+// compiler has resolved the overload, it knows the internal builtin ID of the
+// selected function, and can check the immediate arguments against that; and
+// if the user gets the types wrong in a call to a polymorphic intrinsic, they
+// get a completely clear error message showing all the declarations of that
+// function in the header file and explaining why each one doesn't fit their
+// call.
+//
+// The downside of this is that if every clang builtin has to correspond
+// exactly to a user-facing ACLE intrinsic, then you can't save work in the
+// frontend by doing it in the header file: CGBuiltin.cpp has to do the entire
+// job of converting an ACLE intrinsic call into LLVM IR. So the Tablegen
+// description for an MVE intrinsic has to contain a full description of the
+// sequence of IRBuilder calls that clang will need to make.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+class MveEmitter;
+class Result;
+
+// -----------------------------------------------------------------------------
+// A system of classes to represent all the types we'll need to deal with in
+// the prototypes of intrinsics.
+//
+// Query methods include finding out the C name of a type; the "LLVM name" in
+// the sense of a C++ code snippet that can be used in the codegen function;
+// the suffix that represents the type in the ACLE intrinsic naming scheme
+// (e.g. 's32' represents int32_t in intrinsics such as vaddq_s32); whether the
+// type is floating-point related (hence should be under #ifdef in the MVE
+// header so that it isn't included in integer-only MVE mode); and the type's
+// size in bits. Not all subtypes support all these queries.
+
+class Type {
+public:
+ enum class TypeKind {
+ // Void appears as a return type (for store intrinsics, which are pure
+ // side-effect). It's also used as the parameter type in the Tablegen
+ // when an intrinsic doesn't need to come in various suffixed forms like
+ // vfooq_s8,vfooq_u16,vfooq_f32.
+ Void,
+
+ // Scalar is used for ordinary int and float types of all sizes.
+ Scalar,
+
+ // Vector is used for anything that occupies exactly one MVE vector
+ // register, i.e. {uint,int,float}NxM_t.
+ Vector,
+
+ // MultiVector is used for the {uint,int,float}NxMxK_t types used by the
+ // interleaving load/store intrinsics v{ld,st}{2,4}q.
+ MultiVector,
+
+ // Predicate is used by all the predicated intrinsics. Its C
+ // representation is mve_pred16_t (which is just an alias for uint16_t).
+ // But we give more detail here, by indicating that a given predicate
+ // instruction is logically regarded as a vector of i1 containing the
+ // same number of lanes as the input vector type. So our Predicate type
+ // comes with a lane count, which we use to decide which kind of <n x i1>
+ // we'll invoke the pred_i2v IR intrinsic to translate it into.
+ Predicate,
+
+ // Pointer is used for pointer types (obviously), and comes with a flag
+ // indicating whether it's a pointer to a const or mutable instance of
+ // the pointee type.
+ Pointer,
+ };
+
+private:
+ const TypeKind TKind;
+
+protected:
+ Type(TypeKind K) : TKind(K) {}
+
+public:
+ TypeKind typeKind() const { return TKind; }
+ virtual ~Type() = default;
+ virtual bool requiresFloat() const = 0;
+ virtual unsigned sizeInBits() const = 0;
+ virtual std::string cName() const = 0;
+ virtual std::string llvmName() const {
+ PrintFatalError("no LLVM type name available for type " + cName());
+ }
+ virtual std::string acleSuffix() const {
+ PrintFatalError("no ACLE suffix available for this type");
+ }
+};
+
+enum class ScalarTypeKind { SignedInt, UnsignedInt, Float };
+inline std::string toLetter(ScalarTypeKind kind) {
+ switch (kind) {
+ case ScalarTypeKind::SignedInt:
+ return "s";
+ case ScalarTypeKind::UnsignedInt:
+ return "u";
+ case ScalarTypeKind::Float:
+ return "f";
+ default:
+ llvm_unreachable("bad scalar type kind");
+ }
+}
+inline std::string toCPrefix(ScalarTypeKind kind) {
+ switch (kind) {
+ case ScalarTypeKind::SignedInt:
+ return "int";
+ case ScalarTypeKind::UnsignedInt:
+ return "uint";
+ case ScalarTypeKind::Float:
+ return "float";
+ default:
+ llvm_unreachable("bad scalar type kind");
+ }
+}
+
+class VoidType : public Type {
+public:
+ VoidType() : Type(TypeKind::Void) {}
+ unsigned sizeInBits() const override { return 0; }
+ bool requiresFloat() const override { return false; }
+ std::string cName() const override { return "void"; }
+
+ static bool classof(const Type *T) { return T->typeKind() == TypeKind::Void; }
+ std::string acleSuffix() const override { return ""; }
+};
+
+class PointerType : public Type {
+ const Type *Pointee;
+ bool Const;
+
+public:
+ PointerType(const Type *Pointee, bool Const)
+ : Type(TypeKind::Pointer), Pointee(Pointee), Const(Const) {}
+ unsigned sizeInBits() const override { return 32; }
+ bool requiresFloat() const override { return Pointee->requiresFloat(); }
+ std::string cName() const override {
+ std::string Name = Pointee->cName();
+
+ // The syntax for a pointer in C is different when the pointee is
+ // itself a pointer. The MVE intrinsics don't contain any double
+ // pointers, so we don't need to worry about that wrinkle.
+ assert(!isa<PointerType>(Pointee) && "Pointer to pointer not supported");
+
+ if (Const)
+ Name = "const " + Name;
+ return Name + " *";
+ }
+
+ static bool classof(const Type *T) {
+ return T->typeKind() == TypeKind::Pointer;
+ }
+};
+
+// Base class for all the types that have a name of the form
+// [prefix][numbers]_t, like int32_t, uint16x8_t, float32x4x2_t.
+//
+// For this sub-hierarchy we invent a cNameBase() method which returns the
+// whole name except for the trailing "_t", so that Vector and MultiVector can
+// append an extra "x2" or whatever to their element type's cNameBase(). Then
+// the main cName() query method puts "_t" on the end for the final type name.
+
+class CRegularNamedType : public Type {
+ using Type::Type;
+ virtual std::string cNameBase() const = 0;
+
+public:
+ std::string cName() const override { return cNameBase() + "_t"; }
+};
+
+class ScalarType : public CRegularNamedType {
+ ScalarTypeKind Kind;
+ unsigned Bits;
+
+public:
+ ScalarType(const Record *Record) : CRegularNamedType(TypeKind::Scalar) {
+ Kind = StringSwitch<ScalarTypeKind>(Record->getValueAsString("kind"))
+ .Case("s", ScalarTypeKind::SignedInt)
+ .Case("u", ScalarTypeKind::UnsignedInt)
+ .Case("f", ScalarTypeKind::Float);
+ Bits = Record->getValueAsInt("size");
+ }
+ unsigned sizeInBits() const override { return Bits; }
+ ScalarTypeKind kind() const { return Kind; }
+ std::string suffix() const { return toLetter(Kind) + utostr(Bits); }
+ std::string cNameBase() const override {
+ return toCPrefix(Kind) + utostr(Bits);
+ }
+ std::string llvmName() const override {
+ if (Kind == ScalarTypeKind::Float) {
+ if (Bits == 16)
+ return "HalfTy";
+ if (Bits == 32)
+ return "FloatTy";
+ if (Bits == 64)
+ return "DoubleTy";
+ PrintFatalError("bad size for floating type");
+ }
+ return "Int" + utostr(Bits) + "Ty";
+ }
+ std::string acleSuffix() const override {
+ return "_" + toLetter(Kind) + utostr(Bits);
+ }
+ bool isInteger() const { return Kind != ScalarTypeKind::Float; }
+ bool requiresFloat() const override { return !isInteger(); }
+
+ static bool classof(const Type *T) {
+ return T->typeKind() == TypeKind::Scalar;
+ }
+};
+
+class VectorType : public CRegularNamedType {
+ const ScalarType *Element;
+ unsigned Lanes;
+
+public:
+ VectorType(const ScalarType *Element)
+ : CRegularNamedType(TypeKind::Vector), Element(Element) {
+ // MVE has a fixed 128-bit vector size
+ Lanes = 128 / Element->sizeInBits();
+ }
+ unsigned sizeInBits() const override { return 128; }
+ unsigned lanes() const { return Lanes; }
+ bool requiresFloat() const override { return Element->requiresFloat(); }
+ std::string cNameBase() const override {
+ return Element->cNameBase() + "x" + utostr(Lanes);
+ }
+ std::string llvmName() const override {
+ return "llvm::VectorType::get(" + Element->llvmName() + ", " +
+ utostr(Lanes) + ")";
+ }
+
+ static bool classof(const Type *T) {
+ return T->typeKind() == TypeKind::Vector;
+ }
+};
+
+class MultiVectorType : public CRegularNamedType {
+ const VectorType *Element;
+ unsigned Registers;
+
+public:
+ MultiVectorType(unsigned Registers, const VectorType *Element)
+ : CRegularNamedType(TypeKind::MultiVector), Element(Element),
+ Registers(Registers) {}
+ unsigned sizeInBits() const override {
+ return Registers * Element->sizeInBits();
+ }
+ unsigned registers() const { return Registers; }
+ bool requiresFloat() const override { return Element->requiresFloat(); }
+ std::string cNameBase() const override {
+ return Element->cNameBase() + "x" + utostr(Registers);
+ }
+
+ // MultiVectorType doesn't override llvmName, because we don't expect to do
+ // automatic code generation for the MVE intrinsics that use it: the {vld2,
+ // vld4, vst2, vst4} family are the only ones that use these types, so it was
+ // easier to hand-write the codegen for dealing with these structs than to
+ // build in lots of extra automatic machinery that would only be used once.
+
+ static bool classof(const Type *T) {
+ return T->typeKind() == TypeKind::MultiVector;
+ }
+};
+
+class PredicateType : public CRegularNamedType {
+ unsigned Lanes;
+
+public:
+ PredicateType(unsigned Lanes)
+ : CRegularNamedType(TypeKind::Predicate), Lanes(Lanes) {}
+ unsigned sizeInBits() const override { return 16; }
+ std::string cNameBase() const override { return "mve_pred16"; }
+ bool requiresFloat() const override { return false; };
+ std::string llvmName() const override {
+ // Use <4 x i1> instead of <2 x i1> for two-lane vector types. See
+ // the comment in llvm/lib/Target/ARM/ARMInstrMVE.td for further
+ // explanation.
+ unsigned ModifiedLanes = (Lanes == 2 ? 4 : Lanes);
+
+ return "llvm::VectorType::get(Builder.getInt1Ty(), " +
+ utostr(ModifiedLanes) + ")";
+ }
+
+ static bool classof(const Type *T) {
+ return T->typeKind() == TypeKind::Predicate;
+ }
+};
+
+// -----------------------------------------------------------------------------
+// Class to facilitate merging together the code generation for many intrinsics
+// by means of varying a few constant or type parameters.
+//
+// Most obviously, the intrinsics in a single parametrised family will have
+// code generation sequences that only differ in a type or two, e.g. vaddq_s8
+// and vaddq_u16 will look the same apart from putting a different vector type
+// in the call to CGM.getIntrinsic(). But also, completely different intrinsics
+// will often code-generate in the same way, with only a different choice of
+// _which_ IR intrinsic they lower to (e.g. vaddq_m_s8 and vmulq_m_s8), but
+// marshalling the arguments and return values of the IR intrinsic in exactly
+// the same way. And others might differ only in some other kind of constant,
+// such as a lane index.
+//
+// So, when we generate the IR-building code for all these intrinsics, we keep
+// track of every value that could possibly be pulled out of the code and
+// stored ahead of time in a local variable. Then we group together intrinsics
+// by textual equivalence of the code that would result if _all_ those
+// parameters were stored in local variables. That gives us maximal sets that
+// can be implemented by a single piece of IR-building code by changing
+// parameter values ahead of time.
+//
+// After we've done that, we do a second pass in which we only allocate _some_
+// of the parameters into local variables, by tracking which ones have the same
+// values as each other (so that a single variable can be reused) and which
+// ones are the same across the whole set (so that no variable is needed at
+// all).
+//
+// Hence the class below. Its allocParam method is invoked during code
+// generation by every method of a Result subclass (see below) that wants to
+// give it the opportunity to pull something out into a switchable parameter.
+// It returns a variable name for the parameter, or (if it's being used in the
+// second pass once we've decided that some parameters don't need to be stored
+// in variables after all) it might just return the input expression unchanged.
+
+struct CodeGenParamAllocator {
+ // Accumulated during code generation
+ std::vector<std::string> *ParamTypes = nullptr;
+ std::vector<std::string> *ParamValues = nullptr;
+
+ // Provided ahead of time in pass 2, to indicate which parameters are being
+ // assigned to what. This vector contains an entry for each call to
+ // allocParam expected during code gen (which we counted up in pass 1), and
+ // indicates the number of the parameter variable that should be returned, or
+ // -1 if this call shouldn't allocate a parameter variable at all.
+ //
+ // We rely on the recursive code generation working identically in passes 1
+ // and 2, so that the same list of calls to allocParam happen in the same
+ // order. That guarantees that the parameter numbers recorded in pass 1 will
+ // match the entries in this vector that store what MveEmitter::EmitBuiltinCG
+ // decided to do about each one in pass 2.
+ std::vector<int> *ParamNumberMap = nullptr;
+
+ // Internally track how many things we've allocated
+ unsigned nparams = 0;
+
+ std::string allocParam(StringRef Type, StringRef Value) {
+ unsigned ParamNumber;
+
+ if (!ParamNumberMap) {
+ // In pass 1, unconditionally assign a new parameter variable to every
+ // value we're asked to process.
+ ParamNumber = nparams++;
+ } else {
+ // In pass 2, consult the map provided by the caller to find out which
+ // variable we should be keeping things in.
+ int MapValue = (*ParamNumberMap)[nparams++];
+ if (MapValue < 0)
+ return Value;
+ ParamNumber = MapValue;
+ }
+
+ // If we've allocated a new parameter variable for the first time, store
+ // its type and value to be retrieved after codegen.
+ if (ParamTypes && ParamTypes->size() == ParamNumber)
+ ParamTypes->push_back(Type);
+ if (ParamValues && ParamValues->size() == ParamNumber)
+ ParamValues->push_back(Value);
+
+ // Unimaginative naming scheme for parameter variables.
+ return "Param" + utostr(ParamNumber);
+ }
+};
+
+// -----------------------------------------------------------------------------
+// System of classes that represent all the intermediate values used during
+// code-generation for an intrinsic.
+//
+// The base class 'Result' can represent a value of the LLVM type 'Value', or
+// sometimes 'Address' (for loads/stores, including an alignment requirement).
+//
+// In the case where the Tablegen provides a value in the codegen dag as a
+// plain integer literal, the Result object we construct here will be one that
+// returns true from hasIntegerConstantValue(). This allows the generated C++
+// code to use the constant directly in contexts which can take a literal
+// integer, such as Builder.CreateExtractValue(thing, 1), without going to the
+// effort of calling llvm::ConstantInt::get() and then pulling the constant
+// back out of the resulting llvm:Value later.
+
+class Result {
+public:
+ // Convenient shorthand for the pointer type we'll be using everywhere.
+ using Ptr = std::shared_ptr<Result>;
+
+private:
+ Ptr Predecessor;
+ std::string VarName;
+ bool VarNameUsed = false;
+ unsigned Visited = 0;
+
+public:
+ virtual ~Result() = default;
+ using Scope = std::map<std::string, Ptr>;
+ virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0;
+ virtual bool hasIntegerConstantValue() const { return false; }
+ virtual uint32_t integerConstantValue() const { return 0; }
+ virtual std::string typeName() const { return "Value *"; }
+
+ // Mostly, when a code-generation operation has a dependency on prior
+ // operations, it's because it uses the output values of those operations as
+ // inputs. But there's one exception, which is the use of 'seq' in Tablegen
+ // to indicate that operations have to be performed in sequence regardless of
+ // whether they use each others' output values.
+ //
+ // So, the actual generation of code is done by depth-first search, using the
+ // prerequisites() method to get a list of all the other Results that have to
+ // be computed before this one. That method divides into the 'predecessor',
+ // set by setPredecessor() while processing a 'seq' dag node, and the list
+ // returned by 'morePrerequisites', which each subclass implements to return
+ // a list of the Results it uses as input to whatever its own computation is
+ // doing.
+
+ virtual void morePrerequisites(std::vector<Ptr> &output) const {}
+ std::vector<Ptr> prerequisites() const {
+ std::vector<Ptr> ToRet;
+ if (Predecessor)
+ ToRet.push_back(Predecessor);
+ morePrerequisites(ToRet);
+ return ToRet;
+ }
+
+ void setPredecessor(Ptr p) {
+ assert(!Predecessor);
+ Predecessor = p;
+ }
+
+ // Each Result will be assigned a variable name in the output code, but not
+ // all those variable names will actually be used (e.g. the return value of
+ // Builder.CreateStore has void type, so nobody will want to refer to it). To
+ // prevent annoying compiler warnings, we track whether each Result's
+ // variable name was ever actually mentioned in subsequent statements, so
+ // that it can be left out of the final generated code.
+ std::string varname() {
+ VarNameUsed = true;
+ return VarName;
+ }
+ void setVarname(const StringRef s) { VarName = s; }
+ bool varnameUsed() const { return VarNameUsed; }
+
+ // Code generation happens in multiple passes. This method tracks whether a
+ // Result has yet been visited in a given pass, without the need for a
+ // tedious loop in between passes that goes through and resets a 'visited'
+ // flag back to false: you just set Pass=1 the first time round, and Pass=2
+ // the second time.
+ bool needsVisiting(unsigned Pass) {
+ bool ToRet = Visited < Pass;
+ Visited = Pass;
+ return ToRet;
+ }
+};
+
+// Result subclass that retrieves one of the arguments to the clang builtin
+// function. In cases where the argument has pointer type, we call
+// EmitPointerWithAlignment and store the result in a variable of type Address,
+// so that load and store IR nodes can know the right alignment. Otherwise, we
+// call EmitScalarExpr.
+//
+// There are aggregate parameters in the MVE intrinsics API, but we don't deal
+// with them in this Tablegen back end: they only arise in the vld2q/vld4q and
+// vst2q/vst4q family, which is few enough that we just write the code by hand
+// for those in CGBuiltin.cpp.
+class BuiltinArgResult : public Result {
+public:
+ unsigned ArgNum;
+ bool AddressType;
+ BuiltinArgResult(unsigned ArgNum, bool AddressType)
+ : ArgNum(ArgNum), AddressType(AddressType) {}
+ void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override {
+ OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")
+ << "(E->getArg(" << ArgNum << "))";
+ }
+ virtual std::string typeName() const {
+ return AddressType ? "Address" : Result::typeName();
+ }
+};
+
+// Result subclass for an integer literal appearing in Tablegen. This may need
+// to be turned into an llvm::Result by means of llvm::ConstantInt::get(), or
+// it may be used directly as an integer, depending on which IRBuilder method
+// it's being passed to.
+class IntLiteralResult : public Result {
+public:
+ const ScalarType *IntegerType;
+ uint32_t IntegerValue;
+ IntLiteralResult(const ScalarType *IntegerType, uint32_t IntegerValue)
+ : IntegerType(IntegerType), IntegerValue(IntegerValue) {}
+ void genCode(raw_ostream &OS,
+ CodeGenParamAllocator &ParamAlloc) const override {
+ OS << "llvm::ConstantInt::get("
+ << ParamAlloc.allocParam("llvm::Type *", IntegerType->llvmName())
+ << ", ";
+ OS << ParamAlloc.allocParam(IntegerType->cName(), utostr(IntegerValue))
+ << ")";
+ }
+ bool hasIntegerConstantValue() const override { return true; }
+ uint32_t integerConstantValue() const override { return IntegerValue; }
+};
+
+// Result subclass representing a cast between different integer types. We use
+// our own ScalarType abstraction as the representation of the target type,
+// which gives both size and signedness.
+class IntCastResult : public Result {
+public:
+ const ScalarType *IntegerType;
+ Ptr V;
+ IntCastResult(const ScalarType *IntegerType, Ptr V)
+ : IntegerType(IntegerType), V(V) {}
+ void genCode(raw_ostream &OS,
+ CodeGenParamAllocator &ParamAlloc) const override {
+ OS << "Builder.CreateIntCast(" << V->varname() << ", "
+ << ParamAlloc.allocParam("llvm::Type *", IntegerType->llvmName()) << ", "
+ << ParamAlloc.allocParam("bool",
+ IntegerType->kind() == ScalarTypeKind::SignedInt
+ ? "true"
+ : "false")
+ << ")";
+ }
+ void morePrerequisites(std::vector<Ptr> &output) const override {
+ output.push_back(V);
+ }
+};
+
+// Result subclass representing a call to an IRBuilder method. Each IRBuilder
+// method we want to use will have a Tablegen record giving the method name and
+// describing any important details of how to call it, such as whether a
+// particular argument should be an integer constant instead of an llvm::Value.
+class IRBuilderResult : public Result {
+public:
+ StringRef BuilderMethod;
+ std::vector<Ptr> Args;
+ std::set<unsigned> AddressArgs;
+ std::set<unsigned> IntConstantArgs;
+ IRBuilderResult(StringRef BuilderMethod, std::vector<Ptr> Args,
+ std::set<unsigned> AddressArgs,
+ std::set<unsigned> IntConstantArgs)
+ : BuilderMethod(BuilderMethod), Args(Args), AddressArgs(AddressArgs),
+ IntConstantArgs(IntConstantArgs) {}
+ void genCode(raw_ostream &OS,
+ CodeGenParamAllocator &ParamAlloc) const override {
+ OS << "Builder." << BuilderMethod << "(";
+ const char *Sep = "";
+ for (unsigned i = 0, e = Args.size(); i < e; ++i) {
+ Ptr Arg = Args[i];
+ if (IntConstantArgs.find(i) != IntConstantArgs.end()) {
+ assert(Arg->hasIntegerConstantValue());
+ OS << Sep
+ << ParamAlloc.allocParam("unsigned",
+ utostr(Arg->integerConstantValue()));
+ } else {
+ OS << Sep << Arg->varname();
+ }
+ Sep = ", ";
+ }
+ OS << ")";
+ }
+ void morePrerequisites(std::vector<Ptr> &output) const override {
+ for (unsigned i = 0, e = Args.size(); i < e; ++i) {
+ Ptr Arg = Args[i];
+ if (IntConstantArgs.find(i) != IntConstantArgs.end())
+ continue;
+ output.push_back(Arg);
+ }
+ }
+};
+
+// Result subclass representing a call to an IR intrinsic, which we first have
+// to look up using an Intrinsic::ID constant and an array of types.
+class IRIntrinsicResult : public Result {
+public:
+ std::string IntrinsicID;
+ std::vector<const Type *> ParamTypes;
+ std::vector<Ptr> Args;
+ IRIntrinsicResult(StringRef IntrinsicID, std::vector<const Type *> ParamTypes,
+ std::vector<Ptr> Args)
+ : IntrinsicID(IntrinsicID), ParamTypes(ParamTypes), Args(Args) {}
+ void genCode(raw_ostream &OS,
+ CodeGenParamAllocator &ParamAlloc) const override {
+ std::string IntNo = ParamAlloc.allocParam(
+ "Intrinsic::ID", "Intrinsic::arm_mve_" + IntrinsicID);
+ OS << "Builder.CreateCall(CGM.getIntrinsic(" << IntNo;
+ if (!ParamTypes.empty()) {
+ OS << ", llvm::SmallVector<llvm::Type *, " << ParamTypes.size() << "> {";
+ const char *Sep = "";
+ for (auto T : ParamTypes) {
+ OS << Sep << ParamAlloc.allocParam("llvm::Type *", T->llvmName());
+ Sep = ", ";
+ }
+ OS << "}";
+ }
+ OS << "), llvm::SmallVector<Value *, " << Args.size() << "> {";
+ const char *Sep = "";
+ for (auto Arg : Args) {
+ OS << Sep << Arg->varname();
+ Sep = ", ";
+ }
+ OS << "})";
+ }
+ void morePrerequisites(std::vector<Ptr> &output) const override {
+ output.insert(output.end(), Args.begin(), Args.end());
+ }
+};
+
+// -----------------------------------------------------------------------------
+// Class that describes a single ACLE intrinsic.
+//
+// A Tablegen record will typically describe more than one ACLE intrinsic, by
+// means of setting the 'list<Type> Params' field to a list of multiple
+// parameter types, so as to define vaddq_{s8,u8,...,f16,f32} all in one go.
+// We'll end up with one instance of ACLEIntrinsic for *each* parameter type,
+// rather than a single one for all of them. Hence, the constructor takes both
+// a Tablegen record and the current value of the parameter type.
+
+class ACLEIntrinsic {
+ // Structure documenting that one of the intrinsic's arguments is required to
+ // be a compile-time constant integer, and what constraints there are on its
+ // value. Used when generating Sema checking code.
+ struct ImmediateArg {
+ enum class BoundsType { ExplicitRange, UInt };
+ BoundsType boundsType;
+ int64_t i1, i2;
+ StringRef ExtraCheckType, ExtraCheckArgs;
+ const Type *ArgType;
+ };
+
+ // For polymorphic intrinsics, FullName is the explicit name that uniquely
+ // identifies this variant of the intrinsic, and ShortName is the name it
+ // shares with at least one other intrinsic.
+ std::string ShortName, FullName;
+
+ const Type *ReturnType;
+ std::vector<const Type *> ArgTypes;
+ std::map<unsigned, ImmediateArg> ImmediateArgs;
+ Result::Ptr Code;
+
+ std::map<std::string, std::string> CustomCodeGenArgs;
+
+ // Recursive function that does the internals of code generation.
+ void genCodeDfs(Result::Ptr V, std::list<Result::Ptr> &Used,
+ unsigned Pass) const {
+ if (!V->needsVisiting(Pass))
+ return;
+
+ for (Result::Ptr W : V->prerequisites())
+ genCodeDfs(W, Used, Pass);
+
+ Used.push_back(V);
+ }
+
+public:
+ const std::string &shortName() const { return ShortName; }
+ const std::string &fullName() const { return FullName; }
+ const Type *returnType() const { return ReturnType; }
+ const std::vector<const Type *> &argTypes() const { return ArgTypes; }
+ bool requiresFloat() const {
+ if (ReturnType->requiresFloat())
+ return true;
+ for (const Type *T : ArgTypes)
+ if (T->requiresFloat())
+ return true;
+ return false;
+ }
+ bool polymorphic() const { return ShortName != FullName; }
+
+ // External entry point for code generation, called from MveEmitter.
+ void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc,
+ unsigned Pass) const {
+ if (!hasCode()) {
+ for (auto kv : CustomCodeGenArgs)
+ OS << " " << kv.first << " = " << kv.second << ";\n";
+ OS << " break; // custom code gen\n";
+ return;
+ }
+ std::list<Result::Ptr> Used;
+ genCodeDfs(Code, Used, Pass);
+
+ unsigned varindex = 0;
+ for (Result::Ptr V : Used)
+ if (V->varnameUsed())
+ V->setVarname("Val" + utostr(varindex++));
+
+ for (Result::Ptr V : Used) {
+ OS << " ";
+ if (V == Used.back()) {
+ assert(!V->varnameUsed());
+ OS << "return "; // FIXME: what if the top-level thing is void?
+ } else if (V->varnameUsed()) {
+ std::string Type = V->typeName();
+ OS << V->typeName();
+ if (!StringRef(Type).endswith("*"))
+ OS << " ";
+ OS << V->varname() << " = ";
+ }
+ V->genCode(OS, ParamAlloc);
+ OS << ";\n";
+ }
+ }
+ bool hasCode() const { return Code != nullptr; }
+
+ std::string genSema() const {
+ std::vector<std::string> SemaChecks;
+
+ for (const auto &kv : ImmediateArgs) {
+ const ImmediateArg &IA = kv.second;
+
+ llvm::APInt lo(128, 0), hi(128, 0);
+ switch (IA.boundsType) {
+ case ImmediateArg::BoundsType::ExplicitRange:
+ lo = IA.i1;
+ hi = IA.i2;
+ break;
+ case ImmediateArg::BoundsType::UInt:
+ lo = 0;
+ hi = IA.i1;
+ break;
+ }
+
+ llvm::APInt typelo, typehi;
+ if (cast<ScalarType>(IA.ArgType)->kind() == ScalarTypeKind::UnsignedInt) {
+ typelo = llvm::APInt::getSignedMinValue(IA.ArgType->sizeInBits());
+ typehi = llvm::APInt::getSignedMaxValue(IA.ArgType->sizeInBits());
+ } else {
+ typelo = llvm::APInt::getMinValue(IA.ArgType->sizeInBits());
+ typehi = llvm::APInt::getMaxValue(IA.ArgType->sizeInBits());
+ }
+ typelo = typelo.sext(128);
+ typehi = typehi.sext(128);
+
+ std::string Index = utostr(kv.first);
+
+ if (lo.sle(typelo) && hi.sge(typehi))
+ SemaChecks.push_back("SemaBuiltinConstantArg(TheCall, " + Index + ")");
+ else
+ SemaChecks.push_back("SemaBuiltinConstantArgRange(TheCall, " + Index +
+ ", 0x" + lo.toString(16, true) + ", 0x" +
+ hi.toString(16, true) + ")");
+
+ if (!IA.ExtraCheckType.empty()) {
+ std::string Suffix;
+ if (!IA.ExtraCheckArgs.empty())
+ Suffix = (Twine(", ") + IA.ExtraCheckArgs).str();
+ SemaChecks.push_back((Twine("SemaBuiltinConstantArg") +
+ IA.ExtraCheckType + "(TheCall, " + Index +
+ Suffix + ")")
+ .str());
+ }
+ }
+ if (SemaChecks.empty())
+ return "";
+ return (Twine(" return ") +
+ join(std::begin(SemaChecks), std::end(SemaChecks),
+ " ||\n ") +
+ ";\n")
+ .str();
+ }
+
+ ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param);
+};
+
+// -----------------------------------------------------------------------------
+// The top-level class that holds all the state from analyzing the entire
+// Tablegen input.
+
+class MveEmitter {
+ // MveEmitter holds a collection of all the types we've instantiated.
+ VoidType Void;
+ std::map<std::string, std::unique_ptr<ScalarType>> ScalarTypes;
+ std::map<std::pair<ScalarTypeKind, unsigned>, std::unique_ptr<VectorType>>
+ VectorTypes;
+ std::map<std::pair<std::string, unsigned>, std::unique_ptr<MultiVectorType>>
+ MultiVectorTypes;
+ std::map<unsigned, std::unique_ptr<PredicateType>> PredicateTypes;
+ std::map<std::string, std::unique_ptr<PointerType>> PointerTypes;
+
+ // And all the ACLEIntrinsic instances we've created.
+ std::map<std::string, std::unique_ptr<ACLEIntrinsic>> ACLEIntrinsics;
+
+public:
+ // Methods to create a Type object, or return the right existing one from the
+ // maps stored in this object.
+ const VoidType *getVoidType() { return &Void; }
+ const ScalarType *getScalarType(StringRef Name) {
+ return ScalarTypes[Name].get();
+ }
+ const ScalarType *getScalarType(Record *R) {
+ return getScalarType(R->getName());
+ }
+ const VectorType *getVectorType(const ScalarType *ST) {
+ std::pair<ScalarTypeKind, unsigned> key(ST->kind(), ST->sizeInBits());
+ if (VectorTypes.find(key) == VectorTypes.end())
+ VectorTypes[key] = std::make_unique<VectorType>(ST);
+ return VectorTypes[key].get();
+ }
+ const MultiVectorType *getMultiVectorType(unsigned Registers,
+ const VectorType *VT) {
+ std::pair<std::string, unsigned> key(VT->cNameBase(), Registers);
+ if (MultiVectorTypes.find(key) == MultiVectorTypes.end())
+ MultiVectorTypes[key] = std::make_unique<MultiVectorType>(Registers, VT);
+ return MultiVectorTypes[key].get();
+ }
+ const PredicateType *getPredicateType(unsigned Lanes) {
+ unsigned key = Lanes;
+ if (PredicateTypes.find(key) == PredicateTypes.end())
+ PredicateTypes[key] = std::make_unique<PredicateType>(Lanes);
+ return PredicateTypes[key].get();
+ }
+ const PointerType *getPointerType(const Type *T, bool Const) {
+ PointerType PT(T, Const);
+ std::string key = PT.cName();
+ if (PointerTypes.find(key) == PointerTypes.end())
+ PointerTypes[key] = std::make_unique<PointerType>(PT);
+ return PointerTypes[key].get();
+ }
+
+ // Methods to construct a type from various pieces of Tablegen. These are
+ // always called in the context of setting up a particular ACLEIntrinsic, so
+ // there's always an ambient parameter type (because we're iterating through
+ // the Params list in the Tablegen record for the intrinsic), which is used
+ // to expand Tablegen classes like 'Vector' which mean something different in
+ // each member of a parametric family.
+ const Type *getType(Record *R, const Type *Param);
+ const Type *getType(DagInit *D, const Type *Param);
+ const Type *getType(Init *I, const Type *Param);
+
+ // Functions that translate the Tablegen representation of an intrinsic's
+ // code generation into a collection of Value objects (which will then be
+ // reprocessed to read out the actual C++ code included by CGBuiltin.cpp).
+ Result::Ptr getCodeForDag(DagInit *D, const Result::Scope &Scope,
+ const Type *Param);
+ Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum,
+ const Result::Scope &Scope, const Type *Param);
+ Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType);
+
+ // Constructor and top-level functions.
+
+ MveEmitter(RecordKeeper &Records);
+
+ void EmitHeader(raw_ostream &OS);
+ void EmitBuiltinDef(raw_ostream &OS);
+ void EmitBuiltinSema(raw_ostream &OS);
+ void EmitBuiltinCG(raw_ostream &OS);
+ void EmitBuiltinAliases(raw_ostream &OS);
+};
+
+const Type *MveEmitter::getType(Init *I, const Type *Param) {
+ if (auto Dag = dyn_cast<DagInit>(I))
+ return getType(Dag, Param);
+ if (auto Def = dyn_cast<DefInit>(I))
+ return getType(Def->getDef(), Param);
+
+ PrintFatalError("Could not convert this value into a type");
+}
+
+const Type *MveEmitter::getType(Record *R, const Type *Param) {
+ if (R->isSubClassOf("Immediate"))
+ R = R->getValueAsDef("type"); // pass to subfield
+
+ if (R->getName() == "Void")
+ return getVoidType();
+ if (R->isSubClassOf("PrimitiveType"))
+ return getScalarType(R);
+ if (R->isSubClassOf("ComplexType"))
+ return getType(R->getValueAsDag("spec"), Param);
+
+ PrintFatalError(R->getLoc(), "Could not convert this record into a type");
+}
+
+const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
+ // The meat of the getType system: types in the Tablegen are represented by a
+ // dag whose operators select sub-cases of this function.
+
+ Record *Op = cast<DefInit>(D->getOperator())->getDef();
+ if (!Op->isSubClassOf("ComplexTypeOp"))
+ PrintFatalError(
+ "Expected ComplexTypeOp as dag operator in type expression");
+
+ if (Op->getName() == "CTO_Parameter") {
+ if (isa<VoidType>(Param))
+ PrintFatalError("Parametric type in unparametrised context");
+ return Param;
+ }
+
+ if (Op->getName() == "CTO_Vec") {
+ const Type *Element = getType(D->getArg(0), Param);
+ return getVectorType(cast<ScalarType>(Element));
+ }
+
+ if (Op->getName() == "CTO_Pred") {
+ const Type *Element = getType(D->getArg(0), Param);
+ return getPredicateType(128 / Element->sizeInBits());
+ }
+
+ if (Op->isSubClassOf("CTO_Tuple")) {
+ unsigned Registers = Op->getValueAsInt("n");
+ const Type *Element = getType(D->getArg(0), Param);
+ return getMultiVectorType(Registers, cast<VectorType>(Element));
+ }
+
+ if (Op->isSubClassOf("CTO_Pointer")) {
+ const Type *Pointee = getType(D->getArg(0), Param);
+ return getPointerType(Pointee, Op->getValueAsBit("const"));
+ }
+
+ if (Op->isSubClassOf("CTO_Sign")) {
+ const ScalarType *ST = cast<ScalarType>(getType(D->getArg(0), Param));
+ ScalarTypeKind NewKind = Op->getValueAsBit("signed")
+ ? ScalarTypeKind::SignedInt
+ : ScalarTypeKind::UnsignedInt;
+ for (const auto &kv : ScalarTypes) {
+ const ScalarType *RT = kv.second.get();
+ if (RT->kind() == NewKind && RT->sizeInBits() == ST->sizeInBits())
+ return RT;
+ }
+ PrintFatalError("Cannot change sign of this type");
+ }
+
+ PrintFatalError("Bad operator in type dag expression");
+}
+
+Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
+ const Type *Param) {
+ Record *Op = cast<DefInit>(D->getOperator())->getDef();
+
+ if (Op->getName() == "seq") {
+ Result::Scope SubScope = Scope;
+ Result::Ptr PrevV = nullptr;
+ for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i) {
+ // We don't use getCodeForDagArg here, because the argument name
+ // has different semantics in a seq
+ Result::Ptr V =
+ getCodeForDag(cast<DagInit>(D->getArg(i)), SubScope, Param);
+ StringRef ArgName = D->getArgNameStr(i);
+ if (!ArgName.empty())
+ SubScope[ArgName] = V;
+ if (PrevV)
+ V->setPredecessor(PrevV);
+ PrevV = V;
+ }
+ return PrevV;
+ } else if (Op->isSubClassOf("Type")) {
+ if (D->getNumArgs() != 1)
+ PrintFatalError("Type casts should have exactly one argument");
+ const Type *CastType = getType(Op, Param);
+ Result::Ptr Arg = getCodeForDagArg(D, 0, Scope, Param);
+ if (const auto *ST = dyn_cast<ScalarType>(CastType)) {
+ if (!ST->requiresFloat()) {
+ if (Arg->hasIntegerConstantValue())
+ return std::make_shared<IntLiteralResult>(
+ ST, Arg->integerConstantValue());
+ else
+ return std::make_shared<IntCastResult>(ST, Arg);
+ }
+ }
+ PrintFatalError("Unsupported type cast");
+ } else {
+ std::vector<Result::Ptr> Args;
+ for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
+ Args.push_back(getCodeForDagArg(D, i, Scope, Param));
+ if (Op->isSubClassOf("IRBuilder")) {
+ std::set<unsigned> AddressArgs;
+ for (unsigned i : Op->getValueAsListOfInts("address_params"))
+ AddressArgs.insert(i);
+ std::set<unsigned> IntConstantArgs;
+ for (unsigned i : Op->getValueAsListOfInts("int_constant_params"))
+ IntConstantArgs.insert(i);
+ return std::make_shared<IRBuilderResult>(
+ Op->getValueAsString("func"), Args, AddressArgs, IntConstantArgs);
+ } else if (Op->isSubClassOf("IRInt")) {
+ std::vector<const Type *> ParamTypes;
+ for (Record *RParam : Op->getValueAsListOfDefs("params"))
+ ParamTypes.push_back(getType(RParam, Param));
+ std::string IntName = Op->getValueAsString("intname");
+ if (Op->getValueAsBit("appendKind"))
+ IntName += "_" + toLetter(cast<ScalarType>(Param)->kind());
+ return std::make_shared<IRIntrinsicResult>(IntName, ParamTypes, Args);
+ } else {
+ PrintFatalError("Unsupported dag node " + Op->getName());
+ }
+ }
+}
+
+Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
+ const Result::Scope &Scope,
+ const Type *Param) {
+ Init *Arg = D->getArg(ArgNum);
+ StringRef Name = D->getArgNameStr(ArgNum);
+
+ if (!Name.empty()) {
+ if (!isa<UnsetInit>(Arg))
+ PrintFatalError(
+ "dag operator argument should not have both a value and a name");
+ auto it = Scope.find(Name);
+ if (it == Scope.end())
+ PrintFatalError("unrecognized variable name '" + Name + "'");
+ return it->second;
+ }
+
+ if (auto *II = dyn_cast<IntInit>(Arg))
+ return std::make_shared<IntLiteralResult>(getScalarType("u32"),
+ II->getValue());
+
+ if (auto *DI = dyn_cast<DagInit>(Arg))
+ return getCodeForDag(DI, Scope, Param);
+
+ PrintFatalError("bad dag argument type for code generation");
+}
+
+Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType) {
+ Result::Ptr V =
+ std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType));
+
+ if (const auto *ST = dyn_cast<ScalarType>(ArgType)) {
+ if (ST->isInteger() && ST->sizeInBits() < 32)
+ V = std::make_shared<IntCastResult>(getScalarType("u32"), V);
+ } else if (const auto *PT = dyn_cast<PredicateType>(ArgType)) {
+ V = std::make_shared<IntCastResult>(getScalarType("u32"), V);
+ V = std::make_shared<IRIntrinsicResult>(
+ "pred_i2v", std::vector<const Type *>{PT}, std::vector<Result::Ptr>{V});
+ }
+
+ return V;
+}
+
+ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
+ : ReturnType(ME.getType(R->getValueAsDef("ret"), Param)) {
+ // Derive the intrinsic's full name, by taking the name of the
+ // Tablegen record (or override) and appending the suffix from its
+ // parameter type. (If the intrinsic is unparametrised, its
+ // parameter type will be given as Void, which returns the empty
+ // string for acleSuffix.)
+ StringRef BaseName =
+ (R->isSubClassOf("NameOverride") ? R->getValueAsString("basename")
+ : R->getName());
+ FullName = (Twine(BaseName) + Param->acleSuffix()).str();
+
+ // Derive the intrinsic's polymorphic name, by removing components from the
+ // full name as specified by its 'pnt' member ('polymorphic name type'),
+ // which indicates how many type suffixes to remove, and any other piece of
+ // the name that should be removed.
+ Record *PolymorphicNameType = R->getValueAsDef("pnt");
+ SmallVector<StringRef, 8> NameParts;
+ StringRef(FullName).split(NameParts, '_');
+ for (unsigned i = 0, e = PolymorphicNameType->getValueAsInt(
+ "NumTypeSuffixesToDiscard");
+ i < e; ++i)
+ NameParts.pop_back();
+ if (!PolymorphicNameType->isValueUnset("ExtraSuffixToDiscard")) {
+ StringRef ExtraSuffix =
+ PolymorphicNameType->getValueAsString("ExtraSuffixToDiscard");
+ auto it = NameParts.end();
+ while (it != NameParts.begin()) {
+ --it;
+ if (*it == ExtraSuffix) {
+ NameParts.erase(it);
+ break;
+ }
+ }
+ }
+ ShortName = join(std::begin(NameParts), std::end(NameParts), "_");
+
+ // Process the intrinsic's argument list.
+ DagInit *ArgsDag = R->getValueAsDag("args");
+ Result::Scope Scope;
+ for (unsigned i = 0, e = ArgsDag->getNumArgs(); i < e; ++i) {
+ Init *TypeInit = ArgsDag->getArg(i);
+
+ // Work out the type of the argument, for use in the function prototype in
+ // the header file.
+ const Type *ArgType = ME.getType(TypeInit, Param);
+ ArgTypes.push_back(ArgType);
+
+ // The argument will usually have a name in the arguments dag, which goes
+ // into the variable-name scope that the code gen will refer to.
+ StringRef ArgName = ArgsDag->getArgNameStr(i);
+ if (!ArgName.empty())
+ Scope[ArgName] = ME.getCodeForArg(i, ArgType);
+
+ // If the argument is a subclass of Immediate, record the details about
+ // what values it can take, for Sema checking.
+ if (auto TypeDI = dyn_cast<DefInit>(TypeInit)) {
+ Record *TypeRec = TypeDI->getDef();
+ if (TypeRec->isSubClassOf("Immediate")) {
+ Record *Bounds = TypeRec->getValueAsDef("bounds");
+ ImmediateArg &IA = ImmediateArgs[i];
+ if (Bounds->isSubClassOf("IB_ConstRange")) {
+ IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
+ IA.i1 = Bounds->getValueAsInt("lo");
+ IA.i2 = Bounds->getValueAsInt("hi");
+ } else if (Bounds->getName() == "IB_UEltValue") {
+ IA.boundsType = ImmediateArg::BoundsType::UInt;
+ IA.i1 = Param->sizeInBits();
+ } else if (Bounds->getName() == "IB_LaneIndex") {
+ IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
+ IA.i1 = 0;
+ IA.i2 = 128 / Param->sizeInBits();
+ } else if (Bounds->getName() == "IB_EltBit") {
+ IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
+ IA.i1 = Bounds->getValueAsInt("base");
+ IA.i2 = IA.i1 + Param->sizeInBits() - 1;
+ } else {
+ PrintFatalError("unrecognised ImmediateBounds subclass");
+ }
+
+ IA.ArgType = ArgType;
+
+ if (!TypeRec->isValueUnset("extra")) {
+ IA.ExtraCheckType = TypeRec->getValueAsString("extra");
+ if (!TypeRec->isValueUnset("extraarg"))
+ IA.ExtraCheckArgs = TypeRec->getValueAsString("extraarg");
+ }
+ }
+ }
+ }
+
+ // Finally, go through the codegen dag and translate it into a Result object
+ // (with an arbitrary DAG of depended-on Results hanging off it).
+ DagInit *CodeDag = R->getValueAsDag("codegen");
+ Record *MainOp = cast<DefInit>(CodeDag->getOperator())->getDef();
+ if (MainOp->isSubClassOf("CustomCodegen")) {
+ // Or, if it's the special case of CustomCodegen, just accumulate
+ // a list of parameters we're going to assign to variables before
+ // breaking from the loop.
+ CustomCodeGenArgs["CustomCodeGenType"] =
+ (Twine("CustomCodeGen::") + MainOp->getValueAsString("type")).str();
+ for (unsigned i = 0, e = CodeDag->getNumArgs(); i < e; ++i) {
+ StringRef Name = CodeDag->getArgNameStr(i);
+ if (Name.empty()) {
+ PrintFatalError("Operands to CustomCodegen should have names");
+ } else if (auto *II = dyn_cast<IntInit>(CodeDag->getArg(i))) {
+ CustomCodeGenArgs[Name] = itostr(II->getValue());
+ } else if (auto *SI = dyn_cast<StringInit>(CodeDag->getArg(i))) {
+ CustomCodeGenArgs[Name] = SI->getValue();
+ } else {
+ PrintFatalError("Operands to CustomCodegen should be integers");
+ }
+ }
+ } else {
+ Code = ME.getCodeForDag(CodeDag, Scope, Param);
+ }
+}
+
+MveEmitter::MveEmitter(RecordKeeper &Records) {
+ // Construct the whole MveEmitter.
+
+ // First, look up all the instances of PrimitiveType. This gives us the list
+ // of vector typedefs we have to put in arm_mve.h, and also allows us to
+ // collect all the useful ScalarType instances into a big list so that we can
+ // use it for operations such as 'find the unsigned version of this signed
+ // integer type'.
+ for (Record *R : Records.getAllDerivedDefinitions("PrimitiveType"))
+ ScalarTypes[R->getName()] = std::make_unique<ScalarType>(R);
+
+ // Now go through the instances of Intrinsic, and for each one, iterate
+ // through its list of type parameters making an ACLEIntrinsic for each one.
+ for (Record *R : Records.getAllDerivedDefinitions("Intrinsic")) {
+ for (Record *RParam : R->getValueAsListOfDefs("params")) {
+ const Type *Param = getType(RParam, getVoidType());
+ auto Intrinsic = std::make_unique<ACLEIntrinsic>(*this, R, Param);
+ ACLEIntrinsics[Intrinsic->fullName()] = std::move(Intrinsic);
+ }
+ }
+}
+
+/// A wrapper on raw_string_ostream that contains its own buffer rather than
+/// having to point it at one elsewhere. (In other words, it works just like
+/// std::ostringstream; also, this makes it convenient to declare a whole array
+/// of them at once.)
+///
+/// We have to set this up using multiple inheritance, to ensure that the
+/// string member has been constructed before raw_string_ostream's constructor
+/// is given a pointer to it.
+class string_holder {
+protected:
+ std::string S;
+};
+class raw_self_contained_string_ostream : private string_holder,
+ public raw_string_ostream {
+public:
+ raw_self_contained_string_ostream()
+ : string_holder(), raw_string_ostream(S) {}
+};
+
+void MveEmitter::EmitHeader(raw_ostream &OS) {
+ // Accumulate pieces of the header file that will be enabled under various
+ // different combinations of #ifdef. The index into parts[] is made up of
+ // the following bit flags.
+ constexpr unsigned Float = 1;
+ constexpr unsigned UseUserNamespace = 2;
+
+ constexpr unsigned NumParts = 4;
+ raw_self_contained_string_ostream parts[NumParts];
+
+ // Write typedefs for all the required vector types, and a few scalar
+ // types that don't already have the name we want them to have.
+
+ parts[0] << "typedef uint16_t mve_pred16_t;\n";
+ parts[Float] << "typedef __fp16 float16_t;\n"
+ "typedef float float32_t;\n";
+ for (const auto &kv : ScalarTypes) {
+ const ScalarType *ST = kv.second.get();
+ raw_ostream &OS = parts[ST->requiresFloat() ? Float : 0];
+ const VectorType *VT = getVectorType(ST);
+
+ OS << "typedef __attribute__((neon_vector_type(" << VT->lanes() << "))) "
+ << ST->cName() << " " << VT->cName() << ";\n";
+
+ // Every vector type also comes with a pair of multi-vector types for
+ // the VLD2 and VLD4 instructions.
+ for (unsigned n = 2; n <= 4; n += 2) {
+ const MultiVectorType *MT = getMultiVectorType(n, VT);
+ OS << "typedef struct { " << VT->cName() << " val[" << n << "]; } "
+ << MT->cName() << ";\n";
+ }
+ }
+ parts[0] << "\n";
+ parts[Float] << "\n";
+
+ // Write declarations for all the intrinsics.
+
+ for (const auto &kv : ACLEIntrinsics) {
+ const ACLEIntrinsic &Int = *kv.second;
+
+ // We generate each intrinsic twice, under its full unambiguous
+ // name and its shorter polymorphic name (if the latter exists).
+ for (bool Polymorphic : {false, true}) {
+ if (Polymorphic && !Int.polymorphic())
+ continue;
+
+ // We also generate each intrinsic under a name like __arm_vfooq
+ // (which is in C language implementation namespace, so it's
+ // safe to define in any conforming user program) and a shorter
+ // one like vfooq (which is in user namespace, so a user might
+ // reasonably have used it for something already). If so, they
+ // can #define __ARM_MVE_PRESERVE_USER_NAMESPACE before
+ // including the header, which will suppress the shorter names
+ // and leave only the implementation-namespace ones. Then they
+ // have to write __arm_vfooq everywhere, of course.
+
+ for (bool UserNamespace : {false, true}) {
+ raw_ostream &OS = parts[(Int.requiresFloat() ? Float : 0) |
+ (UserNamespace ? UseUserNamespace : 0)];
+
+ // Make the name of the function in this declaration.
+
+ std::string FunctionName =
+ Polymorphic ? Int.shortName() : Int.fullName();
+ if (!UserNamespace)
+ FunctionName = "__arm_" + FunctionName;
+
+ // Make strings for the types involved in the function's
+ // prototype.
+
+ std::string RetTypeName = Int.returnType()->cName();
+ if (!StringRef(RetTypeName).endswith("*"))
+ RetTypeName += " ";
+
+ std::vector<std::string> ArgTypeNames;
+ for (const Type *ArgTypePtr : Int.argTypes())
+ ArgTypeNames.push_back(ArgTypePtr->cName());
+ std::string ArgTypesString =
+ join(std::begin(ArgTypeNames), std::end(ArgTypeNames), ", ");
+
+ // Emit the actual declaration. All these functions are
+ // declared 'static inline' without a body, which is fine
+ // provided clang recognizes them as builtins, and has the
+ // effect that this type signature is used in place of the one
+ // that Builtins.def didn't provide. That's how we can get
+ // structure types that weren't defined until this header was
+ // included to be part of the type signature of a builtin that
+ // was known to clang already.
+ //
+ // The declarations use __attribute__(__clang_arm_mve_alias),
+ // so that each function declared will be recognized as the
+ // appropriate MVE builtin in spite of its user-facing name.
+ //
+ // (That's better than making them all wrapper functions,
+ // partly because it avoids any compiler error message citing
+ // the wrapper function definition instead of the user's code,
+ // and mostly because some MVE intrinsics have arguments
+ // required to be compile-time constants, and that property
+ // can't be propagated through a wrapper function. It can be
+ // propagated through a macro, but macros can't be overloaded
+ // on argument types very easily - you have to use _Generic,
+ // which makes error messages very confusing when the user
+ // gets it wrong.)
+ //
+ // Finally, the polymorphic versions of the intrinsics are
+ // also defined with __attribute__(overloadable), so that when
+ // the same name is defined with several type signatures, the
+ // right thing happens. Each one of the overloaded
+ // declarations is given a different builtin id, which
+ // has exactly the effect we want: first clang resolves the
+ // overload to the right function, then it knows which builtin
+ // it's referring to, and then the Sema checking for that
+ // builtin can check further things like the constant
+ // arguments.
+ //
+ // One more subtlety is the newline just before the return
+ // type name. That's a cosmetic tweak to make the error
+ // messages legible if the user gets the types wrong in a call
+ // to a polymorphic function: this way, clang will print just
+ // the _final_ line of each declaration in the header, to show
+ // the type signatures that would have been legal. So all the
+ // confusing machinery with __attribute__ is left out of the
+ // error message, and the user sees something that's more or
+ // less self-documenting: "here's a list of actually readable
+ // type signatures for vfooq(), and here's why each one didn't
+ // match your call".
+
+ OS << "static __inline__ __attribute__(("
+ << (Polymorphic ? "overloadable, " : "")
+ << "__clang_arm_mve_alias(__builtin_arm_mve_" << Int.fullName()
+ << ")))\n"
+ << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
+ }
+ }
+ }
+ for (auto &part : parts)
+ part << "\n";
+
+ // Now we've finished accumulating bits and pieces into the parts[] array.
+ // Put it all together to write the final output file.
+
+ OS << "/*===---- arm_mve.h - ARM MVE intrinsics "
+ "-----------------------------------===\n"
+ " *\n"
+ " *\n"
+ " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+ "Exceptions.\n"
+ " * See https://llvm.org/LICENSE.txt for license information.\n"
+ " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ " *\n"
+ " *===-------------------------------------------------------------"
+ "----"
+ "------===\n"
+ " */\n"
+ "\n"
+ "#ifndef __ARM_MVE_H\n"
+ "#define __ARM_MVE_H\n"
+ "\n"
+ "#if !__ARM_FEATURE_MVE\n"
+ "#error \"MVE support not enabled\"\n"
+ "#endif\n"
+ "\n"
+ "#include <stdint.h>\n"
+ "\n";
+
+ for (size_t i = 0; i < NumParts; ++i) {
+ std::vector<std::string> conditions;
+ if (i & Float)
+ conditions.push_back("(__ARM_FEATURE_MVE & 2)");
+ if (i & UseUserNamespace)
+ conditions.push_back("(!defined __ARM_MVE_PRESERVE_USER_NAMESPACE)");
+
+ std::string condition =
+ join(std::begin(conditions), std::end(conditions), " && ");
+ if (!condition.empty())
+ OS << "#if " << condition << "\n\n";
+ OS << parts[i].str();
+ if (!condition.empty())
+ OS << "#endif /* " << condition << " */\n\n";
+ }
+
+ OS << "#endif /* __ARM_MVE_H */\n";
+}
+
+void MveEmitter::EmitBuiltinDef(raw_ostream &OS) {
+ for (const auto &kv : ACLEIntrinsics) {
+ const ACLEIntrinsic &Int = *kv.second;
+ OS << "TARGET_HEADER_BUILTIN(__builtin_arm_mve_" << Int.fullName()
+ << ", \"\", \"n\", \"arm_mve.h\", ALL_LANGUAGES, \"\")\n";
+ }
+
+ std::set<std::string> ShortNamesSeen;
+
+ for (const auto &kv : ACLEIntrinsics) {
+ const ACLEIntrinsic &Int = *kv.second;
+ if (Int.polymorphic()) {
+ StringRef Name = Int.shortName();
+ if (ShortNamesSeen.find(Name) == ShortNamesSeen.end()) {
+ OS << "BUILTIN(__builtin_arm_mve_" << Name << ", \"vi.\", \"nt\")\n";
+ ShortNamesSeen.insert(Name);
+ }
+ }
+ }
+}
+
+void MveEmitter::EmitBuiltinSema(raw_ostream &OS) {
+ std::map<std::string, std::set<std::string>> Checks;
+
+ for (const auto &kv : ACLEIntrinsics) {
+ const ACLEIntrinsic &Int = *kv.second;
+ std::string Check = Int.genSema();
+ if (!Check.empty())
+ Checks[Check].insert(Int.fullName());
+ }
+
+ for (const auto &kv : Checks) {
+ for (StringRef Name : kv.second)
+ OS << "case ARM::BI__builtin_arm_mve_" << Name << ":\n";
+ OS << kv.first;
+ }
+}
+
+// Machinery for the grouping of intrinsics by similar codegen.
+//
+// The general setup is that 'MergeableGroup' stores the things that a set of
+// similarly shaped intrinsics have in common: the text of their code
+// generation, and the number and type of their parameter variables.
+// MergeableGroup is the key in a std::map whose value is a set of
+// OutputIntrinsic, which stores the ways in which a particular intrinsic
+// specializes the MergeableGroup's generic description: the function name and
+// the _values_ of the parameter variables.
+
+struct ComparableStringVector : std::vector<std::string> {
+ // Infrastructure: a derived class of vector<string> which comes with an
+ // ordering, so that it can be used as a key in maps and an element in sets.
+ // There's no requirement on the ordering beyond being deterministic.
+ bool operator<(const ComparableStringVector &rhs) const {
+ if (size() != rhs.size())
+ return size() < rhs.size();
+ for (size_t i = 0, e = size(); i < e; ++i)
+ if ((*this)[i] != rhs[i])
+ return (*this)[i] < rhs[i];
+ return false;
+ }
+};
+
+struct OutputIntrinsic {
+ const ACLEIntrinsic *Int;
+ std::string Name;
+ ComparableStringVector ParamValues;
+ bool operator<(const OutputIntrinsic &rhs) const {
+ if (Name != rhs.Name)
+ return Name < rhs.Name;
+ return ParamValues < rhs.ParamValues;
+ }
+};
+struct MergeableGroup {
+ std::string Code;
+ ComparableStringVector ParamTypes;
+ bool operator<(const MergeableGroup &rhs) const {
+ if (Code != rhs.Code)
+ return Code < rhs.Code;
+ return ParamTypes < rhs.ParamTypes;
+ }
+};
+
+void MveEmitter::EmitBuiltinCG(raw_ostream &OS) {
+ // Pass 1: generate code for all the intrinsics as if every type or constant
+ // that can possibly be abstracted out into a parameter variable will be.
+ // This identifies the sets of intrinsics we'll group together into a single
+ // piece of code generation.
+
+ std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroupsPrelim;
+
+ for (const auto &kv : ACLEIntrinsics) {
+ const ACLEIntrinsic &Int = *kv.second;
+
+ MergeableGroup MG;
+ OutputIntrinsic OI;
+
+ OI.Int = ∬
+ OI.Name = Int.fullName();
+ CodeGenParamAllocator ParamAllocPrelim{&MG.ParamTypes, &OI.ParamValues};
+ raw_string_ostream OS(MG.Code);
+ Int.genCode(OS, ParamAllocPrelim, 1);
+ OS.flush();
+
+ MergeableGroupsPrelim[MG].insert(OI);
+ }
+
+ // Pass 2: for each of those groups, optimize the parameter variable set by
+ // eliminating 'parameters' that are the same for all intrinsics in the
+ // group, and merging together pairs of parameter variables that take the
+ // same values as each other for all intrinsics in the group.
+
+ std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroups;
+
+ for (const auto &kv : MergeableGroupsPrelim) {
+ const MergeableGroup &MG = kv.first;
+ std::vector<int> ParamNumbers;
+ std::map<ComparableStringVector, int> ParamNumberMap;
+
+ // Loop over the parameters for this group.
+ for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
+ // Is this parameter the same for all intrinsics in the group?
+ const OutputIntrinsic &OI_first = *kv.second.begin();
+ bool Constant = all_of(kv.second, [&](const OutputIntrinsic &OI) {
+ return OI.ParamValues[i] == OI_first.ParamValues[i];
+ });
+
+ // If so, record it as -1, meaning 'no parameter variable needed'. Then
+ // the corresponding call to allocParam in pass 2 will not generate a
+ // variable at all, and just use the value inline.
+ if (Constant) {
+ ParamNumbers.push_back(-1);
+ continue;
+ }
+
+ // Otherwise, make a list of the values this parameter takes for each
+ // intrinsic, and see if that value vector matches anything we already
+ // have. We also record the parameter type, so that we don't accidentally
+ // match up two parameter variables with different types. (Not that
+ // there's much chance of them having textually equivalent values, but in
+ // _principle_ it could happen.)
+ ComparableStringVector key;
+ key.push_back(MG.ParamTypes[i]);
+ for (const auto &OI : kv.second)
+ key.push_back(OI.ParamValues[i]);
+
+ auto Found = ParamNumberMap.find(key);
+ if (Found != ParamNumberMap.end()) {
+ // Yes, an existing parameter variable can be reused for this.
+ ParamNumbers.push_back(Found->second);
+ continue;
+ }
+
+ // No, we need a new parameter variable.
+ int ExistingIndex = ParamNumberMap.size();
+ ParamNumberMap[key] = ExistingIndex;
+ ParamNumbers.push_back(ExistingIndex);
+ }
+
+ // Now we're ready to do the pass 2 code generation, which will emit the
+ // reduced set of parameter variables we've just worked out.
+
+ for (const auto &OI_prelim : kv.second) {
+ const ACLEIntrinsic *Int = OI_prelim.Int;
+
+ MergeableGroup MG;
+ OutputIntrinsic OI;
+
+ OI.Int = OI_prelim.Int;
+ OI.Name = OI_prelim.Name;
+ CodeGenParamAllocator ParamAlloc{&MG.ParamTypes, &OI.ParamValues,
+ &ParamNumbers};
+ raw_string_ostream OS(MG.Code);
+ Int->genCode(OS, ParamAlloc, 2);
+ OS.flush();
+
+ MergeableGroups[MG].insert(OI);
+ }
+ }
+
+ // Output the actual C++ code.
+
+ for (const auto &kv : MergeableGroups) {
+ const MergeableGroup &MG = kv.first;
+
+ // List of case statements in the main switch on BuiltinID, and an open
+ // brace.
+ const char *prefix = "";
+ for (const auto &OI : kv.second) {
+ OS << prefix << "case ARM::BI__builtin_arm_mve_" << OI.Name << ":";
+ prefix = "\n";
+ }
+ OS << " {\n";
+
+ if (!MG.ParamTypes.empty()) {
+ // If we've got some parameter variables, then emit their declarations...
+ for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
+ StringRef Type = MG.ParamTypes[i];
+ OS << " " << Type;
+ if (!Type.endswith("*"))
+ OS << " ";
+ OS << " Param" << utostr(i) << ";\n";
+ }
+
+ // ... and an inner switch on BuiltinID that will fill them in with each
+ // individual intrinsic's values.
+ OS << " switch (BuiltinID) {\n";
+ for (const auto &OI : kv.second) {
+ OS << " case ARM::BI__builtin_arm_mve_" << OI.Name << ":\n";
+ for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i)
+ OS << " Param" << utostr(i) << " = " << OI.ParamValues[i] << ";\n";
+ OS << " break;\n";
+ }
+ OS << " }\n";
+ }
+
+ // And finally, output the code, and close the outer pair of braces. (The
+ // code will always end with a 'return' statement, so we need not insert a
+ // 'break' here.)
+ OS << MG.Code << "}\n";
+ }
+}
+
+void MveEmitter::EmitBuiltinAliases(raw_ostream &OS) {
+ for (const auto &kv : ACLEIntrinsics) {
+ const ACLEIntrinsic &Int = *kv.second;
+ OS << "case ARM::BI__builtin_arm_mve_" << Int.fullName() << ":\n"
+ << " return AliasName == \"" << Int.fullName() << "\"";
+ if (Int.polymorphic())
+ OS << " || AliasName == \"" << Int.shortName() << "\"";
+ OS << ";\n";
+ }
+}
+
+} // namespace
+
+namespace clang {
+
+void EmitMveHeader(RecordKeeper &Records, raw_ostream &OS) {
+ MveEmitter(Records).EmitHeader(OS);
+}
+
+void EmitMveBuiltinDef(RecordKeeper &Records, raw_ostream &OS) {
+ MveEmitter(Records).EmitBuiltinDef(OS);
+}
+
+void EmitMveBuiltinSema(RecordKeeper &Records, raw_ostream &OS) {
+ MveEmitter(Records).EmitBuiltinSema(OS);
+}
+
+void EmitMveBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
+ MveEmitter(Records).EmitBuiltinCG(OS);
+}
+
+void EmitMveBuiltinAliases(RecordKeeper &Records, raw_ostream &OS) {
+ MveEmitter(Records).EmitBuiltinAliases(OS);
+}
+
+} // end namespace clang