// Some Common Instruction Class Templates
//===----------------------------------------------------------------------===//
+// Template for instructions which take three int64, int32, or int16 args.
+// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
multiclass I3<string OpcStr, SDNode OpNode> {
- def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- Int64Regs:$b))]>;
- def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
- def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- Int32Regs:$b))]>;
- def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
- def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- Int16Regs:$b))]>;
- def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+ def i64rr :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
+ def i64ri :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
+ def i16ri :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
}
+// Template for instructions which take 3 int32 args. The instructions are
+// named "<OpcStr>.s32" (e.g. "addc.cc.s32").
multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
- def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
- Int32Regs:$b),
- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- Int32Regs:$b))]>;
- def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i32rr :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
}
+// Template for instructions which take three fp64 or fp32 args. The
+// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64").
+//
+// Also defines ftz (flush subnormal inputs and results to sign-preserving
+// zero) variants for fp32 functions.
multiclass F3<string OpcStr, SDNode OpNode> {
- def f64rr : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
- !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
- [(set Float64Regs:$dst,
- (OpNode Float64Regs:$a, Float64Regs:$b))]>,
- Requires<[allowFMA]>;
- def f64ri : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
- !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
- [(set Float64Regs:$dst,
- (OpNode Float64Regs:$a, fpimm:$b))]>,
- Requires<[allowFMA]>;
- def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[allowFMA, doF32FTZ]>;
- def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, fpimm:$b))]>,
- Requires<[allowFMA, doF32FTZ]>;
- def f32rr : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[allowFMA]>;
- def f32ri : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, fpimm:$b))]>,
- Requires<[allowFMA]>;
+ def f64rr :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f64ri :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
+ def f32rr_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA, doF32FTZ]>;
+ def f32ri_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA, doF32FTZ]>;
+ def f32rr :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f32ri :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
}
+// Same as F3, but defines ".rn" variants (round to nearest even).
multiclass F3_rn<string OpcStr, SDNode OpNode> {
- def f64rr : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
- !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
- [(set Float64Regs:$dst,
- (OpNode Float64Regs:$a, Float64Regs:$b))]>,
- Requires<[noFMA]>;
- def f64ri : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
- !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
- [(set Float64Regs:$dst,
- (OpNode Float64Regs:$a, fpimm:$b))]>,
- Requires<[noFMA]>;
- def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[noFMA, doF32FTZ]>;
- def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, fpimm:$b))]>,
- Requires<[noFMA, doF32FTZ]>;
- def f32rr : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[noFMA]>;
- def f32ri : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
- [(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, fpimm:$b))]>,
- Requires<[noFMA]>;
+ def f64rr :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
+ Requires<[noFMA]>;
+ def f64ri :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>,
+ Requires<[noFMA]>;
+ def f32rr_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[noFMA, doF32FTZ]>;
+ def f32ri_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[noFMA, doF32FTZ]>;
+ def f32rr :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[noFMA]>;
+ def f32ri :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[noFMA]>;
}
+// Template for operations which take two f32 or f64 operands. Provides three
+// instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush
+// subnormal inputs and results to zero).
multiclass F2<string OpcStr, SDNode OpNode> {
- def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
- !strconcat(OpcStr, ".f64 \t$dst, $a;"),
- [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
+ def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
+ !strconcat(OpcStr, ".f64 \t$dst, $a;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
- !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
- [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
- Requires<[doF32FTZ]>;
- def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
- !strconcat(OpcStr, ".f32 \t$dst, $a;"),
- [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
+ Requires<[doF32FTZ]>;
+ def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ !strconcat(OpcStr, ".f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
}
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
//-----------------------------------
-// General Type Conversion
+// Type Conversion
//-----------------------------------
let hasSideEffects = 0 in {
-// Generate a cvt to the given type from all possible types.
-// Each instance takes a CvtMode immediate that defines the conversion mode to
-// use. It can be CvtNONE to omit a conversion mode.
-multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
- def _s16 : NVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s16\t$dst, $src;"),
- []>;
- def _u16 : NVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u16\t$dst, $src;"),
- []>;
- def _f16 : NVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f16\t$dst, $src;"),
- []>;
- def _s32 : NVPTXInst<(outs RC:$dst),
- (ins Int32Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s32\t$dst, $src;"),
- []>;
- def _u32 : NVPTXInst<(outs RC:$dst),
- (ins Int32Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u32\t$dst, $src;"),
- []>;
- def _s64 : NVPTXInst<(outs RC:$dst),
- (ins Int64Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s64\t$dst, $src;"),
- []>;
- def _u64 : NVPTXInst<(outs RC:$dst),
- (ins Int64Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u64\t$dst, $src;"),
- []>;
- def _f32 : NVPTXInst<(outs RC:$dst),
- (ins Float32Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f32\t$dst, $src;"),
- []>;
- def _f64 : NVPTXInst<(outs RC:$dst),
- (ins Float64Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f64\t$dst, $src;"),
- []>;
-}
-
-// Generate a cvt to all possible types.
-defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
-defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
-defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
-defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
-defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
-defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
-defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
-defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
-defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
-
-// This set of cvt is different from the above. The type of the source
-// and target are the same.
-//
-def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
- "cvt.s16.s8 \t$dst, $src;", []>;
-def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
- "cvt.s32.s8 \t$dst, $src;", []>;
-def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
- "cvt.s32.s16 \t$dst, $src;", []>;
-def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
- "cvt.s64.s8 \t$dst, $src;", []>;
-def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
- "cvt.s64.s16 \t$dst, $src;", []>;
-def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
- "cvt.s64.s32 \t$dst, $src;", []>;
+ // Generate a cvt to the given type from all possible types. Each instance
+ // takes a CvtMode immediate that defines the conversion mode to use. It can
+ // be CvtNONE to omit a conversion mode.
+ multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
+ def _s16 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".s16\t$dst, $src;"), []>;
+ def _u16 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".u16\t$dst, $src;"), []>;
+ def _f16 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".f16\t$dst, $src;"), []>;
+ def _s32 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int32Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".s32\t$dst, $src;"), []>;
+ def _u32 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int32Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".u32\t$dst, $src;"), []>;
+ def _s64 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int64Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".s64\t$dst, $src;"), []>;
+ def _u64 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int64Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".u64\t$dst, $src;"), []>;
+ def _f32 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Float32Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".f32\t$dst, $src;"), []>;
+ def _f64 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Float64Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".f64\t$dst, $src;"), []>;
+ }
+
+ // Generate cvts from all types to all types.
+ defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
+ defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
+ defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
+ defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
+ defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
+ defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
+ defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
+ defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
+ defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
+
+ // These cvts are different from those above: The source and dest registers
+ // are of the same type.
+ def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "cvt.s16.s8 \t$dst, $src;", []>;
+ def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "cvt.s32.s8 \t$dst, $src;", []>;
+ def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "cvt.s32.s16 \t$dst, $src;", []>;
+ def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "cvt.s64.s8 \t$dst, $src;", []>;
+ def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "cvt.s64.s16 \t$dst, $src;", []>;
+ def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "cvt.s64.s32 \t$dst, $src;", []>;
}
//-----------------------------------
// Integer Arithmetic
//-----------------------------------
+// Template for xor masquerading as int1 arithmetic.
multiclass ADD_SUB_i1<SDNode OpNode> {
def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
- "xor.pred \t$dst, $a, $b;",
- [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
- "xor.pred \t$dst, $a, $b;",
- [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
}
+// int1 addition and subtraction are both just xor.
defm ADD_i1 : ADD_SUB_i1<add>;
defm SUB_i1 : ADD_SUB_i1<sub>;
-
+// int16, int32, and int64 signed addition. Since nvptx is 2's compliment, we
+// also use these for unsigned arithmetic.
defm ADD : I3<"add.s", add>;
defm SUB : I3<"sub.s", sub>;
+// int32 addition and subtraction with carry-out.
+// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?).
defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
+// int32 addition and subtraction with carry-in and carry-out.
defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
-//mul.wide PTX instruction
+defm MULT : I3<"mul.lo.s", mul>;
+
+defm MULTHS : I3<"mul.hi.s", mulhs>;
+defm MULTHU : I3<"mul.hi.u", mulhu>;
+
+defm SDIV : I3<"div.s", sdiv>;
+defm UDIV : I3<"div.u", udiv>;
+
+// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
+// will lower it.
+defm SREM : I3<"rem.s", srem>;
+defm UREM : I3<"rem.u", urem>;
+
+
+//
+// Wide multiplication
+//
+def MULWIDES64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+def MULWIDES64Imm :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+def MULWIDES64Imm64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+
+def MULWIDEU64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+def MULWIDEU64Imm :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+def MULWIDEU64Imm64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+
+def MULWIDES32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+def MULWIDES32Imm :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+def MULWIDES32Imm32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+
+def MULWIDEU32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+
+def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
+def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
+def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
+
+// Matchers for signed, unsigned mul.wide ISD nodes.
+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
+ (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+
+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
+ (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
+ (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+
+// Predicates used for converting some patterns to mul.wide.
def SInt32Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
- if (v.isSignedIntN(32))
- return true;
- return false;
+ return v.isSignedIntN(32);
}]>;
def UInt32Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
- if (v.isIntN(32))
- return true;
- return false;
+ return v.isIntN(32);
}]>;
def SInt16Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
- if (v.isSignedIntN(16))
- return true;
- return false;
+ return v.isSignedIntN(16);
}]>;
def UInt16Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
- if (v.isIntN(16))
- return true;
- return false;
+ return v.isIntN(16);
}]>;
def Int5Const : PatLeaf<(imm), [{
+ // Check if 0 <= v < 32; only then will the result of (x << v) be an int32.
const APInt &v = N->getAPIntValue();
- // Check if 0 <= v < 32
- // Only then the result from (x << v) will be i32
- if (v.sge(0) && v.slt(32))
- return true;
- return false;
+ return v.sge(0) && v.slt(32);
}]>;
def Int4Const : PatLeaf<(imm), [{
+ // Check if 0 <= v < 16; only then will the result of (x << v) be an int16.
const APInt &v = N->getAPIntValue();
- // Check if 0 <= v < 16
- // Only then the result from (x << v) will be i16
- if (v.sge(0) && v.slt(16))
- return true;
- return false;
+ return v.sge(0) && v.slt(16);
}]>;
def SHL2MUL32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16);
}]>;
-def MULWIDES64
- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- "mul.wide.s32 \t$dst, $a, $b;", []>;
-def MULWIDES64Imm
- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- "mul.wide.s32 \t$dst, $a, $b;", []>;
-def MULWIDES64Imm64
- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
- "mul.wide.s32 \t$dst, $a, $b;", []>;
-
-def MULWIDEU64
- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- "mul.wide.u32 \t$dst, $a, $b;", []>;
-def MULWIDEU64Imm
- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- "mul.wide.u32 \t$dst, $a, $b;", []>;
-def MULWIDEU64Imm64
- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
- "mul.wide.u32 \t$dst, $a, $b;", []>;
-
-def MULWIDES32
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- "mul.wide.s16 \t$dst, $a, $b;", []>;
-def MULWIDES32Imm
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- "mul.wide.s16 \t$dst, $a, $b;", []>;
-def MULWIDES32Imm32
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
- "mul.wide.s16 \t$dst, $a, $b;", []>;
-
-def MULWIDEU32
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- "mul.wide.u16 \t$dst, $a, $b;", []>;
-def MULWIDEU32Imm
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- "mul.wide.u16 \t$dst, $a, $b;", []>;
-def MULWIDEU32Imm32
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
- "mul.wide.u16 \t$dst, $a, $b;", []>;
-
+// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide.
def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
(MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
(MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
(MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
(MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
+// Convert "sign/zero-extend then multiply" to mul.wide.
def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
(MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
(MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
(MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
(MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
(MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
- Requires<[doMulWide]>;
+ Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
(MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
(MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
- Requires<[doMulWide]>;
-
-
-def SDTMulWide
- : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
-def mul_wide_signed
- : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
-def mul_wide_unsigned
- : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
-
-def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
- (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
- (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
- Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
- (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
- Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
- (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
- Requires<[doMulWide]>;
-
-
-def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
- (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
- Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
- (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
- Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
- (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
- Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
- (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
- Requires<[doMulWide]>;
-
-defm MULT : I3<"mul.lo.s", mul>;
-
-defm MULTHS : I3<"mul.hi.s", mulhs>;
-defm MULTHU : I3<"mul.hi.u", mulhu>;
-
-defm SDIV : I3<"div.s", sdiv>;
-defm UDIV : I3<"div.u", udiv>;
-defm SREM : I3<"rem.s", srem>;
-// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
-defm UREM : I3<"rem.u", urem>;
-// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
-
-def SDTIMAD
- : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
- SDTCisInt<2>, SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>]>;
-def imad
- : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
-
-def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst,
- (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
-def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst,
- (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
-def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst,
- (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
-def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst,
- (imad Int16Regs:$a, imm:$b, imm:$c))]>;
-
-def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
- "mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst,
- (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
-def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
- "mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst,
- (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
-def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
- "mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst,
- (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
-def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
- "mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst,
- (imad Int32Regs:$a, imm:$b, imm:$c))]>;
-
-def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
- "mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst,
- (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
-def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
- "mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst,
- (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
-def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
- "mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst,
- (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
-def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
- "mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst,
- (imad Int64Regs:$a, imm:$b, imm:$c))]>;
-
-def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
- "neg.s16 \t$dst, $src;",
- [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
-def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
- "neg.s32 \t$dst, $src;",
- [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
-def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
- "neg.s64 \t$dst, $src;",
- [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
+//
+// Integer multiply-add
+//
+def SDTIMAD :
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>,
+ SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;
+def imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
+
+def MAD16rrr :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
+def MAD16rri :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
+def MAD16rir :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
+def MAD16rii :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>;
+
+def MAD32rrr :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
+def MAD32rri :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
+def MAD32rir :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
+def MAD32rii :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]>;
+
+def MAD64rrr :
+ NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
+def MAD64rri :
+ NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
+def MAD64rir :
+ NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
+def MAD64rii :
+ NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>;
+
+def INEG16 :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "neg.s16 \t$dst, $src;",
+ [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
+def INEG32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "neg.s32 \t$dst, $src;",
+ [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
+def INEG64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "neg.s64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
//-----------------------------------
// Floating Point Arithmetic
// Constant 1.0f
def FloatConst1 : PatLeaf<(fpimm), [{
- if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
- return false;
- float f = (float)N->getValueAPF().convertToFloat();
- return (f==1.0f);
+ return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle &&
+ N->getValueAPF().convertToFloat() == 1.0f;
}]>;
-// Constand (double)1.0
+// Constant 1.0 (double)
def DoubleConst1 : PatLeaf<(fpimm), [{
- if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
- return false;
- double d = (double)N->getValueAPF().convertToDouble();
- return (d==1.0);
+ return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble &&
+ N->getValueAPF().convertToDouble() == 1.0;
}]>;
defm FADD : F3<"add", fadd>;
defm FSUB_rn : F3_rn<"sub", fsub>;
defm FMUL_rn : F3_rn<"mul", fmul>;
-defm FABS : F2<"abs", fabs>;
-defm FNEG : F2<"neg", fneg>;
+defm FABS : F2<"abs", fabs>;
+defm FNEG : F2<"neg", fneg>;
defm FSQRT : F2<"sqrt.rn", fsqrt>;
//
// F64 division
//
-def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
- (ins f64imm:$a, Float64Regs:$b),
- "rcp.rn.f64 \t$dst, $b;",
- [(set Float64Regs:$dst,
- (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
-def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
- "div.rn.f64 \t$dst, $a, $b;",
- [(set Float64Regs:$dst,
- (fdiv Float64Regs:$a, Float64Regs:$b))]>;
-def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
- "div.rn.f64 \t$dst, $a, $b;",
- [(set Float64Regs:$dst,
- (fdiv Float64Regs:$a, fpimm:$b))]>;
+def FDIV641r :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b),
+ "rcp.rn.f64 \t$dst, $b;",
+ [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
+def FDIV64rr :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>;
+def FDIV64ri :
+ NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>;
//
// F32 Approximate reciprocal
//
-def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins f32imm:$a, Float32Regs:$b),
- "rcp.approx.ftz.f32 \t$dst, $b;",
- [(set Float32Regs:$dst,
- (fdiv FloatConst1:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
-def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
- (ins f32imm:$a, Float32Regs:$b),
- "rcp.approx.f32 \t$dst, $b;",
- [(set Float32Regs:$dst,
- (fdiv FloatConst1:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_APPROX]>;
+def FDIV321r_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV321r :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
//
// F32 Approximate division
//
-def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- "div.approx.ftz.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
-def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- "div.approx.ftz.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, fpimm:$b))]>,
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
-def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- "div.approx.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_APPROX]>;
-def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- "div.approx.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, fpimm:$b))]>,
- Requires<[do_DIVF32_APPROX]>;
+def FDIV32approxrr_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV32approxri_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.approx.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV32approxrr :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
+def FDIV32approxri :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.approx.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
//
// F32 Semi-accurate reciprocal
//
// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
//
-def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins f32imm:$a, Float32Regs:$b),
- "rcp.approx.ftz.f32 \t$dst, $b;",
- [(set Float32Regs:$dst,
- (fdiv FloatConst1:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
-def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
- (ins f32imm:$a, Float32Regs:$b),
- "rcp.approx.f32 \t$dst, $b;",
- [(set Float32Regs:$dst,
- (fdiv FloatConst1:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_FULL]>;
+def FDIV321r_approx_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV321r_approx :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
//
// F32 Semi-accurate division
//
-def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- "div.full.ftz.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
-def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- "div.full.ftz.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, fpimm:$b))]>,
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
-def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- "div.full.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[do_DIVF32_FULL]>;
-def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- "div.full.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, fpimm:$b))]>,
- Requires<[do_DIVF32_FULL]>;
+def FDIV32rr_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32ri_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32rr :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+def FDIV32ri :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
//
// F32 Accurate reciprocal
//
-def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins f32imm:$a, Float32Regs:$b),
- "rcp.rn.ftz.f32 \t$dst, $b;",
- [(set Float32Regs:$dst,
- (fdiv FloatConst1:$a, Float32Regs:$b))]>,
- Requires<[reqPTX20, doF32FTZ]>;
-def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
- (ins f32imm:$a, Float32Regs:$b),
- "rcp.rn.f32 \t$dst, $b;",
- [(set Float32Regs:$dst,
- (fdiv FloatConst1:$a, Float32Regs:$b))]>,
- Requires<[reqPTX20]>;
+def FDIV321r_prec_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20, doF32FTZ]>;
+def FDIV321r_prec :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
//
// F32 Accurate division
//
-def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- "div.rn.ftz.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[doF32FTZ, reqPTX20]>;
-def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- "div.rn.ftz.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, fpimm:$b))]>,
- Requires<[doF32FTZ, reqPTX20]>;
-def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
- "div.rn.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[reqPTX20]>;
-def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
- "div.rn.f32 \t$dst, $a, $b;",
- [(set Float32Regs:$dst,
- (fdiv Float32Regs:$a, fpimm:$b))]>,
- Requires<[reqPTX20]>;
+def FDIV32rr_prec_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32ri_prec_ftz :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32rr_prec :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
+def FDIV32ri_prec :
+ NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[reqPTX20]>;
//
// F32 rsqrt
def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
"rsqrt.approx.f32 \t$dst, $b;", []>;
+// Convert 1.0f/sqrt(x) to rsqrt.approx.f32. (There is an rsqrt.approx.f64, but
+// it's emulated in software.)
def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
(RSQRTF32approx1r Float32Regs:$b)>,
Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
-multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
- def rrr : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst,
- (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
- Requires<[Pred]>;
- def rri : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst,
- (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
- Requires<[Pred]>;
- def rir : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst,
- (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
- Requires<[Pred]>;
- def rii : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst,
- (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
- Requires<[Pred]>;
-}
-
-multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
- def rrr : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst,
- (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
- Requires<[Pred]>;
- def rri : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst,
- (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
- Requires<[Pred]>;
- def rir : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst,
- (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
- Requires<[Pred]>;
- def rii : NVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst,
- (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
- Requires<[Pred]>;
+multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> {
+ def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
+ Requires<[Pred]>;
+ def rri : NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, RC:$b, ImmCls:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
+ Requires<[Pred]>;
+ def rir : NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, ImmCls:$b, RC:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, ImmCls:$b, ImmCls:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
+ Requires<[Pred]>;
}
-defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
-defm FMA32 : FPCONTRACT32<"fma.rn.f32", true>;
-defm FMA64 : FPCONTRACT64<"fma.rn.f64", true>;
+defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
+defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>;
+defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>;
+// sin/cos
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
[(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
"cos.approx.f32 \t$dst, $src;",
[(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
-// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y))
-// e.g. "poor man's fmod()"
+// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)),
+// i.e. "poor man's fmod()"
// frem - f32 FTZ
def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
fpimm:$y))>;
//-----------------------------------
-// Logical Arithmetic
+// Bitwise operations
//-----------------------------------
-multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
- def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
- !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
- [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
- def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
- !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
- [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
- def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- Int16Regs:$b))]>;
- def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
- def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- Int32Regs:$b))]>;
- def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
- def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
- !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- Int64Regs:$b))]>;
- def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
- !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+// Template for three-arg bitwise operations. Takes three args, Creates .b16,
+// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr.
+multiclass BITWISE<string OpcStr, SDNode OpNode> {
+ def b1rr :
+ NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ def b1ri :
+ NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
+ def b16rr :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
+ def b16ri :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def b32rr :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def b32ri :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def b64rr :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
+ def b64ri :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
}
-defm OR : LOG_FORMAT<"or", or>;
-defm AND : LOG_FORMAT<"and", and>;
-defm XOR : LOG_FORMAT<"xor", xor>;
+defm OR : BITWISE<"or", or>;
+defm AND : BITWISE<"and", and>;
+defm XOR : BITWISE<"xor", xor>;
-def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
+def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
"not.pred \t$dst, $src;",
[(set Int1Regs:$dst, (not Int1Regs:$src))]>;
-def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"not.b16 \t$dst, $src;",
[(set Int16Regs:$dst, (not Int16Regs:$src))]>;
-def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"not.b32 \t$dst, $src;",
[(set Int32Regs:$dst, (not Int32Regs:$src))]>;
-def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
- "not.b64 \t$dst, $src;",
- [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
-
-// For shifts, the second src operand must be 32-bit value
-multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
- def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
- Int32Regs:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- Int32Regs:$b))]>;
- def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- (i32 imm:$b)))]>;
- def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
- Int32Regs:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- Int32Regs:$b))]>;
- def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- (i32 imm:$b)))]>;
- def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
- (i32 imm:$b)))]>;
- def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
- Int32Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- Int32Regs:$b))]>;
- def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- (i32 imm:$b)))]>;
-}
+def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "not.b64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
-defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
-
-// For shifts, the second src operand must be 32-bit value
-// Need to add cvt for the 8-bits.
-multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
- def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
- Int32Regs:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- Int32Regs:$b))]>;
- def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- (i32 imm:$b)))]>;
- def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
- Int32Regs:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- Int32Regs:$b))]>;
- def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- (i32 imm:$b)))]>;
- def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
- (i32 imm:$b)))]>;
- def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
- Int32Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- Int32Regs:$b))]>;
- def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- (i32 imm:$b)))]>;
+// Template for left/right shifts. Takes three operands,
+// [dest (reg), src (reg), shift (reg or imm)].
+// dest and src may be int64, int32, or int16, but shift is always int32.
+//
+// This template also defines a 32-bit shift (imm, imm) instruction.
+multiclass SHIFT<string OpcStr, SDNode OpNode> {
+ def i64rr :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>;
+ def i64ri :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>;
+ def i32rr :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>;
+ def i32ii :
+ NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>;
+ def i16rr :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>;
+ def i16ri :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>;
}
-defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
-defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
+defm SHL : SHIFT<"shl.b", shl>;
+defm SRA : SHIFT<"shr.s", sra>;
+defm SRL : SHIFT<"shr.u", srl>;
//
-// Rotate: use ptx shf instruction if available.
+// Rotate: Use ptx shf instruction if available.
//
// 32 bit r2 = rotl r1, n
// =>
// r2 = shf.l r1, r1, n
-def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$src, i32imm:$amt),
- "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
- Requires<[hasHWROT32]> ;
-
-def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$src, Int32Regs:$amt),
- "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
- Requires<[hasHWROT32]>;
+def ROTL32imm_hw :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]>;
+
+def ROTL32reg_hw :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]>;
// 32 bit r2 = rotr r1, n
// =>
// r2 = shf.r r1, r1, n
-def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$src, i32imm:$amt),
- "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
- Requires<[hasHWROT32]>;
-
-def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$src, Int32Regs:$amt),
- "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
- Requires<[hasHWROT32]>;
-
-//
-// Rotate: if ptx shf instruction is not available, then use shift+add
-//
-// 32bit
-def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
- !strconcat("{{\n\t",
- !strconcat(".reg .b32 %lhs;\n\t",
- !strconcat(".reg .b32 %rhs;\n\t",
- !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
- !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
- !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
- !strconcat("}}", ""))))))),
- []>;
+def ROTR32imm_hw :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
+ "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]>;
+
+def ROTR32reg_hw :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]>;
+
+// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1.
+def ROT32imm_sw :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
+ "{{\n\t"
+ ".reg .b32 %lhs;\n\t"
+ ".reg .b32 %rhs;\n\t"
+ "shl.b32 \t%lhs, $src, $amt1;\n\t"
+ "shr.b32 \t%rhs, $src, $amt2;\n\t"
+ "add.u32 \t$dst, %lhs, %rhs;\n\t"
+ "}}",
+ []>;
def SUB_FRM_32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(32-N->getZExtValue(), SDLoc(N), MVT::i32);
+ return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
(ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
Requires<[noHWROT32]>;
-def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
- Int32Regs:$amt),
- !strconcat("{{\n\t",
- !strconcat(".reg .b32 %lhs;\n\t",
- !strconcat(".reg .b32 %rhs;\n\t",
- !strconcat(".reg .b32 %amt2;\n\t",
- !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
- !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
- !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
- !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
- !strconcat("}}", ""))))))))),
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
- Requires<[noHWROT32]>;
-
-def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
- Int32Regs:$amt),
- !strconcat("{{\n\t",
- !strconcat(".reg .b32 %lhs;\n\t",
- !strconcat(".reg .b32 %rhs;\n\t",
- !strconcat(".reg .b32 %amt2;\n\t",
- !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
- !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
- !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
- !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
- !strconcat("}}", ""))))))))),
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
- Requires<[noHWROT32]>;
-
-// 64bit
-def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
- i32imm:$amt1, i32imm:$amt2),
- !strconcat("{{\n\t",
- !strconcat(".reg .b64 %lhs;\n\t",
- !strconcat(".reg .b64 %rhs;\n\t",
- !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
- !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
- !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
- !strconcat("}}", ""))))))),
- []>;
+// 32-bit software rotate left by register.
+def ROTL32reg_sw :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
+ "{{\n\t"
+ ".reg .b32 %lhs;\n\t"
+ ".reg .b32 %rhs;\n\t"
+ ".reg .b32 %amt2;\n\t"
+ "shl.b32 \t%lhs, $src, $amt;\n\t"
+ "sub.s32 \t%amt2, 32, $amt;\n\t"
+ "shr.b32 \t%rhs, $src, %amt2;\n\t"
+ "add.u32 \t$dst, %lhs, %rhs;\n\t"
+ "}}",
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[noHWROT32]>;
+
+// 32-bit software rotate right by register.
+def ROTR32reg_sw :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
+ "{{\n\t"
+ ".reg .b32 %lhs;\n\t"
+ ".reg .b32 %rhs;\n\t"
+ ".reg .b32 %amt2;\n\t"
+ "shr.b32 \t%lhs, $src, $amt;\n\t"
+ "sub.s32 \t%amt2, 32, $amt;\n\t"
+ "shl.b32 \t%rhs, $src, %amt2;\n\t"
+ "add.u32 \t$dst, %lhs, %rhs;\n\t"
+ "}}",
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[noHWROT32]>;
+
+// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1.
+def ROT64imm_sw :
+ NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2),
+ "{{\n\t"
+ ".reg .b64 %lhs;\n\t"
+ ".reg .b64 %rhs;\n\t"
+ "shl.b64 \t%lhs, $src, $amt1;\n\t"
+ "shr.b64 \t%rhs, $src, $amt2;\n\t"
+ "add.u64 \t$dst, %lhs, %rhs;\n\t"
+ "}}",
+ []>;
def SUB_FRM_64 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32);
def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
(ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
-def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
- Int32Regs:$amt),
- !strconcat("{{\n\t",
- !strconcat(".reg .b64 %lhs;\n\t",
- !strconcat(".reg .b64 %rhs;\n\t",
- !strconcat(".reg .u32 %amt2;\n\t",
- !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
- !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
- !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
- !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
- !strconcat("}}", ""))))))))),
- [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
-
-def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
- Int32Regs:$amt),
- !strconcat("{{\n\t",
- !strconcat(".reg .b64 %lhs;\n\t",
- !strconcat(".reg .b64 %rhs;\n\t",
- !strconcat(".reg .u32 %amt2;\n\t",
- !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
- !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
- !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
- !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
- !strconcat("}}", ""))))))))),
- [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+// 64-bit software rotate left by register.
+def ROTL64reg_sw :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),
+ "{{\n\t"
+ ".reg .b64 %lhs;\n\t"
+ ".reg .b64 %rhs;\n\t"
+ ".reg .u32 %amt2;\n\t"
+ "shl.b64 \t%lhs, $src, $amt;\n\t"
+ "sub.u32 \t%amt2, 64, $amt;\n\t"
+ "shr.b64 \t%rhs, $src, %amt2;\n\t"
+ "add.u64 \t$dst, %lhs, %rhs;\n\t"
+ "}}",
+ [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
+
+def ROTR64reg_sw :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),
+ "{{\n\t"
+ ".reg .b64 %lhs;\n\t"
+ ".reg .b64 %rhs;\n\t"
+ ".reg .u32 %amt2;\n\t"
+ "shr.b64 \t%lhs, $src, $amt;\n\t"
+ "sub.u32 \t%amt2, 64, $amt;\n\t"
+ "shl.b64 \t%rhs, $src, %amt2;\n\t"
+ "add.u64 \t$dst, %lhs, %rhs;\n\t"
+ "}}",
+ [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+
+//
+// Funnnel shift in clamp mode
+//
+
+// Create SDNodes so they can be used in the DAG code, e.g.
+// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
+def SDTIntShiftDOp :
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
+def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
+
+def FUNSHFLCLAMP :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
+ [(set Int32Regs:$dst,
+ (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;
+def FUNSHFRCLAMP :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
+ [(set Int32Regs:$dst,
+ (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;
+
+//
// BFE - bit-field extract
+//
+// Template for BFE instructions. Takes four args,
+// [dest (reg), src (reg), start (reg or imm), end (reg or imm)].
+// Start may be an imm only if end is also an imm. FIXME: Is this a
+// restriction in PTX?
+//
+// dest and src may be int32 or int64, but start and end are always int32.
multiclass BFE<string TyStr, RegisterClass RC> {
- // BFE supports both 32-bit and 64-bit values, but the start and length
- // operands are always 32-bit
def rrr
: NVPTXInst<(outs RC:$d),
(ins RC:$a, Int32Regs:$b, Int32Regs:$c),
defm BFE_U64 : BFE<"u64", Int64Regs>;
//-----------------------------------
-// General Comparison
+// Comparison instructions (setp, set)
//-----------------------------------
-// General setp instructions
+// FIXME: This doesn't cover versions of set and setp that combine with a
+// boolean predicate, e.g. setp.eq.and.b16.
+
multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
- def rr : NVPTXInst<(outs Int1Regs:$dst),
- (ins RC:$a, RC:$b, CmpMode:$cmp),
- !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
- []>;
- def ri : NVPTXInst<(outs Int1Regs:$dst),
- (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
- !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
- []>;
- def ir : NVPTXInst<(outs Int1Regs:$dst),
- (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
- !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
- []>;
+ def rr :
+ NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
+ !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
+ "\t$dst, $a, $b;"), []>;
+ def ri :
+ NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
+ !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
+ "\t$dst, $a, $b;"), []>;
+ def ir :
+ NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
+ !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
+ "\t$dst, $a, $b;"), []>;
}
defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
-// General set instructions
+// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
+// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
+// reg, either u32, s32, or f32. Anyway these aren't used at the moment.
+
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
//-----------------------------------
-// General Selection
+// Selection instructions (selp)
//-----------------------------------
-// General selp instructions
+// FIXME: Missing slct
+
+// selp instructions that don't have any pattern matches; we explicitly use
+// them within this file.
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
SDNode ImmNode> {
- def rr : NVPTXInst<(outs RC:$dst),
- (ins RC:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
- [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
- def ri : NVPTXInst<(outs RC:$dst),
- (ins RC:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
- [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
- def ir : NVPTXInst<(outs RC:$dst),
- (ins ImmCls:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
- [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
- def ii : NVPTXInst<(outs RC:$dst),
- (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
- [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
+ def rr :
+ NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, RC:$b, Int1Regs:$p),
+ !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
+ def ri :
+ NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, ImmCls:$b, Int1Regs:$p),
+ !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
+ def ir :
+ NVPTXInst<(outs RC:$dst),
+ (ins ImmCls:$a, RC:$b, Int1Regs:$p),
+ !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
+ def ii :
+ NVPTXInst<(outs RC:$dst),
+ (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
+ !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
}
+// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as
+// good.
defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>;
defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
-//
-// Funnnel shift in clamp mode
-//
-// - SDNodes are created so they can be used in the DAG code,
-// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
-//
-def SDTIntShiftDOp: SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisInt<3>]>;
-def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
-def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
-
-def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
- "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
- [(set Int32Regs:$dst,
- (FUN_SHFL_CLAMP Int32Regs:$lo,
- Int32Regs:$hi, Int32Regs:$amt))]>;
-
-def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
- "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
- [(set Int32Regs:$dst,
- (FUN_SHFR_CLAMP Int32Regs:$lo,
- Int32Regs:$hi, Int32Regs:$amt))]>;
-
//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
- [SDNPWantRoot]>;
+ [SDNPWantRoot]>;
def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
- [SDNPWantRoot]>;
+ [SDNPWantRoot]>;
def MEMri : Operand<i32> {
let PrintMethod = "printMemOperand";
}
def imem : Operand<iPTR> {
- let PrintMethod = "printOperand";
+ let PrintMethod = "printOperand";
}
def imemAny : Operand<iPTRAny> {
- let PrintMethod = "printOperand";
+ let PrintMethod = "printOperand";
}
def LdStCode : Operand<i32> {
- let PrintMethod = "printLdStCode";
+ let PrintMethod = "printLdStCode";
}
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
+// Load a memory address into a u32 or u64 register.
def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
- "mov.u32 \t$dst, $a;",
- [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
-
+ "mov.u32 \t$dst, $a;",
+ [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
- "mov.u64 \t$dst, $a;",
- [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
+ "mov.u64 \t$dst, $a;",
+ [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
-// Get pointer to local stack
-def MOV_DEPOT_ADDR
- : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
- "mov.u32 \t$d, __local_depot$num;", []>;
-def MOV_DEPOT_ADDR_64
- : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
- "mov.u64 \t$d, __local_depot$num;", []>;
+// Get pointer to local stack.
+def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
+ "mov.u32 \t$d, __local_depot$num;", []>;
+def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
+ "mov.u64 \t$d, __local_depot$num;", []>;
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
let IsSimpleMove=1 in {
-def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
- "mov.pred \t$dst, $sss;", []>;
-def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
- "mov.u16 \t$dst, $sss;", []>;
-def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
- "mov.u32 \t$dst, $sss;", []>;
-def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
- "mov.u64 \t$dst, $sss;", []>;
-
-def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
- "mov.f32 \t$dst, $src;", []>;
-def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
- "mov.f64 \t$dst, $src;", []>;
+ def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
+ "mov.pred \t$dst, $sss;", []>;
+ def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
+ "mov.u16 \t$dst, $sss;", []>;
+ def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
+ "mov.u32 \t$dst, $sss;", []>;
+ def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
+ "mov.u64 \t$dst, $sss;", []>;
+
+ def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "mov.f32 \t$dst, $src;", []>;
+ def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
+ "mov.f64 \t$dst, $src;", []>;
}
-def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
- "mov.pred \t$dst, $src;",
- [(set Int1Regs:$dst, imm:$src)]>;
-def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
- "mov.u16 \t$dst, $src;",
- [(set Int16Regs:$dst, imm:$src)]>;
-def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
- "mov.u32 \t$dst, $src;",
- [(set Int32Regs:$dst, imm:$src)]>;
-def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
- "mov.u64 \t$dst, $src;",
- [(set Int64Regs:$dst, imm:$src)]>;
-
-def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
- "mov.f32 \t$dst, $src;",
- [(set Float32Regs:$dst, fpimm:$src)]>;
-def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
- "mov.f64 \t$dst, $src;",
- [(set Float64Regs:$dst, fpimm:$src)]>;
+
+def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
+ "mov.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, imm:$src)]>;
+def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int16Regs:$dst, imm:$src)]>;
+def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.u32 \t$dst, $src;",
+ [(set Int32Regs:$dst, imm:$src)]>;
+def IMOV64i : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.u64 \t$dst, $src;",
+ [(set Int64Regs:$dst, imm:$src)]>;
+
+def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
+ "mov.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, fpimm:$src)]>;
+def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
+ "mov.f64 \t$dst, $src;",
+ [(set Float64Regs:$dst, fpimm:$src)]>;
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
//---- Copy Frame Index ----
-def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
- "add.u32 \t$dst, ${addr:add};",
- [(set Int32Regs:$dst, ADDRri:$addr)]>;
+def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
+ "add.u32 \t$dst, ${addr:add};",
+ [(set Int32Regs:$dst, ADDRri:$addr)]>;
def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
- "add.u64 \t$dst, ${addr:add};",
- [(set Int64Regs:$dst, ADDRri64:$addr)]>;
+ "add.u64 \t$dst, ${addr:add};",
+ [(set Int64Regs:$dst, ADDRri64:$addr)]>;
//-----------------------------------
// Comparison and Selection
SET_s16rr, SET_s16ri, SET_s16ir,
SET_s32rr, SET_s32ri, SET_s32ir,
SET_s64rr, SET_s64ri, SET_s64ir> {
- // TableGen doesn't like empty multiclasses
+ // TableGen doesn't like empty multiclasses.
def : PatLeaf<(i32 0)>;
}
SET_u16rr, SET_u16ri, SET_u16ir,
SET_u32rr, SET_u32ri, SET_u32ir,
SET_u64rr, SET_u64ri, SET_u64ir> {
- // TableGen doesn't like empty multiclasses
+ // TableGen doesn't like empty multiclasses.
def : PatLeaf<(i32 0)>;
}
defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
-defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
-defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
-defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
-defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
-defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
+defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
+defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
+defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
+defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
+defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
// i1 compares
defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
-//def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
-// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+// FIXME: What is this doing here? Can it be deleted?
+// def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
+// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
- SDTCisInt<2>]>;
-def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
- SDTCisInt<1>, SDTCisInt<2>]>;
+def SDTDeclareParamProfile :
+ SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
+def SDTDeclareScalarParamProfile :
+ SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
-def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
- SDTDeclareScalarParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
- SDTDeclareParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
- [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
- [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
- [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
- []>;
-def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
- [SDNPHasChain, SDNPSideEffect]>;
-def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
- [SDNPHasChain, SDNPSideEffect]>;
-def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
- [SDNPHasChain, SDNPSideEffect]>;
-def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
- SDTPseudoUseParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
- [SDNPHasChain, SDNPSideEffect]>;
+def DeclareParam :
+ SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareScalarParam :
+ SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareRetParam :
+ SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareRet :
+ SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def LoadParam :
+ SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
+ [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
+def LoadParamV2 :
+ SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
+ [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
+def LoadParamV4 :
+ SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
+ [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
+def PrintCall :
+ SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def PrintCallUni :
+ SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParam :
+ SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamV2 :
+ SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamV4 :
+ SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamU32 :
+ SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamS32 :
+ SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArgBegin :
+ SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArg :
+ SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def LastCallArg :
+ SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArgEnd :
+ SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallVoid :
+ SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def Prototype :
+ SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallVal :
+ SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def MoveParam :
+ SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
+def StoreRetval :
+ SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def StoreRetvalV2 :
+ SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def StoreRetvalV4 :
+ SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PseudoUseParam :
+ SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def RETURNNode :
+ SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat(!strconcat("ld.param", opstr),
- "\t$dst, [retval0+$b];"),
+ "\t$dst, [retval0+$b];"),
[]>;
class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat(!strconcat("mov", opstr),
- "\t$dst, retval$b;"),
+ !strconcat("mov", opstr, "\t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
- !strconcat(!strconcat("ld.param.v2", opstr),
- "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
+ !strconcat("ld.param.v2", opstr,
+ "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins i32imm:$b),
- !strconcat(!strconcat("ld.param.v4", opstr),
- "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>;
+ !strconcat("ld.param.v4", opstr,
+ "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
+ []>;
class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
- !strconcat(!strconcat("st.param", opstr),
- "\t[param$a+$b], $val;"),
+ !strconcat("st.param", opstr, "\t[param$a+$b], $val;"),
[]>;
class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
i32imm:$a, i32imm:$b),
- !strconcat(!strconcat("st.param.v2", opstr),
- "\t[param$a+$b], {{$val, $val2}};"),
+ !strconcat("st.param.v2", opstr,
+ "\t[param$a+$b], {{$val, $val2}};"),
[]>;
class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
regclass:$val3, i32imm:$a, i32imm:$b),
- !strconcat(!strconcat("st.param.v4", opstr),
- "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
+ !strconcat("st.param.v4", opstr,
+ "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
[]>;
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
- !strconcat(!strconcat("st.param", opstr),
- "\t[func_retval0+$a], $val;"),
+ !strconcat("st.param", opstr, "\t[func_retval0+$a], $val;"),
[]>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
- !strconcat(!strconcat("st.param.v2", opstr),
- "\t[func_retval0+$a], {{$val, $val2}};"),
+ !strconcat("st.param.v2", opstr,
+ "\t[func_retval0+$a], {{$val, $val2}};"),
[]>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs),
(ins regclass:$val, regclass:$val2, regclass:$val3,
regclass:$val4, i32imm:$a),
- !strconcat(!strconcat("st.param.v4", opstr),
- "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
+ !strconcat("st.param.v4", opstr,
+ "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
[]>;
-let isCall = 1 in {
-def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
-"call (retval0), ",
- [(PrintCall (i32 1))]>;
-def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
-"call (retval0, retval1), ",
- [(PrintCall (i32 2))]>;
-def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
-"call (retval0, retval1, retval2), ",
- [(PrintCall (i32 3))]>;
-def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
-"call (retval0, retval1, retval2, retval3), ",
- [(PrintCall (i32 4))]>;
-def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
-"call (retval0, retval1, retval2, retval3, retval4), ",
- [(PrintCall (i32 5))]>;
-def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
-"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
- [(PrintCall (i32 6))]>;
-def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
-"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
- [(PrintCall (i32 7))]>;
-def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
-!strconcat("call (retval0, retval1, retval2, retval3, retval4",
- ", retval5, retval6, retval7), "),
- [(PrintCall (i32 8))]>;
-
-def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
- [(PrintCall (i32 0))]>;
-
-def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
-"call.uni (retval0), ",
- [(PrintCallUni (i32 1))]>;
-def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
-"call.uni (retval0, retval1), ",
- [(PrintCallUni (i32 2))]>;
-def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
-"call.uni (retval0, retval1, retval2), ",
- [(PrintCallUni (i32 3))]>;
-def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
-"call.uni (retval0, retval1, retval2, retval3), ",
- [(PrintCallUni (i32 4))]>;
-def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
-"call.uni (retval0, retval1, retval2, retval3, retval4), ",
- [(PrintCallUni (i32 5))]>;
-def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
-"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
- [(PrintCallUni (i32 6))]>;
-def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
-"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
- [(PrintCallUni (i32 7))]>;
-def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
-!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
- ", retval5, retval6, retval7), "),
- [(PrintCallUni (i32 8))]>;
-
-def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
- [(PrintCallUni (i32 0))]>;
-} // call instructions
+let isCall=1 in {
+ def PrintCallNoRetInst : NVPTXInst<(outs), (ins),
+ "call ", [(PrintCall (i32 0))]>;
+ def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
+ "call (retval0), ", [(PrintCall (i32 1))]>;
+ def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
+ "call (retval0, retval1), ", [(PrintCall (i32 2))]>;
+ def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
+ "call (retval0, retval1, retval2), ", [(PrintCall (i32 3))]>;
+ def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
+ "call (retval0, retval1, retval2, retval3), ", [(PrintCall (i32 4))]>;
+ def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
+ "call (retval0, retval1, retval2, retval3, retval4), ",
+ [(PrintCall (i32 5))]>;
+ def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
+ "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
+ [(PrintCall (i32 6))]>;
+ def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
+ "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
+ [(PrintCall (i32 7))]>;
+ def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
+ "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6, "
+ "retval7), ",
+ [(PrintCall (i32 8))]>;
+
+ def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins),
+ "call.uni ", [(PrintCallUni (i32 0))]>;
+ def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0), ", [(PrintCallUni (i32 1))]>;
+ def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0, retval1), ", [(PrintCallUni (i32 2))]>;
+ def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0, retval1, retval2), ", [(PrintCallUni (i32 3))]>;
+ def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0, retval1, retval2, retval3), ", [(PrintCallUni (i32 4))]>;
+ def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0, retval1, retval2, retval3, retval4), ",
+ [(PrintCallUni (i32 5))]>;
+ def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
+ [(PrintCallUni (i32 6))]>;
+ def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
+ [(PrintCallUni (i32 7))]>;
+ def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
+ "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6, "
+ "retval7), ",
+ [(PrintCallUni (i32 8))]>;
+}
def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
//def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
-def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
- Int32Regs:$val3, Int32Regs:$val4,
- i32imm:$a, i32imm:$b),
- "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- []>;
-
-def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
- Int16Regs:$val3, Int16Regs:$val4,
- i32imm:$a, i32imm:$b),
- "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- []>;
-
-def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
- Int16Regs:$val3, Int16Regs:$val4,
- i32imm:$a, i32imm:$b),
- "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- []>;
-
-def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
-def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
+def StoreParamV4I32 :
+ NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, Int32Regs:$val3,
+ Int32Regs:$val4, i32imm:$a, i32imm:$b),
+ "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
+ []>;
+
+def StoreParamV4I16 :
+ NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, Int16Regs:$val3,
+ Int16Regs:$val4, i32imm:$a, i32imm:$b),
+ "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
+ []>;
+
+def StoreParamV4I8 :
+ NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, Int16Regs:$val3,
+ Int16Regs:$val4, i32imm:$a, i32imm:$b),
+ "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
+ []>;
+
+def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
+def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
//def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
-def StoreParamV4F32 : NVPTXInst<(outs),
- (ins Float32Regs:$val, Float32Regs:$val2,
- Float32Regs:$val3, Float32Regs:$val4,
- i32imm:$a, i32imm:$b),
- "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- []>;
-
+def StoreParamV4F32 :
+ NVPTXInst<(outs), (ins Float32Regs:$val, Float32Regs:$val2, Float32Regs:$val3,
+ Float32Regs:$val4, i32imm:$a, i32imm:$b),
+ "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
+ []>;
def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
class CallArgInst<NVPTXRegClass regclass> :
- NVPTXInst<(outs), (ins regclass:$a), "$a, ",
- [(CallArg (i32 0), regclass:$a)]>;
+ NVPTXInst<(outs), (ins regclass:$a), "$a, ",
+ [(CallArg (i32 0), regclass:$a)]>;
class LastCallArgInst<NVPTXRegClass regclass> :
- NVPTXInst<(outs), (ins regclass:$a), "$a",
- [(LastCallArg (i32 0), regclass:$a)]>;
+ NVPTXInst<(outs), (ins regclass:$a), "$a",
+ [(LastCallArg (i32 0), regclass:$a)]>;
def CallArgI64 : CallArgInst<Int64Regs>;
def CallArgI32 : CallArgInst<Int32Regs>;
def CallArgI16 : CallArgInst<Int16Regs>;
-
def CallArgF64 : CallArgInst<Float64Regs>;
def CallArgF32 : CallArgInst<Float32Regs>;
def LastCallArgI64 : LastCallArgInst<Int64Regs>;
def LastCallArgI32 : LastCallArgInst<Int32Regs>;
def LastCallArgI16 : LastCallArgInst<Int16Regs>;
-
def LastCallArgF64 : LastCallArgInst<Float64Regs>;
def LastCallArgF32 : LastCallArgInst<Float32Regs>;
def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
[(CallArg (i32 0), (i32 imm:$a))]>;
def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
- [(LastCallArg (i32 0), (i32 imm:$a))]>;
+ [(LastCallArg (i32 0), (i32 imm:$a))]>;
def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
[(CallArg (i32 1), (i32 imm:$a))]>;
def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
- [(LastCallArg (i32 1), (i32 imm:$a))]>;
-
-def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
- "$addr, ",
- [(CallVoid (Wrapper tglobaladdr:$addr))]>;
-def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
- "$addr, ",
- [(CallVoid Int32Regs:$addr)]>;
-def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
- "$addr, ",
- [(CallVoid Int64Regs:$addr)]>;
-def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
- ", prototype_$val;",
- [(Prototype (i32 imm:$val))]>;
-
-def DeclareRetMemInst : NVPTXInst<(outs),
- (ins i32imm:$align, i32imm:$size, i32imm:$num),
- ".param .align $align .b8 retval$num[$size];",
- [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
-def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
- ".param .b$size retval$num;",
- [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
-def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
- ".reg .b$size retval$num;",
- [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
-
-def DeclareParamInst : NVPTXInst<(outs),
- (ins i32imm:$align, i32imm:$a, i32imm:$size),
- ".param .align $align .b8 param$a[$size];",
- [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
-def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
- ".param .b$size param$a;",
- [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
-def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
- ".reg .b$size param$a;",
- [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
+ [(LastCallArg (i32 1), (i32 imm:$a))]>;
+
+def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ",
+ [(CallVoid (Wrapper tglobaladdr:$addr))]>;
+def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
+ [(CallVoid Int32Regs:$addr)]>;
+def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ",
+ [(CallVoid Int64Regs:$addr)]>;
+def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
+ [(Prototype (i32 imm:$val))]>;
+
+def DeclareRetMemInst :
+ NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num),
+ ".param .align $align .b8 retval$num[$size];",
+ [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
+def DeclareRetScalarInst :
+ NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
+ ".param .b$size retval$num;",
+ [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
+def DeclareRetRegInst :
+ NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
+ ".reg .b$size retval$num;",
+ [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
+
+def DeclareParamInst :
+ NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
+ ".param .align $align .b8 param$a[$size];",
+ [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
+def DeclareScalarParamInst :
+ NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
+ ".param .b$size param$a;",
+ [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
+def DeclareScalarRegInst :
+ NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
+ ".reg .b$size param$a;",
+ [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
- NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
- !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
- [(set regclass:$dst, (MoveParam regclass:$src))]>;
+ NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
+ !strconcat("mov", asmstr, "\t$dst, $src;"),
+ [(set regclass:$dst, (MoveParam regclass:$src))]>;
def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
-def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
- "cvt.u16.u32\t$dst, $src;",
- [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
+def MoveParamI16 :
+ NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "cvt.u16.u32\t$dst, $src;",
+ [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
class PseudoUseParamInst<NVPTXRegClass regclass> :
- NVPTXInst<(outs), (ins regclass:$src),
- "// Pseudo use of $src",
- [(PseudoUseParam regclass:$src)]>;
+ NVPTXInst<(outs), (ins regclass:$src),
+ "// Pseudo use of $src",
+ [(PseudoUseParam regclass:$src)]>;
def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
// Load / Store Handling
//
multiclass LD<NVPTXRegClass regclass> {
- def _avar : NVPTXInst<(outs regclass:$dst),
+ def _avar : NVPTXInst<
+ (outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr),
-!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t$dst, [$addr];"), []>;
- def _areg : NVPTXInst<(outs regclass:$dst),
+ i32imm:$fromWidth, imem:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t$dst, [$addr];", []>;
+ def _areg : NVPTXInst<
+ (outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr),
-!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t$dst, [$addr];"), []>;
- def _areg_64 : NVPTXInst<(outs regclass:$dst),
+ i32imm:$fromWidth, Int32Regs:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t$dst, [$addr];", []>;
+ def _areg_64 : NVPTXInst<
+ (outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
- " \t$dst, [$addr];"), []>;
- def _ari : NVPTXInst<(outs regclass:$dst),
+ i32imm:$fromWidth, Int64Regs:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t$dst, [$addr];", []>;
+ def _ari : NVPTXInst<
+ (outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
-!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t$dst, [$addr+$offset];"), []>;
- def _ari_64 : NVPTXInst<(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
- " \t$dst, [$addr+$offset];"), []>;
- def _asi : NVPTXInst<(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr, i32imm:$offset),
-!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t$dst, [$addr+$offset];"), []>;
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t$dst, [$addr+$offset];", []>;
+ def _ari_64 : NVPTXInst<
+ (outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t$dst, [$addr+$offset];", []>;
+ def _asi : NVPTXInst<
+ (outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t$dst, [$addr+$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
-defm LD_i8 : LD<Int16Regs>;
-defm LD_i16 : LD<Int16Regs>;
-defm LD_i32 : LD<Int32Regs>;
-defm LD_i64 : LD<Int64Regs>;
-defm LD_f32 : LD<Float32Regs>;
-defm LD_f64 : LD<Float64Regs>;
+ defm LD_i8 : LD<Int16Regs>;
+ defm LD_i16 : LD<Int16Regs>;
+ defm LD_i32 : LD<Int32Regs>;
+ defm LD_i64 : LD<Int64Regs>;
+ defm LD_f32 : LD<Float32Regs>;
+ defm LD_f64 : LD<Float64Regs>;
}
multiclass ST<NVPTXRegClass regclass> {
- def _avar : NVPTXInst<(outs),
- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
-!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
- " \t[$addr], $src;"), []>;
- def _areg : NVPTXInst<(outs),
+ def _avar : NVPTXInst<
+ (outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
-!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
- " \t[$addr], $src;"), []>;
- def _areg_64 : NVPTXInst<(outs),
+ LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ " \t[$addr], $src;", []>;
+ def _areg : NVPTXInst<
+ (outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ " \t[$addr], $src;", []>;
+ def _areg_64 : NVPTXInst<
+ (outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
- "\t[$addr], $src;"), []>;
- def _ari : NVPTXInst<(outs),
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ " \t[$addr], $src;", []>;
+ def _ari : NVPTXInst<
+ (outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
-!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
- " \t[$addr+$offset], $src;"), []>;
- def _ari_64 : NVPTXInst<(outs),
+ LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ " \t[$addr+$offset], $src;", []>;
+ def _ari_64 : NVPTXInst<
+ (outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
- "\t[$addr+$offset], $src;"), []>;
- def _asi : NVPTXInst<(outs),
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ " \t[$addr+$offset], $src;", []>;
+ def _asi : NVPTXInst<
+ (outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
-!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
- " \t[$addr+$offset], $src;"), []>;
+ LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ " \t[$addr+$offset], $src;", []>;
}
let mayStore=1, hasSideEffects=0 in {
-defm ST_i8 : ST<Int16Regs>;
-defm ST_i16 : ST<Int16Regs>;
-defm ST_i32 : ST<Int32Regs>;
-defm ST_i64 : ST<Int64Regs>;
-defm ST_f32 : ST<Float32Regs>;
-defm ST_f64 : ST<Float64Regs>;
+ defm ST_i8 : ST<Int16Regs>;
+ defm ST_i16 : ST<Int16Regs>;
+ defm ST_i32 : ST<Int32Regs>;
+ defm ST_i64 : ST<Int64Regs>;
+ defm ST_f32 : ST<Float32Regs>;
+ defm ST_f64 : ST<Float64Regs>;
}
-// The following is used only in and after vector elementizations.
-// Vector elementization happens at the machine instruction level, so the
-// following instruction
-// never appears in the DAG.
+// The following is used only in and after vector elementizations. Vector
+// elementization happens at the machine instruction level, so the following
+// instructions never appear in the DAG.
multiclass LD_VEC<NVPTXRegClass regclass> {
- def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ def _v2_avar : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
- def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ i32imm:$fromWidth, imem:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2}}, [$addr];", []>;
+ def _v2_areg : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
- def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ i32imm:$fromWidth, Int32Regs:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2}}, [$addr];", []>;
+ def _v2_areg_64 : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
- def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ i32imm:$fromWidth, Int64Regs:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2}}, [$addr];", []>;
+ def _v2_ari : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
- def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ def _v2_ari_64 : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
- def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ def _v2_asi : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr, i32imm:$offset),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
- def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
- regclass:$dst3, regclass:$dst4),
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ def _v4_avar : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
- def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4),
+ i32imm:$fromWidth, imem:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
+ def _v4_areg : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
- def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
- regclass:$dst3, regclass:$dst4),
+ i32imm:$fromWidth, Int32Regs:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
+ def _v4_areg_64 : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
- def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4),
+ i32imm:$fromWidth, Int64Regs:$addr),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
+ def _v4_ari : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
- []>;
- def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
- regclass:$dst3, regclass:$dst4),
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ def _v4_ari_64 : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
- []>;
- def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4),
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ def _v4_asi : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr, i32imm:$offset),
- !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
- []>;
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
-defm LDV_i8 : LD_VEC<Int16Regs>;
-defm LDV_i16 : LD_VEC<Int16Regs>;
-defm LDV_i32 : LD_VEC<Int32Regs>;
-defm LDV_i64 : LD_VEC<Int64Regs>;
-defm LDV_f32 : LD_VEC<Float32Regs>;
-defm LDV_f64 : LD_VEC<Float64Regs>;
+ defm LDV_i8 : LD_VEC<Int16Regs>;
+ defm LDV_i16 : LD_VEC<Int16Regs>;
+ defm LDV_i32 : LD_VEC<Int32Regs>;
+ defm LDV_i64 : LD_VEC<Int64Regs>;
+ defm LDV_f32 : LD_VEC<Float32Regs>;
+ defm LDV_f64 : LD_VEC<Float64Regs>;
}
multiclass ST_VEC<NVPTXRegClass regclass> {
- def _v2_avar : NVPTXInst<(outs),
+ def _v2_avar : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
- def _v2_areg : NVPTXInst<(outs),
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr], {{$src1, $src2}};", []>;
+ def _v2_areg : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
- def _v2_areg_64 : NVPTXInst<(outs),
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr], {{$src1, $src2}};", []>;
+ def _v2_areg_64 : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
- def _v2_ari : NVPTXInst<(outs),
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr], {{$src1, $src2}};", []>;
+ def _v2_ari : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
- i32imm:$offset),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
- def _v2_ari_64 : NVPTXInst<(outs),
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
+ i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ def _v2_ari_64 : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
- i32imm:$offset),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
- def _v2_asi : NVPTXInst<(outs),
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
+ i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ def _v2_asi : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
- i32imm:$offset),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
- def _v4_avar : NVPTXInst<(outs),
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
+ i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ def _v4_avar : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
- def _v4_areg : NVPTXInst<(outs),
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
+ def _v4_areg : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
- def _v4_areg_64 : NVPTXInst<(outs),
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
+ def _v4_areg_64 : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
- def _v4_ari : NVPTXInst<(outs),
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
+ def _v4_ari : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
- []>;
- def _v4_ari_64 : NVPTXInst<(outs),
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ def _v4_ari_64 : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
- []>;
- def _v4_asi : NVPTXInst<(outs),
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ def _v4_asi : NVPTXInst<
+ (outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr, i32imm:$offset),
- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
- "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
- []>;
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}"
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
}
+
let mayStore=1, hasSideEffects=0 in {
-defm STV_i8 : ST_VEC<Int16Regs>;
-defm STV_i16 : ST_VEC<Int16Regs>;
-defm STV_i32 : ST_VEC<Int32Regs>;
-defm STV_i64 : ST_VEC<Int64Regs>;
-defm STV_f32 : ST_VEC<Float32Regs>;
-defm STV_f64 : ST_VEC<Float64Regs>;
+ defm STV_i8 : ST_VEC<Int16Regs>;
+ defm STV_i16 : ST_VEC<Int16Regs>;
+ defm STV_i32 : ST_VEC<Int32Regs>;
+ defm STV_i64 : ST_VEC<Int64Regs>;
+ defm STV_f32 : ST_VEC<Float32Regs>;
+ defm STV_f64 : ST_VEC<Float64Regs>;
}
// pack a set of smaller int registers to a larger int register
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
- (ins Int16Regs:$s1, Int16Regs:$s2,
- Int16Regs:$s3, Int16Regs:$s4),
- "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
- []>;
+ (ins Int16Regs:$s1, Int16Regs:$s2,
+ Int16Regs:$s3, Int16Regs:$s4),
+ "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>;
def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
- (ins Int16Regs:$s1, Int16Regs:$s2),
- "mov.b32\t$d, {{$s1, $s2}};",
- []>;
+ (ins Int16Regs:$s1, Int16Regs:$s2),
+ "mov.b32\t$d, {{$s1, $s2}};", []>;
def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
- (ins Int32Regs:$s1, Int32Regs:$s2),
- "mov.b64\t$d, {{$s1, $s2}};",
- []>;
+ (ins Int32Regs:$s1, Int32Regs:$s2),
+ "mov.b64\t$d, {{$s1, $s2}};", []>;
def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
- (ins Float32Regs:$s1, Float32Regs:$s2),
- "mov.b64\t$d, {{$s1, $s2}};",
- []>;
+ (ins Float32Regs:$s1, Float32Regs:$s2),
+ "mov.b64\t$d, {{$s1, $s2}};", []>;
// unpack a larger int register to a set of smaller int registers
def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
Int16Regs:$d3, Int16Regs:$d4),
(ins Int64Regs:$s),
- "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
- []>;
+ "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>;
def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
(ins Int32Regs:$s),
- "mov.b32\t{{$d1, $d2}}, $s;",
- []>;
+ "mov.b32\t{{$d1, $d2}}, $s;", []>;
def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
(ins Int64Regs:$s),
- "mov.b64\t{{$d1, $d2}}, $s;",
- []>;
+ "mov.b64\t{{$d1, $d2}}, $s;", []>;
def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
(ins Float64Regs:$s),
- "mov.b64\t{{$d1, $d2}}, $s;",
- []>;
+ "mov.b64\t{{$d1, $d2}}, $s;", []>;
// Count leading zeros
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
- "clz.b32\t$d, $a;",
- []>;
+ "clz.b32\t$d, $a;", []>;
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
- "clz.b64\t$d, $a;",
- []>;
+ "clz.b64\t$d, $a;", []>;
// 32-bit has a direct PTX instruction
-def : Pat<(ctlz Int32Regs:$a),
- (CLZr32 Int32Regs:$a)>;
-def : Pat<(ctlz_zero_undef Int32Regs:$a),
- (CLZr32 Int32Regs:$a)>;
+def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
+def : Pat<(ctlz_zero_undef Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
// to 64-bit to match the LLVM semantics
-def : Pat<(ctlz Int64Regs:$a),
- (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
def : Pat<(ctlz_zero_undef Int64Regs:$a),
(CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
// Population count
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
- "popc.b32\t$d, $a;",
- []>;
+ "popc.b32\t$d, $a;", []>;
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
- "popc.b64\t$d, $a;",
- []>;
+ "popc.b64\t$d, $a;", []>;
// 32-bit has a direct PTX instruction
-def : Pat<(ctpop Int32Regs:$a),
- (POPCr32 Int32Regs:$a)>;
+def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
// to 64-bit to match the LLVM semantics
-def : Pat<(ctpop Int64Regs:$a),
- (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
// For 16-bit, we zero-extend to 32-bit, then trunc the result back
// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
// than 16 bits to store)
def : Pat<(ctpop Int16Regs:$a),
- (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
- CvtNONE)>;
+ (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
// fround f64 -> f32
def : Pat<(f32 (fround Float64Regs:$a)),
def : Pat<(f64 (fextend Float32Regs:$a)),
(CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
-def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
//-----------------------------------
// Control-flow
let isBranch=1 in
def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
- "@$a bra \t$target;",
- [(brcond Int1Regs:$a, bb:$target)]>;
+ "@$a bra \t$target;",
+ [(brcond Int1Regs:$a, bb:$target)]>;
let isBranch=1 in
def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
- "@!$a bra \t$target;",
- []>;
+ "@!$a bra \t$target;", []>;
let isBranch=1, isBarrier=1 in
def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
- "bra.uni \t$target;",
- [(br bb:$target)]>;
+ "bra.uni \t$target;", [(br bb:$target)]>;
}
def : Pat<(brcond Int32Regs:$a, bb:$target),
(CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
-// conditional branch if
-// the target block is the next block so that the code can fall through to the
-// target block.
-// The invertion is done by 'xor condition, 1', which will be translated to
-// (setne condition, -1).
-// Since ptx supports '@!pred bra target', we should use it.
+// conditional branch if the target block is the next block so that the code
+// can fall through to the target block. The invertion is done by 'xor
+// condition, 1', which will be translated to (setne condition, -1). Since ptx
+// supports '@!pred bra target', we should use it.
def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
- (CBranchOther Int1Regs:$a, bb:$target)>;
+ (CBranchOther Int1Regs:$a, bb:$target)>;
// Call
-def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
- SDTCisVT<1, i32> ]>;
+def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPSideEffect]>;
+ SDNPSideEffect]>;
def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def calltarget : Operand<i32>;
let isCall=1 in {
- def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
- "call \t$dst, (1);", []>;
+ def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>;
}
-def : Pat<(call tglobaladdr:$dst),
- (CALL tglobaladdr:$dst)>;
-def : Pat<(call texternalsym:$dst),
- (CALL texternalsym:$dst)>;
+def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>;
// Pseudo instructions.
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
// @TODO: We use some tricks here to emit curly braces. Can we clean this up
// a bit without TableGen modifications?
-def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
- "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
- [(callseq_start timm:$amt)]>;
-def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- "\n\t//{{\n\t}}// Callseq End $amt1",
- [(callseq_end timm:$amt1, timm:$amt2)]>;
+def Callseq_Start :
+ NVPTXInst<(outs), (ins i32imm:$amt),
+ "// Callseq Start $amt\n"
+ "\t{{\n"
+ "\t.reg .b32 temp_param_reg;\n"
+ "\t// <end>}}",
+ [(callseq_start timm:$amt)]>;
+def Callseq_End :
+ NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "\n"
+ "\t//{{\n"
+ "\t}}// Callseq End $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
// trap instruction
-
-def trapinst : NVPTXInst<(outs), (ins),
- "trap;",
- [(trap)]>;
+def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>;
// Call prototype wrapper
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def CallPrototype
- : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallPrototype :
+ SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def ProtoIdent : Operand<i32> {
let PrintMethod = "printProtoIdent";
}
-def CALL_PROTOTYPE
- : NVPTXInst<(outs), (ins ProtoIdent:$ident),
- "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
-
+def CALL_PROTOTYPE :
+ NVPTXInst<(outs), (ins ProtoIdent:$ident),
+ "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
include "NVPTXIntrinsics.td"