[NVPTX] Add lowering of i128 params.

author Michael Kuperstein <mkuper@google.com>

Thu, 6 Jul 2017 22:18:54 +0000 (22:18 +0000)

committer Michael Kuperstein <mkuper@google.com>

Thu, 6 Jul 2017 22:18:54 +0000 (22:18 +0000)
author Michael Kuperstein <mkuper@google.com>
Thu, 6 Jul 2017 22:18:54 +0000 (22:18 +0000)
committer Michael Kuperstein <mkuper@google.com>
Thu, 6 Jul 2017 22:18:54 +0000 (22:18 +0000)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

index 0139646..82634cf 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -400,7 +400,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
    O << " (";
  
    if (isABI) {
-    if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
+    if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
        unsigned size = 0;
        if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
          size = ITy->getBitWidth();
@@ -418,7 +418,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
      } else if (isa<PointerType>(Ty)) {
        O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
          << " func_retval0";
-    } else if (Ty->isAggregateType() || Ty->isVectorTy()) {
+    } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
        unsigned totalsz = DL.getTypeAllocSize(Ty);
        unsigned retAlignment = 0;
        if (!getAlign(*F, 0, retAlignment))
@@ -1425,6 +1425,14 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
    else
      O << " .align " << GVar->getAlignment();
  
+  // Special case for i128
+  if (ETy->isIntegerTy(128)) {
+    O << " .b8 ";
+    getSymbol(GVar)->print(O, MAI);
+    O << "[16]";
+    return;
+  }
+
    if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
      O << " .";
      O << getPTXFundamentalTypeStr(ETy);
@@ -1551,7 +1559,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
      }
  
      if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
-      if (Ty->isAggregateType() || Ty->isVectorTy()) {
+      if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
          // Just print .param .align <a> .b8 .param[size];
          // <a> = PAL.getparamalignment
          // size = typeallocsize of element type
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

index 54579d0..9b48979 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -169,6 +169,19 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
    SmallVector<EVT, 16> TempVTs;\r
    SmallVector<uint64_t, 16> TempOffsets;\r
  \r
+  // Special case for i128 - decompose to (i64, i64)\r
+  if (Ty->isIntegerTy(128)) {\r
+    ValueVTs.push_back(EVT(MVT::i64));\r
+    ValueVTs.push_back(EVT(MVT::i64));\r
+\r
+    if (Offsets) {\r
+      Offsets->push_back(StartingOffset + 0);\r
+      Offsets->push_back(StartingOffset + 8);\r
+    }\r
+\r
+    return;\r
+  }\r
+\r
    ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);\r
    for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {\r
      EVT VT = TempVTs[i];\r
@@ -1263,7 +1276,7 @@ std::string NVPTXTargetLowering::getPrototype(
      O << "()";\r
    } else {\r
      O << "(";\r
-    if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {\r
+    if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) {\r
        unsigned size = 0;\r
        if (auto *ITy = dyn_cast<IntegerType>(retTy)) {\r
          size = ITy->getBitWidth();\r
@@ -1281,7 +1294,7 @@ std::string NVPTXTargetLowering::getPrototype(
        O << ".param .b" << size << " _";\r
      } else if (isa<PointerType>(retTy)) {\r
        O << ".param .b" << PtrVT.getSizeInBits() << " _";\r
-    } else if (retTy->isAggregateType() || retTy->isVectorTy()) {\r
+    } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) {\r
        auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();\r
        O << ".param .align " << retAlignment << " .b8 _["\r
          << DL.getTypeAllocSize(retTy) << "]";\r
@@ -1303,7 +1316,7 @@ std::string NVPTXTargetLowering::getPrototype(
      first = false;\r
  \r
      if (!Outs[OIdx].Flags.isByVal()) {\r
-      if (Ty->isAggregateType() || Ty->isVectorTy()) {\r
+      if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {\r
          unsigned align = 0;\r
          const CallInst *CallI = cast<CallInst>(CS->getInstruction());\r
          // +1 because index 0 is reserved for return type alignment\r
@@ -1459,7 +1472,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
        unsigned AllocSize = DL.getTypeAllocSize(Ty);\r
        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);\r
        bool NeedAlign; // Does argument declaration specify alignment?\r
-      if (Ty->isAggregateType() || Ty->isVectorTy()) {\r
+      if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {\r
          // declare .param .align <align> .b8 .param<n>[<size>];\r
          SDValue DeclareParamOps[] = {\r
              Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),\r
@@ -1635,8 +1648,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      // these three types to match the logic in\r
      // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.\r
      // Plus, this behavior is consistent with nvcc's.\r
-    if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() ||\r
-        RetTy->isPointerTy()) {\r
+    if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() ||\r
+        (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) {\r
        // Scalar needs to be at least 32bit wide\r
        if (resultsz < 32)\r
          resultsz = 32;\r
@@ -2367,7 +2380,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
  \r
      if (theArgs[i]->use_empty()) {\r
        // argument is dead\r
-      if (Ty->isAggregateType()) {\r
+      if (Ty->isAggregateType() || Ty->isIntegerTy(128)) {\r
          SmallVector<EVT, 16> vtparts;\r
  \r
          ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);\r
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

index 2b6ba8c..ac21563 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -81,7 +81,7 @@ static std::string computeDataLayout(bool is64Bit) {
    if (!is64Bit)
      Ret += "-p:32:32";
  
-  Ret += "-i64:64-v16:16-v32:32-n16:32:64";
+  Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
  
    return Ret;
  }
diff --git a/llvm/test/CodeGen/NVPTX/i128-global.ll b/llvm/test/CodeGen/NVPTX/i128-global.ll

new file mode 100644 (file)

index 0000000..cd96092
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i128-global.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+; CHECK: .visible .global .align 16 .b8 G1[16] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+@G1 = global i128 1
+
+; CHECK: .visible .global .align 16 .b8 G2[16];
+@G2 = global i128 0
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll

new file mode 100644 (file)

index 0000000..7cb6035
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i128-param.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK-LABEL: .visible .func callee(
+; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16],
+; CHECK-NEXT: .param .align 16 .b8 callee_param_1[16],
+define void @callee(i128, i128, i128*) {
+  ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
+  ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [callee_param_1];
+
+  ; CHECK:      mul.lo.s64 %[[REG4:rd[0-9]+]], %[[REG0]], %[[REG3]];
+       ; CHECK-NEXT: mul.hi.u64 %[[REG5:rd[0-9]+]], %[[REG0]], %[[REG2]];
+       ; CHECK-NEXT: add.s64    %[[REG6:rd[0-9]+]], %[[REG5]], %[[REG4]];
+       ; CHECK-NEXT: mul.lo.s64 %[[REG7:rd[0-9]+]], %[[REG1]], %[[REG2]];
+       ; CHECK-NEXT: add.s64    %[[REG8:rd[0-9]+]], %[[REG6]], %[[REG7]];
+       ; CHECK-NEXT: mul.lo.s64 %[[REG9:rd[0-9]+]], %[[REG0]], %[[REG2]];
+  %a = mul i128 %0, %1
+
+  store i128 %a, i128* %2
+  ret void
+}
+
+; CHECK-LABEL: .visible .entry caller_kernel(
+; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_0[16],
+; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_1[16],
+define ptx_kernel void @caller_kernel(i128, i128, i128*) {
+start:
+  ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_kernel_param_0];
+  ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
+
+  ; CHECK:      { // callseq [[CALLSEQ_ID:[0-9]]], 0
+       ; CHECK:      .param .align 16 .b8 param0[16];
+       ; CHECK-NEXT: st.param.v2.b64   [param0+0], {%[[REG0]], %[[REG1]]}
+       ; CHECK:      .param .align 16 .b8 param1[16];
+       ; CHECK-NEXT: st.param.v2.b64   [param1+0], {%[[REG2]], %[[REG3]]}
+       ; CHECK:      } // callseq [[CALLSEQ_ID]]
+  call void @callee(i128 %0, i128 %1, i128* %2)
+
+  ret void
+}
+
+; CHECK-LABEL: .visible .func caller_func(
+; CHECK-NEXT: .param .align 16 .b8 caller_func_param_0[16],
+; CHECK-NEXT: .param .align 16 .b8 caller_func_param_1[16],
+define void @caller_func(i128, i128, i128*) {
+start:
+  ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_func_param_0]
+  ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
+
+  ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
+       ; CHECK: .param .align 16 .b8 param0[16];
+       ; CHECK: st.param.v2.b64        [param0+0], {%[[REG0]], %[[REG1]]}
+       ; CHECK: .param .align 16 .b8 param1[16];
+  ; CHECK: st.param.v2.b64     [param1+0], {%[[REG2]], %[[REG3]]}
+       ; CHECK: } // callseq [[CALLSEQ_ID]]
+  call void @callee(i128 %0, i128 %1, i128* %2)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/i128-retval.ll b/llvm/test/CodeGen/NVPTX/i128-retval.ll

new file mode 100644 (file)

index 0000000..015b019
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i128-retval.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[16]) callee(
+define i128 @callee(i128) {
+  ; CHECK: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
+  ; CHECK: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]}
+  ret i128 %0
+}
+
+; CHECK-LABEL: .visible .func caller(
+define void @caller(i128, i128*) {
+start:
+  ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_param_0];
+  ; CHECK-DAG: ld.param.u64 %[[OUT:rd[0-9]+]],  [caller_param_1];
+
+  ; CHECK: { // callseq 0, 0
+       ; CHECK: .param .align 16 .b8 retval0[16];
+       ; CHECK: call.uni (retval0),
+  ; CHECK: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [retval0+0];
+       ; CHECK: } // callseq 0
+  %a = call i128 @callee(i128 %0)
+
+       ; CHECK-DAG: st.u64 [%[[OUT]]], %[[REG2]];
+       ; CHECK-DAG: st.u64 [%[[OUT]]+8], %[[REG3]];
+  store i128 %a, i128* %1
+
+  ret void
+}
author	Michael Kuperstein <mkuper@google.com>
	Thu, 6 Jul 2017 22:18:54 +0000 (22:18 +0000)
committer	Michael Kuperstein <mkuper@google.com>
	Thu, 6 Jul 2017 22:18:54 +0000 (22:18 +0000)
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp		patch \| blob \| history
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp		patch \| blob \| history
llvm/test/CodeGen/NVPTX/i128-global.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/NVPTX/i128-param.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/NVPTX/i128-retval.ll	[new file with mode: 0644]	patch \| blob