[AMDGPU] set read_only access qualifier for pointers
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Fri, 14 Apr 2017 19:11:40 +0000 (19:11 +0000)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Fri, 14 Apr 2017 19:11:40 +0000 (19:11 +0000)
If a kernel's pointer argument is known to be readonly
set access qualifier accordingly. This allows RT not to
flush caches before dispatches.

Differential Revision: https://reviews.llvm.org/D32091

llvm-svn: 300362

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
llvm/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll [new file with mode: 0644]

index 59571a4..29a6ab9 100644 (file)
@@ -478,9 +478,14 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
 
   StringRef AccQual;
-  Node = Func->getMetadata("kernel_arg_access_qual");
-  if (Node && ArgNo < Node->getNumOperands())
-    AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+      Arg.hasNoAliasAttr()) {
+    AccQual = "read_only";
+  } else {
+    Node = Func->getMetadata("kernel_arg_access_qual");
+    if (Node && ArgNo < Node->getNumOperands())
+      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+  }
 
   StringRef Name;
   Node = Func->getMetadata("kernel_arg_name");
diff --git a/llvm/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll b/llvm/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll
new file mode 100644 (file)
index 0000000..a33c364
--- /dev/null
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck %s
+
+; CHECK:      - Name:            test_ro_arg
+; CHECK:        Args:
+; CHECK-NEXT: - Size:            8
+; CHECK-NEXT:   Align:           8
+; CHECK-NEXT:   ValueKind:       GlobalBuffer
+; CHECK-NEXT:   ValueType:       F32
+; CHECK-NEXT:   AccQual:         ReadOnly
+; CHECK-NEXT:   AddrSpaceQual:   Global
+; CHECK-NEXT:   IsConst:         true
+; CHECK-NEXT:   IsRestrict:      true
+; CHECK-NEXT:   TypeName:        'float*'
+
+; CHECK-NEXT: - Size:            8
+; CHECK-NEXT:   Align:           8
+; CHECK-NEXT:   ValueKind:       GlobalBuffer
+; CHECK-NEXT:   ValueType:       F32
+; CHECK-NEXT:   AccQual:         Default
+; CHECK-NEXT:   AddrSpaceQual:   Global
+; CHECK-NEXT:   TypeName:        'float*'
+
+define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out)
+    !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2
+    !kernel_arg_base_type !2 !kernel_arg_type_qual !3 {
+  ret void
+}
+
+!0 = !{i32 1, i32 1}
+!1 = !{!"none", !"none"}
+!2 = !{!"float*", !"float*"}
+!3 = !{!"const restrict", !""}
+