[mlir][AMDGPU] Explicitly truncate memory addresses in buffer ops

author Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>

Wed, 6 Jul 2022 20:37:30 +0000 (20:37 +0000)

committer Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>

Thu, 4 Aug 2022 19:42:33 +0000 (19:42 +0000)
author Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>
Wed, 6 Jul 2022 20:37:30 +0000 (20:37 +0000)
committer Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>
Thu, 4 Aug 2022 19:42:33 +0000 (19:42 +0000)
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

index 1867df2..c532906 100644 (file)
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -118,20 +118,29 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
      MemRefDescriptor memrefDescriptor(memref);
      Type llvmI64 = this->typeConverter->convertType(rewriter.getI64Type());
      Type llvm2xI32 = this->typeConverter->convertType(VectorType::get(2, i32));
+    Value c32I64 = rewriter.create<LLVM::ConstantOp>(
+        loc, llvmI64, rewriter.getI64IntegerAttr(32));
  
      Value resource = rewriter.create<LLVM::UndefOp>(loc, llvm4xI32);
  
      Value ptr = memrefDescriptor.alignedPtr(rewriter, loc);
      Value ptrAsInt = rewriter.create<LLVM::PtrToIntOp>(loc, llvmI64, ptr);
-    Value ptrAsInts =
-        rewriter.create<LLVM::BitcastOp>(loc, llvm2xI32, ptrAsInt);
-    for (int64_t i = 0; i < 2; ++i) {
-      Value idxConst = this->createIndexConstant(rewriter, loc, i);
-      Value part =
-          rewriter.create<LLVM::ExtractElementOp>(loc, ptrAsInts, idxConst);
-      resource = rewriter.create<LLVM::InsertElementOp>(
-          loc, llvm4xI32, resource, part, idxConst);
-    }
+    Value lowHalf = rewriter.create<LLVM::TruncOp>(loc, llvmI32, ptrAsInt);
+    resource = rewriter.create<LLVM::InsertElementOp>(
+        loc, llvm4xI32, resource, lowHalf,
+        this->createIndexConstant(rewriter, loc, 0));
+
+    // Bits 48-63 are used both for the stride of the buffer and (on gfx10) for
+    // enabling swizzling. Prevent the high bits of pointers from accidentally
+    // setting those flags.
+    Value highHalfShifted = rewriter.create<LLVM::TruncOp>(
+        loc, llvmI32, rewriter.create<LLVM::LShrOp>(loc, ptrAsInt, c32I64));
+    Value highHalfTruncated = rewriter.create<LLVM::AndOp>(
+        loc, llvmI32, highHalfShifted,
+        createI32Constant(rewriter, loc, 0x0000ffff));
+    resource = rewriter.create<LLVM::InsertElementOp>(
+        loc, llvm4xI32, resource, highHalfTruncated,
+        this->createIndexConstant(rewriter, loc, 1));
  
      Value numRecords;
      if (memrefType.hasStaticShape()) {
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir

index e9a999d..3fab11a 100644 (file)
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -3,11 +3,18 @@
  
  // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_i32
  func.func @gpu_gcn_raw_buffer_load_i32(%buf: memref<64xi32>, %idx: i32) -> i32 {
+  // CHECK: %[[ptr:.*]] = llvm.ptrtoint
+  // CHECK: %[[lowHalf:.*]] = llvm.trunc %[[ptr]] : i64 to i32
+  // CHECK: %[[resource_1:.*]] = llvm.insertelement %[[lowHalf]]
+  // CHECK: %[[highHalfI64:.*]] = llvm.lshr %[[ptr]]
+  // CHECK: %[[highHalfI32:.*]] = llvm.trunc %[[highHalfI64]] : i64 to i32
+  // CHECK: %[[highHalf:.*]] = llvm.and %[[highHalfI32]], %{{.*}} : i32
+  // CHECK: %[[resource_2:.*]] = llvm.insertelement %[[highHalf]], %[[resource_1]]
    // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
-  // CHECK: llvm.insertelement{{.*}}%[[numRecords]]
+  // CHECK: %[[resource_3:.*]] = llvm.insertelement %[[numRecords]], %[[resource_2]]
    // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32)
    // RDNA: %[[word3:.*]] = llvm.mlir.constant(822243328 : i32)
-  // CHECK: %[[resource:.*]] = llvm.insertelement{{.*}}%[[word3]]
+  // CHECK: %[[resource:.*]] = llvm.insertelement %[[word3]], %[[resource_3]]
    // CHECK: %[[ret:.*]] = rocdl.raw.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
    // CHECK: return %[[ret]]
    %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xi32>, i32 -> i32
author	Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>
	Wed, 6 Jul 2022 20:37:30 +0000 (20:37 +0000)
committer	Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>
	Thu, 4 Aug 2022 19:42:33 +0000 (19:42 +0000)
mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp		patch \| blob \| history
mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir		patch \| blob \| history