// RUN: mlir-rocm-runner %s --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s
func @other_func(%arg0 : f32, %arg1 : memref<?xf32>) {
- %cst = constant 1 : index
- %cst2 = dim %arg1, 0 : memref<?xf32>
- gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
- threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %block_dim = dim %arg1, %c0 : memref<?xf32>
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+ threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
store %arg0, %arg1[%tx] : memref<?xf32>
gpu.terminator
}
func @main() {
%arg = alloc() : memref<13xi32>
%dst = memref_cast %arg : memref<13xi32> to memref<?xi32>
- %one = constant 1 : index
- %sx = dim %dst, 0 : memref<?xi32>
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %sx = dim %dst, %c0 : memref<?xi32>
%cast_dst = memref_cast %dst : memref<?xi32> to memref<*xi32>
call @mgpuMemHostRegisterInt32(%cast_dst) : (memref<*xi32>) -> ()
%dst_device = call @mgpuMemGetDeviceMemRef1dInt32(%dst) : (memref<?xi32>) -> (memref<?xi32>)
- gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
- threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+ threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
%t0 = index_cast %tx : index to i32
store %t0, %dst_device[%tx] : memref<?xi32>
gpu.terminator
}
- gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
- threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+ threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
%t0 = index_cast %tx : index to i32
store %t0, %dst_device[%tx] : memref<?xi32>
gpu.terminator
// RUN: mlir-rocm-runner %s --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s
func @vecadd(%arg0 : memref<?xf32>, %arg1 : memref<?xf32>, %arg2 : memref<?xf32>) {
- %cst = constant 1 : index
- %cst2 = dim %arg0, 0 : memref<?xf32>
- gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
- threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %block_dim = dim %arg0, %c0 : memref<?xf32>
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+ threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
%a = load %arg0[%tx] : memref<?xf32>
%b = load %arg1[%tx] : memref<?xf32>
%c = addf %a, %b : f32