[libc] Add more missing GPU utilities

author Joseph Huber <jhuber6@vols.utk.edu>

Thu, 27 Apr 2023 19:35:47 +0000 (14:35 -0500)

committer Joseph Huber <jhuber6@vols.utk.edu>

Thu, 27 Apr 2023 19:37:00 +0000 (14:37 -0500)
author Joseph Huber <jhuber6@vols.utk.edu>
Thu, 27 Apr 2023 19:35:47 +0000 (14:35 -0500)
committer Joseph Huber <jhuber6@vols.utk.edu>
Thu, 27 Apr 2023 19:37:00 +0000 (14:37 -0500)
diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h

index a4ac7d26f0d911ad72cca36f0d735b0b42f0c7e5..ca9122b6b6a54b6b2b5568539f7090f322bf52c6 100644 (file)
--- a/libc/src/__support/GPU/amdgpu/utils.h
+++ b/libc/src/__support/GPU/amdgpu/utils.h
@@ -34,6 +34,11 @@ LIBC_INLINE uint32_t get_num_blocks_z() {
    return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();
  }
  
+/// Returns the total number of workgruops in the grid.
+LIBC_INLINE uint64_t get_num_blocks() {
+  return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();
+}
+
  /// Returns the 'x' dimension of the current AMD workgroup's id.
  LIBC_INLINE uint32_t get_block_id_x() {
    return __builtin_amdgcn_workgroup_id_x();
@@ -70,6 +75,11 @@ LIBC_INLINE uint32_t get_num_threads_z() {
    return __builtin_amdgcn_workgroup_size_z();
  }
  
+/// Returns the total number of workitems in the workgroup.
+LIBC_INLINE uint64_t get_num_threads() {
+  return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();
+}
+
  /// Returns the 'x' dimension id of the workitem in the current AMD workgroup.
  LIBC_INLINE uint32_t get_thread_id_x() {
    return __builtin_amdgcn_workitem_id_x();
@@ -119,7 +129,9 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
  }
  
  /// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
-[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) {}
+[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) {
+  __builtin_amdgcn_wave_barrier();
+}
  
  } // namespace gpu
  } // namespace __llvm_libc
diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h

index 20e1b16ec25bac2eac173adfc69290dc7a1b1891..0decb3fa59d59b84ee7d1b0af65b743512ae306b 100644 (file)
--- a/libc/src/__support/GPU/generic/utils.h
+++ b/libc/src/__support/GPU/generic/utils.h
@@ -20,9 +20,11 @@ constexpr const uint64_t LANE_SIZE = 1;
  
  LIBC_INLINE uint32_t get_num_blocks_x() { return 1; }
  
-LIBC_INLINE uint32_t get_num_blocks_y() { return 0; }
+LIBC_INLINE uint32_t get_num_blocks_y() { return 1; }
  
-LIBC_INLINE uint32_t get_num_blocks_z() { return 0; }
+LIBC_INLINE uint32_t get_num_blocks_z() { return 1; }
+
+LIBC_INLINE uint64_t get_num_blocks() { return 1; }
  
  LIBC_INLINE uint32_t get_block_id_x() { return 0; }
  
@@ -34,9 +36,11 @@ LIBC_INLINE uint64_t get_block_id() { return 0; }
  
  LIBC_INLINE uint32_t get_num_threads_x() { return 1; }
  
-LIBC_INLINE uint32_t get_num_threads_y() { return 0; }
+LIBC_INLINE uint32_t get_num_threads_y() { return 1; }
+
+LIBC_INLINE uint32_t get_num_threads_z() { return 1; }
  
-LIBC_INLINE uint32_t get_num_threads_z() { return 0; }
+LIBC_INLINE uint64_t get_num_threads() { return 1; }
  
  LIBC_INLINE uint32_t get_thread_id_x() { return 0; }
  
diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h

index 88544db85b9f07bcf1a8c052547e82bc73cdaa5b..443b8c72fc85c824fcae752ec4f0bf5d368a3d03 100644 (file)
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -34,6 +34,11 @@ LIBC_INLINE uint32_t get_num_blocks_z() {
    return __nvvm_read_ptx_sreg_nctaid_z();
  }
  
+/// Returns the total number of CUDA blocks.
+LIBC_INLINE uint64_t get_num_blocks() {
+  return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();
+}
+
  /// Returns the 'x' dimension of the current CUDA block's id.
  LIBC_INLINE uint32_t get_block_id_x() { return __nvvm_read_ptx_sreg_ctaid_x(); }
  
@@ -64,6 +69,11 @@ LIBC_INLINE uint32_t get_num_threads_z() {
    return __nvvm_read_ptx_sreg_ntid_z();
  }
  
+/// Returns the total number of threads in the block.
+LIBC_INLINE uint64_t get_num_threads() {
+  return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();
+}
+
  /// Returns the 'x' dimension id of the thread in the current CUDA block.
  LIBC_INLINE uint32_t get_thread_id_x() { return __nvvm_read_ptx_sreg_tid_x(); }
author	Joseph Huber <jhuber6@vols.utk.edu>
	Thu, 27 Apr 2023 19:35:47 +0000 (14:35 -0500)
committer	Joseph Huber <jhuber6@vols.utk.edu>
	Thu, 27 Apr 2023 19:37:00 +0000 (14:37 -0500)
libc/src/__support/GPU/amdgpu/utils.h		patch \| blob \| history
libc/src/__support/GPU/generic/utils.h		patch \| blob \| history
libc/src/__support/GPU/nvptx/utils.h		patch \| blob \| history