From 0ad19a833177861be55fefaff725ab89c8695d01 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 24 Jan 2022 12:32:36 -0800 Subject: [PATCH] [CUDA,NVPTX] Corrected fragment size for tf32 LD B matrix. Signed-off-by: JackAKirk Reviewed By: tra Differential Revision: https://reviews.llvm.org/D118023 --- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index cd35e7c..a80a55e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17190,7 +17190,7 @@ static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { case NVPTX::BI__mma_tf32_m16n16k8_ld_a: return MMA_LDST(4, m16n16k8_load_a_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_b: - return MMA_LDST(2, m16n16k8_load_b_tf32); + return MMA_LDST(4, m16n16k8_load_b_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_c: return MMA_LDST(8, m16n16k8_load_c_f32); -- 2.7.4