-// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=COMMON,CHECK64 %s
-// RUN: %clang_cc1 -fenable-matrix -triple i386-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=COMMON,CHECK32 %s
+// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
// Also check we do not crash when running some middle-end passes. Most
// importantly this includes the IR verifier, to ensure we emit valid IR.
typedef unsigned ux6x1_t __attribute__((matrix_type(6, 1)));
void transpose_double_5x5(dx5x5_t *a) {
- // COMMON-LABEL: define{{.*}} void @transpose_double_5x5(
- // CHECK32: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 4
- // CHECK64: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
- // COMMON-NEXT: [[TRANS:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[A]], i32 5, i32 5)
- // COMMON-NEXT: [[AT_ADDR:%.*]] = bitcast [25 x double]* %a_t to <25 x double>*
- // CHECK32-NEXT: store <25 x double> [[TRANS]], <25 x double>* [[AT_ADDR]], align 4
- // CHECK64-NEXT: store <25 x double> [[TRANS]], <25 x double>* [[AT_ADDR]], align 8
-
+ // CHECK-LABEL: define{{.*}} void @transpose_double_5x5(
+ // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+ // CHECK-NEXT: [[TRANS:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[A]], i32 5, i32 5)
+ // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [25 x double]* %a_t to <25 x double>*
+ // CHECK-NEXT: store <25 x double> [[TRANS]], <25 x double>* [[AT_ADDR]], align 8
dx5x5_t a_t = __builtin_matrix_transpose(*a);
}
void transpose_float_3x2(fx3x2_t *a) {
- // COMMON-LABEL: define{{.*}} void @transpose_float_3x2(
- // COMMON: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
- // COMMON-NEXT: [[TRANS:%.*]] = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> [[A]], i32 3, i32 2)
- // COMMON-NEXT: [[AT_ADDR:%.*]] = bitcast [6 x float]* %a_t to <6 x float>*
- // COMMON-NEXT: store <6 x float> [[TRANS]], <6 x float>* [[AT_ADDR]], align 4
+ // CHECK-LABEL: define{{.*}} void @transpose_float_3x2(
+ // CHECK: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
+ // CHECK-NEXT: [[TRANS:%.*]] = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> [[A]], i32 3, i32 2)
+ // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [6 x float]* %a_t to <6 x float>*
+ // CHECK-NEXT: store <6 x float> [[TRANS]], <6 x float>* [[AT_ADDR]], align 4
fx2x3_t a_t = __builtin_matrix_transpose(*a);
}
void transpose_int_20x4(ix20x4_t *a) {
- // COMMON-LABEL: define{{.*}} void @transpose_int_20x4(
- // COMMON: [[A:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
- // COMMON-NEXT: [[TRANS:%.*]] = call <80 x i32> @llvm.matrix.transpose.v80i32(<80 x i32> [[A]], i32 20, i32 4)
- // COMMON-NEXT: [[AT_ADDR:%.*]] = bitcast [80 x i32]* %a_t to <80 x i32>*
- // COMMON-NEXT: store <80 x i32> [[TRANS]], <80 x i32>* [[AT_ADDR]], align 4
+ // CHECK-LABEL: define{{.*}} void @transpose_int_20x4(
+ // CHECK: [[A:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
+ // CHECK-NEXT: [[TRANS:%.*]] = call <80 x i32> @llvm.matrix.transpose.v80i32(<80 x i32> [[A]], i32 20, i32 4)
+ // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [80 x i32]* %a_t to <80 x i32>*
+ // CHECK-NEXT: store <80 x i32> [[TRANS]], <80 x i32>* [[AT_ADDR]], align 4
ix4x20_t a_t = __builtin_matrix_transpose(*a);
}
};
void transpose_struct_member(struct Foo *F) {
- // COMMON-LABEL: define{{.*}} void @transpose_struct_member(
- // COMMON: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
- // COMMON-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6)
- // CHECK32-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 4
- // CHECK64-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8
- // COMMON-NEXT: [[OUT_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 1
- // COMMON-NEXT: [[OUT_PTR_C:%.*]] = bitcast [6 x i32]* [[OUT_PTR]] to <6 x i32>*
- // COMMON-NEXT: store <6 x i32> [[M_T]], <6 x i32>* [[OUT_PTR_C]], align 4
+ // CHECK-LABEL: define{{.*}} void @transpose_struct_member(
+ // CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
+ // CHECK-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6)
+ // CHECK-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8
+ // CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 1
+ // CHECK-NEXT: [[OUT_PTR_C:%.*]] = bitcast [6 x i32]* [[OUT_PTR]] to <6 x i32>*
+ // CHECK-NEXT: store <6 x i32> [[M_T]], <6 x i32>* [[OUT_PTR_C]], align 4
F->out = __builtin_matrix_transpose(F->in);
}
void transpose_transpose_struct_member(struct Foo *F) {
- // COMMON-LABEL: define{{.*}} void @transpose_transpose_struct_member(
- // COMMON: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
- // COMMON-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6)
- // COMMON-NEXT: [[M_T2:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M_T]], i32 6, i32 1)
- // CHECK32-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 4
- // CHECK64-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8
- // COMMON-NEXT: [[IN_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 0
- // COMMON-NEXT: [[IN_PTR_C:%.*]] = bitcast [6 x i32]* [[IN_PTR]] to <6 x i32>*
- // COMMON-NEXT: store <6 x i32> [[M_T2]], <6 x i32>* [[IN_PTR_C]], align 4
+ // CHECK-LABEL: define{{.*}} void @transpose_transpose_struct_member(
+ // CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
+ // CHECK-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6)
+ // CHECK-NEXT: [[M_T2:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M_T]], i32 6, i32 1)
+ // CHECK-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8
+ // CHECK-NEXT: [[IN_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 0
+ // CHECK-NEXT: [[IN_PTR_C:%.*]] = bitcast [6 x i32]* [[IN_PTR]] to <6 x i32>*
+ // CHECK-NEXT: store <6 x i32> [[M_T2]], <6 x i32>* [[IN_PTR_C]], align 4
F->in = __builtin_matrix_transpose(__builtin_matrix_transpose(F->in));
}
dx5x5_t get_matrix();
void transpose_rvalue() {
- // COMMON-LABEL: define{{.*}} void @transpose_rvalue()
- // COMMON-NEXT: entry:
- // CHECK32-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 4
- // CHECK64-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8
- // CHECK32-NEXT: [[CALL:%.*]] = call <25 x double> bitcast (<25 x double> (...)* @get_matrix to <25 x double> ()*)()
- // CHECK64-NEXT: [[CALL:%.*]] = call <25 x double> (...) @get_matrix()
- // COMMON-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[CALL]], i32 5, i32 5)
- // COMMON-NEXT: [[M_T_ADDR_C:%.*]] = bitcast [25 x double]* [[M_T_ADDR]] to <25 x double>*
- // CHECK32-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 4
- // CHECK64-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 8
+ // CHECK-LABEL: define{{.*}} void @transpose_rvalue()
+ // CHECK-NEXT: entry:
+ // CHECK-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8
+ // CHECK-NEXT: [[CALL:%.*]] = call <25 x double> (...) @get_matrix()
+ // CHECK-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[CALL]], i32 5, i32 5)
+ // CHECK-NEXT: [[M_T_ADDR_C:%.*]] = bitcast [25 x double]* [[M_T_ADDR]] to <25 x double>*
+ // CHECK-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 8
dx5x5_t m_t = __builtin_matrix_transpose(get_matrix());
}
const dx5x5_t global_matrix;
void transpose_global() {
- // COMMON-LABEL: define{{.*}} void @transpose_global()
- // COMMON-NEXT: entry:
- // CHECK32-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 4
- // CHECK32-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, <25 x double>* bitcast ([25 x double]* @global_matrix to <25 x double>*), align 4
- // CHECK64-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8
- // CHECK64-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, <25 x double>* bitcast ([25 x double]* @global_matrix to <25 x double>*), align 8
- // COMMON-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[GLOBAL_MATRIX]], i32 5, i32 5)
- // COMMON-NEXT: [[M_T_ADDR_C:%.*]] = bitcast [25 x double]* [[M_T_ADDR]] to <25 x double>*
- // CHECK32-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 4
- // CHECK64-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 8
+ // CHECK-LABEL: define{{.*}} void @transpose_global()
+ // CHECK-NEXT: entry:
+ // CHECK-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8
+ // CHECK-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, <25 x double>* bitcast ([25 x double]* @global_matrix to <25 x double>*), align 8
+ // CHECK-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[GLOBAL_MATRIX]], i32 5, i32 5)
+ // CHECK-NEXT: [[M_T_ADDR_C:%.*]] = bitcast [25 x double]* [[M_T_ADDR]] to <25 x double>*
+ // CHECK-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 8
dx5x5_t m_t = __builtin_matrix_transpose(global_matrix);
}
void column_major_load_with_const_stride_double(double *Ptr) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_with_const_stride_double(double* %Ptr)
- // CHECK32: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(double* align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5)
- // CHECK64: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_with_const_stride_double(double* %Ptr)
+ // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
}
void column_major_load_with_const_stride2_double(double *Ptr) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_with_const_stride2_double(double* %Ptr)
- // CHECK32: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(double* align 4 [[PTR]], i32 15, i1 false, i32 5, i32 5)
- // CHECK64: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_with_const_stride2_double(double* %Ptr)
+ // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
dx5x5_t m_a2 = __builtin_matrix_column_major_load(Ptr, 5, 5, 2 * 3 + 9);
}
void column_major_load_with_variable_stride_ull_float(float *Ptr, unsigned long long S) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_with_variable_stride_ull_float(float* %Ptr, i64 %S)
- // CHECK32: [[S:%.*]] = load i64, i64* %S.addr, align 8
- // CHECK32-NEXT: [[STRIDE_TRUNC:%.*]] = trunc i64 [[S]] to i32
- // CHECK32-NEXT: [[PTR:%.*]] = load float*, float** %Ptr.addr, align 4
- // CHECK32-NEXT: call <6 x float> @llvm.matrix.column.major.load.v6f32.i32(float* align 4 [[PTR]], i32 [[STRIDE_TRUNC]], i1 false, i32 2, i32 3)
-
- // CHECK64: [[S:%.*]] = load i64, i64* %S.addr, align 8
- // CHECK64-NEXT: [[PTR:%.*]] = load float*, float** %Ptr.addr, align 8
- // CHECK64-NEXT: call <6 x float> @llvm.matrix.column.major.load.v6f32.i64(float* align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_with_variable_stride_ull_float(float* %Ptr, i64 %S)
+ // CHECK: [[S:%.*]] = load i64, i64* %S.addr, align 8
+ // CHECK-NEXT: [[PTR:%.*]] = load float*, float** %Ptr.addr, align 8
+ // CHECK-NEXT: call <6 x float> @llvm.matrix.column.major.load.v6f32(float* align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3)
fx2x3_t m_b = __builtin_matrix_column_major_load(Ptr, 2, 3, S);
}
void column_major_load_with_stride_math_int(int *Ptr, int S) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_with_stride_math_int(i32* %Ptr, i32 %S)
- // COMMON: [[S:%.*]] = load i32, i32* %S.addr, align 4
- // COMMON-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S]], 32
- // CHECK32-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 4
- // CHECK32-NEXT: call <80 x i32> @llvm.matrix.column.major.load.v80i32.i32(i32* align 4 [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 20)
- //
- // CHECK64-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
- // CHECK64-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK64-NEXT: call <80 x i32> @llvm.matrix.column.major.load.v80i32.i64(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_with_stride_math_int(i32* %Ptr, i32 %S)
+ // CHECK: [[S:%.*]] = load i32, i32* %S.addr, align 4
+ // CHECK-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S]], 32
+ // CHECK-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
+ // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
+ // CHECK-NEXT: call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
}
void column_major_load_with_stride_math_s_int(int *Ptr, short S) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_with_stride_math_s_int(i32* %Ptr, i16 signext %S)
- // COMMON: [[S:%.*]] = load i16, i16* %S.addr, align 2
- // COMMON-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
- // COMMON-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S_EXT]], 32
- // CHECK32-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 4
- // CHECK32-NEXT: %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32.i32(i32* align 4 [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 20)
- //
- // CHECK64-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
- // CHECK64-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK64-NEXT: %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32.i64(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_with_stride_math_s_int(i32* %Ptr, i16 signext %S)
+ // CHECK: [[S:%.*]] = load i16, i16* %S.addr, align 2
+ // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
+ // CHECK-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S_EXT]], 32
+ // CHECK-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
+ // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
+ // CHECK-NEXT: %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
}
void column_major_load_array1(double Ptr[25]) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_array1(double* %Ptr)
- // CHECK32: [[ADDR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(double* align 4 [[ADDR]], i32 5, i1 false, i32 5, i32 5)
-
- // CHECK64: [[ADDR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(double* align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_array1(double* %Ptr)
+ // CHECK: [[ADDR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5)
dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
}
void column_major_load_array2() {
- // COMMON-LABEL: define{{.*}} void @column_major_load_array2() #0 {
- // COMMON-NEXT: entry:
- // CHECK32-NEXT: [[PTR:%.*]] = alloca [25 x double], align 8
- // CHECK32: [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], [25 x double]* [[PTR]], i32 0, i32 0
- // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(double* align 8 [[ARRAY_DEC]], i32 5, i1 false, i32 5, i32 5)
-
- // CHECK64-NEXT: [[PTR:%.*]] = alloca [25 x double], align 16
- // CHECK64: [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], [25 x double]* [[PTR]], i64 0, i64 0
- // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(double* align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_array2() #0 {
+ // CHECK-NEXT: entry:
+ // CHECK-NEXT: [[PTR:%.*]] = alloca [25 x double], align 16
+ // CHECK: [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], [25 x double]* [[PTR]], i64 0, i64 0
+ // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5)
double Ptr[25];
dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
}
void column_major_load_const(const double *Ptr) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_const(double* %Ptr)
- // CHECK32: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(double* align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5)
- //
- // CHECK64: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_const(double* %Ptr)
+ // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
}
void column_major_load_volatile(volatile double *Ptr) {
- // COMMON-LABEL: define{{.*}} void @column_major_load_volatile(double* %Ptr)
- // CHECK32: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(double* align 4 [[PTR]], i32 5, i1 true, i32 5, i32 5)
- //
- // CHECK64: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_load_volatile(double* %Ptr)
+ // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
}
void column_major_store_with_const_stride_double(double *Ptr) {
- // COMMON-LABEL: define{{.*}} void @column_major_store_with_const_stride_double(double* %Ptr)
- // CHECK32: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 4
- // CHECK32-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], double* align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5)
- //
- // CHECK64: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
- // CHECK64-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_store_with_const_stride_double(double* %Ptr)
+ // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+ // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
dx5x5_t m;
__builtin_matrix_column_major_store(m, Ptr, 5);
}
void column_major_store_with_const_stride2_double(double *Ptr) {
- // COMMON-LABEL: define{{.*}} void @column_major_store_with_const_stride2_double(double* %Ptr)
- // CHECK32: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 4
- // CHECK32-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], double* align 4 [[PTR]], i32 15, i1 false, i32 5, i32 5)
- //
- // CHECK64: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
- // CHECK64-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_store_with_const_stride2_double(double* %Ptr)
+ // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+ // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
//
dx5x5_t m;
__builtin_matrix_column_major_store(m, Ptr, 2 * 3 + 9);
}
void column_major_store_with_stride_math_int(int *Ptr, int S) {
- // COMMON-LABEL: define{{.*}} void @column_major_store_with_stride_math_int(i32* %Ptr, i32 %S)
- // COMMON: [[M:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
- // CHECK32-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 4
- // CHECK64-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // COMMON-NEXT: [[S:%.*]] = load i32, i32* %S.addr, align 4
- // COMMON-NEXT: [[ADD:%.*]] = add nsw i32 [[S]], 32
- // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v80i32.i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i32 [[ADD]], i1 false, i32 4, i32 20)
- //
- // CHECK64-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64
- // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v80i32.i64(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
+ // CHECK-LABEL: define{{.*}} void @column_major_store_with_stride_math_int(i32* %Ptr, i32 %S)
+ // CHECK: [[M:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
+ // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
+ // CHECK-NEXT: [[S:%.*]] = load i32, i32* %S.addr, align 4
+ // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[S]], 32
+ // CHECK-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
ix4x20_t m;
__builtin_matrix_column_major_store(m, Ptr, S + 32);
}
void column_major_store_with_stride_math_s_int(int *Ptr, short S) {
- // COMMON-LABEL: define{{.*}} void @column_major_store_with_stride_math_s_int(i32* %Ptr, i16 signext %S)
- // COMMON: [[M:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
- // CHECK32-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 4
- // CHECK64-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // COMMON-NEXT: [[S:%.*]] = load i16, i16* %S.addr, align 2
- // COMMON-NEXT: [[EXT:%.*]] = sext i16 [[S]] to i32
- // COMMON-NEXT: [[ADD:%.*]] = add nsw i32 [[EXT]], 2
- // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v80i32.i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i32 [[ADD]], i1 false, i32 4, i32 20)
- //
- // CHECK64-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64
- // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v80i32.i64(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
+ // CHECK-LABEL: define{{.*}} void @column_major_store_with_stride_math_s_int(i32* %Ptr, i16 signext %S)
+ // CHECK: [[M:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
+ // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
+ // CHECK-NEXT: [[S:%.*]] = load i16, i16* %S.addr, align 2
+ // CHECK-NEXT: [[EXT:%.*]] = sext i16 [[S]] to i32
+ // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[EXT]], 2
+ // CHECK-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
ix4x20_t m;
__builtin_matrix_column_major_store(m, Ptr, S + 2);
}
void column_major_store_array1(double Ptr[25]) {
- // COMMON-LABEL: define{{.*}} void @column_major_store_array1(double* %Ptr)
- // CHECK32: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 4
- // CHECK32-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], double* align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5)
- //
- // CHECK64: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
- // CHECK64-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_store_array1(double* %Ptr)
+ // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+ // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
dx5x5_t m;
__builtin_matrix_column_major_store(m, Ptr, 5);
}
void column_major_store_array2() {
- // COMMON-LABEL: define{{.*}} void @column_major_store_array2()
- // CHECK32: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 4
- // CHECK32-NEXT: [[PTR:%.*]] = getelementptr inbounds [25 x double], [25 x double]* %Ptr, i32 0, i32 0
- // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], double* align 8 [[PTR]], i32 5, i1 false, i32 5, i32 5)
- //
- // CHECK64: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
- // CHECK64-NEXT: [[PTR:%.*]] = getelementptr inbounds [25 x double], [25 x double]* %Ptr, i64 0, i64 0
- // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], double* align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_store_array2()
+ // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+ // CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [25 x double], [25 x double]* %Ptr, i64 0, i64 0
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5)
double Ptr[25];
dx5x5_t m;
}
void column_major_store_volatile(volatile double *Ptr) {
- // COMMON-LABEL: define{{.*}} void @column_major_store_volatile(double* %Ptr) #0 {
- // CHECK32: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 4
- // CHECK32-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 4
- // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], double* align 4 [[PTR]], i32 5, i1 true, i32 5, i32 5)
- //
- // CHECK64: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
- // CHECK64-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
+ // CHECK-LABEL: define{{.*}} void @column_major_store_volatile(double* %Ptr) #0 {
+ // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+ // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
dx5x5_t m;
__builtin_matrix_column_major_store(m, Ptr, 5);
// CHECK-LABEL: define linkonce_odr <40 x double> @_Z29column_major_load_with_strideIdLj10ELj4ELj15EEu11matrix_typeIXT0_EXT1_ET_EPS0_(double* %Ptr)
// CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK-NEXT: call <40 x double> @llvm.matrix.column.major.load.v40f64.i64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
+ // CHECK-NEXT: call <40 x double> @llvm.matrix.column.major.load.v40f64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
matrix_t<double, 10, 4> M1 = column_major_load_with_stride<double, 10, 4, 15>(Ptr);
}
// CHECK-LABEL: define linkonce_odr <6 x i32> @_Z29column_major_load_with_strideIiLj3ELj2ELj12EEu11matrix_typeIXT0_EXT1_ET_EPS0_(i32* %Ptr)
// CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK-NEXT: call <6 x i32> @llvm.matrix.column.major.load.v6i32.i64(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2)
+ // CHECK-NEXT: call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2)
matrix_t<int, 3, 2> M1 = column_major_load_with_stride<int, 3, 2, 12>(Ptr);
}
// CHECK-NEXT: [[STRIDE:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} [[W]])
// CHECK-NEXT: [[STRIDE_EXT:%.*]] = zext i32 [[STRIDE]] to i64
// CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK-NEXT: call <4 x i32> @llvm.matrix.column.major.load.v4i32.i64(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
+ // CHECK-NEXT: call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, W);
}
void test_column_major_load_constexpr_num_rows(int *Ptr) {
// CHECK-LABEL: define{{.*}} void @_Z41test_column_major_load_constexpr_num_rowsPi(i32* %Ptr)
// CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK-NEXT: call <6 x i32> @llvm.matrix.column.major.load.v6i32.i64(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
+ // CHECK-NEXT: call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
matrix_t<int, 3, 2> M1 = __builtin_matrix_column_major_load(Ptr, constexpr3(), 2, 3);
}
void test_column_major_load_constexpr_num_columns(int *Ptr) {
// CHECK-LABEL: define{{.*}} void @_Z44test_column_major_load_constexpr_num_columnsPi(i32* %Ptr)
// CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK-NEXT: call <2 x i32> @llvm.matrix.column.major.load.v2i32.i64(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1)
+ // CHECK-NEXT: call <2 x i32> @llvm.matrix.column.major.load.v2i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1)
matrix_t<int, 2, 1> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr1(), 3);
}
void test_column_major_load_constexpr_num_columns_temp(int *Ptr) {
// CHECK-LABEL: define{{.*}} void @_Z49test_column_major_load_constexpr_num_columns_tempPi(i32* %Ptr)
// CHECK: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK-NEXT: call <10 x i32> @llvm.matrix.column.major.load.v10i32.i64(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5)
+ // CHECK-NEXT: call <10 x i32> @llvm.matrix.column.major.load.v10i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5)
matrix_t<int, 2, 5> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr_plus1<4>(), 3);
}
// CHECK: [[STRIDE:%.*]] = call i32 @_Z10constexpr3v()
// CHECK-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
// CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK-NEXT: call <4 x i32> @llvm.matrix.column.major.load.v4i32.i64(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
+ // CHECK-NEXT: call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, constexpr3());
}
// CHECK-LABEL: define linkonce_odr void @_Z30column_major_store_with_strideIdLj10ELj4ELj15EEvRu11matrix_typeIXT0_EXT1_ET_EPS0_([40 x double]* nonnull align 8 dereferenceable(320) %m, double* %Ptr)
// CHECK: [[M:%.*]] = load <40 x double>, <40 x double>* {{.*}}, align 8
// CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
- // CHECK-NEXT: call void @llvm.matrix.column.major.store.v40f64.i64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v40f64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
matrix_t<double, 10, 4> M1;
column_major_store_with_stride<double, 10, 4, 15>(M1, Ptr);
// CHECK-LABEL: define linkonce_odr void @_Z30column_major_store_with_strideIiLj3ELj2ELj3EEvRu11matrix_typeIXT0_EXT1_ET_EPS0_([6 x i32]* nonnull align 4 dereferenceable(24) %m, i32* %Ptr)
// CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
// CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
- // CHECK-NEXT: call void @llvm.matrix.column.major.store.v6i32.i64(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v6i32(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
matrix_t<int, 3, 2> M1;
column_major_store_with_stride<int, 3, 2, 3>(M1, Ptr);
// CHECK-NEXT: [[W:%.*]] = load %struct.UnsignedWrapper*, %struct.UnsignedWrapper** %W.addr, align 8
// CHECK-NEXT: [[IDX:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} [[W]])
// CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[IDX]] to i64
- // CHECK-NEXT: call void @llvm.matrix.column.major.store.v4i32.i64(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
matrix_t<int, 2, 2> M1;
__builtin_matrix_column_major_store(M1, Ptr, W);
// CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
// CHECK-NEXT: [[IDX:%.*]] = call i32 @_Z10constexpr3v()
// CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[IDX]] to i64
- // CHECK-NEXT: call void @llvm.matrix.column.major.store.v4i32.i64(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
+ // CHECK-NEXT: call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
matrix_t<int, 2, 2> M;
__builtin_matrix_column_major_store(M, Ptr, constexpr3());
; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
; CHECK-NEXT: immarg operand has non-immediate parameter
; CHECK-NEXT: i32 %arg
-; CHECK-NEXT: %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32.i64(float* %n, i64 2, i1 true, i32 3, i32 %arg)
- %result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32.i64(float* %m, i64 0, i1 false, i32 0, i32 0)
- %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32.i64(float* %m, i64 2, i1 false, i32 1, i32 2)
- %result.2 = call <6 x float> @llvm.matrix.column.major.load.v6f32.i64(float* %n, i64 2, i1 true, i32 3, i32 3)
- %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32.i64(float* %n, i64 2, i1 true, i32 3, i32 %arg)
+; CHECK-NEXT: %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* %n, i64 2, i1 true, i32 3, i32 %arg)
+ %result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32(float* %m, i64 0, i1 false, i32 0, i32 0)
+ %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32(float* %m, i64 2, i1 false, i32 1, i32 2)
+ %result.2 = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* %n, i64 2, i1 true, i32 3, i32 3)
+ %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* %n, i64 2, i1 true, i32 3, i32 %arg)
ret <4 x float> %result.1
}
; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
- call void @llvm.matrix.column.major.store.v4f32.i64(<4 x float> zeroinitializer, float* %m, i64 0, i1 false, i32 0, i32 0)
- call void @llvm.matrix.column.major.store.v4f32.i64(<4 x float> zeroinitializer, float* %m, i64 2, i1 false, i32 1, i32 2)
- call void @llvm.matrix.column.major.store.v6f32.i64(<6 x float> zeroinitializer, float* %n, i64 2, i1 false, i32 3, i32 3)
- call void @llvm.matrix.column.major.store.v6f32.i64(<6 x float> zeroinitializer, float* %n, i64 %arg, i1 false, i32 3, i32 3)
+ call void @llvm.matrix.column.major.store.v4f32(<4 x float> zeroinitializer, float* %m, i64 0, i1 false, i32 0, i32 0)
+ call void @llvm.matrix.column.major.store.v4f32(<4 x float> zeroinitializer, float* %m, i64 2, i1 false, i32 1, i32 2)
+ call void @llvm.matrix.column.major.store.v6f32(<6 x float> zeroinitializer, float* %n, i64 2, i1 false, i32 3, i32 3)
+ call void @llvm.matrix.column.major.store.v6f32(<6 x float> zeroinitializer, float* %n, i64 %arg, i1 false, i32 3, i32 3)
ret void
}
; CHECK-NEXT: Intrinsic has incorrect argument type!
; CHECK-NEXT: <4 x float> (i32*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32.pi32
; CHECK-NEXT: Intrinsic has incorrect argument type!
-; CHECK-NEXT: <4 x i32> (float*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4i32.i64
+; CHECK-NEXT: <4 x i32> (float*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4i32
;
%result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32.pi32(i32* %m, i64 2, i1 false, i32 2, i32 2)
- %result.1 = call <4 x i32> @llvm.matrix.column.major.load.v4i32.i64(float* %n, i64 2, i1 false, i32 2, i32 2)
+ %result.1 = call <4 x i32> @llvm.matrix.column.major.load.v4i32(float* %n, i64 2, i1 false, i32 2, i32 2)
ret <4 x float> %result.0
}
define void @column.major_store_mixed_types(float* %m, i32* %n, i64 %arg) {
;
-; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: Intrinsic has incorrect argument type!
; CHECK-NEXT: void (<4 x i32>, float*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4i32.vi32
-; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: Intrinsic has incorrect argument type!
; CHECK-NEXT: void (<4 x float>, i32*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32.pi32
;
call void @llvm.matrix.column.major.store.v4i32.vi32(<4 x i32> zeroinitializer, float* %m, i64 2, i1 false, i32 2, i32 2)
define <4 x float> @column.major_load_stride_too_small(float* %m, i32 %arg) {
;
; CHECK-NEXT: Stride must be greater or equal than the number of rows!
-; CHECK-NEXT: <4 x float> (float*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32.i64
+; CHECK-NEXT: <4 x float> (float*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32
;
- %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32.i64(float* %m, i64 1, i1 false, i32 2, i32 2)
+ %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32(float* %m, i64 1, i1 false, i32 2, i32 2)
ret <4 x float> %result.1
}
define void @column.major_store_stride_too_small(float* %m, i64 %arg) {
;
; CHECK-NEXT: Stride must be greater or equal than the number of rows!
-; CHECK-NEXT: void (<4 x float>, float*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32.i64
+; CHECK-NEXT: void (<4 x float>, float*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32
;
- call void @llvm.matrix.column.major.store.v4f32.i64(<4 x float> zeroinitializer, float* %m, i64 1, i1 false, i32 2, i32 2)
+ call void @llvm.matrix.column.major.store.v4f32(<4 x float> zeroinitializer, float* %m, i64 1, i1 false, i32 2, i32 2)
ret void
}
-declare <4 x i32> @llvm.matrix.column.major.load.v4i32.i64(float*, i64, i1, i32, i32)
+declare <4 x i32> @llvm.matrix.column.major.load.v4i32(float*, i64, i1, i32, i32)
declare <4 x float> @llvm.matrix.column.major.load.v4f32.pi32(i32*, i64, i1, i32, i32)
-declare <4 x float> @llvm.matrix.column.major.load.v4f32.i64(float*, i64, i1, i32, i32)
-declare <6 x float> @llvm.matrix.column.major.load.v6f32.i64(float*, i64, i1, i32, i32)
+declare <4 x float> @llvm.matrix.column.major.load.v4f32(float*, i64, i1, i32, i32)
+declare <6 x float> @llvm.matrix.column.major.load.v6f32(float*, i64, i1, i32, i32)
-declare void @llvm.matrix.column.major.store.v4f32.i64(<4 x float>, float*, i64, i1, i32, i32)
-declare void @llvm.matrix.column.major.store.v6f32.i64(<6 x float>, float*, i64, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v4f32(<4 x float>, float*, i64, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v6f32(<6 x float>, float*, i64, i1, i32, i32)
declare void @llvm.matrix.column.major.store.v4i32.vi32(<4 x i32>, float*, i64, i1, i32, i32)
declare void @llvm.matrix.column.major.store.v4f32.pi32(<4 x float>, i32*, i64, i1, i32, i32)
declare void @llvm.matrix.column.major.store.v4f32p0.p0v4f32(<4 x float*>, <4 x float>*, i64, i1, i32, i32)