From: Mingming Liu Date: Tue, 20 Dec 2022 21:53:34 +0000 (-0800) Subject: [AArch64] Guard {vmull_p64, vmull_high_p64} with 'aes' target guard. X-Git-Tag: upstream/17.0.6~22931 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c847e22db33e4625b1ad58b631f57d7d1e1944ca;p=platform%2Fupstream%2Fllvm.git [AArch64] Guard {vmull_p64, vmull_high_p64} with 'aes' target guard. The 'aes' target guard includes both FEAT_AES and FEAT_PMULL currently. In this way, cpp code that uses these intrinsics without specifying the required extension gets better hint. - Before, compile crashes with LLVM ISel internal message (see issue [[ https://github.com/llvm/llvm-project/issues/59599 | 59599 ]]). - After, clang hints that target 'aes' is required in the command. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D140432 --- diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 4288e9e..94dfe80 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -957,8 +957,10 @@ def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "(>Q)(>Q)QQ", "si", OP_QDMLALHi>; def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLALHi_N>; def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "(>Q)(>Q)QQ", "si", OP_QDMLSLHi>; def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLSLHi_N>; -def VMULL_P64 : SInst<"vmull", "(1>)11", "Pl">; -def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>; +let TargetGuard = "aes" in { + def VMULL_P64 : SInst<"vmull", "(1>)11", "Pl">; + def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>; +} //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/aarch64-poly128.c b/clang/test/CodeGen/aarch64-poly128.c index d73390c..f188632 100644 --- a/clang/test/CodeGen/aarch64-poly128.c +++ b/clang/test/CodeGen/aarch64-poly128.c @@ -28,8 +28,8 @@ void test_vstrq_p128(poly128_t * ptr, poly128_t val) { // CHECK-LABEL: define {{[^@]+}}@test_vldrq_p128 // CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[PTR]], align 16 -// CHECK-NEXT: ret i128 [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[PTR]], align 16 +// CHECK-NEXT: ret i128 [[TMP0]] // poly128_t test_vldrq_p128(poly128_t * ptr) { return vldrq_p128(ptr); @@ -39,9 +39,9 @@ poly128_t test_vldrq_p128(poly128_t * ptr) { // CHECK-LABEL: define {{[^@]+}}@test_ld_st_p128 // CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[PTR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[PTR]], align 16 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i128, ptr [[PTR]], i64 1 -// CHECK-NEXT: store i128 [[TMP2]], ptr [[ADD_PTR]], align 16 +// CHECK-NEXT: store i128 [[TMP0]], ptr [[ADD_PTR]], align 16 // CHECK-NEXT: ret void // void test_ld_st_p128(poly128_t * ptr) { @@ -50,18 +50,18 @@ void test_ld_st_p128(poly128_t * ptr) { } // CHECK-LABEL: define {{[^@]+}}@test_vmull_p64 -// CHECK-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMULL_P64_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[A]], i64 [[B]]) // CHECK-NEXT: [[VMULL_P641_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I]] to i128 // CHECK-NEXT: ret i128 [[VMULL_P641_I]] // -poly128_t test_vmull_p64(poly64_t a, poly64_t b) { +__attribute__((target("aes"))) poly128_t test_vmull_p64(poly64_t a, poly64_t b) { return vmull_p64(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vmull_high_p64 -// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[SHUFFLE_I5:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I5]] to i64 @@ -71,12 +71,12 @@ poly128_t test_vmull_p64(poly64_t a, poly64_t b) { // CHECK-NEXT: [[VMULL_P641_I_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I_I]] to i128 // CHECK-NEXT: ret i128 [[VMULL_P641_I_I]] // -poly128_t test_vmull_high_p64(poly64x2_t a, poly64x2_t b) { +__attribute__((target("aes"))) poly128_t test_vmull_high_p64(poly64x2_t a, poly64x2_t b) { return vmull_high_p64(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -86,7 +86,7 @@ poly128_t test_vreinterpretq_p128_s8(int8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -96,7 +96,7 @@ poly128_t test_vreinterpretq_p128_s16(int16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -106,7 +106,7 @@ poly128_t test_vreinterpretq_p128_s32(int32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s64 -// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -116,7 +116,7 @@ poly128_t test_vreinterpretq_p128_s64(int64x2_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -126,7 +126,7 @@ poly128_t test_vreinterpretq_p128_u8(uint8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -136,7 +136,7 @@ poly128_t test_vreinterpretq_p128_u16(uint16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -146,7 +146,7 @@ poly128_t test_vreinterpretq_p128_u32(uint32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u64 -// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -156,7 +156,7 @@ poly128_t test_vreinterpretq_p128_u64(uint64x2_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_f32 -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -166,7 +166,7 @@ poly128_t test_vreinterpretq_p128_f32(float32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_f64 -// CHECK-SAME: (<2 x double> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<2 x double> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -176,7 +176,7 @@ poly128_t test_vreinterpretq_p128_f64(float64x2_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_p8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -186,7 +186,7 @@ poly128_t test_vreinterpretq_p128_p8(poly8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_p16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -196,7 +196,7 @@ poly128_t test_vreinterpretq_p128_p16(poly16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_p64 -// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] @@ -206,7 +206,7 @@ poly128_t test_vreinterpretq_p128_p64(poly64x2_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s8_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <16 x i8> // CHECK-NEXT: ret <16 x i8> [[TMP0]] @@ -216,7 +216,7 @@ int8x16_t test_vreinterpretq_s8_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s16_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <8 x i16> // CHECK-NEXT: ret <8 x i16> [[TMP0]] @@ -226,7 +226,7 @@ int16x8_t test_vreinterpretq_s16_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s32_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -236,7 +236,7 @@ int32x4_t test_vreinterpretq_s32_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s64_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x i64> // CHECK-NEXT: ret <2 x i64> [[TMP0]] @@ -246,7 +246,7 @@ int64x2_t test_vreinterpretq_s64_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u8_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <16 x i8> // CHECK-NEXT: ret <16 x i8> [[TMP0]] @@ -256,7 +256,7 @@ uint8x16_t test_vreinterpretq_u8_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u16_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <8 x i16> // CHECK-NEXT: ret <8 x i16> [[TMP0]] @@ -266,7 +266,7 @@ uint16x8_t test_vreinterpretq_u16_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u32_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -276,7 +276,7 @@ uint32x4_t test_vreinterpretq_u32_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u64_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x i64> // CHECK-NEXT: ret <2 x i64> [[TMP0]] @@ -286,7 +286,7 @@ uint64x2_t test_vreinterpretq_u64_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_f32_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <4 x float> // CHECK-NEXT: ret <4 x float> [[TMP0]] @@ -296,7 +296,7 @@ float32x4_t test_vreinterpretq_f32_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_f64_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x double> // CHECK-NEXT: ret <2 x double> [[TMP0]] @@ -306,7 +306,7 @@ float64x2_t test_vreinterpretq_f64_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p8_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <16 x i8> // CHECK-NEXT: ret <16 x i8> [[TMP0]] @@ -316,7 +316,7 @@ poly8x16_t test_vreinterpretq_p8_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p16_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <8 x i16> // CHECK-NEXT: ret <8 x i16> [[TMP0]] @@ -326,7 +326,7 @@ poly16x8_t test_vreinterpretq_p16_p128(poly128_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p64_p128 -// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x i64> // CHECK-NEXT: ret <2 x i64> [[TMP0]] @@ -334,5 +334,3 @@ poly16x8_t test_vreinterpretq_p16_p128(poly128_t a) { poly64x2_t test_vreinterpretq_p64_p128(poly128_t a) { return vreinterpretq_p64_p128(a); } - - diff --git a/clang/test/Sema/aarch64-neon-target.c b/clang/test/Sema/aarch64-neon-target.c index 416ac23..18e4f98 100644 --- a/clang/test/Sema/aarch64-neon-target.c +++ b/clang/test/Sema/aarch64-neon-target.c @@ -60,7 +60,7 @@ void test_v85(float32x4_t v4f32) { vrnd32xq_f32(v4f32); } -void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64, bfloat16x4_t v4bf16, __bf16 bf16) { +void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64, bfloat16x4_t v4bf16, __bf16 bf16, poly64_t poly64, poly64x2_t poly64x2) { // dotprod vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} vdot_laneq_u32(v2i32, v8i8, v16i8, 1); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} @@ -92,4 +92,8 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}} // 8.5 - frint vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}} + + vmull_p64(poly64, poly64); // expected-error {{always_inline function 'vmull_p64' requires target feature 'aes'}} + vmull_high_p64(poly64x2, poly64x2); // expected-error {{always_inline function 'vmull_high_p64' requires target feature 'aes'}} + }