From: Pirama Arumuga Nainar Date: Thu, 23 Apr 2015 17:32:25 +0000 (+0000) Subject: [AArch64] Add nvcast patterns for v4f16 and v8f16 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=745615ca0059ac89b4871d8426f57a9e6f283fa7;p=platform%2Fupstream%2Fllvm.git [AArch64] Add nvcast patterns for v4f16 and v8f16 Summary: Constant stores of f16 vectors can create NvCast nodes from various operand types to v4f16 or v8f16 depending on patterns in the stored constants. This patch adds nvcast rules with v4f16 and v8f16 values. AArchISelLowering::LowerBUILD_VECTOR has the details on which constant patterns generate the nvcast nodes. Reviewers: jmolloy, srhines, ab Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9201 llvm-svn: 235610 --- diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f7db50a..92d4460 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5128,22 +5128,26 @@ def : Pat<(trap), (BRK 1)>; // Natural vector casts (64 bit) def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; @@ -5158,22 +5162,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; // Natural vector casts (128 bit) def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; diff --git a/llvm/test/CodeGen/AArch64/fp16-vector-nvcast.ll b/llvm/test/CodeGen/AArch64/fp16-vector-nvcast.ll new file mode 100644 index 0000000..83e0df7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fp16-vector-nvcast.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s + +; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src))) +define void @nvcast_v2i32(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v2i32: +; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16 +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> , <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src))) +define void @nvcast_v4i16(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v4i16: +; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> , <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src))) +define void @nvcast_v8i8(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v8i8: +; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> , <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src))) +define void @nvcast_f64(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_f64: +; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000 +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> zeroinitializer, <4 x half>* %a + ret void +} + +; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src))) +define void @nvcast_v4i32(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v4i32: +; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16 +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> , <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src))) +define void @nvcast_v8i16(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v8i16: +; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> , <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src))) +define void @nvcast_v16i8(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v16i8: +; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> , <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src))) +define void @nvcast_v2i64(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v2i64: +; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000 +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> zeroinitializer, <8 x half>* %a + ret void +} + +attributes #0 = { nounwind }