From e725669483225748882ae385d3de8829964725e3 Mon Sep 17 00:00:00 2001 From: Kit Barton Date: Tue, 1 Mar 2016 20:51:57 +0000 Subject: [PATCH] [Power9] Implement new vector compare, extract, insert instructions This change implements the following vector operations: - Vector Compare Not Equal - vcmpneb(.) vcmpneh(.) vcmpnew(.) - vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) - Vector Extract Unsigned - vextractub vextractuh vextractuw vextractd - vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx - Vector Insert - vinsertb vinserth vinsertw vinsertd 26 instructions. Phabricator: http://reviews.llvm.org/D15916 llvm-svn: 262392 --- llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 65 ++++++++++++++++ llvm/lib/Target/PowerPC/README_P9.txt | 31 ++++++++ .../MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt | 80 ++++++++++++++++++++ llvm/test/MC/PowerPC/ppc64-encoding-vmx.s | 88 ++++++++++++++++++++++ 4 files changed, 264 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 5367468..989946c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1213,3 +1213,68 @@ def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast", int_ppc_altivec_crypto_vncipherlast, v2i64>; def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; } // HasP8Crypto + +// The following altivec instructions were introduced in Power ISA 3.0 +def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">; +let Predicates = [HasP9Altivec] in { + +// Vector Compare Not Equal (Zero) +class P9VCMP xo, string asmstr, ValueType Ty> + : VXRForm_1; +class P9VCMPo xo, string asmstr, ValueType Ty> + : VXRForm_1 { + let Defs = [CR6]; + let RC = 1; +} + +// i8 element comparisons. +def VCMPNEB : P9VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>; +def VCMPNEBo : P9VCMPo< 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; +def VCMPNEZB : P9VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>; +def VCMPNEZBo : P9VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>; + +// i16 element comparisons. +def VCMPNEH : P9VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>; +def VCMPNEHo : P9VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>; +def VCMPNEZH : P9VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>; +def VCMPNEZHo : P9VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>; + +// i32 element comparisons. +def VCMPNEW : P9VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>; +def VCMPNEWo : P9VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>; +def VCMPNEZW : P9VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>; +def VCMPNEZWo : P9VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>; + +// VX-Form: [PO VRT / UIM VRB XO]. +// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent +// "/ UIM" (1 + 4 bit) +class VX1_VT5_UIM5_VB5 xo, string opc, list pattern> + : VXForm_1; + +class VX1_RT5_RA5_VB5 xo, string opc, list pattern> + : VXForm_1; + +// Vector Extract Unsigned +def VEXTRACTUB : VX1_VT5_UIM5_VB5<525, "vextractub", []>; +def VEXTRACTUH : VX1_VT5_UIM5_VB5<589, "vextractuh", []>; +def VEXTRACTUW : VX1_VT5_UIM5_VB5<653, "vextractuw", []>; +def VEXTRACTD : VX1_VT5_UIM5_VB5<717, "vextractd" , []>; + +// Vector Extract Unsigned Byte/Halfword/Word Left/Right-Indexed +def VEXTUBLX : VX1_RT5_RA5_VB5<1549, "vextublx", []>; +def VEXTUBRX : VX1_RT5_RA5_VB5<1805, "vextubrx", []>; +def VEXTUHLX : VX1_RT5_RA5_VB5<1613, "vextuhlx", []>; +def VEXTUHRX : VX1_RT5_RA5_VB5<1869, "vextuhrx", []>; +def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>; +def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>; + +// Vector Insert Element Instructions +def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>; +def VINSERTH : VX1_VT5_UIM5_VB5<845, "vinserth", []>; +def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>; +def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>; +} // end HasP9Altivec diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt index 3138dc3..1f9211e 100644 --- a/llvm/lib/Target/PowerPC/README_P9.txt +++ b/llvm/lib/Target/PowerPC/README_P9.txt @@ -3,6 +3,37 @@ TODO: Instructions Need Implement Instrinstics or Map to LLVM IR Altivec: +- Vector Compare Not Equal (Zero): + vcmpneb(.) vcmpneh(.) vcmpnew(.) + vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) + . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic) + +- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd + . Don't use llvm extractelement because they have different semantics + . Use instrinstics: + (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM)) + +- Vector Extract Unsigned Byte Left/Right-Indexed: + vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx + . Use instrinstics: + // Left-Indexed + (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB)) + + // Right-Indexed + (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB)) + +- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw + (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM)) + (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM)) + (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) VSX: diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt index 16ff14c..f6011d25 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -672,3 +672,83 @@ # CHECK: mfvscr 2 0x10 0x40 0x06 0x04 +# Power9 instructions + +# CHECK: vcmpneb 2, 3, 4 +0x10 0x43 0x20 0x07 + +# CHECK: vcmpneb. 2, 3, 4 +0x10 0x43 0x24 0x07 + +# CHECK: vcmpnezb 2, 3, 4 +0x10 0x43 0x21 0x07 + +# CHECK: vcmpnezb. 2, 3, 4 +0x10 0x43 0x25 0x07 + +# CHECK: vcmpneh 2, 3, 4 +0x10 0x43 0x20 0x47 + +# CHECK: vcmpneh. 2, 3, 4 +0x10 0x43 0x24 0x47 + +# CHECK: vcmpnezh 2, 3, 4 +0x10 0x43 0x21 0x47 + +# CHECK: vcmpnezh. 2, 3, 4 +0x10 0x43 0x25 0x47 + +# CHECK: vcmpnew 2, 3, 4 +0x10 0x43 0x20 0x87 + +# CHECK: vcmpnew. 2, 3, 4 +0x10 0x43 0x24 0x87 + +# CHECK: vcmpnezw 2, 3, 4 +0x10 0x43 0x21 0x87 + +# CHECK: vcmpnezw. 2, 3, 4 +0x10 0x43 0x25 0x87 + +# CHECK: vextractub 2, 3, 15 +0x10 0x4f 0x1a 0x0d + +# CHECK: vextractuh 2, 3, 14 +0x10 0x4e 0x1a 0x4d + +# CHECK: vextractuw 2, 3, 12 +0x10 0x4c 0x1a 0x8d + +# CHECK: vextractd 2, 3, 8 +0x10 0x48 0x1a 0xcd + +# CHECK: vextublx 2, 3, 4 +0x10 0x43 0x26 0x0d + +# CHECK: vextubrx 2, 3, 4 +0x10 0x43 0x27 0x0d + +# CHECK: vextuhlx 2, 3, 4 +0x10 0x43 0x26 0x4d + +# CHECK: vextuhrx 2, 3, 4 +0x10 0x43 0x27 0x4d + +# CHECK: vextuwlx 2, 3, 4 +0x10 0x43 0x26 0x8d + +# CHECK: vextuwrx 2, 3, 4 +0x10 0x43 0x27 0x8d + +# CHECK: vinsertb 2, 3, 15 +0x10 0x4f 0x1b 0x0d + +# CHECK: vinserth 2, 3, 14 +0x10 0x4e 0x1b 0x4d + +# CHECK: vinsertw 2, 3, 12 +0x10 0x4c 0x1b 0x8d + +# CHECK: vinsertd 2, 3, 8 +0x10 0x48 0x1b 0xcd + diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s b/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s index d8825bf..1b12f661 100644 --- a/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s +++ b/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s @@ -742,3 +742,91 @@ # CHECK-LE: mfvscr 2 # encoding: [0x04,0x06,0x40,0x10] mfvscr 2 +# Power9 instructions + +# Vector Compare Not Equal (Zero) +# CHECK-BE: vcmpneb 2, 3, 4 # encoding: [0x10,0x43,0x20,0x07] +# CHECK-LE: vcmpneb 2, 3, 4 # encoding: [0x07,0x20,0x43,0x10] + vcmpneb 2, 3, 4 +# CHECK-BE: vcmpneb. 2, 3, 4 # encoding: [0x10,0x43,0x24,0x07] +# CHECK-LE: vcmpneb. 2, 3, 4 # encoding: [0x07,0x24,0x43,0x10] + vcmpneb. 2, 3, 4 +# CHECK-BE: vcmpnezb 2, 3, 4 # encoding: [0x10,0x43,0x21,0x07] +# CHECK-LE: vcmpnezb 2, 3, 4 # encoding: [0x07,0x21,0x43,0x10] + vcmpnezb 2, 3, 4 +# CHECK-BE: vcmpnezb. 2, 3, 4 # encoding: [0x10,0x43,0x25,0x07] +# CHECK-LE: vcmpnezb. 2, 3, 4 # encoding: [0x07,0x25,0x43,0x10] + vcmpnezb. 2, 3, 4 +# CHECK-BE: vcmpneh 2, 3, 4 # encoding: [0x10,0x43,0x20,0x47] +# CHECK-LE: vcmpneh 2, 3, 4 # encoding: [0x47,0x20,0x43,0x10] + vcmpneh 2, 3, 4 +# CHECK-BE: vcmpneh. 2, 3, 4 # encoding: [0x10,0x43,0x24,0x47] +# CHECK-LE: vcmpneh. 2, 3, 4 # encoding: [0x47,0x24,0x43,0x10] + vcmpneh. 2, 3, 4 +# CHECK-BE: vcmpnezh 2, 3, 4 # encoding: [0x10,0x43,0x21,0x47] +# CHECK-LE: vcmpnezh 2, 3, 4 # encoding: [0x47,0x21,0x43,0x10] + vcmpnezh 2, 3, 4 +# CHECK-BE: vcmpnezh. 2, 3, 4 # encoding: [0x10,0x43,0x25,0x47] +# CHECK-LE: vcmpnezh. 2, 3, 4 # encoding: [0x47,0x25,0x43,0x10] + vcmpnezh. 2, 3, 4 +# CHECK-BE: vcmpnew 2, 3, 4 # encoding: [0x10,0x43,0x20,0x87] +# CHECK-LE: vcmpnew 2, 3, 4 # encoding: [0x87,0x20,0x43,0x10] + vcmpnew 2, 3, 4 +# CHECK-BE: vcmpnew. 2, 3, 4 # encoding: [0x10,0x43,0x24,0x87] +# CHECK-LE: vcmpnew. 2, 3, 4 # encoding: [0x87,0x24,0x43,0x10] + vcmpnew. 2, 3, 4 +# CHECK-BE: vcmpnezw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x87] +# CHECK-LE: vcmpnezw 2, 3, 4 # encoding: [0x87,0x21,0x43,0x10] + vcmpnezw 2, 3, 4 +# CHECK-BE: vcmpnezw. 2, 3, 4 # encoding: [0x10,0x43,0x25,0x87] +# CHECK-LE: vcmpnezw. 2, 3, 4 # encoding: [0x87,0x25,0x43,0x10] + vcmpnezw. 2, 3, 4 + +# Vector Extract Unsigned +# CHECK-BE: vextractub 2, 3, 15 # encoding: [0x10,0x4f,0x1a,0x0d] +# CHECK-LE: vextractub 2, 3, 15 # encoding: [0x0d,0x1a,0x4f,0x10] + vextractub 2, 3, 15 +# CHECK-BE: vextractuh 2, 3, 14 # encoding: [0x10,0x4e,0x1a,0x4d] +# CHECK-LE: vextractuh 2, 3, 14 # encoding: [0x4d,0x1a,0x4e,0x10] + vextractuh 2, 3, 14 +# CHECK-BE: vextractuw 2, 3, 12 # encoding: [0x10,0x4c,0x1a,0x8d] +# CHECK-LE: vextractuw 2, 3, 12 # encoding: [0x8d,0x1a,0x4c,0x10] + vextractuw 2, 3, 12 +# CHECK-BE: vextractd 2, 3, 8 # encoding: [0x10,0x48,0x1a,0xcd] +# CHECK-LE: vextractd 2, 3, 8 # encoding: [0xcd,0x1a,0x48,0x10] + vextractd 2, 3, 8 + +# Vector Extract Unsigned Left/Right-Indexed +# CHECK-BE: vextublx 2, 3, 4 # encoding: [0x10,0x43,0x26,0x0d] +# CHECK-LE: vextublx 2, 3, 4 # encoding: [0x0d,0x26,0x43,0x10] + vextublx 2, 3, 4 +# CHECK-BE: vextubrx 2, 3, 4 # encoding: [0x10,0x43,0x27,0x0d] +# CHECK-LE: vextubrx 2, 3, 4 # encoding: [0x0d,0x27,0x43,0x10] + vextubrx 2, 3, 4 +# CHECK-BE: vextuhlx 2, 3, 4 # encoding: [0x10,0x43,0x26,0x4d] +# CHECK-LE: vextuhlx 2, 3, 4 # encoding: [0x4d,0x26,0x43,0x10] + vextuhlx 2, 3, 4 +# CHECK-BE: vextuhrx 2, 3, 4 # encoding: [0x10,0x43,0x27,0x4d] +# CHECK-LE: vextuhrx 2, 3, 4 # encoding: [0x4d,0x27,0x43,0x10] + vextuhrx 2, 3, 4 +# CHECK-BE: vextuwlx 2, 3, 4 # encoding: [0x10,0x43,0x26,0x8d] +# CHECK-LE: vextuwlx 2, 3, 4 # encoding: [0x8d,0x26,0x43,0x10] + vextuwlx 2, 3, 4 +# CHECK-BE: vextuwrx 2, 3, 4 # encoding: [0x10,0x43,0x27,0x8d] +# CHECK-LE: vextuwrx 2, 3, 4 # encoding: [0x8d,0x27,0x43,0x10] + vextuwrx 2, 3, 4 + +# Vector Insert Element +# CHECK-BE: vinsertb 2, 3, 15 # encoding: [0x10,0x4f,0x1b,0x0d] +# CHECK-LE: vinsertb 2, 3, 15 # encoding: [0x0d,0x1b,0x4f,0x10] + vinsertb 2, 3, 15 +# CHECK-BE: vinserth 2, 3, 14 # encoding: [0x10,0x4e,0x1b,0x4d] +# CHECK-LE: vinserth 2, 3, 14 # encoding: [0x4d,0x1b,0x4e,0x10] + vinserth 2, 3, 14 +# CHECK-BE: vinsertw 2, 3, 12 # encoding: [0x10,0x4c,0x1b,0x8d] +# CHECK-LE: vinsertw 2, 3, 12 # encoding: [0x8d,0x1b,0x4c,0x10] + vinsertw 2, 3, 12 +# CHECK-BE: vinsertd 2, 3, 8 # encoding: [0x10,0x48,0x1b,0xcd] +# CHECK-LE: vinsertd 2, 3, 8 # encoding: [0xcd,0x1b,0x48,0x10] + vinsertd 2, 3, 8 + -- 2.7.4