From: Eric Christopher Date: Wed, 14 Nov 2012 22:09:20 +0000 (+0000) Subject: Remove the CellSPU port. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=950d8703b1e79df72b8acda09421d1e0c9f262b8;p=platform%2Fupstream%2Fllvm.git Remove the CellSPU port. Approved by Chris Lattner. llvm-svn: 167984 --- diff --git a/llvm/autoconf/configure.ac b/llvm/autoconf/configure.ac index f1842a6..0a7c6e2 100644 --- a/llvm/autoconf/configure.ac +++ b/llvm/autoconf/configure.ac @@ -697,14 +697,14 @@ dnl Allow specific targets to be specified for building (or not) TARGETS_TO_BUILD="" AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets], [Build specific host targets: all or target1,target2,... Valid targets are: - host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, + host, x86, x86_64, sparc, powerpc, arm, mips, hexagon, xcore, msp430, nvptx, and cpp (default=all)]),, enableval=all) if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -716,7 +716,6 @@ case "$enableval" in mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; - spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; @@ -731,7 +730,6 @@ case "$enableval" in ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; diff --git a/llvm/configure b/llvm/configure index d4a42f7..de0dd48 100755 --- a/llvm/configure +++ b/llvm/configure @@ -1426,8 +1426,8 @@ Optional Features: YES) --enable-targets Build specific host targets: all or target1,target2,... Valid targets are: host, x86, - x86_64, sparc, powerpc, arm, mips, spu, hexagon, - xcore, msp430, nvptx, and cpp (default=all) + x86_64, sparc, powerpc, arm, mips, hexagon, xcore, + msp430, nvptx, and cpp (default=all) --enable-experimental-targets Build experimental host targets: disable or target1,target2,... (default=disable) @@ -5418,7 +5418,7 @@ if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -5430,7 +5430,6 @@ case "$enableval" in mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; - spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; @@ -5445,7 +5444,6 @@ case "$enableval" in ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; @@ -10315,7 +10313,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <` :raw-html:`Feature` :raw-html:`ARM` -:raw-html:`CellSPU` :raw-html:`Hexagon` :raw-html:`MBlaze` :raw-html:`MSP430` @@ -1777,7 +1776,6 @@ Here is the table: :raw-html:`` :raw-html:`is generally reliable` :raw-html:` ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` @@ -1792,7 +1790,6 @@ Here is the table: :raw-html:`` :raw-html:`assembly parser` :raw-html:` ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` @@ -1807,7 +1804,6 @@ Here is the table: :raw-html:`` :raw-html:`disassembler` :raw-html:` ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` @@ -1822,7 +1818,6 @@ Here is the table: :raw-html:`` :raw-html:`inline asm` :raw-html:` ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` @@ -1837,7 +1832,6 @@ Here is the table: :raw-html:`` :raw-html:`jit` :raw-html:`* ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` @@ -1852,7 +1846,6 @@ Here is the table: :raw-html:`` :raw-html:`.o file writing` :raw-html:` ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` @@ -1867,7 +1860,6 @@ Here is the table: :raw-html:`` :raw-html:`tail calls` :raw-html:` ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` @@ -1882,7 +1874,6 @@ Here is the table: :raw-html:`` :raw-html:`segmented stacks` :raw-html:` ` -:raw-html:` ` :raw-html:` ` :raw-html:` ` :raw-html:` ` diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 2ace829..c3a2b19 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -44,7 +44,6 @@ public: UnknownArch, arm, // ARM; arm, armv.*, xscale - cellspu, // CellSPU: spu, cellspu hexagon, // Hexagon: hexagon mips, // MIPS: mips, mipsallegrex mipsel, // MIPSEL: mipsel, mipsallegrexel diff --git a/llvm/include/llvm/Intrinsics.td b/llvm/include/llvm/Intrinsics.td index 2e1597f..26a05e1 100644 --- a/llvm/include/llvm/Intrinsics.td +++ b/llvm/include/llvm/Intrinsics.td @@ -464,7 +464,6 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty], include "llvm/IntrinsicsPowerPC.td" include "llvm/IntrinsicsX86.td" include "llvm/IntrinsicsARM.td" -include "llvm/IntrinsicsCellSPU.td" include "llvm/IntrinsicsXCore.td" include "llvm/IntrinsicsHexagon.td" include "llvm/IntrinsicsNVVM.td" diff --git a/llvm/include/llvm/IntrinsicsCellSPU.td b/llvm/include/llvm/IntrinsicsCellSPU.td deleted file mode 100644 index 1e311bb..0000000 --- a/llvm/include/llvm/IntrinsicsCellSPU.td +++ /dev/null @@ -1,242 +0,0 @@ -//==- IntrinsicsCellSPU.td - Cell SDK intrinsics -*- tablegen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// Department at The Aerospace Corporation and is distributed under the -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cell SPU Instructions: -//===----------------------------------------------------------------------===// -// TODO Items (not urgent today, but would be nice, low priority) -// -// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by -// concatenating the byte argument b as "bbbb". Could recognize this bit pattern -// in 16-bit and 32-bit constants and reduce instruction count. -//===----------------------------------------------------------------------===// - -// 7-bit integer type, used as an immediate: -def cell_i7_ty: LLVMType; -def cell_i8_ty: LLVMType; - -// Keep this here until it's actually supported: -def llvm_i128_ty : LLVMType; - -class v16i8_u7imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, cell_i7_ty], - [IntrNoMem]>; - -class v16i8_u8imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - -class v16i8_s10imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v16i8_u16imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v16i8_rr : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem]>; - -class v8i16_s10imm : - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v8i16_u16imm : - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v8i16_rr : - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - -class v4i32_rr : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; - -class v4i32_u7imm : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, cell_i7_ty], - [IntrNoMem]>; - -class v4i32_s10imm : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v4i32_u16imm : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v4f32_rr : - GCCBuiltin, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - -class v4f32_rrr : - GCCBuiltin, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - -class v2f64_rr : - GCCBuiltin, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], - [IntrNoMem]>; - -// All Cell SPU intrinsics start with "llvm.spu.". -let TargetPrefix = "spu" in { - def int_spu_si_fsmbi : v8i16_u16imm<"fsmbi">; - def int_spu_si_ah : v8i16_rr<"ah">; - def int_spu_si_ahi : v8i16_s10imm<"ahi">; - def int_spu_si_a : v4i32_rr<"a">; - def int_spu_si_ai : v4i32_s10imm<"ai">; - def int_spu_si_sfh : v8i16_rr<"sfh">; - def int_spu_si_sfhi : v8i16_s10imm<"sfhi">; - def int_spu_si_sf : v4i32_rr<"sf">; - def int_spu_si_sfi : v4i32_s10imm<"sfi">; - def int_spu_si_addx : v4i32_rr<"addx">; - def int_spu_si_cg : v4i32_rr<"cg">; - def int_spu_si_cgx : v4i32_rr<"cgx">; - def int_spu_si_sfx : v4i32_rr<"sfx">; - def int_spu_si_bg : v4i32_rr<"bg">; - def int_spu_si_bgx : v4i32_rr<"bgx">; - def int_spu_si_mpy : // This is special: - GCCBuiltin<"__builtin_si_mpy">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyu : // This is special: - GCCBuiltin<"__builtin_si_mpyu">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyi : // This is special: - GCCBuiltin<"__builtin_si_mpyi">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyui : // This is special: - GCCBuiltin<"__builtin_si_mpyui">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_spu_si_mpya : // This is special: - GCCBuiltin<"__builtin_si_mpya">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyh : // This is special: - GCCBuiltin<"__builtin_si_mpyh">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpys : // This is special: - GCCBuiltin<"__builtin_si_mpys">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhh : // This is special: - GCCBuiltin<"__builtin_si_mpyhh">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhha : // This is special: - GCCBuiltin<"__builtin_si_mpyhha">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhhu : // This is special: - GCCBuiltin<"__builtin_si_mpyhhu">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhhau : // This is special: - GCCBuiltin<"__builtin_si_mpyhhau">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - - def int_spu_si_shli: v4i32_u7imm<"shli">; - - def int_spu_si_shlqbi: - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], - [IntrNoMem]>; - - def int_spu_si_shlqbii: v16i8_u7imm<"shlqbii">; - def int_spu_si_shlqby: - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_spu_si_shlqbyi: v16i8_u7imm<"shlqbyi">; - - def int_spu_si_ceq: v4i32_rr<"ceq">; - def int_spu_si_ceqi: v4i32_s10imm<"ceqi">; - def int_spu_si_ceqb: v16i8_rr<"ceqb">; - def int_spu_si_ceqbi: v16i8_u8imm<"ceqbi">; - def int_spu_si_ceqh: v8i16_rr<"ceqh">; - def int_spu_si_ceqhi: v8i16_s10imm<"ceqhi">; - def int_spu_si_cgt: v4i32_rr<"cgt">; - def int_spu_si_cgti: v4i32_s10imm<"cgti">; - def int_spu_si_cgtb: v16i8_rr<"cgtb">; - def int_spu_si_cgtbi: v16i8_u8imm<"cgtbi">; - def int_spu_si_cgth: v8i16_rr<"cgth">; - def int_spu_si_cgthi: v8i16_s10imm<"cgthi">; - def int_spu_si_clgtb: v16i8_rr<"clgtb">; - def int_spu_si_clgtbi: v16i8_u8imm<"clgtbi">; - def int_spu_si_clgth: v8i16_rr<"clgth">; - def int_spu_si_clgthi: v8i16_s10imm<"clgthi">; - def int_spu_si_clgt: v4i32_rr<"clgt">; - def int_spu_si_clgti: v4i32_s10imm<"clgti">; - - def int_spu_si_and: v4i32_rr<"and">; - def int_spu_si_andbi: v16i8_u8imm<"andbi">; - def int_spu_si_andc: v4i32_rr<"andc">; - def int_spu_si_andhi: v8i16_s10imm<"andhi">; - def int_spu_si_andi: v4i32_s10imm<"andi">; - - def int_spu_si_or: v4i32_rr<"or">; - def int_spu_si_orbi: v16i8_u8imm<"orbi">; - def int_spu_si_orc: v4i32_rr<"orc">; - def int_spu_si_orhi: v8i16_s10imm<"orhi">; - def int_spu_si_ori: v4i32_s10imm<"ori">; - - def int_spu_si_xor: v4i32_rr<"xor">; - def int_spu_si_xorbi: v16i8_u8imm<"xorbi">; - def int_spu_si_xorhi: v8i16_s10imm<"xorhi">; - def int_spu_si_xori: v4i32_s10imm<"xori">; - - def int_spu_si_nor: v4i32_rr<"nor">; - def int_spu_si_nand: v4i32_rr<"nand">; - - def int_spu_si_fa: v4f32_rr<"fa">; - def int_spu_si_fs: v4f32_rr<"fs">; - def int_spu_si_fm: v4f32_rr<"fm">; - - def int_spu_si_fceq: v4f32_rr<"fceq">; - def int_spu_si_fcmeq: v4f32_rr<"fcmeq">; - def int_spu_si_fcgt: v4f32_rr<"fcgt">; - def int_spu_si_fcmgt: v4f32_rr<"fcmgt">; - - def int_spu_si_fma: v4f32_rrr<"fma">; - def int_spu_si_fnms: v4f32_rrr<"fnms">; - def int_spu_si_fms: v4f32_rrr<"fms">; - - def int_spu_si_dfa: v2f64_rr<"dfa">; - def int_spu_si_dfs: v2f64_rr<"dfs">; - def int_spu_si_dfm: v2f64_rr<"dfm">; - -//def int_spu_si_dfceq: v2f64_rr<"dfceq">; -//def int_spu_si_dfcmeq: v2f64_rr<"dfcmeq">; -//def int_spu_si_dfcgt: v2f64_rr<"dfcgt">; -//def int_spu_si_dfcmgt: v2f64_rr<"dfcmgt">; - - def int_spu_si_dfnma: v2f64_rr<"dfnma">; - def int_spu_si_dfma: v2f64_rr<"dfma">; - def int_spu_si_dfnms: v2f64_rr<"dfnms">; - def int_spu_si_dfms: v2f64_rr<"dfms">; -} diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index c59ec19..7bf65f7 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -20,7 +20,6 @@ const char *Triple::getArchTypeName(ArchType Kind) { case UnknownArch: return "unknown"; case arm: return "arm"; - case cellspu: return "cellspu"; case hexagon: return "hexagon"; case mips: return "mips"; case mipsel: return "mipsel"; @@ -56,8 +55,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case arm: case thumb: return "arm"; - case cellspu: return "spu"; - case ppc64: case ppc: return "ppc"; @@ -153,7 +150,6 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return StringSwitch(Name) .Case("arm", arm) - .Case("cellspu", cellspu) .Case("mips", mips) .Case("mipsel", mipsel) .Case("mips64", mips64) @@ -220,7 +216,6 @@ static Triple::ArchType parseArch(StringRef ArchName) { .StartsWith("armv", Triple::arm) .Case("thumb", Triple::thumb) .StartsWith("thumbv", Triple::thumb) - .Cases("spu", "cellspu", Triple::cellspu) .Case("msp430", Triple::msp430) .Cases("mips", "mipseb", "mipsallegrex", Triple::mips) .Cases("mipsel", "mipsallegrexel", Triple::mipsel) @@ -659,7 +654,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::amdil: case llvm::Triple::arm: - case llvm::Triple::cellspu: case llvm::Triple::hexagon: case llvm::Triple::le32: case llvm::Triple::mblaze: @@ -709,7 +703,6 @@ Triple Triple::get32BitArchVariant() const { case Triple::amdil: case Triple::spir: case Triple::arm: - case Triple::cellspu: case Triple::hexagon: case Triple::le32: case Triple::mblaze: @@ -742,7 +735,6 @@ Triple Triple::get64BitArchVariant() const { case Triple::UnknownArch: case Triple::amdil: case Triple::arm: - case Triple::cellspu: case Triple::hexagon: case Triple::le32: case Triple::mblaze: diff --git a/llvm/lib/Target/CellSPU/CMakeLists.txt b/llvm/lib/Target/CellSPU/CMakeLists.txt deleted file mode 100644 index 1f8ca86..0000000 --- a/llvm/lib/Target/CellSPU/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS SPU.td) - -tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter) -tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info) -tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv) -add_public_tablegen_target(CellSPUCommonTableGen) - -add_llvm_target(CellSPUCodeGen - SPUAsmPrinter.cpp - SPUHazardRecognizers.cpp - SPUInstrInfo.cpp - SPUISelDAGToDAG.cpp - SPUISelLowering.cpp - SPUFrameLowering.cpp - SPUMachineFunction.cpp - SPURegisterInfo.cpp - SPUSubtarget.cpp - SPUTargetMachine.cpp - SPUSelectionDAGInfo.cpp - SPUNopFiller.cpp - ) - -add_dependencies(LLVMCellSPUCodeGen intrinsics_gen) - -add_subdirectory(TargetInfo) -add_subdirectory(MCTargetDesc) diff --git a/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td b/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td deleted file mode 100644 index cdb4099..0000000 --- a/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td +++ /dev/null @@ -1,449 +0,0 @@ -//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -///--==-- Arithmetic ops intrinsics --==-- -def CellSDKah: - RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>; -def CellSDKahi: - RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>; -def CellSDKa: - RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>; -def CellSDKai: - RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>; -def CellSDKsfh: - RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>; -def CellSDKsfhi: - RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>; -def CellSDKsf: - RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>; -def CellSDKsfi: - RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>; -def CellSDKaddx: - RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>; -def CellSDKcg: - RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>; -def CellSDKcgx: - RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>; -def CellSDKsfx: - RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>; -def CellSDKbg: - RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>; -def CellSDKbgx: - RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>; - -def CellSDKmpy: - RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpy $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyu: - RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyu $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))] >; - -def CellSDKmpyi: - RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "mpyi $rT, $rA, $val", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA), - i16ImmSExt10:$val))]>; - -def CellSDKmpyui: - RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "mpyui $rT, $rA, $val", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA), - i16ImmSExt10:$val))]>; - -def CellSDKmpya: - RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "mpya $rT, $rA, $rB, $rC", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB), - (v8i16 VECREG:$rC)))]>; - -def CellSDKmpyh: - RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyh $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpys: - RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpys $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyhh: - RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhh $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyhha: - RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhha $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave -// as an intrinsic for the time being -def CellSDKmpyhhu: - RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhhu $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyhhau: - RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhhau $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKand: - RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "and\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKandc: - RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "andc\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKandbi: - RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "andbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKandhi: - RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "andhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKandi: - RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "andi\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKor: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "or\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKorc: - RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "addc\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "orbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKorhi: - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "orhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKori: - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ori\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKxor: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "xor\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKxorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "xorbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKxorhi: - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "xorhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKxori: - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "xori\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKnor: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "nor\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKnand: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "nand\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -//===----------------------------------------------------------------------===// -// Shift/rotate intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKshli: - Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val), - (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def CellSDKshlqbi: - Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB), - (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>; - -def CellSDKshlqii: - Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val), - (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def CellSDKshlqby: - Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB), - (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>; - -def CellSDKshlqbyi: - Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val), - (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>; - - -//===----------------------------------------------------------------------===// -// Branch/compare intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKceq: - RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ceq\t $rT, $rA, $rB", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKceqi: - RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ceqi\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKceqb: - RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ceqb\t $rT, $rA, $rB", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; - -def CellSDKceqbi: - RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "ceqbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKceqh: - RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ceqh\t $rT, $rA, $rB", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; - -def CellSDKceqhi: - RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ceqhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; -def CellSDKcgth: - RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "cgth\t $rT, $rA, $rB", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; - -def CellSDKcgthi: - RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "cgthi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKcgt: - RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "cgt\t $rT, $rA, $rB", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKcgti: - RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "cgti\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKcgtb: - RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "cgtb\t $rT, $rA, $rB", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; - -def CellSDKcgtbi: - RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "cgtbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKclgth: - RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "clgth\t $rT, $rA, $rB", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; - -def CellSDKclgthi: - RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "clgthi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKclgt: - RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "clgt\t $rT, $rA, $rB", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKclgti: - RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "clgti\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKclgtb: - RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "clgtb\t $rT, $rA, $rB", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; - -def CellSDKclgtbi: - RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "clgtbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>; - -//===----------------------------------------------------------------------===// -// Floating-point intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKfa: - RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fa\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfs: - RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fs\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfm: - RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fm\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfceq: - RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fceq\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfcgt: - RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fcgt\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfcmeq: - RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fcmeq\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfcmgt: - RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fcmgt\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfma: - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fma\t $rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rC)))]>; - -def CellSDKfnms: - RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fnms\t $rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rC)))]>; - -def CellSDKfms: - RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fms\t $rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rC)))]>; - -//===----------------------------------------------------------------------===// -// Double precision floating-point intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKdfa: - RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfa\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfs: - RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfs\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfm: - RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfm\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfma: - RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfma\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfnma: - RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfnma\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfnms: - RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfnms\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfms: - RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfms\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; diff --git a/llvm/lib/Target/CellSPU/LLVMBuild.txt b/llvm/lib/Target/CellSPU/LLVMBuild.txt deleted file mode 100644 index 277620b..0000000 --- a/llvm/lib/Target/CellSPU/LLVMBuild.txt +++ /dev/null @@ -1,32 +0,0 @@ -;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = MCTargetDesc TargetInfo - -[component_0] -type = TargetGroup -name = CellSPU -parent = Target -has_asmprinter = 1 - -[component_1] -type = Library -name = CellSPUCodeGen -parent = CellSPU -required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target -add_to_library_groups = CellSPU diff --git a/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 0027bdb..0000000 --- a/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMCellSPUDesc - SPUMCTargetDesc.cpp - SPUMCAsmInfo.cpp - ) - -add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen) diff --git a/llvm/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/llvm/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index 71e5bbc..0000000 --- a/llvm/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = CellSPUDesc -parent = CellSPU -required_libraries = CellSPUInfo MC -add_to_library_groups = CellSPU diff --git a/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile b/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile deleted file mode 100644 index 10d9a42..0000000 --- a/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp deleted file mode 100644 index 4bad37e..0000000 --- a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the SPUMCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "SPUMCAsmInfo.h" -using namespace llvm; - -void SPULinuxMCAsmInfo::anchor() { } - -SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { - IsLittleEndian = false; - - ZeroDirective = "\t.space\t"; - Data64bitsDirective = "\t.quad\t"; - AlignmentIsInBytes = false; - - PCSymbol = "."; - CommentString = "#"; - GlobalPrefix = ""; - PrivateGlobalPrefix = ".L"; - - // Has leb128 - HasLEB128 = true; - - SupportsDebugInformation = true; - - // Exception handling is not supported on CellSPU (think about it: you only - // have 256K for code+data. Would you support exception handling?) - ExceptionsType = ExceptionHandling::None; - - // SPU assembly requires ".section" before ".bss" - UsesELFSectionDirectiveForBSS = true; -} - diff --git a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h deleted file mode 100644 index f786147..0000000 --- a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the SPUMCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef SPUTARGETASMINFO_H -#define SPUTARGETASMINFO_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - - class SPULinuxMCAsmInfo : public MCAsmInfo { - virtual void anchor(); - public: - explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT); - }; -} // namespace llvm - -#endif /* SPUTARGETASMINFO_H */ diff --git a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp deleted file mode 100644 index 8450e2c..0000000 --- a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides Cell SPU specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "SPUMCTargetDesc.h" -#include "SPUMCAsmInfo.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "SPUGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "SPUGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "SPUGenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createSPUMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitSPUMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitSPUMCRegisterInfo(X, SPU::R0); - return X; -} - -static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitSPUMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) { - MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT); - - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(SPU::R1, 0); - MAI->addInitialFrameState(0, Dst, Src); - - return MAI; -} - -static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - // For the time being, use static relocations, since there's really no - // support for PIC yet. - X->InitMCCodeGenInfo(Reloc::Static, CM, OL); - return X; -} - -// Force static initialization. -extern "C" void LLVMInitializeCellSPUTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget, - createSPUMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget, - createCellSPUMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, - createSPUMCSubtargetInfo); -} diff --git a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h deleted file mode 100644 index d26449e..0000000 --- a/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h +++ /dev/null @@ -1,38 +0,0 @@ -//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides CellSPU specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef SPUMCTARGETDESC_H -#define SPUMCTARGETDESC_H - -namespace llvm { -class Target; - -extern Target TheCellSPUTarget; - -} // End llvm namespace - -// Define symbolic names for Cell registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "SPUGenRegisterInfo.inc" - -// Defines symbolic names for the SPU instructions. -// -#define GET_INSTRINFO_ENUM -#include "SPUGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "SPUGenSubtargetInfo.inc" - -#endif diff --git a/llvm/lib/Target/CellSPU/Makefile b/llvm/lib/Target/CellSPU/Makefile deleted file mode 100644 index d7a8247..0000000 --- a/llvm/lib/Target/CellSPU/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMCellSPUCodeGen -TARGET = SPU -BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \ - SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \ - SPUGenDAGISel.inc \ - SPUGenSubtargetInfo.inc SPUGenCallingConv.inc - -DIRS = TargetInfo MCTargetDesc - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/CellSPU/README.txt b/llvm/lib/Target/CellSPU/README.txt deleted file mode 100644 index 3bce960..0000000 --- a/llvm/lib/Target/CellSPU/README.txt +++ /dev/null @@ -1,106 +0,0 @@ -//===- README.txt - Notes for improving CellSPU-specific code gen ---------===// - -This code was contributed by a team from the Computer Systems Research -Department in The Aerospace Corporation: - -- Scott Michel (head bottle washer and much of the non-floating point - instructions) -- Mark Thomas (floating point instructions) -- Michael AuYeung (intrinsics) -- Chandler Carruth (LLVM expertise) -- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise) - -Some minor fixes added by Kalle Raiskila. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR -OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES -OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING -OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT -LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR -REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, -OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR -SUCH DAMAGES ARE FORESEEABLE. - ---------------------------------------------------------------------------- ---WARNING--: ---WARNING--: The CellSPU work is work-in-progress and "alpha" quality code. ---WARNING--: - -If you are brave enough to try this code or help to hack on it, be sure -to add 'spu' to configure's --enable-targets option, e.g.: - - ./configure \ - --enable-targets=x86,x86_64,powerpc,spu - ---------------------------------------------------------------------------- - -TODO: -* In commit r142152 vector legalization was set to element promotion per - default. This breaks half vectors (e.g. v2i32) badly as they get element - promoted to much slower types (v2i64). - -* Many CellSPU specific codegen tests only grep & count the number of - instructions, not checking their place with FileCheck. There have also - been some commits that change the CellSPU checks, some of which might - have not been thoroughly scrutinized w.r.t. to the changes they cause in SPU - assembly. (especially since about the time of r142152) - -* Some of the i64 math have huge tablegen rules, which sometime cause - tablegen to run out of memory. See e.g. bug 8850. i64 arithmetics - should probably be done with libraries. - -* Create a machine pass for performing dual-pipeline scheduling specifically - for CellSPU, and insert branch prediction instructions as needed. - -* i32 instructions: - - * i32 division (work-in-progress) - -* i64 support (see i64operations.c test harness): - - * shifts and comparison operators: done - * sign and zero extension: done - * addition: done - * subtraction: needed - * multiplication: done - -* i128 support: - - * zero extension, any extension: done - * sign extension: done - * arithmetic operators (add, sub, mul, div): needed - * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed - - * or: done - -* f64 support - - * Comparison operators: - SETOEQ unimplemented - SETOGT unimplemented - SETOGE unimplemented - SETOLT unimplemented - SETOLE unimplemented - SETONE unimplemented - SETO done (lowered) - SETUO done (lowered) - SETUEQ unimplemented - SETUGT unimplemented - SETUGE unimplemented - SETULT unimplemented - SETULE unimplemented - SETUNE unimplemented - -* LLVM vector suport - - * VSETCC needs to be implemented. It's pretty straightforward to code, but - needs implementation. - -* Intrinsics - - * spu.h instrinsics added but not tested. Need to have an operational - llvm-spu-gcc in order to write a unit test harness. - -===-------------------------------------------------------------------------=== diff --git a/llvm/lib/Target/CellSPU/SPU.h b/llvm/lib/Target/CellSPU/SPU.h deleted file mode 100644 index c660131..0000000 --- a/llvm/lib/Target/CellSPU/SPU.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// Cell SPU back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_IBMCELLSPU_H -#define LLVM_TARGET_IBMCELLSPU_H - -#include "MCTargetDesc/SPUMCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class SPUTargetMachine; - class FunctionPass; - class formatted_raw_ostream; - - FunctionPass *createSPUISelDag(SPUTargetMachine &TM); - FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm); - -} - -#endif /* LLVM_TARGET_IBMCELLSPU_H */ diff --git a/llvm/lib/Target/CellSPU/SPU.td b/llvm/lib/Target/CellSPU/SPU.td deleted file mode 100644 index e835b9c..0000000 --- a/llvm/lib/Target/CellSPU/SPU.td +++ /dev/null @@ -1,66 +0,0 @@ -//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the top level entry point for the STI Cell SPU target machine. -// -//===----------------------------------------------------------------------===// - -// Get the target-independent interfaces which we are implementing. -// -include "llvm/Target/Target.td" - -// Holder of code fragments (you'd think this'd already be in -// a td file somewhere... :-) - -class CodeFrag { - dag Fragment = frag; -} - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "SPURegisterInfo.td" - -//===----------------------------------------------------------------------===// -// Instruction formats, instructions -//===----------------------------------------------------------------------===// - -include "SPUNodes.td" -include "SPUOperands.td" -include "SPUSchedule.td" -include "SPUInstrFormats.td" -include "SPUInstrInfo.td" - -//===----------------------------------------------------------------------===// -// Subtarget features: -//===----------------------------------------------------------------------===// - -def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">; -def LargeMemFeature: - SubtargetFeature<"large_mem","UseLargeMem", "true", - "Use large (>256) LSA memory addressing [default = false]">; - -def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>; - -//===----------------------------------------------------------------------===// -// Calling convention: -//===----------------------------------------------------------------------===// - -include "SPUCallingConv.td" - -// Target: - -def SPUInstrInfo : InstrInfo { - let isLittleEndianEncoding = 1; -} - -def SPU : Target { - let InstructionSet = SPUInstrInfo; -} diff --git a/llvm/lib/Target/CellSPU/SPU128InstrInfo.td b/llvm/lib/Target/CellSPU/SPU128InstrInfo.td deleted file mode 100644 index e051e04..0000000 --- a/llvm/lib/Target/CellSPU/SPU128InstrInfo.td +++ /dev/null @@ -1,41 +0,0 @@ -//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===// -// -// Cell SPU 128-bit operations -// -//===----------------------------------------------------------------------===// - -// zext 32->128: Zero extend 32-bit to 128-bit -def : Pat<(i128 (zext R32C:$rSrc)), - (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; - -// zext 64->128: Zero extend 64-bit to 128-bit -def : Pat<(i128 (zext R64C:$rSrc)), - (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; - -// zext 16->128: Zero extend 16-bit to 128-bit -def : Pat<(i128 (zext R16C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; - -// zext 8->128: Zero extend 8-bit to 128-bit -def : Pat<(i128 (zext R8C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; - -// anyext 32->128: Zero extend 32-bit to 128-bit -def : Pat<(i128 (anyext R32C:$rSrc)), - (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; - -// anyext 64->128: Zero extend 64-bit to 128-bit -def : Pat<(i128 (anyext R64C:$rSrc)), - (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; - -// anyext 16->128: Zero extend 16-bit to 128-bit -def : Pat<(i128 (anyext R16C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; - -// anyext 8->128: Zero extend 8-bit to 128-bit -def : Pat<(i128 (anyext R8C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; - -// Shift left -def : Pat<(shl GPRC:$rA, R32C:$rB), - (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>; diff --git a/llvm/lib/Target/CellSPU/SPU64InstrInfo.td b/llvm/lib/Target/CellSPU/SPU64InstrInfo.td deleted file mode 100644 index bea33b5..0000000 --- a/llvm/lib/Target/CellSPU/SPU64InstrInfo.td +++ /dev/null @@ -1,408 +0,0 @@ -//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===// -// -// Cell SPU 64-bit operations -// -//===----------------------------------------------------------------------===// - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// 64-bit comparisons: -// -// 1. The instruction sequences for vector vice scalar differ by a -// constant. In the scalar case, we're only interested in the -// top two 32-bit slots, whereas we're interested in an exact -// all-four-slot match in the vector case. -// -// 2. There are no "immediate" forms, since loading 64-bit constants -// could be a constant pool load. -// -// 3. i64 setcc results are i32, which are subsequently converted to a FSM -// mask when used in a select pattern. -// -// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) -// [Note: this may be moot, since gb produces v4i32 or r32.] -// -// 5. The code sequences for r64 and v2i64 are probably overly conservative, -// compared to the code that gcc produces. -// -// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!) -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// selb instruction definition for i64. Note that the selection mask is -// a vector, produced by various forms of FSM: -def SELBr64_cond: - SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), - [/* no pattern */]>; - -// The generic i64 select pattern, which assumes that the comparison result -// is in a 32-bit register that contains a select mask pattern (i.e., gather -// bits result): - -def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), - (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; - -// select the negative condition: -class I64SELECTNegCond: - Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), - (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; - -// setcc the negative condition: -class I64SETCCNegCond: - Pat<(cond R64C:$rA, R64C:$rB), - (XORIr32 compare.Fragment, -1)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// The i64 seteq fragment that does the scalar->vector conversion and -// comparison: -def CEQr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>; - -// The i64 seteq fragment that does the vector comparison -def CEQv2i64compare: - CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>; - -// i64 seteq (equality): the setcc result is i32, which is converted to a -// vector FSM mask when used in a select pattern. -// -// v2i64 seteq (equality): the setcc result is v4i32 -multiclass CompareEqual64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>; - def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CEQr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CEQv2i64compare.Fragment), R32C))>; -} - -defm I64EQ: CompareEqual64; - -def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; -def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; - -// i64 setne: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setugt/setule: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CLGTr64ugt: - CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CLGTr64eq: - CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CLGTr64compare: - CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, - (XSWDv2i64 CLGTr64ugt.Fragment), - CLGTr64eq.Fragment)>; - -def CLGTv2i64ugt: - CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>; - -def CLGTv2i64eq: - CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; - -def CLGTv2i64compare: - CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment, - (XSWDv2i64 CLGTr64ugt.Fragment), - CLGTv2i64eq.Fragment)>; - -multiclass CompareLogicalGreaterThan64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGTr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>; -} - -defm I64LGT: CompareLogicalGreaterThan64; - -def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; -//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), -// I64LGTv2i64.Fragment>; - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setuge/setult: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CLGEr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment, - CLGTr64eq.Fragment)), 0xb)>; - -def CLGEv2i64compare: - CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment, - CLGTv2i64eq.Fragment)), 0xf)>; - -multiclass CompareLogicalGreaterEqual64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGEr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>; -} - -defm I64LGE: CompareLogicalGreaterEqual64; - -def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>; -def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), - I64LGEv2i64.Fragment>; - - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setgt/setle: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CGTr64sgt: - CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CGTr64eq: - CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CGTr64compare: - CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, - (XSWDv2i64 CGTr64sgt.Fragment), - CGTr64eq.Fragment)>; - -def CGTv2i64sgt: - CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>; - -def CGTv2i64eq: - CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; - -def CGTv2i64compare: - CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment, - (XSWDv2i64 CGTr64sgt.Fragment), - CGTv2i64eq.Fragment)>; - -multiclass CompareGreaterThan64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CGTr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CGTv2i64compare.Fragment), R32C))>; -} - -defm I64GT: CompareLogicalGreaterThan64; - -def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; -//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), -// I64GTv2i64.Fragment>; - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setge/setlt: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CGEr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment, - CGTr64eq.Fragment)), 0xb)>; - -def CGEv2i64compare: - CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment, - CGTv2i64eq.Fragment)), 0xf)>; - -multiclass CompareGreaterEqual64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>; -} - -defm I64GE: CompareGreaterEqual64; - -def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>; -def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), - I64GEv2i64.Fragment>; - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v2i64, i64 add -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class v2i64_add_cg: - CodeFrag<(CGv4i32 lhs, rhs)>; - -class v2i64_add_1: - CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; - -class v2i64_add: - v2i64_add_1.Fragment, cg_mask>; - -def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG), - (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; - -def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)), - v2i64_add<(v2i64 VECREG:$rA), - (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v2i64, i64 subtraction -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class v2i64_sub_bg: CodeFrag<(BGv4i32 lhs, rhs)>; - -class v2i64_sub: - CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; - -def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (COPY_TO_REGCLASS - v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG), - v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment, - (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; - -def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)), - v2i64_sub<(v2i64 VECREG:$rA), - (v2i64 VECREG:$rB), - v2i64_sub_bg<(v2i64 VECREG:$rA), - (v2i64 VECREG:$rB)>.Fragment, - (v4i32 VECREG:$rCGmask)>.Fragment>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v2i64, i64 multiply -// -// Note: i64 multiply is simply the vector->scalar conversion of the -// full-on v2i64 multiply, since the entire vector has to be manipulated -// anyway. -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class v2i64_mul_ahi64 : - CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; - -class v2i64_mul_bhi64 : - CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; - -class v2i64_mul_alo64 : - CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; - -class v2i64_mul_blo64 : - CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; - -class v2i64_mul_ashlq2: - CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; - -class v2i64_mul_ashlq4: - CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; - -class v2i64_mul_bshlq2 : - CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; - -class v2i64_mul_bshlq4 : - CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; - -class v2i64_highprod: - CodeFrag<(Av4i32 - (Av4i32 - (MPYUv4i32 v2i64_mul_bshlq4.Fragment, // a1 x b3 - v2i64_mul_ahi64.Fragment), - (MPYHv4i32 v2i64_mul_ahi64.Fragment, // a0 x b3 - v2i64_mul_bshlq4.Fragment)), - (Av4i32 - (MPYHv4i32 v2i64_mul_bhi64.Fragment, - v2i64_mul_ashlq4.Fragment), - (Av4i32 - (MPYHv4i32 v2i64_mul_ashlq4.Fragment, - v2i64_mul_bhi64.Fragment), - (Av4i32 - (MPYUv4i32 v2i64_mul_ashlq4.Fragment, - v2i64_mul_bhi64.Fragment), - (Av4i32 - (MPYHv4i32 v2i64_mul_ashlq2.Fragment, - v2i64_mul_bshlq2.Fragment), - (MPYUv4i32 v2i64_mul_ashlq2.Fragment, - v2i64_mul_bshlq2.Fragment))))))>; - -class v2i64_mul_a3_b3: - CodeFrag<(MPYUv4i32 v2i64_mul_alo64.Fragment, - v2i64_mul_blo64.Fragment)>; - -class v2i64_mul_a2_b3: - CodeFrag<(SELBv4i32 (SHLQBYIv4i32 - (MPYHHUv4i32 v2i64_mul_alo64.Fragment, - v2i64_mul_bshlq2.Fragment), 0x2), - (ILv4i32 0), - (FSMBIv4i32 0xc3c3))>; - -class v2i64_mul_a3_b2: - CodeFrag<(SELBv4i32 (SHLQBYIv4i32 - (MPYHHUv4i32 v2i64_mul_blo64.Fragment, - v2i64_mul_ashlq2.Fragment), 0x2), - (ILv4i32 0), - (FSMBIv4i32 0xc3c3))>; - -class v2i64_lowsum: - v2i64_add.Fragment, - v2i64_mul_a2_b3.Fragment, rCGmask>.Fragment, - v2i64_mul_a3_b2.Fragment, rCGmask>; - -class v2i64_mul: - v2i64_add.Fragment, - (SELBv4i32 v2i64_highprod.Fragment, - (ILv4i32 0), - (FSMBIv4i32 0x0f0f)), - rCGmask>; - -def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG), - (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; - -def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)), - v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// f64 comparisons -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// selb instruction definition for i64. Note that the selection mask is -// a vector, produced by various forms of FSM: -def SELBf64_cond: - SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), - [(set R64FP:$rT, - (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; diff --git a/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp deleted file mode 100644 index 3396e8b..0000000 --- a/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ /dev/null @@ -1,333 +0,0 @@ -//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to Cell SPU assembly language. This printer -// is the output mechanism used by `llc'. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asmprinter" -#include "SPU.h" -#include "SPUTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class SPUAsmPrinter : public AsmPrinter { - public: - explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : - AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "STI CBEA SPU Assembly Printer"; - } - - /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. - void printInstruction(const MachineInstr *MI, raw_ostream &OS); - static const char *getRegisterName(unsigned RegNo); - - - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - void printOp(const MachineOperand &MO, raw_ostream &OS); - - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - if (MO.isReg()) { - O << getRegisterName(MO.getReg()); - } else if (MO.isImm()) { - O << MO.getImm(); - } else { - printOp(MO, O); - } - } - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - - void - printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - unsigned int value = MI->getOperand(OpNo).getImm(); - assert(value < (1 << 8) && "Invalid u7 argument"); - O << value; - } - - void - printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - char value = MI->getOperand(OpNo).getImm(); - O << (int) value; - O << "("; - printOperand(MI, OpNo+1, O); - O << ")"; - } - - void - printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - O << (short) MI->getOperand(OpNo).getImm(); - } - - void - printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - O << (unsigned short)MI->getOperand(OpNo).getImm(); - } - - void - printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // When used as the base register, r0 reads constant zero rather than - // the value contained in the register. For this reason, the darwin - // assembler requires that we print r0 as 0 (no r) when used as the base. - const MachineOperand &MO = MI->getOperand(OpNo); - O << getRegisterName(MO.getReg()) << ", "; - printOperand(MI, OpNo+1, O); - } - - void - printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - unsigned int value = MI->getOperand(OpNo).getImm(); - assert(value <= (1 << 19) - 1 && "Invalid u18 argument"); - O << value; - } - - void - printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - short value = MI->getOperand(OpNo).getImm(); - assert((value >= -(1 << 9) && value <= (1 << 9) - 1) - && "Invalid s10 argument"); - O << value; - } - - void - printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - short value = MI->getOperand(OpNo).getImm(); - assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); - O << value; - } - - void - printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - assert(MI->getOperand(OpNo).isImm() && - "printDFormAddr first operand is not immediate"); - int64_t value = int64_t(MI->getOperand(OpNo).getImm()); - int16_t value16 = int16_t(value); - assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) - && "Invalid dform s10 offset argument"); - O << (value16 & ~0xf) << "("; - printOperand(MI, OpNo+1, O); - O << ")"; - } - - void - printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - /* Note: operand 1 is an offset or symbol name. */ - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - if (MI->getOperand(OpNo+1).isImm()) { - int displ = int(MI->getOperand(OpNo+1).getImm()); - if (displ > 0) - O << "+" << displ; - else if (displ < 0) - O << displ; - } - } - } - - void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // Used to generate a ".-", but it turns out that the assembler - // really wants the target. - // - // N.B.: This operand is used for call targets. Branch hints are another - // animal entirely. - printOp(MI->getOperand(OpNo), O); - } - - void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - O << "@h"; - } - } - - void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - O << "@l"; - } - } - - /// Print local store address - void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - int value = (int) MI->getOperand(OpNo).getImm(); - assert((value >= 0 && value < 16) - && "Invalid negated immediate rotate 7-bit argument"); - O << -value; - } else { - llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); - } - } - - void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){ - assert(MI->getOperand(OpNo).isImm() && - "Invalid/non-immediate rotate amount in printRotateNeg7Imm"); - int value = (int) MI->getOperand(OpNo).getImm(); - assert((value >= 0 && value <= 32) - && "Invalid negated immediate rotate 7-bit argument"); - O << -value; - } - }; -} // end of anonymous namespace - -// Include the auto-generated portion of the assembly writer -#include "SPUGenAsmWriter.inc" - -void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { - switch (MO.getType()) { - case MachineOperand::MO_Immediate: - report_fatal_error("printOp() does not handle immediate values"); - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - case MachineOperand::MO_ExternalSymbol: - // Computing the address of an external symbol, not calling it. - if (TM.getRelocationModel() != Reloc::Static) { - O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName() - << "$non_lazy_ptr"; - return; - } - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; - case MachineOperand::MO_GlobalAddress: - // External or weakly linked global variables need non-lazily-resolved - // stubs - if (TM.getRelocationModel() != Reloc::Static) { - const GlobalValue *GV = MO.getGlobal(); - if (((GV->isDeclaration() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { - O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - return; - } - } - O << *Mang->getSymbol(MO.getGlobal()); - return; - case MachineOperand::MO_MCSymbol: - O << *(MO.getMCSymbol()); - return; - default: - O << ""; - return; - } -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: - // See if this is a generic print operand - return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); - case 'L': // Write second word of DImode reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNo).isReg() || - OpNo+1 == MI->getNumOperands() || - !MI->getOperand(OpNo+1).isReg()) - return true; - ++OpNo; // Return the high-part. - break; - } - } - - printOperand(MI, OpNo, O); - return false; -} - -bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - printMemRegReg(MI, OpNo, O); - return false; -} - -// Force static initialization. -extern "C" void LLVMInitializeCellSPUAsmPrinter() { - RegisterAsmPrinter X(TheCellSPUTarget); -} diff --git a/llvm/lib/Target/CellSPU/SPUCallingConv.td b/llvm/lib/Target/CellSPU/SPUCallingConv.td deleted file mode 100644 index 9bc6be7..0000000 --- a/llvm/lib/Target/CellSPU/SPUCallingConv.td +++ /dev/null @@ -1,53 +0,0 @@ -//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the STI Cell SPU architecture. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Return Value Calling Convention -//===----------------------------------------------------------------------===// - -// Return-value convention for Cell SPU: return value to be passed in reg 3-74 -def RetCC_SPU : CallingConv<[ - CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64], - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74]>> -]>; - - -//===----------------------------------------------------------------------===// -// CellSPU Argument Calling Conventions -//===----------------------------------------------------------------------===// -def CCC_SPU : CallingConv<[ - CCIfType<[i8, i16, i32, i64, i128, f32, f64, - v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74]>>, - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToStack<16, 16>> -]>; diff --git a/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp deleted file mode 100644 index f011995..0000000 --- a/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp +++ /dev/null @@ -1,256 +0,0 @@ -//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the Cell SPU target. -// -//===----------------------------------------------------------------------===// - -#include "SPUFrameLowering.h" -#include "SPU.h" -#include "SPUInstrBuilder.h" -#include "SPUInstrInfo.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// SPUFrameLowering: -//===----------------------------------------------------------------------===// - -SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), - Subtarget(sti) { - LR[0].first = SPU::R0; - LR[0].second = 16; -} - - -//-------------------------------------------------------------------------- -// hasFP - Return true if the specified function actually has a dedicated frame -// pointer register. This is true if the function needs a frame pointer and has -// a non-zero stack size. -bool SPUFrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - return MFI->getStackSize() && - (MF.getTarget().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()); -} - - -/// determineFrameLayout - Determine the size of the frame and maximum call -/// frame size. -void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); - - // Get the alignments provided by the target, and the maximum alignment - // (if any) of the fixed frame objects. - unsigned TargetAlign = getStackAlignment(); - unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment()); - assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); - unsigned AlignMask = Align - 1; - - // Get the maximum call frame size of all the calls. - unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); - - // If we have dynamic alloca then maxCallFrameSize needs to be aligned so - // that allocations will be aligned. - if (MFI->hasVarSizedObjects()) - maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - - // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); - - // Include call frame size in total. - FrameSize += maxCallFrameSize; - - // Make sure the frame is aligned. - FrameSize = (FrameSize + AlignMask) & ~AlignMask; - - // Update frame info. - MFI->setStackSize(FrameSize); -} - -void SPUFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const SPUInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - MachineModuleInfo &MMI = MF.getMMI(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Prepare for debug frame info. - bool hasDebugInfo = MMI.hasDebugInfo(); - MCSymbol *FrameLabel = 0; - - // Move MBBI back to the beginning of the function. - MBBI = MBB.begin(); - - // Work out frame sizes. - determineFrameLayout(MF); - int FrameSize = MFI->getStackSize(); - - assert((FrameSize & 0xf) == 0 - && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); - - // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->adjustsStack()) { - FrameSize = -(FrameSize + SPUFrameLowering::minStackSize()); - if (hasDebugInfo) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); - } - - // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) - // for the ABI - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16) - .addReg(SPU::R1); - if (isInt<10>(FrameSize)) { - // Spill $sp to adjusted $sp - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) - .addReg(SPU::R1); - // Adjust $sp by required amout - BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) - .addImm(FrameSize); - } else if (isInt<16>(FrameSize)) { - // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use - // $r2 to adjust $sp: - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) - .addImm(-16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) - .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) - .addReg(SPU::R2) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) - .addReg(SPU::R1) - .addReg(SPU::R2); - BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2) - .addReg(SPU::R2) - .addImm(16); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) - .addReg(SPU::R2) - .addReg(SPU::R1); - } else { - report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); - } - - if (hasDebugInfo) { - std::vector &Moves = MMI.getFrameMoves(); - - // Show update of SP. - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - - // Add callee saved registers to move list. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - if (Reg == SPU::R0) continue; - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); - } - - // Mark effective beginning of when frame pointer is ready. - MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); - - MachineLocation FPDst(SPU::R1); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); - } - } -} - -void SPUFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - const SPUInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int FrameSize = MFI->getStackSize(); - int LinkSlotOffset = SPUFrameLowering::stackSlotSize(); - DebugLoc dl = MBBI->getDebugLoc(); - - assert(MBBI->getOpcode() == SPU::RET && - "Can only insert epilog into returning blocks"); - assert((FrameSize & 0xf) == 0 && "FrameSize not aligned"); - - // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->adjustsStack()) { - FrameSize = FrameSize + SPUFrameLowering::minStackSize(); - if (isInt<10>(FrameSize + LinkSlotOffset)) { - // Reload $lr, adjust $sp by required amount - // Note: We do this to slightly improve dual issue -- not by much, but it - // is an opportunity for dual issue. - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) - .addImm(FrameSize + LinkSlotOffset) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1) - .addReg(SPU::R1) - .addImm(FrameSize); - } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { - // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use - // $r2 to adjust $sp: - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) - .addImm(16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) - .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) - .addReg(SPU::R1) - .addReg(SPU::R2); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) - .addImm(16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). - addReg(SPU::R2) - .addImm(16); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) - .addReg(SPU::R2) - .addReg(SPU::R1); - } else { - report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); - } - } -} - -void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const{ - // Mark LR and SP unused, since the prolog spills them to stack and - // we don't want anyone else to spill them for us. - // - // Also, unless R2 is really used someday, don't spill it automatically. - MF.getRegInfo().setPhysRegUnused(SPU::R0); - MF.getRegInfo().setPhysRegUnused(SPU::R1); - MF.getRegInfo().setPhysRegUnused(SPU::R2); - - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterClass *RC = &SPU::R32CRegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); -} diff --git a/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/llvm/lib/Target/CellSPU/SPUFrameLowering.h deleted file mode 100644 index 11c5281..0000000 --- a/llvm/lib/Target/CellSPU/SPUFrameLowering.h +++ /dev/null @@ -1,80 +0,0 @@ -//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains CellSPU frame information that doesn't fit anywhere else -// cleanly... -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_FRAMEINFO_H -#define SPU_FRAMEINFO_H - -#include "SPURegisterInfo.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class SPUSubtarget; - - class SPUFrameLowering: public TargetFrameLowering { - const SPUSubtarget &Subtarget; - std::pair LR[1]; - - public: - SPUFrameLowering(const SPUSubtarget &sti); - - //! Determine the frame's layour - void determineFrameLayout(MachineFunction &MF) const; - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - //! Prediate: Target has dedicated frame pointer - bool hasFP(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - - //! Return a function's saved spill slots - /*! - For CellSPU, a function's saved spill slots is just the link register. - */ - const std::pair * - getCalleeSaveSpillSlots(unsigned &NumEntries) const; - - //! Stack slot size (16 bytes) - static int stackSlotSize() { - return 16; - } - //! Maximum frame offset representable by a signed 10-bit integer - /*! - This is the maximum frame offset that can be expressed as a 10-bit - integer, used in D-form addresses. - */ - static int maxFrameOffset() { - return ((1 << 9) - 1) * stackSlotSize(); - } - //! Minimum frame offset representable by a signed 10-bit integer - static int minFrameOffset() { - return -(1 << 9) * stackSlotSize(); - } - //! Minimum frame size (enough to spill LR + SP) - static int minStackSize() { - return (2 * stackSlotSize()); - } - //! Convert frame index to stack offset - static int FItoStackOffset(int frame_index) { - return frame_index * stackSlotSize(); - } - }; -} - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp deleted file mode 100644 index 67a83f1..0000000 --- a/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements hazard recognizers for scheduling on Cell SPU -// processors. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "sched" - -#include "SPUHazardRecognizers.h" -#include "SPU.h" -#include "SPUInstrInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Cell SPU hazard recognizer -// -// This is the pipeline hazard recognizer for the Cell SPU processor. It does -// very little right now. -//===----------------------------------------------------------------------===// - -/// Return the pipeline hazard type encountered or generated by this -/// instruction. Currently returns NoHazard. -/// -/// \return NoHazard -ScheduleHazardRecognizer::HazardType -SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls) -{ - // Initial thoughts on how to do this, but this code cannot work unless the - // function's prolog and epilog code are also being scheduled so that we can - // accurately determine which pipeline is being scheduled. -#if 0 - assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead"); - - const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); - ScheduleHazardRecognizer::HazardType retval = NoHazard; - bool mustBeOdd = false; - - switch (Node->getOpcode()) { - case SPU::LQDv16i8: - case SPU::LQDv8i16: - case SPU::LQDv4i32: - case SPU::LQDv4f32: - case SPU::LQDv2f64: - case SPU::LQDr128: - case SPU::LQDr64: - case SPU::LQDr32: - case SPU::LQDr16: - case SPU::LQAv16i8: - case SPU::LQAv8i16: - case SPU::LQAv4i32: - case SPU::LQAv4f32: - case SPU::LQAv2f64: - case SPU::LQAr128: - case SPU::LQAr64: - case SPU::LQAr32: - case SPU::LQXv4i32: - case SPU::LQXr128: - case SPU::LQXr64: - case SPU::LQXr32: - case SPU::LQXr16: - case SPU::STQDv16i8: - case SPU::STQDv8i16: - case SPU::STQDv4i32: - case SPU::STQDv4f32: - case SPU::STQDv2f64: - case SPU::STQDr128: - case SPU::STQDr64: - case SPU::STQDr32: - case SPU::STQDr16: - case SPU::STQDr8: - case SPU::STQAv16i8: - case SPU::STQAv8i16: - case SPU::STQAv4i32: - case SPU::STQAv4f32: - case SPU::STQAv2f64: - case SPU::STQAr128: - case SPU::STQAr64: - case SPU::STQAr32: - case SPU::STQAr16: - case SPU::STQAr8: - case SPU::STQXv16i8: - case SPU::STQXv8i16: - case SPU::STQXv4i32: - case SPU::STQXv4f32: - case SPU::STQXv2f64: - case SPU::STQXr128: - case SPU::STQXr64: - case SPU::STQXr32: - case SPU::STQXr16: - case SPU::STQXr8: - case SPU::RET: - mustBeOdd = true; - break; - default: - // Assume that this instruction can be on the even pipe - break; - } - - if (mustBeOdd && !EvenOdd) - retval = Hazard; - - DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " - << retval << "\n"); - EvenOdd ^= 1; - return retval; -#else - return NoHazard; -#endif -} - -void SPUHazardRecognizer::EmitInstruction(SUnit *SU) -{ -} - -void SPUHazardRecognizer::AdvanceCycle() -{ - DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n"); -} - -void SPUHazardRecognizer::EmitNoop() -{ - AdvanceCycle(); -} diff --git a/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h b/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h deleted file mode 100644 index 30acaea..0000000 --- a/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines hazard recognizers for scheduling on the Cell SPU -// processor. -// -//===----------------------------------------------------------------------===// - -#ifndef SPUHAZRECS_H -#define SPUHAZRECS_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" - -namespace llvm { - -class TargetInstrInfo; - -/// SPUHazardRecognizer -class SPUHazardRecognizer : public ScheduleHazardRecognizer -{ -public: - SPUHazardRecognizer(const TargetInstrInfo &/*TII*/) {} - virtual HazardType getHazardType(SUnit *SU, int Stalls); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - virtual void EmitNoop(); -}; - -} // end namespace llvm - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp deleted file mode 100644 index 5d506105..0000000 --- a/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ /dev/null @@ -1,1192 +0,0 @@ -//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a pattern matching instruction selector for the Cell SPU, -// converting from a legalized dag to a SPU-target dag. -// -//===----------------------------------------------------------------------===// - -#include "SPU.h" -#include "SPUTargetMachine.h" -#include "SPUHazardRecognizers.h" -#include "SPUFrameLowering.h" -#include "SPUTargetMachine.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Constants.h" -#include "llvm/GlobalValue.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates - bool - isI32IntS10Immediate(ConstantSDNode *CN) - { - return isInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values - bool - isI32IntU10Immediate(ConstantSDNode *CN) - { - return isUInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntS10Immediate(ConstantSDNode *CN) - { - return isInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values - bool - isI16IntU10Immediate(ConstantSDNode *CN) - { - return isUInt<10>((short) CN->getZExtValue()); - } - - //! ConstantSDNode predicate for signed 16-bit values - /*! - \param CN The constant SelectionDAG node holding the value - \param Imm The returned 16-bit value, if returning true - - This predicate tests the value in \a CN to see whether it can be - represented as a 16-bit, sign-extended quantity. Returns true if - this is the case. - */ - bool - isIntS16Immediate(ConstantSDNode *CN, short &Imm) - { - EVT vt = CN->getValueType(0); - Imm = (short) CN->getZExtValue(); - if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) { - return true; - } else if (vt == MVT::i32) { - int32_t i_val = (int32_t) CN->getZExtValue(); - return i_val == SignExtend32<16>(i_val); - } else { - int64_t i_val = (int64_t) CN->getZExtValue(); - return i_val == SignExtend64<16>(i_val); - } - } - - //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. - static bool - isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) - { - EVT vt = FPN->getValueType(0); - if (vt == MVT::f32) { - int val = FloatToBits(FPN->getValueAPF().convertToFloat()); - if (val == SignExtend32<16>(val)) { - Imm = (short) val; - return true; - } - } - - return false; - } - - //! Generate the carry-generate shuffle mask. - SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); - } - - //! Generate the borrow-generate shuffle mask - SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); - } - - //===------------------------------------------------------------------===// - /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine - /// instructions for SelectionDAG operations. - /// - class SPUDAGToDAGISel : - public SelectionDAGISel - { - const SPUTargetMachine &TM; - const SPUTargetLowering &SPUtli; - unsigned GlobalBaseReg; - - public: - explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), - SPUtli(*tm.getTargetLowering()) - { } - - virtual bool runOnMachineFunction(MachineFunction &MF) { - // Make sure we re-emit a set of the global base reg if necessary - GlobalBaseReg = 0; - SelectionDAGISel::runOnMachineFunction(MF); - return true; - } - - /// getI32Imm - Return a target constant with the specified value, of type - /// i32. - inline SDValue getI32Imm(uint32_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); - } - - /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); - } - - SDNode *emitBuildVector(SDNode *bvNode) { - EVT vecVT = bvNode->getValueType(0); - DebugLoc dl = bvNode->getDebugLoc(); - - // Check to see if this vector can be represented as a CellSPU immediate - // constant by invoking all of the instruction selection predicates: - if (((vecVT == MVT::v8i16) && - (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || - ((vecVT == MVT::v4i32) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || - ((vecVT == MVT::v2i64) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) { - HandleSDNode Dummy(SDValue(bvNode, 0)); - if (SDNode *N = Select(bvNode)) - return N; - return Dummy.getValue().getNode(); - } - - // No, need to emit a constant pool spill: - std::vector CV; - - for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { - ConstantSDNode *V = cast (bvNode->getOperand(i)); - CV.push_back(const_cast(V->getConstantIntValue())); - } - - const Constant *CP = ConstantVector::get(CV); - SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); - unsigned Alignment = cast(CPIdx)->getAlignment(); - SDValue CGPoolOffset = - SPU::LowerConstantPool(CPIdx, *CurDAG, TM); - - HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, - CurDAG->getEntryNode(), CGPoolOffset, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment)); - CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } - - /// Select - Convert the specified operand from a target-independent to a - /// target-specific node if it hasn't already been changed. - SDNode *Select(SDNode *N); - - //! Emit the instruction sequence for i64 shl - SDNode *SelectSHLi64(SDNode *N, EVT OpVT); - - //! Emit the instruction sequence for i64 srl - SDNode *SelectSRLi64(SDNode *N, EVT OpVT); - - //! Emit the instruction sequence for i64 sra - SDNode *SelectSRAi64(SDNode *N, EVT OpVT); - - //! Emit the necessary sequence for loading i64 constants: - SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl); - - //! Alternate instruction emit sequence for loading i64 constants - SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl); - - //! Returns true if the address N is an A-form (local store) address - bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index); - - //! D-form address predicate - bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// Alternate D-form address using i7 offset predicate - bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, - SDValue &Base); - - /// D-form address selection workhorse - bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp, - SDValue &Base, int minOffset, int maxOffset); - - //! Address predicate if N can be expressed as an indexed [r+r] operation. - bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { - SDValue Op0, Op1; - switch (ConstraintCode) { - default: return true; - case 'm': // memory - if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) - && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) - SelectXFormAddr(Op.getNode(), Op, Op0, Op1); - break; - case 'o': // offsetable - if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) - && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) { - Op0 = Op; - Op1 = getSmallIPtrImm(0); - } - break; - case 'v': // not offsetable -#if 1 - llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled."); -#else - SelectAddrIdxOnly(Op, Op, Op0, Op1); - break; -#endif - } - - OutOps.push_back(Op0); - OutOps.push_back(Op1); - return false; - } - - virtual const char *getPassName() const { - return "Cell SPU DAG->DAG Pattern Instruction Selection"; - } - - private: - SDValue getRC( MVT ); - - // Include the pieces autogenerated from the target description. -#include "SPUGenDAGISel.inc" - }; -} - -/*! - \param Op The ISD instruction operand - \param N The address to be tested - \param Base The base address - \param Index The base address index - */ -bool -SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { - // These match the addr256k operand type: - EVT OffsVT = MVT::i16; - SDValue Zero = CurDAG->getTargetConstant(0, OffsVT); - int64_t val; - - switch (N.getOpcode()) { - case ISD::Constant: - val = dyn_cast(N.getNode())->getSExtValue(); - Base = CurDAG->getTargetConstant( val , MVT::i32); - Index = Zero; - return true; - case ISD::ConstantPool: - case ISD::GlobalAddress: - report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered."); - /*NOTREACHED*/ - - case ISD::TargetConstant: - case ISD::TargetGlobalAddress: - case ISD::TargetJumpTable: - report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global " - "not wrapped as A-form address."); - /*NOTREACHED*/ - - case SPUISD::AFormAddr: - // Just load from memory if there's only a single use of the location, - // otherwise, this will get handled below with D-form offset addresses - if (N.hasOneUse()) { - SDValue Op0 = N.getOperand(0); - switch (Op0.getOpcode()) { - case ISD::TargetConstantPool: - case ISD::TargetJumpTable: - Base = Op0; - Index = Zero; - return true; - - case ISD::TargetGlobalAddress: { - GlobalAddressSDNode *GSDN = cast(Op0); - const GlobalValue *GV = GSDN->getGlobal(); - if (GV->getAlignment() == 16) { - Base = Op0; - Index = Zero; - return true; - } - break; - } - } - } - break; - } - return false; -} - -bool -SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, - SDValue &Base) { - const int minDForm2Offset = -(1 << 7); - const int maxDForm2Offset = (1 << 7) - 1; - return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset, - maxDForm2Offset); -} - -/*! - \param Op The ISD instruction (ignored) - \param N The address to be tested - \param Base Base address register/pointer - \param Index Base address index - - Examine the input address by a base register plus a signed 10-bit - displacement, [r+I10] (D-form address). - - \return true if \a N is a D-form address with \a Base and \a Index set - to non-empty SDValue instances. -*/ -bool -SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { - return DFormAddressPredicate(Op, N, Base, Index, - SPUFrameLowering::minFrameOffset(), - SPUFrameLowering::maxFrameOffset()); -} - -bool -SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index, int minOffset, - int maxOffset) { - unsigned Opc = N.getOpcode(); - EVT PtrTy = SPUtli.getPointerTy(); - - if (Opc == ISD::FrameIndex) { - // Stack frame index must be less than 512 (divided by 16): - FrameIndexSDNode *FIN = cast(N); - int FI = int(FIN->getIndex()); - DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " - << FI << "\n"); - if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI, PtrTy); - return true; - } - } else if (Opc == ISD::ADD) { - // Generated by getelementptr - const SDValue Op0 = N.getOperand(0); - const SDValue Op1 = N.getOperand(1); - - if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) - || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) { - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = N; - return true; - } else if (Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = cast(Op1); - int32_t offset = int32_t(CN->getSExtValue()); - - if (Op0.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast(Op0); - int FI = int(FIN->getIndex()); - DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset - << " frame index = " << FI << "\n"); - - if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI, PtrTy); - return true; - } - } else if (offset > minOffset && offset < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = Op0; - return true; - } - } else if (Op0.getOpcode() == ISD::Constant - || Op0.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = cast(Op0); - int32_t offset = int32_t(CN->getSExtValue()); - - if (Op1.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast(Op1); - int FI = int(FIN->getIndex()); - DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset - << " frame index = " << FI << "\n"); - - if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI, PtrTy); - return true; - } - } else if (offset > minOffset && offset < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = Op1; - return true; - } - } - } else if (Opc == SPUISD::IndirectAddr) { - // Indirect with constant offset -> D-Form address - const SDValue Op0 = N.getOperand(0); - const SDValue Op1 = N.getOperand(1); - - if (Op0.getOpcode() == SPUISD::Hi - && Op1.getOpcode() == SPUISD::Lo) { - // (SPUindirect (SPUhi , 0), (SPUlo , 0)) - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = N; - return true; - } else if (isa(Op0) || isa(Op1)) { - int32_t offset = 0; - SDValue idxOp; - - if (isa(Op1)) { - ConstantSDNode *CN = cast(Op1); - offset = int32_t(CN->getSExtValue()); - idxOp = Op0; - } else if (isa(Op0)) { - ConstantSDNode *CN = cast(Op0); - offset = int32_t(CN->getSExtValue()); - idxOp = Op1; - } - - if (offset >= minOffset && offset <= maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = idxOp; - return true; - } - } - } else if (Opc == SPUISD::AFormAddr) { - Base = CurDAG->getTargetConstant(0, N.getValueType()); - Index = N; - return true; - } else if (Opc == SPUISD::LDRESULT) { - Base = CurDAG->getTargetConstant(0, N.getValueType()); - Index = N; - return true; - } else if (Opc == ISD::Register - ||Opc == ISD::CopyFromReg - ||Opc == ISD::UNDEF - ||Opc == ISD::Constant) { - unsigned OpOpc = Op->getOpcode(); - - if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { - // Direct load/store without getelementptr - SDValue Offs; - - Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); - - if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { - if (Offs.getOpcode() == ISD::UNDEF) - Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); - - Base = Offs; - Index = N; - return true; - } - } else { - /* If otherwise unadorned, default to D-form address with 0 offset: */ - if (Opc == ISD::CopyFromReg) { - Index = N.getOperand(1); - } else { - Index = N; - } - - Base = CurDAG->getTargetConstant(0, Index.getValueType()); - return true; - } - } - - return false; -} - -/*! - \param Op The ISD instruction operand - \param N The address operand - \param Base The base pointer operand - \param Index The offset/index operand - - If the address \a N can be expressed as an A-form or D-form address, returns - false. Otherwise, creates two operands, Base and Index that will become the - (r)(r) X-form address. -*/ -bool -SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { - if (!SelectAFormAddr(Op, N, Base, Index) - && !SelectDFormAddr(Op, N, Base, Index)) { - // If the address is neither A-form or D-form, punt and use an X-form - // address: - Base = N.getOperand(1); - Index = N.getOperand(0); - return true; - } - - return false; -} - -/*! - Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue - to be used as the last parameter of a -CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call - \param VT the value type for which we want a register class -*/ -SDValue SPUDAGToDAGISel::getRC( MVT VT ) { - switch( VT.SimpleTy ) { - case MVT::i8: - return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32); - case MVT::i16: - return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32); - case MVT::i32: - return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32); - case MVT::f32: - return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32); - case MVT::i64: - return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32); - case MVT::i128: - return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32); - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v4f32: - case MVT::v2i64: - case MVT::v2f64: - return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32); - default: - assert( false && "add a new case here" ); - return SDValue(); - } -} - -//! Convert the operand from a target-independent to a target-specific node -/*! - */ -SDNode * -SPUDAGToDAGISel::Select(SDNode *N) { - unsigned Opc = N->getOpcode(); - int n_ops = -1; - unsigned NewOpc = 0; - EVT OpVT = N->getValueType(0); - SDValue Ops[8]; - DebugLoc dl = N->getDebugLoc(); - - if (N->isMachineOpcode()) - return NULL; // Already selected. - - if (Opc == ISD::FrameIndex) { - int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); - SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0)); - - if (FI < 128) { - NewOpc = SPU::AIr32; - Ops[0] = TFI; - Ops[1] = Imm0; - n_ops = 2; - } else { - NewOpc = SPU::Ar32; - Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); - Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, - N->getValueType(0), TFI), - 0); - n_ops = 2; - } - } else if (Opc == ISD::Constant && OpVT == MVT::i64) { - // Catch the i64 constants that end up here. Note: The backend doesn't - // attempt to legalize the constant (it's useless because DAGCombiner - // will insert 64-bit constants and we can't stop it). - return SelectI64Constant(N, OpVT, N->getDebugLoc()); - } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) - && OpVT == MVT::i64) { - SDValue Op0 = N->getOperand(0); - EVT Op0VT = Op0.getValueType(); - EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), - Op0VT, (128 / Op0VT.getSizeInBits())); - EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue shufMask; - - switch (Op0VT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT"); - /*NOTREACHED*/ - case MVT::i32: - shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x00010203, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x08090a0b, MVT::i32)); - break; - - case MVT::i16: - shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80800203, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80800a0b, MVT::i32)); - break; - - case MVT::i8: - shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80808003, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x8080800b, MVT::i32)); - break; - } - - SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); - - HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, - Op0VecVT, Op0)); - - SDValue PromScalar; - if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) - PromScalar = SDValue(N, 0); - else - PromScalar = PromoteScalar.getValue(); - - SDValue zextShuffle = - CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, - PromScalar, PromScalar, - SDValue(shufMaskLoad, 0)); - - HandleSDNode Dummy2(zextShuffle); - if (SDNode *N = SelectCode(Dummy2.getValue().getNode())) - zextShuffle = SDValue(N, 0); - else - zextShuffle = Dummy2.getValue(); - HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, - zextShuffle)); - - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - SelectCode(Dummy.getValue().getNode()); - return Dummy.getValue().getNode(); - } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - - HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0))); - - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); - - HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0))); - - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - - HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0))); - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } else if (Opc == ISD::TRUNCATE) { - SDValue Op0 = N->getOperand(0); - if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) - && OpVT == MVT::i32 - && Op0.getValueType() == MVT::i64) { - // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 - // - // Take advantage of the fact that the upper 32 bits are in the - // i32 preferred slot and avoid shuffle gymnastics: - ConstantSDNode *CN = dyn_cast(Op0.getOperand(1)); - if (CN != 0) { - unsigned shift_amt = unsigned(CN->getZExtValue()); - - if (shift_amt >= 32) { - SDNode *hi32 = - CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, - Op0.getOperand(0), getRC(MVT::i32)); - - shift_amt -= 32; - if (shift_amt > 0) { - // Take care of the additional shift, if present: - SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); - unsigned Opc = SPU::ROTMAIr32_i32; - - if (Op0.getOpcode() == ISD::SRL) - Opc = SPU::ROTMr32; - - hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0), - shift); - } - - return hi32; - } - } - } - } else if (Opc == ISD::SHL) { - if (OpVT == MVT::i64) - return SelectSHLi64(N, OpVT); - } else if (Opc == ISD::SRL) { - if (OpVT == MVT::i64) - return SelectSRLi64(N, OpVT); - } else if (Opc == ISD::SRA) { - if (OpVT == MVT::i64) - return SelectSRAi64(N, OpVT); - } else if (Opc == ISD::FNEG - && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { - DebugLoc dl = N->getDebugLoc(); - // Check if the pattern is a special form of DFNMS: - // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) - SDValue Op0 = N->getOperand(0); - if (Op0.getOpcode() == ISD::FSUB) { - SDValue Op00 = Op0.getOperand(0); - if (Op00.getOpcode() == ISD::FMUL) { - unsigned Opc = SPU::DFNMSf64; - if (OpVT == MVT::v2f64) - Opc = SPU::DFNMSv2f64; - - return CurDAG->getMachineNode(Opc, dl, OpVT, - Op00.getOperand(0), - Op00.getOperand(1), - Op0.getOperand(1)); - } - } - - SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); - SDNode *signMask = 0; - unsigned Opc = SPU::XORfneg64; - - if (OpVT == MVT::f64) { - signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl); - } else if (OpVT == MVT::v2f64) { - Opc = SPU::XORfnegvec; - signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, - MVT::v2i64, - negConst, negConst).getNode()); - } - - return CurDAG->getMachineNode(Opc, dl, OpVT, - N->getOperand(0), SDValue(signMask, 0)); - } else if (Opc == ISD::FABS) { - if (OpVT == MVT::f64) { - SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); - return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, - N->getOperand(0), SDValue(signMask, 0)); - } else if (OpVT == MVT::v2f64) { - SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); - SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, - absConst, absConst); - SDNode *signMask = emitBuildVector(absVec.getNode()); - return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, - N->getOperand(0), SDValue(signMask, 0)); - } - } else if (Opc == SPUISD::LDRESULT) { - // Custom select instructions for LDRESULT - EVT VT = N->getValueType(0); - SDValue Arg = N->getOperand(0); - SDValue Chain = N->getOperand(1); - SDNode *Result; - - Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT, - MVT::Other, Arg, - getRC( VT.getSimpleVT()), Chain); - return Result; - - } else if (Opc == SPUISD::IndirectAddr) { - // Look at the operands: SelectCode() will catch the cases that aren't - // specifically handled here. - // - // SPUInstrInfo catches the following patterns: - // (SPUindirect (SPUhi ...), (SPUlo ...)) - // (SPUindirect $sp, imm) - EVT VT = N->getValueType(0); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - RegisterSDNode *RN; - - if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) - || (Op0.getOpcode() == ISD::Register - && ((RN = dyn_cast(Op0.getNode())) != 0 - && RN->getReg() != SPU::R1))) { - NewOpc = SPU::Ar32; - Ops[1] = Op1; - if (Op1.getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast(Op1); - Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); - if (isInt<10>(CN->getSExtValue())) { - NewOpc = SPU::AIr32; - Ops[1] = Op1; - } else { - Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, - N->getValueType(0), - Op1), - 0); - } - } - Ops[0] = Op0; - n_ops = 2; - } - } - - if (n_ops > 0) { - if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); - else - return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); - } else - return SelectCode(N); -} - -/*! - * Emit the instruction sequence for i64 left shifts. The basic algorithm - * is to fill the bottom two word slots with zeros so that zeros are shifted - * in as the entire quadword is shifted left. - * - * \note This code could also be used to implement v2i64 shl. - * - * @param Op The shl operand - * @param OpVT Op's machine value value type (doesn't need to be passed, but - * makes life easier.) - * @return The SDNode with the entire instruction sequence - */ -SDNode * -SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { - SDValue Op0 = N->getOperand(0); - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = N->getOperand(1); - EVT ShiftAmtVT = ShiftAmt.getValueType(); - SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; - SDValue SelMaskVal; - DebugLoc dl = N->getDebugLoc(); - - VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, - Op0, getRC(MVT::v2i64) ); - SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); - SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); - ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT, - CurDAG->getTargetConstant(0, OpVT)); - VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, - SDValue(ZeroFill, 0), - SDValue(VecOp0, 0), - SDValue(SelMask, 0)); - - if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { - unsigned bytes = unsigned(CN->getZExtValue()) >> 3; - unsigned bits = unsigned(CN->getZExtValue()) & 7; - - if (bytes > 0) { - Shift = - CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT, - SDValue(VecOp0, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); - } - - if (bits > 0) { - Shift = - CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : VecOp0), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); - } - } else { - SDNode *Bytes = - CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(3, ShiftAmtVT)); - SDNode *Bits = - CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(7, ShiftAmtVT)); - Shift = - CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT, - SDValue(VecOp0, 0), SDValue(Bytes, 0)); - Shift = - CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(Bits, 0)); - } - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(Shift, 0), getRC(MVT::i64)); -} - -/*! - * Emit the instruction sequence for i64 logical right shifts. - * - * @param Op The shl operand - * @param OpVT Op's machine value value type (doesn't need to be passed, but - * makes life easier.) - * @return The SDNode with the entire instruction sequence - */ -SDNode * -SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { - SDValue Op0 = N->getOperand(0); - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = N->getOperand(1); - EVT ShiftAmtVT = ShiftAmt.getValueType(); - SDNode *VecOp0, *Shift = 0; - DebugLoc dl = N->getDebugLoc(); - - VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, - Op0, getRC(MVT::v2i64) ); - - if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { - unsigned bytes = unsigned(CN->getZExtValue()) >> 3; - unsigned bits = unsigned(CN->getZExtValue()) & 7; - - if (bytes > 0) { - Shift = - CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT, - SDValue(VecOp0, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); - } - - if (bits > 0) { - Shift = - CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : VecOp0), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); - } - } else { - SDNode *Bytes = - CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(3, ShiftAmtVT)); - SDNode *Bits = - CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(7, ShiftAmtVT)); - - // Ensure that the shift amounts are negated! - Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, - SDValue(Bytes, 0), - CurDAG->getTargetConstant(0, ShiftAmtVT)); - - Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, - SDValue(Bits, 0), - CurDAG->getTargetConstant(0, ShiftAmtVT)); - - Shift = - CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT, - SDValue(VecOp0, 0), SDValue(Bytes, 0)); - Shift = - CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(Bits, 0)); - } - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(Shift, 0), getRC(MVT::i64)); -} - -/*! - * Emit the instruction sequence for i64 arithmetic right shifts. - * - * @param Op The shl operand - * @param OpVT Op's machine value value type (doesn't need to be passed, but - * makes life easier.) - * @return The SDNode with the entire instruction sequence - */ -SDNode * -SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { - // Promote Op0 to vector - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = N->getOperand(1); - EVT ShiftAmtVT = ShiftAmt.getValueType(); - DebugLoc dl = N->getDebugLoc(); - - SDNode *VecOp0 = - CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - VecVT, N->getOperand(0), getRC(MVT::v2i64)); - - SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); - SDNode *SignRot = - CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, - SDValue(VecOp0, 0), SignRotAmt); - SDNode *UpperHalfSign = - CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32)); - - SDNode *UpperHalfSignMask = - CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); - SDNode *UpperLowerMask = - CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, - CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); - SDNode *UpperLowerSelect = - CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, - SDValue(UpperHalfSignMask, 0), - SDValue(VecOp0, 0), - SDValue(UpperLowerMask, 0)); - - SDNode *Shift = 0; - - if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { - unsigned bytes = unsigned(CN->getZExtValue()) >> 3; - unsigned bits = unsigned(CN->getZExtValue()) & 7; - - if (bytes > 0) { - bytes = 31 - bytes; - Shift = - CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT, - SDValue(UpperLowerSelect, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); - } - - if (bits > 0) { - bits = 8 - bits; - Shift = - CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); - } - } else { - SDNode *NegShift = - CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, - ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); - - Shift = - CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT, - SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); - Shift = - CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(NegShift, 0)); - } - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(Shift, 0), getRC(MVT::i64)); -} - -/*! - Do the necessary magic necessary to load a i64 constant - */ -SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT, - DebugLoc dl) { - ConstantSDNode *CN = cast(N); - return SelectI64Constant(CN->getZExtValue(), OpVT, dl); -} - -SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, - DebugLoc dl) { - EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2); - SDValue i64vec = - SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl); - - // Here's where it gets interesting, because we have to parse out the - // subtree handed back in i64vec: - - if (i64vec.getOpcode() == ISD::BITCAST) { - // The degenerate case where the upper and lower bits in the splat are - // identical: - SDValue Op0 = i64vec.getOperand(0); - - ReplaceUses(i64vec, Op0); - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, - SDValue(emitBuildVector(Op0.getNode()), 0), - getRC(MVT::i64)); - } else if (i64vec.getOpcode() == SPUISD::SHUFB) { - SDValue lhs = i64vec.getOperand(0); - SDValue rhs = i64vec.getOperand(1); - SDValue shufmask = i64vec.getOperand(2); - - if (lhs.getOpcode() == ISD::BITCAST) { - ReplaceUses(lhs, lhs.getOperand(0)); - lhs = lhs.getOperand(0); - } - - SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() - ? lhs.getNode() - : emitBuildVector(lhs.getNode())); - - if (rhs.getOpcode() == ISD::BITCAST) { - ReplaceUses(rhs, rhs.getOperand(0)); - rhs = rhs.getOperand(0); - } - - SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() - ? rhs.getNode() - : emitBuildVector(rhs.getNode())); - - if (shufmask.getOpcode() == ISD::BITCAST) { - ReplaceUses(shufmask, shufmask.getOperand(0)); - shufmask = shufmask.getOperand(0); - } - - SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() - ? shufmask.getNode() - : emitBuildVector(shufmask.getNode())); - - SDValue shufNode = - CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, - SDValue(lhsNode, 0), SDValue(rhsNode, 0), - SDValue(shufMaskNode, 0)); - HandleSDNode Dummy(shufNode); - SDNode *SN = SelectCode(Dummy.getValue().getNode()); - if (SN == 0) SN = Dummy.getValue().getNode(); - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(SN, 0), getRC(MVT::i64)); - } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, - SDValue(emitBuildVector(i64vec.getNode()), 0), - getRC(MVT::i64)); - } else { - report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" - "condition"); - } -} - -/// createSPUISelDag - This pass converts a legalized DAG into a -/// SPU-specific DAG, ready for instruction scheduling. -/// -FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { - return new SPUDAGToDAGISel(TM); -} diff --git a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp deleted file mode 100644 index 31b8733..0000000 --- a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp +++ /dev/null @@ -1,3267 +0,0 @@ -//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SPUTargetLowering class. -// -//===----------------------------------------------------------------------===// - -#include "SPUISelLowering.h" -#include "SPUTargetMachine.h" -#include "SPUFrameLowering.h" -#include "SPUMachineFunction.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/CallingConv.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - // Byte offset of the preferred slot (counted from the MSB) - int prefslotOffset(EVT VT) { - int retval=0; - if (VT==MVT::i1) retval=3; - if (VT==MVT::i8) retval=3; - if (VT==MVT::i16) retval=2; - - return retval; - } - - //! Expand a library call into an actual call DAG node - /*! - \note - This code is taken from SelectionDAGLegalize, since it is not exposed as - part of the LLVM SelectionDAG API. - */ - - SDValue - ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, - bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - EVT ArgVT = Op.getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op.getOperand(i); - Entry.Ty = ArgTy; - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); - - // Splice the libcall in wherever FindInputOutputChains tells us to. - Type *RetTy = - Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, - false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotRet=*/false, - /*isReturnValueUsed=*/true, - Callee, Args, DAG, Op.getDebugLoc()); - std::pair CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo.first; - } -} - -SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) - : TargetLowering(TM, new TargetLoweringObjectFileELF()), - SPUTM(TM) { - - // Use _setjmp/_longjmp instead of setjmp/longjmp. - setUseUnderscoreSetJmp(true); - setUseUnderscoreLongJmp(true); - - // Set RTLIB libcall names as used by SPU: - setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); - - // Set up the SPU's register classes: - addRegisterClass(MVT::i8, &SPU::R8CRegClass); - addRegisterClass(MVT::i16, &SPU::R16CRegClass); - addRegisterClass(MVT::i32, &SPU::R32CRegClass); - addRegisterClass(MVT::i64, &SPU::R64CRegClass); - addRegisterClass(MVT::f32, &SPU::R32FPRegClass); - addRegisterClass(MVT::f64, &SPU::R64FPRegClass); - addRegisterClass(MVT::i128, &SPU::GPRCRegClass); - - // SPU has no sign or zero extended loads for i1, i8, i16: - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - - setTruncStoreAction(MVT::i128, MVT::i64, Expand); - setTruncStoreAction(MVT::i128, MVT::i32, Expand); - setTruncStoreAction(MVT::i128, MVT::i16, Expand); - setTruncStoreAction(MVT::i128, MVT::i8, Expand); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // SPU constant load actions are custom lowered: - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Custom); - - // SPU's loads and stores have to be custom lowered: - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, Custom); - setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, Custom); - - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { - MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; - setTruncStoreAction(VT, StoreVT, Expand); - } - } - - for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { - MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; - setTruncStoreAction(VT, StoreVT, Expand); - } - } - - // Expand the jumptable branches - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - - // Custom lower SELECT_CC for most cases, but expand by default - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - - // SPU has no intrinsics for these particular operations: - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - - // SPU has no division/remainder instructions - setOperationAction(ISD::SREM, MVT::i8, Expand); - setOperationAction(ISD::UREM, MVT::i8, Expand); - setOperationAction(ISD::SDIV, MVT::i8, Expand); - setOperationAction(ISD::UDIV, MVT::i8, Expand); - setOperationAction(ISD::SDIVREM, MVT::i8, Expand); - setOperationAction(ISD::UDIVREM, MVT::i8, Expand); - setOperationAction(ISD::SREM, MVT::i16, Expand); - setOperationAction(ISD::UREM, MVT::i16, Expand); - setOperationAction(ISD::SDIV, MVT::i16, Expand); - setOperationAction(ISD::UDIV, MVT::i16, Expand); - setOperationAction(ISD::SDIVREM, MVT::i16, Expand); - setOperationAction(ISD::UDIVREM, MVT::i16, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - setOperationAction(ISD::SDIV, MVT::i64, Expand); - setOperationAction(ISD::UDIV, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - setOperationAction(ISD::SREM, MVT::i128, Expand); - setOperationAction(ISD::UREM, MVT::i128, Expand); - setOperationAction(ISD::SDIV, MVT::i128, Expand); - setOperationAction(ISD::UDIV, MVT::i128, Expand); - setOperationAction(ISD::SDIVREM, MVT::i128, Expand); - setOperationAction(ISD::UDIVREM, MVT::i128, Expand); - - // We don't support sin/cos/sqrt/fmod - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); - setOperationAction(ISD::FREM , MVT::f32, Expand); - - // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt - // for f32!) - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSQRT, MVT::f32, Expand); - - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); - - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - - // SPU can do rotate right and left, so legalize it... but customize for i8 - // because instructions don't exist. - - // FIXME: Change from "expand" to appropriate type once ROTR is supported in - // .td files. - setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); - setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); - setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); - - setOperationAction(ISD::ROTL, MVT::i32, Legal); - setOperationAction(ISD::ROTL, MVT::i16, Legal); - setOperationAction(ISD::ROTL, MVT::i8, Custom); - - // SPU has no native version of shift left/right for i8 - setOperationAction(ISD::SHL, MVT::i8, Custom); - setOperationAction(ISD::SRL, MVT::i8, Custom); - setOperationAction(ISD::SRA, MVT::i8, Custom); - - // Make these operations legal and handle them during instruction selection: - setOperationAction(ISD::SHL, MVT::i64, Legal); - setOperationAction(ISD::SRL, MVT::i64, Legal); - setOperationAction(ISD::SRA, MVT::i64, Legal); - - // Custom lower i8, i32 and i64 multiplications - setOperationAction(ISD::MUL, MVT::i8, Custom); - setOperationAction(ISD::MUL, MVT::i32, Legal); - setOperationAction(ISD::MUL, MVT::i64, Legal); - - // Expand double-width multiplication - // FIXME: It would probably be reasonable to support some of these operations - setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::MULHU, MVT::i8, Expand); - setOperationAction(ISD::MULHS, MVT::i8, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::MULHU, MVT::i16, Expand); - setOperationAction(ISD::MULHS, MVT::i16, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); - - // Need to custom handle (some) common i8, i64 math ops - setOperationAction(ISD::ADD, MVT::i8, Custom); - setOperationAction(ISD::ADD, MVT::i64, Legal); - setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i64, Legal); - - // SPU does not have BSWAP. It does have i32 support CTLZ. - // CTPOP has to be custom lowered. - setOperationAction(ISD::BSWAP, MVT::i32, Expand); - setOperationAction(ISD::BSWAP, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i8, Custom); - setOperationAction(ISD::CTPOP, MVT::i16, Custom); - setOperationAction(ISD::CTPOP, MVT::i32, Custom); - setOperationAction(ISD::CTPOP, MVT::i64, Custom); - setOperationAction(ISD::CTPOP, MVT::i128, Expand); - - setOperationAction(ISD::CTTZ , MVT::i8, Expand); - setOperationAction(ISD::CTTZ , MVT::i16, Expand); - setOperationAction(ISD::CTTZ , MVT::i32, Expand); - setOperationAction(ISD::CTTZ , MVT::i64, Expand); - setOperationAction(ISD::CTTZ , MVT::i128, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand); - - setOperationAction(ISD::CTLZ , MVT::i8, Promote); - setOperationAction(ISD::CTLZ , MVT::i16, Promote); - setOperationAction(ISD::CTLZ , MVT::i32, Legal); - setOperationAction(ISD::CTLZ , MVT::i64, Expand); - setOperationAction(ISD::CTLZ , MVT::i128, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand); - - // SPU has a version of select that implements (a&~c)|(b&c), just like - // select ought to work: - setOperationAction(ISD::SELECT, MVT::i8, Legal); - setOperationAction(ISD::SELECT, MVT::i16, Legal); - setOperationAction(ISD::SELECT, MVT::i32, Legal); - setOperationAction(ISD::SELECT, MVT::i64, Legal); - - setOperationAction(ISD::SETCC, MVT::i8, Legal); - setOperationAction(ISD::SETCC, MVT::i16, Legal); - setOperationAction(ISD::SETCC, MVT::i32, Legal); - setOperationAction(ISD::SETCC, MVT::i64, Legal); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - - // Custom lower i128 -> i64 truncates - setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); - - // Custom lower i32/i64 -> i128 sign extend - setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); - - setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); - // SPU has a legal FP -> signed INT instruction for f32, but for f64, need - // to expand to a libcall, hence the custom lowering: - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); - - // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall - - // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - - setOperationAction(ISD::BITCAST, MVT::i32, Legal); - setOperationAction(ISD::BITCAST, MVT::f32, Legal); - setOperationAction(ISD::BITCAST, MVT::i64, Legal); - setOperationAction(ISD::BITCAST, MVT::f64, Legal); - - // We cannot sextinreg(i1). Expand to shifts. - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // We want to legalize GlobalAddress and ConstantPool nodes into the - // appropriate instructions to materialize the address. - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; - - setOperationAction(ISD::GlobalAddress, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::JumpTable, VT, Custom); - } - - // VASTART needs to be custom lowered to use the VarArgsFrameIndex - setOperationAction(ISD::VASTART , MVT::Other, Custom); - - // Use the default implementation. - setOperationAction(ISD::VAARG , MVT::Other, Expand); - setOperationAction(ISD::VACOPY , MVT::Other, Expand); - setOperationAction(ISD::VAEND , MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); - - // Cell SPU has instructions for converting between i64 and fp. - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - - // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); - - // BUILD_PAIR can't be handled natively, and should be expanded to shl/or - setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); - - // First set operation action for all vector types to expand. Then we - // will selectively turn on ones that can be effectively codegen'd. - addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass); - addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass); - addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass); - addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass); - addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass); - addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass); - - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - - // Set operation actions to legal types only. - if (!isTypeLegal(VT)) continue; - - // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD, VT, Legal); - setOperationAction(ISD::SUB, VT, Legal); - // mul has to be custom lowered. - setOperationAction(ISD::MUL, VT, Legal); - - setOperationAction(ISD::AND, VT, Legal); - setOperationAction(ISD::OR, VT, Legal); - setOperationAction(ISD::XOR, VT, Legal); - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Custom); - - // These operations need to be expanded: - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::FFLOOR, VT, Expand); - - // Expand all trunc stores - for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { - MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j; - setTruncStoreAction(VT, TargetVT, Expand); - } - - // Custom lower build_vector, constant pool spills, insert and - // extract vector elements: - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - } - - setOperationAction(ISD::SHL, MVT::v2i64, Expand); - - setOperationAction(ISD::AND, MVT::v16i8, Custom); - setOperationAction(ISD::OR, MVT::v16i8, Custom); - setOperationAction(ISD::XOR, MVT::v16i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); - - setOperationAction(ISD::FDIV, MVT::v4f32, Legal); - - setBooleanContents(ZeroOrNegativeOneBooleanContent); - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? - - setStackPointerRegisterToSaveRestore(SPU::R1); - - // We have target-specific dag combine patterns for the following nodes: - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - - setMinFunctionAlignment(3); - - computeRegisterProperties(); - - // Set pre-RA register scheduler default to BURR, which produces slightly - // better code than the default (could also be TDRR, but TargetLowering.h - // needs a mod to support that model): - setSchedulingPreference(Sched::RegPressure); -} - -const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return 0; - case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG"; - case SPUISD::Hi: return "SPUISD::Hi"; - case SPUISD::Lo: return "SPUISD::Lo"; - case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr"; - case SPUISD::AFormAddr: return "SPUISD::AFormAddr"; - case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr"; - case SPUISD::LDRESULT: return "SPUISD::LDRESULT"; - case SPUISD::CALL: return "SPUISD::CALL"; - case SPUISD::SHUFB: return "SPUISD::SHUFB"; - case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK"; - case SPUISD::CNTB: return "SPUISD::CNTB"; - case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC"; - case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT"; - case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS"; - case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES"; - case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL"; - case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR"; - case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT"; - case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS"; - case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK"; - case SPUISD::SELB: return "SPUISD::SELB"; - case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER"; - case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER"; - case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER"; - } -} - -//===----------------------------------------------------------------------===// -// Return the Cell SPU's SETCC result type -//===----------------------------------------------------------------------===// - -EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { - // i8, i16 and i32 are valid SETCC result types - MVT::SimpleValueType retval; - - switch(VT.getSimpleVT().SimpleTy){ - case MVT::i1: - case MVT::i8: - retval = MVT::i8; break; - case MVT::i16: - retval = MVT::i16; break; - case MVT::i32: - default: - retval = MVT::i32; - } - return retval; -} - -//===----------------------------------------------------------------------===// -// Calling convention code: -//===----------------------------------------------------------------------===// - -#include "SPUGenCallingConv.inc" - -//===----------------------------------------------------------------------===// -// LowerOperation implementation -//===----------------------------------------------------------------------===// - -/// Custom lower loads for CellSPU -/*! - All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements - within a 16-byte block, we have to rotate to extract the requested element. - - For extending loads, we also want to ensure that the following sequence is - emitted, e.g. for MVT::f32 extending load to MVT::f64: - -\verbatim -%1 v16i8,ch = load -%2 v16i8,ch = rotate %1 -%3 v4f8, ch = bitconvert %2 -%4 f32 = vec2perfslot %3 -%5 f64 = fp_extend %4 -\endverbatim -*/ -static SDValue -LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - LoadSDNode *LN = cast(Op); - SDValue the_chain = LN->getChain(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - EVT InVT = LN->getMemoryVT(); - EVT OutVT = Op.getValueType(); - ISD::LoadExtType ExtType = LN->getExtensionType(); - unsigned alignment = LN->getAlignment(); - int pso = prefslotOffset(InVT); - DebugLoc dl = Op.getDebugLoc(); - EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, - (128 / InVT.getSizeInBits())); - - // two sanity checks - assert( LN->getAddressingMode() == ISD::UNINDEXED - && "we should get only UNINDEXED adresses"); - // clean aligned loads can be selected as-is - if (InVT.getSizeInBits() == 128 && (alignment%16) == 0) - return SDValue(); - - // Get pointerinfos to the memory chunk(s) that contain the data to load - uint64_t mpi_offset = LN->getPointerInfo().Offset; - mpi_offset -= mpi_offset%16; - MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); - MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); - - SDValue result; - SDValue basePtr = LN->getBasePtr(); - SDValue rotate; - - if ((alignment%16) == 0) { - ConstantSDNode *CN; - - // Special cases for a known aligned load to simplify the base pointer - // and the rotation amount: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast (basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - int64_t rotamt = int64_t((offset & 0xf) - pso); - - if (rotamt < 0) - rotamt += 16; - - rotate = DAG.getConstant(rotamt, MVT::i16); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) - || (basePtr.getOpcode() == SPUISD::IndirectAddr - && basePtr.getOperand(0).getOpcode() == SPUISD::Hi - && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { - // Plain aligned a-form address: rotate into preferred slot - // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) - int64_t rotamt = -pso; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getConstant(rotamt, MVT::i16); - } else { - // Offset the rotate amount by the basePtr and the preferred slot - // byte offset - int64_t rotamt = -pso; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(rotamt, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa(Op1)) { - // Convert the (add , ) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add , ) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Offset the rotate amount by the basePtr and the preferred slot - // byte offset - rotate = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(-pso, PtrVT)); - } - - // Do the load as a i128 to allow possible shifting - SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, - lowMemPtr, - LN->isVolatile(), LN->isNonTemporal(), false, 16); - - // When the size is not greater than alignment we get all data with just - // one load - if (alignment >= InVT.getSizeInBits()/8) { - // Update the chain - the_chain = low.getValue(1); - - // Rotate into the preferred slot: - result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, - low.getValue(0), rotate); - - // Convert the loaded v16i8 vector to the appropriate vector type - // specified by the operand: - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), - InVT, (128 / InVT.getSizeInBits())); - result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, - DAG.getNode(ISD::BITCAST, dl, vecVT, result)); - } - // When alignment is less than the size, we might need (known only at - // run-time) two loads - // TODO: if the memory address is composed only from constants, we have - // extra kowledge, and might avoid the second load - else { - // storage position offset from lower 16 byte aligned memory chunk - SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, - basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); - // get a registerfull of ones. (this implementation is a workaround: LLVM - // cannot handle 128 bit signed int constants) - SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); - ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); - - SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, - DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(16, PtrVT)), - highMemPtr, - LN->isVolatile(), LN->isNonTemporal(), false, - 16); - - the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), - high.getValue(1)); - - // Shift the (possible) high part right to compensate the misalignemnt. - // if there is no highpart (i.e. value is i64 and offset is 4), this - // will zero out the high value. - high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, - DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant( 16, MVT::i32), - offset - )); - - // Shift the low similarly - // TODO: add SPUISD::SHL_BYTES - low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); - - // Merge the two parts - result = DAG.getNode(ISD::BITCAST, dl, vecVT, - DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); - - if (!InVT.isVector()) { - result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); - } - - } - // Handle extending loads by extending the scalar result: - if (ExtType == ISD::SEXTLOAD) { - result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); - } else if (ExtType == ISD::ZEXTLOAD) { - result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); - } else if (ExtType == ISD::EXTLOAD) { - unsigned NewOpc = ISD::ANY_EXTEND; - - if (OutVT.isFloatingPoint()) - NewOpc = ISD::FP_EXTEND; - - result = DAG.getNode(NewOpc, dl, OutVT, result); - } - - SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); - SDValue retops[2] = { - result, - the_chain - }; - - result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, - retops, sizeof(retops) / sizeof(retops[0])); - return result; -} - -/// Custom lower stores for CellSPU -/*! - All CellSPU stores are aligned to 16-byte boundaries, so for elements - within a 16-byte block, we have to generate a shuffle to insert the - requested element into its place, then store the resulting block. - */ -static SDValue -LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - StoreSDNode *SN = cast(Op); - SDValue Value = SN->getValue(); - EVT VT = Value.getValueType(); - EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); - unsigned alignment = SN->getAlignment(); - SDValue result; - EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, - (128 / StVT.getSizeInBits())); - // Get pointerinfos to the memory chunk(s) that contain the data to load - uint64_t mpi_offset = SN->getPointerInfo().Offset; - mpi_offset -= mpi_offset%16; - MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); - MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); - - - // two sanity checks - assert( SN->getAddressingMode() == ISD::UNINDEXED - && "we should get only UNINDEXED adresses"); - // clean aligned loads can be selected as-is - if (StVT.getSizeInBits() == 128 && (alignment%16) == 0) - return SDValue(); - - SDValue alignLoadVec; - SDValue basePtr = SN->getBasePtr(); - SDValue the_chain = SN->getChain(); - SDValue insertEltOffs; - - if ((alignment%16) == 0) { - ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer - // and insertion byte: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast(basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & 0xf), PtrVT)); - - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else { - // Otherwise, assume it's at byte 0 of basePtr - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa(Op1)) { - // Convert the (add , ) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add , ) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Insertion point is solely determined by basePtr's contents - insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Load the lower part of the memory to which to store. - SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, - lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), - false, 16); - - // if we don't need to store over the 16 byte boundary, one store suffices - if (alignment >= StVT.getSizeInBits()/8) { - // Update the chain - the_chain = low.getValue(1); - - LoadSDNode *LN = cast(low); - SDValue theValue = SN->getValue(); - - if (StVT != VT - && (theValue.getOpcode() == ISD::AssertZext - || theValue.getOpcode() == ISD::AssertSext)) { - // Drill down and get the value for zero- and sign-extended - // quantities - theValue = theValue.getOperand(0); - } - - // If the base pointer is already a D-form address, then just create - // a new D-form address with a slot offset and the orignal base pointer. - // Otherwise generate a D-form address with the slot offset relative - // to the stack pointer, which is always aligned. -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "CellSPU LowerSTORE: basePtr = "; - basePtr.getNode()->dump(&DAG); - errs() << "\n"; - } -#endif - - SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, - insertEltOffs); - SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, - theValue); - - result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, - vectorizeOp, low, - DAG.getNode(ISD::BITCAST, dl, - MVT::v4i32, insertEltOp)); - - result = DAG.getStore(the_chain, dl, result, basePtr, - lowMemPtr, - LN->isVolatile(), LN->isNonTemporal(), - 16); - - } - // do the store when it might cross the 16 byte memory access boundary. - else { - // TODO issue a warning if SN->isVolatile()== true? This is likely not - // what the user wanted. - - // address offset from nearest lower 16byte alinged address - SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, - SN->getBasePtr(), - DAG.getConstant(0xf, MVT::i32)); - // 16 - offset - SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant( 16, MVT::i32), - offset); - // 16 - sizeof(Value) - SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant( 16, MVT::i32), - DAG.getConstant( VT.getSizeInBits()/8, - MVT::i32)); - // get a registerfull of ones - SDValue ones = DAG.getConstant(-1, MVT::v4i32); - ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); - - // Create the 128 bit masks that have ones where the data to store is - // located. - SDValue lowmask, himask; - // if the value to store don't fill up the an entire 128 bits, zero - // out the last bits of the mask so that only the value we want to store - // is masked. - // this is e.g. in the case of store i32, align 2 - if (!VT.isVector()){ - Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value); - lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus); - lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, - surplus); - Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); - Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask); - - } - else { - lowmask = ones; - Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); - } - // this will zero, if there are no data that goes to the high quad - himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, - offset_compl); - lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask, - offset); - - // Load in the old data and zero out the parts that will be overwritten with - // the new data to store. - SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain, - DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, - DAG.getConstant( 16, PtrVT)), - highMemPtr, - SN->isVolatile(), SN->isNonTemporal(), - false, 16); - the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), - hi.getValue(1)); - - low = DAG.getNode(ISD::AND, dl, MVT::i128, - DAG.getNode( ISD::BITCAST, dl, MVT::i128, low), - DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones)); - hi = DAG.getNode(ISD::AND, dl, MVT::i128, - DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi), - DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones)); - - // Shift the Value to store into place. rlow contains the parts that go to - // the lower memory chunk, rhi has the parts that go to the upper one. - SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset); - rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask); - SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value, - offset_compl); - - // Merge the old data and the new data and store the results - // Need to convert vectors here to integer as 'OR'ing floats assert - rlow = DAG.getNode(ISD::OR, dl, MVT::i128, - DAG.getNode(ISD::BITCAST, dl, MVT::i128, low), - DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow)); - rhi = DAG.getNode(ISD::OR, dl, MVT::i128, - DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi), - DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi)); - - low = DAG.getStore(the_chain, dl, rlow, basePtr, - lowMemPtr, - SN->isVolatile(), SN->isNonTemporal(), 16); - hi = DAG.getStore(the_chain, dl, rhi, - DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, - DAG.getConstant( 16, PtrVT)), - highMemPtr, - SN->isVolatile(), SN->isNonTemporal(), 16); - result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0), - hi.getValue(0)); - } - - return result; -} - -//! Generate the address of a constant pool entry. -static SDValue -LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - EVT PtrVT = Op.getValueType(); - ConstantPoolSDNode *CP = cast(Op); - const Constant *C = CP->getConstVal(); - SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); - SDValue Zero = DAG.getConstant(0, PtrVT); - const TargetMachine &TM = DAG.getTarget(); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - // Just return the SDValue with the constant pool address in it. - return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); - } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); - } - } - - llvm_unreachable("LowerConstantPool: Relocation model other than static" - " not supported."); -} - -//! Alternate entry point for generating the address of a constant pool entry -SDValue -SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { - return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); -} - -static SDValue -LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - SDValue Zero = DAG.getConstant(0, PtrVT); - const TargetMachine &TM = DAG.getTarget(); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); - } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); - } - } - - llvm_unreachable("LowerJumpTable: Relocation model other than static" - " not supported."); -} - -static SDValue -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - EVT PtrVT = Op.getValueType(); - GlobalAddressSDNode *GSDN = cast(Op); - const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), - PtrVT, GSDN->getOffset()); - const TargetMachine &TM = DAG.getTarget(); - SDValue Zero = DAG.getConstant(0, PtrVT); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); - } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); - return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); - } - } else { - report_fatal_error("LowerGlobalAddress: Relocation model other than static" - "not supported."); - /*NOTREACHED*/ - } -} - -//! Custom lower double precision floating point constants -static SDValue -LowerConstantFP(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (VT == MVT::f64) { - ConstantFPSDNode *FP = cast(Op.getNode()); - - assert((FP != 0) && - "LowerConstantFP: Node is not ConstantFPSDNode"); - - uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); - SDValue T = DAG.getConstant(dbits, MVT::i64); - SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); - return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec)); - } - - return SDValue(); -} - -SDValue -SPUTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { - - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SPUFunctionInfo *FuncInfo = MF.getInfo(); - - unsigned ArgOffset = SPUFrameLowering::minStackSize(); - unsigned ArgRegIdx = 0; - unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - // FIXME: allow for other calling conventions - CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); - - // Add DAG nodes to load the arguments or copy them out of registers. - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { - EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; - SDValue ArgVal; - CCValAssign &VA = ArgLocs[ArgNo]; - - if (VA.isRegLoc()) { - const TargetRegisterClass *ArgRegClass; - - switch (ObjectVT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("LowerFormalArguments Unhandled argument type: " + - Twine(ObjectVT.getEVTString())); - case MVT::i8: - ArgRegClass = &SPU::R8CRegClass; - break; - case MVT::i16: - ArgRegClass = &SPU::R16CRegClass; - break; - case MVT::i32: - ArgRegClass = &SPU::R32CRegClass; - break; - case MVT::i64: - ArgRegClass = &SPU::R64CRegClass; - break; - case MVT::i128: - ArgRegClass = &SPU::GPRCRegClass; - break; - case MVT::f32: - ArgRegClass = &SPU::R32FPRegClass; - break; - case MVT::f64: - ArgRegClass = &SPU::R64FPRegClass; - break; - case MVT::v2f64: - case MVT::v4f32: - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - ArgRegClass = &SPU::VECREGRegClass; - break; - } - - unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); - ++ArgRegIdx; - } else { - // We need to load the argument to a virtual register if we determined - // above that we ran out of physical registers of the appropriate type - // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, false, 0); - ArgOffset += StackSlotSize; - } - - InVals.push_back(ArgVal); - // Update the chain - Chain = ArgVal.getOperand(0); - } - - // vararg handling: - if (isVarArg) { - // FIXME: we should be able to query the argument registers from - // tablegen generated code. - static const uint16_t ArgRegs[] = { - SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, - SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, - SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, - SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, - SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, - SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, - SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, - SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, - SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, - SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, - SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 - }; - // size of ArgRegs array - const unsigned NumArgRegs = 77; - - // We will spill (79-3)+1 registers to the stack - SmallVector MemOps; - - // Create the frame slot - for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - FuncInfo->setVarArgsFrameIndex( - MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); - SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass); - SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); - SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), - false, false, 0); - Chain = Store.getOperand(0); - MemOps.push_back(Store); - - // Increment address by stack slot size for the next stored argument - ArgOffset += StackSlotSize; - } - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); - } - - return Chain; -} - -/// isLSAAddress - Return the immediate to use if the specified -/// value is representable as a LSA address. -static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { - ConstantSDNode *C = dyn_cast(Op); - if (!C) return 0; - - int Addr = C->getZExtValue(); - if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. - (Addr << 14 >> 14) != Addr) - return 0; // Top 14 bits have to be sext of immediate. - - return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); -} - -SDValue -SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; - bool isVarArg = CLI.IsVarArg; - - // CellSPU target does not yet support tail call optimization. - isTailCall = false; - - const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); - unsigned NumOps = Outs.size(); - unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); - - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - // FIXME: allow for other calling conventions - CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); - - const unsigned NumArgRegs = ArgLocs.size(); - - - // Handy pointer type - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - // Set up a copy of the stack pointer for use loading and storing any - // arguments that may not fit in the registers available for argument - // passing. - SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); - - // Figure out which arguments are going to go in registers, and which in - // memory. - unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR] - unsigned ArgRegIdx = 0; - - // Keep track of registers passing arguments - std::vector > RegsToPass; - // And the arguments passed on the stack - SmallVector MemOpChains; - - for (; ArgRegIdx != NumOps; ++ArgRegIdx) { - SDValue Arg = OutVals[ArgRegIdx]; - CCValAssign &VA = ArgLocs[ArgRegIdx]; - - // PtrOff will be used to store the current argument to the stack if a - // register cannot be found for it. - SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - - switch (Arg.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected ValueType for argument!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: - case MVT::i128: - case MVT::f32: - case MVT::f64: - case MVT::v2i64: - case MVT::v2f64: - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; - } - } - - // Accumulate how many bytes are to be pushed on the stack, including the - // linkage area, and parameter passing area. According to the SPU ABI, - // we minimally need space for [LR] and [SP]. - unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize(); - - // Insert a call sequence start - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, - true)); - - if (!MemOpChains.empty()) { - // Adjust the stack pointer for the stack arguments. - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); - } - - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into the appropriate regs. - SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - SmallVector Ops; - unsigned CallOpc = SPUISD::CALL; - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - EVT CalleeVT = Callee.getValueType(); - SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); - - if (!ST->usingLargeMem()) { - // Turn calls to targets that are defined (i.e., have bodies) into BRSL - // style calls, otherwise, external symbols are BRASL calls. This assumes - // that declared/defined symbols are in the same compilation unit and can - // be reached through PC-relative jumps. - // - // NOTE: - // This may be an unsafe assumption for JIT and really large compilation - // units. - if (GV->isDeclaration()) { - Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); - } else { - Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); - } - } else { - // "Large memory" mode: Turn all calls into indirect calls with a X-form - // address pairs: - Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); - } - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - EVT CalleeVT = Callee.getValueType(); - SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), - Callee.getValueType()); - - if (!ST->usingLargeMem()) { - Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); - } else { - Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); - } - } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { - // If this is an absolute destination address that appears to be a legal - // local store address, use the munged value. - Callee = SDValue(Dest, 0); - } - - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - if (InFlag.getNode()) - Ops.push_back(InFlag); - // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue), - &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); - if (!Ins.empty()) - InFlag = Chain.getValue(1); - - // If the function returns void, just return the chain. - if (Ins.empty()) - return Chain; - - // Now handle the return value(s) - SmallVector RVLocs; - CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); - - - // If the call has results, copy the values out of the ret val registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign VA = RVLocs[i]; - - SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), - InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); - InVals.push_back(Val); - } - - return Chain; -} - -SDValue -SPUTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC_SPU); - - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - - SDValue Flag; - - // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); - Flag = Chain.getValue(1); - } - - if (Flag.getNode()) - return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else - return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); -} - - -//===----------------------------------------------------------------------===// -// Vector related lowering: -//===----------------------------------------------------------------------===// - -static ConstantSDNode * -getVecImm(SDNode *N) { - SDValue OpVal(0, 0); - - // Check to see if this buildvec has a single non-undef value in its elements. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) - OpVal = N->getOperand(i); - else if (OpVal != N->getOperand(i)) - return 0; - } - - if (OpVal.getNode() != 0) { - if (ConstantSDNode *CN = dyn_cast(OpVal)) { - return CN; - } - } - - return 0; -} - -/// get_vec_i18imm - Test if this vector is a vector filled with the same value -/// and the value fits into an unsigned 18-bit constant, and if so, return the -/// constant -SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - uint64_t Value = CN->getZExtValue(); - if (ValueType == MVT::i64) { - uint64_t UValue = CN->getZExtValue(); - uint32_t upper = uint32_t(UValue >> 32); - uint32_t lower = uint32_t(UValue); - if (upper != lower) - return SDValue(); - Value = Value >> 32; - } - if (Value <= 0x3ffff) - return DAG.getTargetConstant(Value, ValueType); - } - - return SDValue(); -} - -/// get_vec_i16imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 16-bit constant, and if so, return the -/// constant -SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - int64_t Value = CN->getSExtValue(); - if (ValueType == MVT::i64) { - uint64_t UValue = CN->getZExtValue(); - uint32_t upper = uint32_t(UValue >> 32); - uint32_t lower = uint32_t(UValue); - if (upper != lower) - return SDValue(); - Value = Value >> 32; - } - if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { - return DAG.getTargetConstant(Value, ValueType); - } - } - - return SDValue(); -} - -/// get_vec_i10imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 10-bit constant, and if so, return the -/// constant -SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - int64_t Value = CN->getSExtValue(); - if (ValueType == MVT::i64) { - uint64_t UValue = CN->getZExtValue(); - uint32_t upper = uint32_t(UValue >> 32); - uint32_t lower = uint32_t(UValue); - if (upper != lower) - return SDValue(); - Value = Value >> 32; - } - if (isInt<10>(Value)) - return DAG.getTargetConstant(Value, ValueType); - } - - return SDValue(); -} - -/// get_vec_i8imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 8-bit constant, and if so, return the -/// constant. -/// -/// @note: The incoming vector is v16i8 because that's the only way we can load -/// constant vectors. Thus, we test to see if the upper and lower bytes are the -/// same value. -SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - int Value = (int) CN->getZExtValue(); - if (ValueType == MVT::i16 - && Value <= 0xffff /* truncated from uint64_t */ - && ((short) Value >> 8) == ((short) Value & 0xff)) - return DAG.getTargetConstant(Value & 0xff, ValueType); - else if (ValueType == MVT::i8 - && (Value & 0xff) == Value) - return DAG.getTargetConstant(Value, ValueType); - } - - return SDValue(); -} - -/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 16-bit constant, and if so, return the -/// constant -SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - uint64_t Value = CN->getZExtValue(); - if ((ValueType == MVT::i32 - && ((unsigned) Value & 0xffff0000) == (unsigned) Value) - || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) - return DAG.getTargetConstant(Value >> 16, ValueType); - } - - return SDValue(); -} - -/// get_v4i32_imm - Catch-all for general 32-bit constant vectors -SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { - if (ConstantSDNode *CN = getVecImm(N)) { - return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); - } - - return SDValue(); -} - -/// get_v4i32_imm - Catch-all for general 64-bit constant vectors -SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { - if (ConstantSDNode *CN = getVecImm(N)) { - return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); - } - - return SDValue(); -} - -//! Lower a BUILD_VECTOR instruction creatively: -static SDValue -LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = Op.getDebugLoc(); - BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); - assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); - unsigned minSplatBits = EltVT.getSizeInBits(); - - if (minSplatBits < 16) - minSplatBits = 16; - - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, minSplatBits) - || minSplatBits < SplatBitSize) - return SDValue(); // Wasn't a constant vector or splat exceeded min - - uint64_t SplatBits = APSplatBits.getZExtValue(); - - switch (VT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + - Twine(VT.getEVTString())); - /*NOTREACHED*/ - case MVT::v4f32: { - uint32_t Value32 = uint32_t(SplatBits); - assert(SplatBitSize == 32 - && "LowerBUILD_VECTOR: Unexpected floating point vector element."); - // NOTE: pretend the constant is an integer. LLVM won't load FP constants - SDValue T = DAG.getConstant(Value32, MVT::i32); - return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); - } - case MVT::v2f64: { - uint64_t f64val = uint64_t(SplatBits); - assert(SplatBitSize == 64 - && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); - // NOTE: pretend the constant is an integer. LLVM won't load FP constants - SDValue T = DAG.getConstant(f64val, MVT::i64); - return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); - } - case MVT::v16i8: { - // 8-bit constants have to be expanded to 16-bits - unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; - SmallVector Ops; - - Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); - } - case MVT::v8i16: { - unsigned short Value16 = SplatBits; - SDValue T = DAG.getConstant(Value16, EltVT); - SmallVector Ops; - - Ops.assign(8, T); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); - } - case MVT::v4i32: { - SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); - } - case MVT::v2i64: { - return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); - } - } -} - -/*! - */ -SDValue -SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, - DebugLoc dl) { - uint32_t upper = uint32_t(SplatVal >> 32); - uint32_t lower = uint32_t(SplatVal); - - if (upper == lower) { - // Magic constant that can be matched by IL, ILA, et. al. - SDValue Val = DAG.getTargetConstant(upper, MVT::i32); - return DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Val, Val, Val, Val)); - } else { - bool upper_special, lower_special; - - // NOTE: This code creates common-case shuffle masks that can be easily - // detected as common expressions. It is not attempting to create highly - // specialized masks to replace any and all 0's, 0xff's and 0x80's. - - // Detect if the upper or lower half is a special shuffle mask pattern: - upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); - lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); - - // Both upper and lower are special, lower to a constant pool load: - if (lower_special && upper_special) { - SDValue UpperVal = DAG.getConstant(upper, MVT::i32); - SDValue LowerVal = DAG.getConstant(lower, MVT::i32); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - UpperVal, LowerVal, UpperVal, LowerVal); - return DAG.getNode(ISD::BITCAST, dl, OpVT, BV); - } - - SDValue LO32; - SDValue HI32; - SmallVector ShufBytes; - SDValue Result; - - // Create lower vector if not a special pattern - if (!lower_special) { - SDValue LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - LO32C, LO32C, LO32C, LO32C)); - } - - // Create upper vector if not a special pattern - if (!upper_special) { - SDValue HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - HI32C, HI32C, HI32C, HI32C)); - } - - // If either upper or lower are special, then the two input operands are - // the same (basically, one of them is a "don't care") - if (lower_special) - LO32 = HI32; - if (upper_special) - HI32 = LO32; - - for (int i = 0; i < 4; ++i) { - uint64_t val = 0; - for (int j = 0; j < 4; ++j) { - SDValue V; - bool process_upper, process_lower; - val <<= 8; - process_upper = (upper_special && (i & 1) == 0); - process_lower = (lower_special && (i & 1) == 1); - - if (process_upper || process_lower) { - if ((process_upper && upper == 0) - || (process_lower && lower == 0)) - val |= 0x80; - else if ((process_upper && upper == 0xffffffff) - || (process_lower && lower == 0xffffffff)) - val |= 0xc0; - else if ((process_upper && upper == 0x80000000) - || (process_lower && lower == 0x80000000)) - val |= (j == 0 ? 0xe0 : 0x80); - } else - val |= i * 4 + j + ((i & 1) * 16); - } - - ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); - } - - return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - } -} - -/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on -/// which the Cell can operate. The code inspects V3 to ascertain whether the -/// permutation vector, V3, is monotonically increasing with one "exception" -/// element, e.g., (0, 1, _, 3). If this is the case, then generate a -/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. -/// In either case, the net result is going to eventually invoke SHUFB to -/// permute/shuffle the bytes from V1 and V2. -/// \note -/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate -/// control word for byte/halfword/word insertion. This takes care of a single -/// element move from V2 into V1. -/// \note -/// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. -static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { - const ShuffleVectorSDNode *SVN = cast(Op); - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - if (V2.getOpcode() == ISD::UNDEF) V2 = V1; - - // If we have a single element being moved from V1 to V2, this can be handled - // using the C*[DX] compute mask instructions, but the vector elements have - // to be monotonically increasing with one exception element, and the source - // slot of the element to move must be the same as the destination. - EVT VecVT = V1.getValueType(); - EVT EltVT = VecVT.getVectorElementType(); - unsigned EltsFromV2 = 0; - unsigned V2EltOffset = 0; - unsigned V2EltIdx0 = 0; - unsigned CurrElt = 0; - unsigned MaxElts = VecVT.getVectorNumElements(); - unsigned PrevElt = 0; - bool monotonic = true; - bool rotate = true; - int rotamt=0; - EVT maskVT; // which of the c?d instructions to use - - if (EltVT == MVT::i8) { - V2EltIdx0 = 16; - maskVT = MVT::v16i8; - } else if (EltVT == MVT::i16) { - V2EltIdx0 = 8; - maskVT = MVT::v8i16; - } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { - V2EltIdx0 = 4; - maskVT = MVT::v4i32; - } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { - V2EltIdx0 = 2; - maskVT = MVT::v2i64; - } else - llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); - - for (unsigned i = 0; i != MaxElts; ++i) { - if (SVN->getMaskElt(i) < 0) - continue; - - unsigned SrcElt = SVN->getMaskElt(i); - - if (monotonic) { - if (SrcElt >= V2EltIdx0) { - // TODO: optimize for the monotonic case when several consecutive - // elements are taken form V2. Do we ever get such a case? - if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) - V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); - else - monotonic = false; - ++EltsFromV2; - } else if (CurrElt != SrcElt) { - monotonic = false; - } - - ++CurrElt; - } - - if (rotate) { - if (PrevElt > 0 && SrcElt < MaxElts) { - if ((PrevElt == SrcElt - 1) - || (PrevElt == MaxElts - 1 && SrcElt == 0)) { - PrevElt = SrcElt; - } else { - rotate = false; - } - } else if (i == 0 || (PrevElt==0 && SrcElt==1)) { - // First time or after a "wrap around" - rotamt = SrcElt-i; - PrevElt = SrcElt; - } else { - // This isn't a rotation, takes elements from vector 2 - rotate = false; - } - } - } - - if (EltsFromV2 == 1 && monotonic) { - // Compute mask and shuffle - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - // As SHUFFLE_MASK becomes a c?d instruction, feed it an address - // R1 ($sp) is used here only as it is guaranteed to have last bits zero - SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(V2EltOffset, MVT::i32)); - SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, - maskVT, Pointer); - - // Use shuffle mask in SHUFB synthetic instruction: - return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, - ShufMaskOp); - } else if (rotate) { - if (rotamt < 0) - rotamt +=MaxElts; - rotamt *= EltVT.getSizeInBits()/8; - return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), - V1, DAG.getConstant(rotamt, MVT::i16)); - } else { - // Convert the SHUFFLE_VECTOR mask's input element units to the - // actual bytes. - unsigned BytesPerElement = EltVT.getSizeInBits()/8; - - SmallVector ResultMask; - for (unsigned i = 0, e = MaxElts; i != e; ++i) { - unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); - - for (unsigned j = 0; j < BytesPerElement; ++j) - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); - } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, - &ResultMask[0], ResultMask.size()); - return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); - } -} - -static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { - SDValue Op0 = Op.getOperand(0); // Op0 = the scalar - DebugLoc dl = Op.getDebugLoc(); - - if (Op0.getNode()->getOpcode() == ISD::Constant) { - // For a constant, build the appropriate constant vector, which will - // eventually simplify to a vector register load. - - ConstantSDNode *CN = cast(Op0.getNode()); - SmallVector ConstVecValues; - EVT VT; - size_t n_copies; - - // Create a constant vector: - switch (Op.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected constant value type in " - "LowerSCALAR_TO_VECTOR"); - case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; - case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; - case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; - case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; - case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; - case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; - } - - SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); - for (size_t j = 0; j < n_copies; ++j) - ConstVecValues.push_back(CValue); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), - &ConstVecValues[0], ConstVecValues.size()); - } else { - // Otherwise, copy the value from one register to another: - switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: - case MVT::f32: - case MVT::f64: - return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); - } - } -} - -static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - SDValue N = Op.getOperand(0); - SDValue Elt = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - SDValue retval; - - if (ConstantSDNode *C = dyn_cast(Elt)) { - // Constant argument: - int EltNo = (int) C->getZExtValue(); - - // sanity checks: - if (VT == MVT::i8 && EltNo >= 16) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); - else if (VT == MVT::i16 && EltNo >= 8) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); - else if (VT == MVT::i32 && EltNo >= 4) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); - else if (VT == MVT::i64 && EltNo >= 2) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); - - if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { - // i32 and i64: Element 0 is the preferred slot - return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); - } - - // Need to generate shuffle mask and extract: - int prefslot_begin = -1, prefslot_end = -1; - int elt_byte = EltNo * VT.getSizeInBits() / 8; - - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid value type!"); - case MVT::i8: { - prefslot_begin = prefslot_end = 3; - break; - } - case MVT::i16: { - prefslot_begin = 2; prefslot_end = 3; - break; - } - case MVT::i32: - case MVT::f32: { - prefslot_begin = 0; prefslot_end = 3; - break; - } - case MVT::i64: - case MVT::f64: { - prefslot_begin = 0; prefslot_end = 7; - break; - } - } - - assert(prefslot_begin != -1 && prefslot_end != -1 && - "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); - - unsigned int ShufBytes[16] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - for (int i = 0; i < 16; ++i) { - // zero fill uppper part of preferred slot, don't care about the - // other slots: - unsigned int mask_val; - if (i <= prefslot_end) { - mask_val = - ((i < prefslot_begin) - ? 0x80 - : elt_byte + (i - prefslot_begin)); - - ShufBytes[i] = mask_val; - } else - ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; - } - - SDValue ShufMask[4]; - for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { - unsigned bidx = i * 4; - unsigned int bits = ((ShufBytes[bidx] << 24) | - (ShufBytes[bidx+1] << 16) | - (ShufBytes[bidx+2] << 8) | - ShufBytes[bidx+3]); - ShufMask[i] = DAG.getConstant(bits, MVT::i32); - } - - SDValue ShufMaskVec = - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); - - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, - DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), - N, N, ShufMaskVec)); - } else { - // Variable index: Rotate the requested element into slot 0, then replicate - // slot 0 across the vector - EVT VecVT = N.getValueType(); - if (!VecVT.isSimple() || !VecVT.isVector()) { - report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" - "vector type!"); - } - - // Make life easier by making sure the index is zero-extended to i32 - if (Elt.getValueType() != MVT::i32) - Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); - - // Scale the index to a bit/byte shift quantity - APInt scaleFactor = - APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); - unsigned scaleShift = scaleFactor.logBase2(); - SDValue vecShift; - - if (scaleShift > 0) { - // Scale the shift factor: - Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, - DAG.getConstant(scaleShift, MVT::i32)); - } - - vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt); - - // Replicate the bytes starting at byte 0 across the entire vector (for - // consistency with the notion of a unified register set) - SDValue replicate; - - switch (VT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" - "type"); - /*NOTREACHED*/ - case MVT::i8: { - SDValue factor = DAG.getConstant(0x00000000, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - factor, factor, factor, factor); - break; - } - case MVT::i16: { - SDValue factor = DAG.getConstant(0x00010001, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - factor, factor, factor, factor); - break; - } - case MVT::i32: - case MVT::f32: { - SDValue factor = DAG.getConstant(0x00010203, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - factor, factor, factor, factor); - break; - } - case MVT::i64: - case MVT::f64: { - SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); - SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - loFactor, hiFactor, loFactor, hiFactor); - break; - } - } - - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, - DAG.getNode(SPUISD::SHUFB, dl, VecVT, - vecShift, vecShift, replicate)); - } - - return retval; -} - -static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - SDValue VecOp = Op.getOperand(0); - SDValue ValOp = Op.getOperand(1); - SDValue IdxOp = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT eltVT = ValOp.getValueType(); - - // use 0 when the lane to insert to is 'undef' - int64_t Offset=0; - if (IdxOp.getOpcode() != ISD::UNDEF) { - ConstantSDNode *CN = cast(IdxOp); - assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; - } - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Use $sp ($1) because it's always 16-byte aligned and it's available: - SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(Offset, PtrVT)); - // widen the mask when dealing with half vectors - EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), - 128/ VT.getVectorElementType().getSizeInBits()); - SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); - - SDValue result = - DAG.getNode(SPUISD::SHUFB, dl, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), - VecOp, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask)); - - return result; -} - -static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, - const TargetLowering &TLI) -{ - SDValue N0 = Op.getOperand(0); // Everything has at least one operand - DebugLoc dl = Op.getDebugLoc(); - EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); - - assert(Op.getValueType() == MVT::i8); - switch (Opc) { - default: - llvm_unreachable("Unhandled i8 math operator"); - case ISD::ADD: { - // 8-bit addition: Promote the arguments up to 16-bits and truncate - // the result: - SDValue N1 = Op.getOperand(1); - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - - } - - case ISD::SUB: { - // 8-bit subtraction: Promote the arguments up to 16-bits and truncate - // the result: - SDValue N1 = Op.getOperand(1); - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - case ISD::ROTR: - case ISD::ROTL: { - SDValue N1 = Op.getOperand(1); - EVT N1VT = N1.getValueType(); - - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); - if (!N1VT.bitsEq(ShiftVT)) { - unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); - } - - // Replicate lower 8-bits into upper 8: - SDValue ExpandArg = - DAG.getNode(ISD::OR, dl, MVT::i16, N0, - DAG.getNode(ISD::SHL, dl, MVT::i16, - N0, DAG.getConstant(8, MVT::i32))); - - // Truncate back down to i8 - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); - } - case ISD::SRL: - case ISD::SHL: { - SDValue N1 = Op.getOperand(1); - EVT N1VT = N1.getValueType(); - - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); - if (!N1VT.bitsEq(ShiftVT)) { - unsigned N1Opc = ISD::ZERO_EXTEND; - - if (N1.getValueType().bitsGT(ShiftVT)) - N1Opc = ISD::TRUNCATE; - - N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); - } - - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - case ISD::SRA: { - SDValue N1 = Op.getOperand(1); - EVT N1VT = N1.getValueType(); - - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - if (!N1VT.bitsEq(ShiftVT)) { - unsigned N1Opc = ISD::SIGN_EXTEND; - - if (N1VT.bitsGT(ShiftVT)) - N1Opc = ISD::TRUNCATE; - N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); - } - - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - case ISD::MUL: { - SDValue N1 = Op.getOperand(1); - - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - } -} - -//! Lower byte immediate operations for v16i8 vectors: -static SDValue -LowerByteImmed(SDValue Op, SelectionDAG &DAG) { - SDValue ConstVec; - SDValue Arg; - EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); - - ConstVec = Op.getOperand(0); - Arg = Op.getOperand(1); - if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { - if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { - ConstVec = ConstVec.getOperand(0); - } else { - ConstVec = Op.getOperand(1); - Arg = Op.getOperand(0); - if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { - ConstVec = ConstVec.getOperand(0); - } - } - } - - if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { - BuildVectorSDNode *BCN = dyn_cast(ConstVec.getNode()); - assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); - - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); - - if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, minSplatBits) - && minSplatBits <= SplatBitSize) { - uint64_t SplatBits = APSplatBits.getZExtValue(); - SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); - - SmallVector tcVec; - tcVec.assign(16, tc); - return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); - } - } - - // These operations (AND, OR, XOR) are legal, they just couldn't be custom - // lowered. Return the operation, rather than a null SDValue. - return Op; -} - -//! Custom lowering for CTPOP (count population) -/*! - Custom lowering code that counts the number ones in the input - operand. SPU has such an instruction, but it counts the number of - ones per byte, which then have to be accumulated. -*/ -static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), - VT, (128 / VT.getSizeInBits())); - DebugLoc dl = Op.getDebugLoc(); - - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid value type!"); - case MVT::i8: { - SDValue N = Op.getOperand(0); - SDValue Elt0 = DAG.getConstant(0, MVT::i32); - - SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); - } - - case MVT::i16: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); - - SDValue N = Op.getOperand(0); - SDValue Elt0 = DAG.getConstant(0, MVT::i16); - SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); - SDValue Shift1 = DAG.getConstant(8, MVT::i32); - - SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - - // CNTB_result becomes the chain to which all of the virtual registers - // CNTB_reg, SUM1_reg become associated: - SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); - - SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - - SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); - - return DAG.getNode(ISD::AND, dl, MVT::i16, - DAG.getNode(ISD::ADD, dl, MVT::i16, - DAG.getNode(ISD::SRL, dl, MVT::i16, - Tmp1, Shift1), - Tmp1), - Mask0); - } - - case MVT::i32: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - - SDValue N = Op.getOperand(0); - SDValue Elt0 = DAG.getConstant(0, MVT::i32); - SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); - SDValue Shift1 = DAG.getConstant(16, MVT::i32); - SDValue Shift2 = DAG.getConstant(8, MVT::i32); - - SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - - // CNTB_result becomes the chain to which all of the virtual registers - // CNTB_reg, SUM1_reg become associated: - SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); - - SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - - SDValue Comp1 = - DAG.getNode(ISD::SRL, dl, MVT::i32, - DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), - Shift1); - - SDValue Sum1 = - DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, - DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); - - SDValue Sum1_rescopy = - DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); - - SDValue Comp2 = - DAG.getNode(ISD::SRL, dl, MVT::i32, - DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), - Shift2); - SDValue Sum2 = - DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, - DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); - - return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); - } - - case MVT::i64: - break; - } - - return SDValue(); -} - -//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 -/*! - f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. - All conversions to i64 are expanded to a libcall. - */ -static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - const SPUTargetLowering &TLI) { - EVT OpVT = Op.getValueType(); - SDValue Op0 = Op.getOperand(0); - EVT Op0VT = Op0.getValueType(); - - if ((OpVT == MVT::i32 && Op0VT == MVT::f64) - || OpVT == MVT::i64) { - // Convert f32 / f64 to i32 / i64 via libcall. - RTLIB::Libcall LC = - (Op.getOpcode() == ISD::FP_TO_SINT) - ? RTLIB::getFPTOSINT(Op0VT, OpVT) - : RTLIB::getFPTOUINT(Op0VT, OpVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); - SDValue Dummy; - return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); - } - - return Op; -} - -//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 -/*! - i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. - All conversions from i64 are expanded to a libcall. - */ -static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, - const SPUTargetLowering &TLI) { - EVT OpVT = Op.getValueType(); - SDValue Op0 = Op.getOperand(0); - EVT Op0VT = Op0.getValueType(); - - if ((OpVT == MVT::f64 && Op0VT == MVT::i32) - || Op0VT == MVT::i64) { - // Convert i32, i64 to f64 via libcall: - RTLIB::Libcall LC = - (Op.getOpcode() == ISD::SINT_TO_FP) - ? RTLIB::getSINTTOFP(Op0VT, OpVT) - : RTLIB::getUINTTOFP(Op0VT, OpVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); - SDValue Dummy; - return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); - } - - return Op; -} - -//! Lower ISD::SETCC -/*! - This handles MVT::f64 (double floating point) condition lowering - */ -static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { - CondCodeSDNode *CC = dyn_cast(Op.getOperand(2)); - DebugLoc dl = Op.getDebugLoc(); - assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); - - SDValue lhs = Op.getOperand(0); - SDValue rhs = Op.getOperand(1); - EVT lhsVT = lhs.getValueType(); - assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); - - EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); - APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); - EVT IntVT(MVT::i64); - - // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently - // selected to a NOP: - SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs); - SDValue lhsHi32 = - DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, - DAG.getNode(ISD::SRL, dl, IntVT, - i64lhs, DAG.getConstant(32, MVT::i32))); - SDValue lhsHi32abs = - DAG.getNode(ISD::AND, dl, MVT::i32, - lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); - SDValue lhsLo32 = - DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); - - // SETO and SETUO only use the lhs operand: - if (CC->get() == ISD::SETO) { - // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of - // SETUO - APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); - return DAG.getNode(ISD::XOR, dl, ccResultVT, - DAG.getSetCC(dl, ccResultVT, - lhs, DAG.getConstantFP(0.0, lhsVT), - ISD::SETUO), - DAG.getConstant(ccResultAllOnes, ccResultVT)); - } else if (CC->get() == ISD::SETUO) { - // Evaluates to true if Op0 is [SQ]NaN - return DAG.getNode(ISD::AND, dl, ccResultVT, - DAG.getSetCC(dl, ccResultVT, - lhsHi32abs, - DAG.getConstant(0x7ff00000, MVT::i32), - ISD::SETGE), - DAG.getSetCC(dl, ccResultVT, - lhsLo32, - DAG.getConstant(0, MVT::i32), - ISD::SETGT)); - } - - SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs); - SDValue rhsHi32 = - DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, - DAG.getNode(ISD::SRL, dl, IntVT, - i64rhs, DAG.getConstant(32, MVT::i32))); - - // If a value is negative, subtract from the sign magnitude constant: - SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); - - // Convert the sign-magnitude representation into 2's complement: - SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, - lhsHi32, DAG.getConstant(31, MVT::i32)); - SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); - SDValue lhsSelect = - DAG.getNode(ISD::SELECT, dl, IntVT, - lhsSelectMask, lhsSignMag2TC, i64lhs); - - SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, - rhsHi32, DAG.getConstant(31, MVT::i32)); - SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); - SDValue rhsSelect = - DAG.getNode(ISD::SELECT, dl, IntVT, - rhsSelectMask, rhsSignMag2TC, i64rhs); - - unsigned compareOp; - - switch (CC->get()) { - case ISD::SETOEQ: - case ISD::SETUEQ: - compareOp = ISD::SETEQ; break; - case ISD::SETOGT: - case ISD::SETUGT: - compareOp = ISD::SETGT; break; - case ISD::SETOGE: - case ISD::SETUGE: - compareOp = ISD::SETGE; break; - case ISD::SETOLT: - case ISD::SETULT: - compareOp = ISD::SETLT; break; - case ISD::SETOLE: - case ISD::SETULE: - compareOp = ISD::SETLE; break; - case ISD::SETUNE: - case ISD::SETONE: - compareOp = ISD::SETNE; break; - default: - report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); - } - - SDValue result = - DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, - (ISD::CondCode) compareOp); - - if ((CC->get() & 0x8) == 0) { - // Ordered comparison: - SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, - lhs, DAG.getConstantFP(0.0, MVT::f64), - ISD::SETO); - SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, - rhs, DAG.getConstantFP(0.0, MVT::f64), - ISD::SETO); - SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); - - result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); - } - - return result; -} - -//! Lower ISD::SELECT_CC -/*! - ISD::SELECT_CC can (generally) be implemented directly on the SPU using the - SELB instruction. - - \note Need to revisit this in the future: if the code path through the true - and false value computations is longer than the latency of a branch (6 - cycles), then it would be more advantageous to branch and insert a new basic - block and branch on the condition. However, this code does not make that - assumption, given the simplisitc uses so far. - */ - -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { - EVT VT = Op.getValueType(); - SDValue lhs = Op.getOperand(0); - SDValue rhs = Op.getOperand(1); - SDValue trueval = Op.getOperand(2); - SDValue falseval = Op.getOperand(3); - SDValue condition = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); - - // NOTE: SELB's arguments: $rA, $rB, $mask - // - // SELB selects bits from $rA where bits in $mask are 0, bits from $rB - // where bits in $mask are 1. CCond will be inverted, having 1s where the - // condition was true and 0s where the condition was false. Hence, the - // arguments to SELB get reversed. - - // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's - // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up - // with another "cannot select select_cc" assert: - - SDValue compare = DAG.getNode(ISD::SETCC, dl, - TLI.getSetCCResultType(Op.getValueType()), - lhs, rhs, condition); - return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); -} - -//! Custom lower ISD::TRUNCATE -static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) -{ - // Type to truncate to - EVT VT = Op.getValueType(); - MVT simpleVT = VT.getSimpleVT(); - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), - VT, (128 / VT.getSizeInBits())); - DebugLoc dl = Op.getDebugLoc(); - - // Type to truncate from - SDValue Op0 = Op.getOperand(0); - EVT Op0VT = Op0.getValueType(); - - if (Op0VT == MVT::i128 && simpleVT == MVT::i64) { - // Create shuffle mask, least significant doubleword of quadword - unsigned maskHigh = 0x08090a0b; - unsigned maskLow = 0x0c0d0e0f; - // Use a shuffle to perform the truncation - SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32), - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32)); - - SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, - Op0, Op0, shufMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); - } - - return SDValue(); // Leave the truncate unmolested -} - -/*! - * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic - * algorithm is to duplicate the sign bit using rotmai to generate at - * least one byte full of sign bits. Then propagate the "sign-byte" into - * the leftmost words and the i64/i32 into the rightmost words using shufb. - * - * @param Op The sext operand - * @param DAG The current DAG - * @return The SDValue with the entire instruction sequence - */ -static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) -{ - DebugLoc dl = Op.getDebugLoc(); - - // Type to extend to - MVT OpVT = Op.getValueType().getSimpleVT(); - - // Type to extend from - SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType().getSimpleVT(); - - // extend i8 & i16 via i32 - if (Op0VT == MVT::i8 || Op0VT == MVT::i16) { - Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0); - Op0VT = MVT::i32; - } - - // The type to extend to needs to be a i128 and - // the type to extend from needs to be i64 or i32. - assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && - "LowerSIGN_EXTEND: input and/or output operand have wrong size"); - (void)OpVT; - - // Create shuffle mask - unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 - unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 - unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 - SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - DAG.getConstant(mask1, MVT::i32), - DAG.getConstant(mask1, MVT::i32), - DAG.getConstant(mask2, MVT::i32), - DAG.getConstant(mask3, MVT::i32)); - - // Word wise arithmetic right shift to generate at least one byte - // that contains sign bits. - MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; - SDValue sraVal = DAG.getNode(ISD::SRA, - dl, - mvt, - DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), - DAG.getConstant(31, MVT::i32)); - - // reinterpret as a i128 (SHUFB requires it). This gets lowered away. - SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - dl, Op0VT, Op0, - DAG.getTargetConstant( - SPU::GPRCRegClass.getID(), - MVT::i32)), 0); - // Shuffle bytes - Copy the sign bits into the upper 64 bits - // and the input value into the lower 64 bits. - SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, - extended, sraVal, shufMask); - return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle); -} - -//! Custom (target-specific) lowering entry point -/*! - This is where LLVM's DAG selection process calls to do target-specific - lowering of nodes. - */ -SDValue -SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const -{ - unsigned Opc = (unsigned) Op.getOpcode(); - EVT VT = Op.getValueType(); - - switch (Opc) { - default: { -#ifndef NDEBUG - errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; - errs() << "Op.getOpcode() = " << Opc << "\n"; - errs() << "*Op.getNode():\n"; - Op.getNode()->dump(); -#endif - llvm_unreachable(0); - } - case ISD::LOAD: - case ISD::EXTLOAD: - case ISD::SEXTLOAD: - case ISD::ZEXTLOAD: - return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::STORE: - return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::ConstantPool: - return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::JumpTable: - return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::ConstantFP: - return LowerConstantFP(Op, DAG); - - // i8, i64 math ops: - case ISD::ADD: - case ISD::SUB: - case ISD::ROTR: - case ISD::ROTL: - case ISD::SRL: - case ISD::SHL: - case ISD::SRA: { - if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc, *this); - break; - } - - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - return LowerFP_TO_INT(Op, DAG, *this); - - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - return LowerINT_TO_FP(Op, DAG, *this); - - // Vector-related lowering. - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG); - case ISD::SCALAR_TO_VECTOR: - return LowerSCALAR_TO_VECTOR(Op, DAG); - case ISD::VECTOR_SHUFFLE: - return LowerVECTOR_SHUFFLE(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: - return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return LowerINSERT_VECTOR_ELT(Op, DAG); - - // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - return LowerByteImmed(Op, DAG); - - // Vector and i8 multiply: - case ISD::MUL: - if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc, *this); - - case ISD::CTPOP: - return LowerCTPOP(Op, DAG); - - case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG, *this); - - case ISD::SETCC: - return LowerSETCC(Op, DAG, *this); - - case ISD::TRUNCATE: - return LowerTRUNCATE(Op, DAG); - - case ISD::SIGN_EXTEND: - return LowerSIGN_EXTEND(Op, DAG); - } - - return SDValue(); -} - -void SPUTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl&Results, - SelectionDAG &DAG) const -{ -#if 0 - unsigned Opc = (unsigned) N->getOpcode(); - EVT OpVT = N->getValueType(0); - - switch (Opc) { - default: { - errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; - errs() << "Op.getOpcode() = " << Opc << "\n"; - errs() << "*Op.getNode():\n"; - N->dump(); - abort(); - /*NOTREACHED*/ - } - } -#endif - - /* Otherwise, return unchanged */ -} - -//===----------------------------------------------------------------------===// -// Target Optimization Hooks -//===----------------------------------------------------------------------===// - -SDValue -SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const -{ -#if 0 - TargetMachine &TM = getTargetMachine(); -#endif - const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); - SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); // everything has at least one operand - EVT NodeVT = N->getValueType(0); // The node's value type - EVT Op0VT = Op0.getValueType(); // The first operand's result - SDValue Result; // Initially, empty result - DebugLoc dl = N->getDebugLoc(); - - switch (N->getOpcode()) { - default: break; - case ISD::ADD: { - SDValue Op1 = N->getOperand(1); - - if (Op0.getOpcode() == SPUISD::IndirectAddr - || Op1.getOpcode() == SPUISD::IndirectAddr) { - // Normalize the operands to reduce repeated code - SDValue IndirectArg = Op0, AddArg = Op1; - - if (Op1.getOpcode() == SPUISD::IndirectAddr) { - IndirectArg = Op1; - AddArg = Op0; - } - - if (isa(AddArg)) { - ConstantSDNode *CN0 = cast (AddArg); - SDValue IndOp1 = IndirectArg.getOperand(1); - - if (CN0->isNullValue()) { - // (add (SPUindirect , ), 0) -> - // (SPUindirect , ) - -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\n" - << "Replace: (add (SPUindirect , ), 0)\n" - << "With: (SPUindirect , )\n"; - } -#endif - - return IndirectArg; - } else if (isa(IndOp1)) { - // (add (SPUindirect , ), ) -> - // (SPUindirect , ) - ConstantSDNode *CN1 = cast (IndOp1); - int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); - SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); - -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\n" - << "Replace: (add (SPUindirect , " << CN1->getSExtValue() - << "), " << CN0->getSExtValue() << ")\n" - << "With: (SPUindirect , " - << combinedConst << ")\n"; - } -#endif - - return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, - IndirectArg, combinedValue); - } - } - } - break; - } - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: { - if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { - // (any_extend (SPUextract_elt0 )) -> - // (SPUextract_elt0 ) - // Types must match, however... -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\nReplace: "; - N->dump(&DAG); - errs() << "\nWith: "; - Op0.getNode()->dump(&DAG); - errs() << "\n"; - } -#endif - - return Op0; - } - break; - } - case SPUISD::IndirectAddr: { - if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { - ConstantSDNode *CN = dyn_cast(N->getOperand(1)); - if (CN != 0 && CN->isNullValue()) { - // (SPUindirect (SPUaform , 0), 0) -> - // (SPUaform , 0) - - DEBUG(errs() << "Replace: "); - DEBUG(N->dump(&DAG)); - DEBUG(errs() << "\nWith: "); - DEBUG(Op0.getNode()->dump(&DAG)); - DEBUG(errs() << "\n"); - - return Op0; - } - } else if (Op0.getOpcode() == ISD::ADD) { - SDValue Op1 = N->getOperand(1); - if (ConstantSDNode *CN1 = dyn_cast(Op1)) { - // (SPUindirect (add , ), 0) -> - // (SPUindirect , ) - if (CN1->isNullValue()) { - -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\n" - << "Replace: (SPUindirect (add , ), 0)\n" - << "With: (SPUindirect , )\n"; - } -#endif - - return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, - Op0.getOperand(0), Op0.getOperand(1)); - } - } - } - break; - } - case SPUISD::SHL_BITS: - case SPUISD::SHL_BYTES: - case SPUISD::ROTBYTES_LEFT: { - SDValue Op1 = N->getOperand(1); - - // Kill degenerate vector shifts: - if (ConstantSDNode *CN = dyn_cast(Op1)) { - if (CN->isNullValue()) { - Result = Op0; - } - } - break; - } - case SPUISD::PREFSLOT2VEC: { - switch (Op0.getOpcode()) { - default: - break; - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: { - // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot ))) -> - // - // but only if the SPUprefslot2vec and types match. - SDValue Op00 = Op0.getOperand(0); - if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { - SDValue Op000 = Op00.getOperand(0); - if (Op000.getValueType() == NodeVT) { - Result = Op000; - } - } - break; - } - case SPUISD::VEC2PREFSLOT: { - // (SPUprefslot2vec (SPUvec2prefslot )) -> - // - Result = Op0.getOperand(0); - break; - } - } - break; - } - } - - // Otherwise, return unchanged. -#ifndef NDEBUG - if (Result.getNode()) { - DEBUG(errs() << "\nReplace.SPU: "); - DEBUG(N->dump(&DAG)); - DEBUG(errs() << "\nWith: "); - DEBUG(Result.getNode()->dump(&DAG)); - DEBUG(errs() << "\n"); - } -#endif - - return Result; -} - -//===----------------------------------------------------------------------===// -// Inline Assembly Support -//===----------------------------------------------------------------------===// - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -SPUTargetLowering::ConstraintType -SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { - if (ConstraintLetter.size() == 1) { - switch (ConstraintLetter[0]) { - default: break; - case 'b': - case 'r': - case 'f': - case 'v': - case 'y': - return C_RegisterClass; - } - } - return TargetLowering::getConstraintType(ConstraintLetter); -} - -/// Examine constraint type and operand type and determine a weight value. -/// This object must already have been set up with the operand type -/// and the current alternative constraint selected. -TargetLowering::ConstraintWeight -SPUTargetLowering::getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const { - ConstraintWeight weight = CW_Invalid; - Value *CallOperandVal = info.CallOperandVal; - // If we don't have a value, we can't do a match, - // but allow it at the lowest weight. - if (CallOperandVal == NULL) - return CW_Default; - // Look at the constraint type. - switch (*constraint) { - default: - weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; - //FIXME: Seems like the supported constraint letters were just copied - // from PPC, as the following doesn't correspond to the GCC docs. - // I'm leaving it so until someone adds the corresponding lowering support. - case 'b': - case 'r': - case 'f': - case 'd': - case 'v': - case 'y': - weight = CW_Register; - break; - } - return weight; -} - -std::pair -SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const -{ - if (Constraint.size() == 1) { - // GCC RS6000 Constraint Letters - switch (Constraint[0]) { - case 'b': // R1-R31 - case 'r': // R0-R31 - if (VT == MVT::i64) - return std::make_pair(0U, &SPU::R64CRegClass); - return std::make_pair(0U, &SPU::R32CRegClass); - case 'f': - if (VT == MVT::f32) - return std::make_pair(0U, &SPU::R32FPRegClass); - if (VT == MVT::f64) - return std::make_pair(0U, &SPU::R64FPRegClass); - break; - case 'v': - return std::make_pair(0U, &SPU::GPRCRegClass); - } - } - - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); -} - -//! Compute used/known bits for a SPU operand -void -SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth ) const { -#if 0 - const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; - - switch (Op.getOpcode()) { - default: - // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); - break; - case CALL: - case SHUFB: - case SHUFFLE_MASK: - case CNTB: - case SPUISD::PREFSLOT2VEC: - case SPUISD::LDRESULT: - case SPUISD::VEC2PREFSLOT: - case SPUISD::SHLQUAD_L_BITS: - case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_ROTL: - case SPUISD::VEC_ROTR: - case SPUISD::ROTBYTES_LEFT: - case SPUISD::SELECT_MASK: - case SPUISD::SELB: - } -#endif -} - -unsigned -SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, - unsigned Depth) const { - switch (Op.getOpcode()) { - default: - return 1; - - case ISD::SETCC: { - EVT VT = Op.getValueType(); - - if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { - VT = MVT::i32; - } - return VT.getSizeInBits(); - } - } -} - -// LowerAsmOperandForConstraint -void -SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const { - // Default, for the time being, to the base class handler - TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); -} - -/// isLegalAddressImmediate - Return true if the integer value can be used -/// as the offset of the target addressing mode. -bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, - Type *Ty) const { - // SPU's addresses are 256K: - return (V > -(1 << 18) && V < (1 << 18) - 1); -} - -bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { - return false; -} - -bool -SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // The SPU target isn't yet aware of offsets. - return false; -} - -// can we compare to Imm without writing it into a register? -bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { - //ceqi, cgti, etc. all take s10 operand - return isInt<10>(Imm); -} - -bool -SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type * ) const{ - - // A-form: 18bit absolute address. - if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) - return true; - - // D-form: reg + 14bit offset - if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs)) - return true; - - // X-form: reg+reg - if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0) - return true; - - return false; -} diff --git a/llvm/lib/Target/CellSPU/SPUISelLowering.h b/llvm/lib/Target/CellSPU/SPUISelLowering.h deleted file mode 100644 index 9f1599f..0000000 --- a/llvm/lib/Target/CellSPU/SPUISelLowering.h +++ /dev/null @@ -1,178 +0,0 @@ -//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that Cell SPU uses to lower LLVM code into -// a selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_ISELLOWERING_H -#define SPU_ISELLOWERING_H - -#include "SPU.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/CodeGen/SelectionDAG.h" - -namespace llvm { - namespace SPUISD { - enum NodeType { - // Start the numbering where the builting ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // Pseudo instructions: - RET_FLAG, ///< Return with flag, matched by bi instruction - - Hi, ///< High address component (upper 16) - Lo, ///< Low address component (lower 16) - PCRelAddr, ///< Program counter relative address - AFormAddr, ///< A-form address (local store) - IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)" - - LDRESULT, ///< Load result (value, chain) - CALL, ///< CALL instruction - SHUFB, ///< Vector shuffle (permute) - SHUFFLE_MASK, ///< Shuffle mask - CNTB, ///< Count leading ones in bytes - PREFSLOT2VEC, ///< Promote scalar->vector - VEC2PREFSLOT, ///< Extract element 0 - SHL_BITS, ///< Shift quad left, by bits - SHL_BYTES, ///< Shift quad left, by bytes - SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros. - VEC_ROTL, ///< Vector rotate left - VEC_ROTR, ///< Vector rotate right - ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) - ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count - SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) - SELB, ///< Select bits -> (b & mask) | (a & ~mask) - // Markers: These aren't used to generate target-dependent nodes, but - // are used during instruction selection. - ADD64_MARKER, ///< i64 addition marker - SUB64_MARKER, ///< i64 subtraction marker - MUL64_MARKER, ///< i64 multiply marker - LAST_SPUISD ///< Last user-defined instruction - }; - } - - //! Utility functions specific to CellSPU: - namespace SPU { - SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG); - SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG); - - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, - const SPUTargetMachine &TM); - //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form - SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat, - DebugLoc dl); - } - - class SPUTargetMachine; // forward dec'l. - - class SPUTargetLowering : - public TargetLowering - { - SPUTargetMachine &SPUTM; - - public: - //! The venerable constructor - /*! - This is where the CellSPU backend sets operation handling (i.e., legal, - custom, expand or promote.) - */ - SPUTargetLowering(SPUTargetMachine &TM); - - //! Get the target machine - SPUTargetMachine &getSPUTargetMachine() { - return SPUTM; - } - - /// getTargetNodeName() - This method returns the name of a target specific - /// DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; - - /// getSetCCResultType - Return the ValueType for ISD::SETCC - virtual EVT getSetCCResultType(EVT VT) const; - - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } - - //! Custom lowering hooks - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - - //! Custom lowering hook for nodes with illegal result types. - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, - SelectionDAG &DAG) const; - - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; - - virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, - unsigned Depth = 0) const; - - ConstraintType getConstraintType(const std::string &ConstraintLetter) const; - - /// Examine constraint string and operand type and determine a weight value. - /// The operand object must already have been set up with the operand type. - ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; - - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - - void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; - - /// isLegalAddressImmediate - Return true if the integer value can be used - /// as the offset of the target addressing mode. - virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; - virtual bool isLegalAddressImmediate(GlobalValue *) const; - - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; - - virtual bool isLegalICmpImmediate(int64_t Imm) const; - - virtual bool isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const; - }; -} - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUInstrBuilder.h b/llvm/lib/Target/CellSPU/SPUInstrBuilder.h deleted file mode 100644 index b495537..0000000 --- a/llvm/lib/Target/CellSPU/SPUInstrBuilder.h +++ /dev/null @@ -1,43 +0,0 @@ -//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file exposes functions that may be used with BuildMI from the -// MachineInstrBuilder.h file to simplify generating frame and constant pool -// references. -// -// For reference, the order of operands for memory references is: -// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate -// Displacement. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_INSTRBUILDER_H -#define SPU_INSTRBUILDER_H - -#include "llvm/CodeGen/MachineInstrBuilder.h" - -namespace llvm { - -/// addFrameReference - This function is used to add a reference to the base of -/// an abstract object on the stack frame of the current function. This -/// reference has base register as the FrameIndex offset until it is resolved. -/// This allows a constant offset to be specified as well... -/// -inline const MachineInstrBuilder& -addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, - bool mem = true) { - if (mem) - return MIB.addImm(Offset).addFrameIndex(FI); - else - return MIB.addFrameIndex(FI).addImm(Offset); -} - -} // End llvm namespace - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUInstrFormats.td b/llvm/lib/Target/CellSPU/SPUInstrFormats.td deleted file mode 100644 index cd3f422..0000000 --- a/llvm/lib/Target/CellSPU/SPUInstrFormats.td +++ /dev/null @@ -1,320 +0,0 @@ -//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// -// Cell SPU instruction formats. Note that these are notationally similar to -// PowerPC, like "A-Form". But the sizes of operands and fields differ. - -// This was kiped from the PPC instruction formats (seemed like a good idea...) - -class SPUInstr - : Instruction { - field bits<32> Inst; - - let Namespace = "SPU"; - let OutOperandList = OOL; - let InOperandList = IOL; - let AsmString = asmstr; - let Itinerary = itin; -} - -// RR Format -class RRForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr { - bits<7> RA; - bits<7> RB; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-10} = opcode; - let Inst{11-17} = RB; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -let RB = 0 in { - // RR Format, where RB is zeroed (dont care): - class RRForm_1 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } - - let RA = 0 in { - // RR Format, where RA and RB are zeroed (dont care): - // Used for reads from status control registers (see FPSCRRr32) - class RRForm_2 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } - } -} - -let RT = 0 in { - // RR Format, where RT is zeroed (don't care), or as the instruction handbook - // says, "RT is a false target." Used in "Halt if" instructions - class RRForm_3 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } -} - -// RRR Format -class RRRForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<7> RA; - bits<7> RB; - bits<7> RC; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-3} = opcode; - let Inst{4-10} = RT; - let Inst{11-17} = RB; - let Inst{18-24} = RA; - let Inst{25-31} = RC; -} - -// RI7 Format -class RI7Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<7> i7; - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-10} = opcode; - let Inst{11-17} = i7; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -// CVTIntFp Format -class CVTIntFPForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-9} = opcode; - let Inst{10-17} = 0; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -let RA = 0 in { - class BICondForm opcode, dag OOL, dag IOL, string asmstr, list pattern> - : RRForm - { } - - let RT = 0 in { - // Branch instruction format (without D/E flag settings) - class BRForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } - - class BIForm opcode, string asmstr, list pattern> - : RRForm - { } - - let RB = 0 in { - // Return instruction (bi, branch indirect), RA is zero (LR): - class RETForm pattern> - : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv, - pattern> - { } - } - } -} - -// Branch indirect external data forms: -class BISLEDForm DE_flag, string asmstr, list pattern> - : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv> -{ - bits<7> Rcalldest; - - let Pattern = pattern; - - let Inst{0-10} = 0b11010101100; - let Inst{11} = 0; - let Inst{12-13} = DE_flag; - let Inst{14-17} = 0b0000; - let Inst{18-24} = Rcalldest; - let Inst{25-31} = 0b0000000; -} - -// RI10 Format -class RI10Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<10> i10; - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-7} = opcode; - let Inst{8-17} = i10; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -// RI10 Format, where the constant is zero (or effectively ignored by the -// SPU) -let i10 = 0 in { - class RI10Form_1 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RI10Form - { } -} - -// RI10 Format, where RT is ignored. -// This format is used primarily by the Halt If ... Immediate set of -// instructions -let RT = 0 in { - class RI10Form_2 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RI10Form - { } -} - -// RI16 Format -class RI16Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<16> i16; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-8} = opcode; - let Inst{9-24} = i16; - let Inst{25-31} = RT; -} - -// Specialized version of the RI16 Format for unconditional branch relative and -// branch absolute, branch and set link. Note that for branch and set link, the -// link register doesn't have to be $lr, but this is actually hard coded into -// the instruction pattern. - -let RT = 0 in { - class UncondBranch opcode, dag OOL, dag IOL, string asmstr, - list pattern> - : RI16Form - { } - - class BranchSetLink opcode, dag OOL, dag IOL, string asmstr, - list pattern> - : RI16Form - { } -} - -//===----------------------------------------------------------------------===// -// Specialized versions of RI16: -//===----------------------------------------------------------------------===// - -// RI18 Format -class RI18Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<18> i18; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-6} = opcode; - let Inst{7-24} = i18; - let Inst{25-31} = RT; -} - -//===----------------------------------------------------------------------===// -// Instruction formats for intrinsics: -//===----------------------------------------------------------------------===// - -// RI10 Format for v8i16 intrinsics -class RI10_Int_v8i16 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RI10Form; - -class RI10_Int_v4i32 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RI10Form; - -// RR Format for v8i16 intrinsics -class RR_Int_v8i16 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RRForm; - -// RR Format for v4i32 intrinsics -class RR_Int_v4i32 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RRForm; - -//===----------------------------------------------------------------------===// -// Pseudo instructions, like call frames: -//===----------------------------------------------------------------------===// - -class Pseudo pattern> - : SPUInstr { - let OutOperandList = OOL; - let InOperandList = IOL; - let AsmString = asmstr; - let Pattern = pattern; - let Inst{31-0} = 0; -} - -//===----------------------------------------------------------------------===// -// Branch hint formats -//===----------------------------------------------------------------------===// -// For hbrr and hbra -class HBI16Form opcode, dag IOL, string asmstr> - : Instruction { - field bits<32> Inst; - bits<16>i16; - bits<9>RO; - - let Namespace = "SPU"; - let InOperandList = IOL; - let OutOperandList = (outs); //no output - let AsmString = asmstr; - let Itinerary = BranchHints; - - let Inst{0-6} = opcode; - let Inst{7-8} = RO{8-7}; - let Inst{9-24} = i16; - let Inst{25-31} = RO{6-0}; -} diff --git a/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp deleted file mode 100644 index b25a639..0000000 --- a/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp +++ /dev/null @@ -1,449 +0,0 @@ -//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Cell SPU implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "SPUInstrInfo.h" -#include "SPUInstrBuilder.h" -#include "SPUTargetMachine.h" -#include "SPUHazardRecognizers.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -#define GET_INSTRINFO_CTOR -#include "SPUGenInstrInfo.inc" - -using namespace llvm; - -namespace { - //! Predicate for an unconditional branch instruction - inline bool isUncondBranch(const MachineInstr *I) { - unsigned opc = I->getOpcode(); - - return (opc == SPU::BR - || opc == SPU::BRA - || opc == SPU::BI); - } - - //! Predicate for a conditional branch instruction - inline bool isCondBranch(const MachineInstr *I) { - unsigned opc = I->getOpcode(); - - return (opc == SPU::BRNZr32 - || opc == SPU::BRNZv4i32 - || opc == SPU::BRZr32 - || opc == SPU::BRZv4i32 - || opc == SPU::BRHNZr16 - || opc == SPU::BRHNZv8i16 - || opc == SPU::BRHZr16 - || opc == SPU::BRHZv8i16); - } -} - -SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) - : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), - TM(tm), - RI(*TM.getSubtargetImpl(), *this) -{ /* NOP */ } - -/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for -/// this target when scheduling the DAG. -ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer( - const TargetMachine *TM, - const ScheduleDAG *DAG) const { - const TargetInstrInfo *TII = TM->getInstrInfo(); - assert(TII && "No InstrInfo?"); - return new SPUHazardRecognizer(*TII); -} - -unsigned -SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case SPU::LQDv16i8: - case SPU::LQDv8i16: - case SPU::LQDv4i32: - case SPU::LQDv4f32: - case SPU::LQDv2f64: - case SPU::LQDr128: - case SPU::LQDr64: - case SPU::LQDr32: - case SPU::LQDr16: { - const MachineOperand MOp1 = MI->getOperand(1); - const MachineOperand MOp2 = MI->getOperand(2); - if (MOp1.isImm() && MOp2.isFI()) { - FrameIndex = MOp2.getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - } - return 0; -} - -unsigned -SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case SPU::STQDv16i8: - case SPU::STQDv8i16: - case SPU::STQDv4i32: - case SPU::STQDv4f32: - case SPU::STQDv2f64: - case SPU::STQDr128: - case SPU::STQDr64: - case SPU::STQDr32: - case SPU::STQDr16: - case SPU::STQDr8: { - const MachineOperand MOp1 = MI->getOperand(1); - const MachineOperand MOp2 = MI->getOperand(2); - if (MOp1.isImm() && MOp2.isFI()) { - FrameIndex = MOp2.getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - } - return 0; -} - -void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const -{ - // We support cross register class moves for our aliases, such as R3 in any - // reg class to any other reg class containing R3. This is required because - // we instruction select bitconvert i64 -> f64 as a noop for example, so our - // types have no specific meaning. - - BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); -} - -void -SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - unsigned opc; - bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); - if (RC == &SPU::GPRCRegClass) - opc = isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128; - else if (RC == &SPU::R64CRegClass) - opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64; - else if (RC == &SPU::R64FPRegClass) - opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64; - else if (RC == &SPU::R32CRegClass) - opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32; - else if (RC == &SPU::R32FPRegClass) - opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32; - else if (RC == &SPU::R16CRegClass) - opc = isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16; - else if (RC == &SPU::R8CRegClass) - opc = isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8; - else if (RC == &SPU::VECREGRegClass) - opc = isValidFrameIdx ? SPU::STQDv16i8 : SPU::STQXv16i8; - else - llvm_unreachable("Unknown regclass!"); - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - addFrameReference(BuildMI(MBB, MI, DL, get(opc)) - .addReg(SrcReg, getKillRegState(isKill)), FrameIdx); -} - -void -SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - unsigned opc; - bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); - if (RC == &SPU::GPRCRegClass) - opc = isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128; - else if (RC == &SPU::R64CRegClass) - opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64; - else if (RC == &SPU::R64FPRegClass) - opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64; - else if (RC == &SPU::R32CRegClass) - opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32; - else if (RC == &SPU::R32FPRegClass) - opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32; - else if (RC == &SPU::R16CRegClass) - opc = isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16; - else if (RC == &SPU::R8CRegClass) - opc = isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8; - else if (RC == &SPU::VECREGRegClass) - opc = isValidFrameIdx ? SPU::LQDv16i8 : SPU::LQXv16i8; - else - llvm_unreachable("Unknown regclass in loadRegFromStackSlot!"); - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); -} - -//! Branch analysis -/*! - \note This code was kiped from PPC. There may be more branch analysis for - CellSPU than what's currently done here. - */ -bool -SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranch(LastInst)) { - // Check for jump tables - if (!LastInst->getOperand(0).isMBB()) - return true; - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else if (isCondBranch(LastInst)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(1).getMBB(); - DEBUG(errs() << "Pushing LastInst: "); - DEBUG(LastInst->dump()); - Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); - Cond.push_back(LastInst->getOperand(0)); - return false; - } - // Otherwise, don't know what this is. - return true; - } - - // Get the instruction before it if it's a terminator. - MachineInstr *SecondLastInst = I; - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a conditional and unconditional branch, handle it. - if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { - TBB = SecondLastInst->getOperand(1).getMBB(); - DEBUG(errs() << "Pushing SecondLastInst: "); - DEBUG(SecondLastInst->dump()); - Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); - Cond.push_back(SecondLastInst->getOperand(0)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // Otherwise, can't handle this. - return true; -} - -// search MBB for branch hint labels and branch hit ops -static void removeHBR( MachineBasicBlock &MBB) { - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){ - if (I->getOpcode() == SPU::HBRA || - I->getOpcode() == SPU::HBR_LABEL){ - I=MBB.erase(I); - if (I == MBB.end()) - break; - } - } -} - -unsigned -SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - removeHBR(MBB); - if (I == MBB.begin()) - return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (!isCondBranch(I) && !isUncondBranch(I)) - return 0; - - // Remove the first branch. - DEBUG(errs() << "Removing branch: "); - DEBUG(I->dump()); - I->eraseFromParent(); - I = MBB.end(); - if (I == MBB.begin()) - return 1; - - --I; - if (!(isCondBranch(I) || isUncondBranch(I))) - return 1; - - // Remove the second branch. - DEBUG(errs() << "Removing second branch: "); - DEBUG(I->dump()); - I->eraseFromParent(); - return 2; -} - -/** Find the optimal position for a hint branch instruction in a basic block. - * This should take into account: - * -the branch hint delays - * -congestion of the memory bus - * -dual-issue scheduling (i.e. avoid insertion of nops) - * Current implementation is rather simplistic. - */ -static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB) -{ - MachineBasicBlock::iterator J = MBB.end(); - for( int i=0; i<8; i++) { - if( J == MBB.begin() ) return J; - J--; - } - return J; -} - -unsigned -SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "SPU branch conditions have two components!"); - - MachineInstrBuilder MIB; - //TODO: make a more accurate algorithm. - bool haveHBR = MBB.size()>8; - - removeHBR(MBB); - MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol(); - // Add a label just before the branch - if (haveHBR) - MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel); - - // One-way branch. - if (FBB == 0) { - if (Cond.empty()) { - // Unconditional branch - MIB = BuildMI(&MBB, DL, get(SPU::BR)); - MIB.addMBB(TBB); - - DEBUG(errs() << "Inserted one-way uncond branch: "); - DEBUG((*MIB).dump()); - - // basic blocks have just one branch so it is safe to add the hint a its - if (haveHBR) { - MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); - MIB.addSym(branchLabel); - MIB.addMBB(TBB); - } - } else { - // Conditional branch - MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - MIB.addReg(Cond[1].getReg()).addMBB(TBB); - - if (haveHBR) { - MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); - MIB.addSym(branchLabel); - MIB.addMBB(TBB); - } - - DEBUG(errs() << "Inserted one-way cond branch: "); - DEBUG((*MIB).dump()); - } - return 1; - } else { - MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); - - // Two-way Conditional Branch. - MIB.addReg(Cond[1].getReg()).addMBB(TBB); - MIB2.addMBB(FBB); - - if (haveHBR) { - MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); - MIB.addSym(branchLabel); - MIB.addMBB(FBB); - } - - DEBUG(errs() << "Inserted conditional branch: "); - DEBUG((*MIB).dump()); - DEBUG(errs() << "part 2: "); - DEBUG((*MIB2).dump()); - return 2; - } -} - -//! Reverses a branch's condition, returning false on success. -bool -SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond) - const { - // Pretty brainless way of inverting the condition, but it works, considering - // there are only two conditions... - static struct { - unsigned Opc; //! The incoming opcode - unsigned RevCondOpc; //! The reversed condition opcode - } revconds[] = { - { SPU::BRNZr32, SPU::BRZr32 }, - { SPU::BRNZv4i32, SPU::BRZv4i32 }, - { SPU::BRZr32, SPU::BRNZr32 }, - { SPU::BRZv4i32, SPU::BRNZv4i32 }, - { SPU::BRHNZr16, SPU::BRHZr16 }, - { SPU::BRHNZv8i16, SPU::BRHZv8i16 }, - { SPU::BRHZr16, SPU::BRHNZr16 }, - { SPU::BRHZv8i16, SPU::BRHNZv8i16 } - }; - - unsigned Opc = unsigned(Cond[0].getImm()); - // Pretty dull mapping between the two conditions that SPU can generate: - for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) { - if (revconds[i].Opc == Opc) { - Cond[0].setImm(revconds[i].RevCondOpc); - return false; - } - } - - return true; -} diff --git a/llvm/lib/Target/CellSPU/SPUInstrInfo.h b/llvm/lib/Target/CellSPU/SPUInstrInfo.h deleted file mode 100644 index 85e5821..0000000 --- a/llvm/lib/Target/CellSPU/SPUInstrInfo.h +++ /dev/null @@ -1,84 +0,0 @@ -//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the CellSPU implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_INSTRUCTIONINFO_H -#define SPU_INSTRUCTIONINFO_H - -#include "SPU.h" -#include "SPURegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "SPUGenInstrInfo.inc" - -namespace llvm { - //! Cell SPU instruction information class - class SPUInstrInfo : public SPUGenInstrInfo { - SPUTargetMachine &TM; - const SPURegisterInfo RI; - public: - explicit SPUInstrInfo(SPUTargetMachine &tm); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } - - ScheduleHazardRecognizer * - CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const; - - unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - //! Store a register to a stack slot, based on its register class. - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - //! Load a register from a stack slot, based on its register class. - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - //! Reverses a branch's condition, returning false on success. - virtual - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - }; -} - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/llvm/lib/Target/CellSPU/SPUInstrInfo.td deleted file mode 100644 index 117acd7..0000000 --- a/llvm/lib/Target/CellSPU/SPUInstrInfo.td +++ /dev/null @@ -1,4484 +0,0 @@ -//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cell SPU Instructions: -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// TODO Items (not urgent today, but would be nice, low priority) -// -// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by -// concatenating the byte argument b as "bbbb". Could recognize this bit pattern -// in 16-bit and 32-bit constants and reduce instruction count. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Pseudo instructions: -//===----------------------------------------------------------------------===// - -let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { - def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt), - "${:comment} ADJCALLSTACKDOWN", - [(callseq_start timm:$amt)]>; - def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt), - "${:comment} ADJCALLSTACKUP", - [(callseq_end timm:$amt)]>; - def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ), - "$targ:\t${:comment}branch hint target",[ ]>; -} - -//===----------------------------------------------------------------------===// -// Loads: -// NB: The ordering is actually important, since the instruction selection -// will try each of the instructions in sequence, i.e., the D-form first with -// the 10-bit displacement, then the A-form with the 16 bit displacement, and -// finally the X-form with the register-register. -//===----------------------------------------------------------------------===// - -let canFoldAsLoad = 1 in { - class LoadDFormVec - : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src), - "lqd\t$rT, $src", - LoadStore, - [(set (vectype VECREG:$rT), (load dform_addr:$src))]> - { } - - class LoadDForm - : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src), - "lqd\t$rT, $src", - LoadStore, - [(set rclass:$rT, (load dform_addr:$src))]> - { } - - multiclass LoadDForms - { - def v16i8: LoadDFormVec; - def v8i16: LoadDFormVec; - def v4i32: LoadDFormVec; - def v2i64: LoadDFormVec; - def v4f32: LoadDFormVec; - def v2f64: LoadDFormVec; - - def r128: LoadDForm; - def r64: LoadDForm; - def r32: LoadDForm; - def f32: LoadDForm; - def f64: LoadDForm; - def r16: LoadDForm; - def r8: LoadDForm; - } - - class LoadAFormVec - : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", - LoadStore, - [(set (vectype VECREG:$rT), (load aform_addr:$src))]> - { } - - class LoadAForm - : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", - LoadStore, - [(set rclass:$rT, (load aform_addr:$src))]> - { } - - multiclass LoadAForms - { - def v16i8: LoadAFormVec; - def v8i16: LoadAFormVec; - def v4i32: LoadAFormVec; - def v2i64: LoadAFormVec; - def v4f32: LoadAFormVec; - def v2f64: LoadAFormVec; - - def r128: LoadAForm; - def r64: LoadAForm; - def r32: LoadAForm; - def f32: LoadAForm; - def f64: LoadAForm; - def r16: LoadAForm; - def r8: LoadAForm; - } - - class LoadXFormVec - : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", - LoadStore, - [(set (vectype VECREG:$rT), (load xform_addr:$src))]> - { } - - class LoadXForm - : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src), - "lqx\t$rT, $src", - LoadStore, - [(set rclass:$rT, (load xform_addr:$src))]> - { } - - multiclass LoadXForms - { - def v16i8: LoadXFormVec; - def v8i16: LoadXFormVec; - def v4i32: LoadXFormVec; - def v2i64: LoadXFormVec; - def v4f32: LoadXFormVec; - def v2f64: LoadXFormVec; - - def r128: LoadXForm; - def r64: LoadXForm; - def r32: LoadXForm; - def f32: LoadXForm; - def f64: LoadXForm; - def r16: LoadXForm; - def r8: LoadXForm; - } - - defm LQA : LoadAForms; - defm LQD : LoadDForms; - defm LQX : LoadXForms; - -/* Load quadword, PC relative: Not much use at this point in time. - Might be of use later for relocatable code. It's effectively the - same as LQA, but uses PC-relative addressing. - def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp), - "lqr\t$rT, $disp", LoadStore, - [(set VECREG:$rT, (load iaddr:$disp))]>; - */ -} - -//===----------------------------------------------------------------------===// -// Stores: -//===----------------------------------------------------------------------===// -class StoreDFormVec - : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src), - "stqd\t$rT, $src", - LoadStore, - [(store (vectype VECREG:$rT), dform_addr:$src)]> -{ } - -class StoreDForm - : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src), - "stqd\t$rT, $src", - LoadStore, - [(store rclass:$rT, dform_addr:$src)]> -{ } - -multiclass StoreDForms -{ - def v16i8: StoreDFormVec; - def v8i16: StoreDFormVec; - def v4i32: StoreDFormVec; - def v2i64: StoreDFormVec; - def v4f32: StoreDFormVec; - def v2f64: StoreDFormVec; - - def r128: StoreDForm; - def r64: StoreDForm; - def r32: StoreDForm; - def f32: StoreDForm; - def f64: StoreDForm; - def r16: StoreDForm; - def r8: StoreDForm; -} - -class StoreAFormVec - : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", - LoadStore, - [(store (vectype VECREG:$rT), aform_addr:$src)]>; - -class StoreAForm - : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src), - "stqa\t$rT, $src", - LoadStore, - [(store rclass:$rT, aform_addr:$src)]>; - -multiclass StoreAForms -{ - def v16i8: StoreAFormVec; - def v8i16: StoreAFormVec; - def v4i32: StoreAFormVec; - def v2i64: StoreAFormVec; - def v4f32: StoreAFormVec; - def v2f64: StoreAFormVec; - - def r128: StoreAForm; - def r64: StoreAForm; - def r32: StoreAForm; - def f32: StoreAForm; - def f64: StoreAForm; - def r16: StoreAForm; - def r8: StoreAForm; -} - -class StoreXFormVec - : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", - LoadStore, - [(store (vectype VECREG:$rT), xform_addr:$src)]> -{ } - -class StoreXForm - : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src), - "stqx\t$rT, $src", - LoadStore, - [(store rclass:$rT, xform_addr:$src)]> -{ } - -multiclass StoreXForms -{ - def v16i8: StoreXFormVec; - def v8i16: StoreXFormVec; - def v4i32: StoreXFormVec; - def v2i64: StoreXFormVec; - def v4f32: StoreXFormVec; - def v2f64: StoreXFormVec; - - def r128: StoreXForm; - def r64: StoreXForm; - def r32: StoreXForm; - def f32: StoreXForm; - def f64: StoreXForm; - def r16: StoreXForm; - def r8: StoreXForm; -} - -defm STQD : StoreDForms; -defm STQA : StoreAForms; -defm STQX : StoreXForms; - -/* Store quadword, PC relative: Not much use at this point in time. Might - be useful for relocatable code. -def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), - "stqr\t$rT, $disp", LoadStore, - [(store VECREG:$rT, iaddr:$disp)]>; -*/ - -//===----------------------------------------------------------------------===// -// Generate Controls for Insertion: -//===----------------------------------------------------------------------===// - -def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cbd\t$rT, $src", ShuffleOp, - [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src), - "cbx\t$rT, $src", ShuffleOp, - [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "chd\t$rT, $src", ShuffleOp, - [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src), - "chx\t$rT, $src", ShuffleOp, - [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cwd\t$rT, $src", ShuffleOp, - [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), - "cwx\t$rT, $src", ShuffleOp, - [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cwd\t$rT, $src", ShuffleOp, - [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), - "cwx\t$rT, $src", ShuffleOp, - [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cdd\t$rT, $src", ShuffleOp, - [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), - "cdx\t$rT, $src", ShuffleOp, - [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cdd\t$rT, $src", ShuffleOp, - [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), - "cdx\t$rT, $src", ShuffleOp, - [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -//===----------------------------------------------------------------------===// -// Constant formation: -//===----------------------------------------------------------------------===// - -def ILHv8i16: - RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val), - "ilh\t$rT, $val", ImmLoad, - [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>; - -def ILHr16: - RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val), - "ilh\t$rT, $val", ImmLoad, - [(set R16C:$rT, immSExt16:$val)]>; - -// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with -// the right constant") -def ILHr8: - RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val), - "ilh\t$rT, $val", ImmLoad, - [(set R8C:$rT, immSExt8:$val)]>; - -// IL does sign extension! - -class ILInst pattern>: - RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val", - ImmLoad, pattern>; - -class ILVecInst: - ILInst<(outs VECREG:$rT), (ins immtype:$val), - [(set (vectype VECREG:$rT), (vectype xform:$val))]>; - -class ILRegInst: - ILInst<(outs rclass:$rT), (ins immtype:$val), - [(set rclass:$rT, xform:$val)]>; - -multiclass ImmediateLoad -{ - def v2i64: ILVecInst; - def v4i32: ILVecInst; - - // TODO: Need v2f64, v4f32 - - def r64: ILRegInst; - def r32: ILRegInst; - def f32: ILRegInst; - def f64: ILRegInst; -} - -defm IL : ImmediateLoad; - -class ILHUInst pattern>: - RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val", - ImmLoad, pattern>; - -class ILHUVecInst: - ILHUInst<(outs VECREG:$rT), (ins immtype:$val), - [(set (vectype VECREG:$rT), (vectype xform:$val))]>; - -class ILHURegInst: - ILHUInst<(outs rclass:$rT), (ins immtype:$val), - [(set rclass:$rT, xform:$val)]>; - -multiclass ImmLoadHalfwordUpper -{ - def v2i64: ILHUVecInst; - def v4i32: ILHUVecInst; - - def r64: ILHURegInst; - def r32: ILHURegInst; - - // Loads the high portion of an address - def hi: ILHURegInst; - - // Used in custom lowering constant SFP loads: - def f32: ILHURegInst; -} - -defm ILHU : ImmLoadHalfwordUpper; - -// Immediate load address (can also be used to load 18-bit unsigned constants, -// see the zext 16->32 pattern) - -class ILAInst pattern>: - RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val", - LoadNOP, pattern>; - -class ILAVecInst: - ILAInst<(outs VECREG:$rT), (ins immtype:$val), - [(set (vectype VECREG:$rT), (vectype xform:$val))]>; - -class ILARegInst: - ILAInst<(outs rclass:$rT), (ins immtype:$val), - [(set rclass:$rT, xform:$val)]>; - -multiclass ImmLoadAddress -{ - def v2i64: ILAVecInst; - def v4i32: ILAVecInst; - - def r64: ILARegInst; - def r32: ILARegInst; - def f32: ILARegInst; - def f64: ILARegInst; - - def hi: ILARegInst; - def lo: ILARegInst; - - def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val), - [(set R32C:$rT, imm18:$val)]>; -} - -defm ILA : ImmLoadAddress; - -// Immediate OR, Halfword Lower: The "other" part of loading large constants -// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...> -// Note that these are really two operand instructions, but they're encoded -// as three operands with the first two arguments tied-to each other. - -class IOHLInst pattern>: - RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val", - ImmLoad, pattern>, - RegConstraint<"$rS = $rT">, - NoEncode<"$rS">; - -class IOHLVecInst: - IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val), - [/* no pattern */]>; - -class IOHLRegInst: - IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val), - [/* no pattern */]>; - -multiclass ImmOrHalfwordLower -{ - def v2i64: IOHLVecInst; - def v4i32: IOHLVecInst; - - def r32: IOHLRegInst; - def f32: IOHLRegInst; - - def lo: IOHLRegInst; -} - -defm IOHL: ImmOrHalfwordLower; - -// Form select mask for bytes using immediate, used in conjunction with the -// SELB instruction: - -class FSMBIVec: - RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", - SelectOp, - [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>; - -multiclass FormSelectMaskBytesImm -{ - def v16i8: FSMBIVec; - def v8i16: FSMBIVec; - def v4i32: FSMBIVec; - def v2i64: FSMBIVec; -} - -defm FSMBI : FormSelectMaskBytesImm; - -// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits -class FSMBInst pattern>: - RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp, - pattern>; - -class FSMBRegInst: - FSMBInst<(outs VECREG:$rT), (ins rclass:$rA), - [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; - -class FSMBVecInst: - FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), - (SPUselmask (vectype VECREG:$rA)))]>; - -multiclass FormSelectMaskBits { - def v16i8_r16: FSMBRegInst; - def v16i8: FSMBVecInst; -} - -defm FSMB: FormSelectMaskBits; - -// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is -// only 8-bits wide (even though it's input as 16-bits here) - -class FSMHInst pattern>: - RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp, - pattern>; - -class FSMHRegInst: - FSMHInst<(outs VECREG:$rT), (ins rclass:$rA), - [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; - -class FSMHVecInst: - FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), - (SPUselmask (vectype VECREG:$rA)))]>; - -multiclass FormSelectMaskHalfword { - def v8i16_r16: FSMHRegInst; - def v8i16: FSMHVecInst; -} - -defm FSMH: FormSelectMaskHalfword; - -// fsm: Form select mask for words. Like the other fsm* instructions, -// only the lower 4 bits of $rA are significant. - -class FSMInst pattern>: - RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp, - pattern>; - -class FSMRegInst: - FSMInst<(outs VECREG:$rT), (ins rclass:$rA), - [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; - -class FSMVecInst: - FSMInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>; - -multiclass FormSelectMaskWord { - def v4i32: FSMVecInst; - - def r32 : FSMRegInst; - def r16 : FSMRegInst; -} - -defm FSM : FormSelectMaskWord; - -// Special case when used for i64 math operations -multiclass FormSelectMaskWord64 { - def r32 : FSMRegInst; - def r16 : FSMRegInst; -} - -defm FSM64 : FormSelectMaskWord64; - -//===----------------------------------------------------------------------===// -// Integer and Logical Operations: -//===----------------------------------------------------------------------===// - -def AHv8i16: - RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ah\t$rT, $rA, $rB", IntegerOp, - [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>; - -def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (AHv8i16 VECREG:$rA, VECREG:$rB)>; - -def AHr16: - RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - "ah\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>; - -def AHIvec: - RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ahi\t$rT, $rA, $val", IntegerOp, - [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA), - v8i16SExt10Imm:$val))]>; - -def AHIr16: - RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "ahi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>; - -// v4i32, i32 add instruction: - -class AInst pattern>: - RRForm<0b00000011000, OOL, IOL, - "a\t$rT, $rA, $rB", IntegerOp, - pattern>; - -class AVecInst: - AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class ARegInst: - AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>; - -multiclass AddInstruction { - def v4i32: AVecInst; - def v16i8: AVecInst; - def r32: ARegInst; -} - -defm A : AddInstruction; - -class AIInst pattern>: - RI10Form<0b00111000, OOL, IOL, - "ai\t$rT, $rA, $val", IntegerOp, - pattern>; - -class AIVecInst: - AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>; - -class AIFPVecInst: - AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [/* no pattern */]>; - -class AIRegInst: - AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), - [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>; - -// This is used to add epsilons to floating point numbers in the f32 fdiv code: -class AIFPInst: - AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), - [/* no pattern */]>; - -multiclass AddImmediate { - def v4i32: AIVecInst; - - def r32: AIRegInst; - - def v4f32: AIFPVecInst; - def f32: AIFPInst; -} - -defm AI : AddImmediate; - -def SFHvec: - RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "sfh\t$rT, $rA, $rB", IntegerOp, - [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def SFHr16: - RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - "sfh\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>; - -def SFHIvec: - RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "sfhi\t$rT, $rA, $val", IntegerOp, - [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val, - (v8i16 VECREG:$rA)))]>; - -def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "sfhi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>; - -def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB), - "sf\t$rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; - - -def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "sf\t$rT, $rA, $rB", IntegerOp, - [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; - -def SFIvec: - RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "sfi\t$rT, $rA, $val", IntegerOp, - [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val, - (v4i32 VECREG:$rA)))]>; - -def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT), - (ins R32C:$rA, s10imm_i32:$val), - "sfi\t$rT, $rA, $val", IntegerOp, - [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>; - -// ADDX: only available in vector form, doesn't match a pattern. -class ADDXInst pattern>: - RRForm<0b00000010110, OOL, IOL, - "addx\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ADDXVecInst: - ADDXInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -class ADDXRegInst: - ADDXInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -multiclass AddExtended { - def v2i64 : ADDXVecInst; - def v4i32 : ADDXVecInst; - def r64 : ADDXRegInst; - def r32 : ADDXRegInst; -} - -defm ADDX : AddExtended; - -// CG: Generate carry for add -class CGInst pattern>: - RRForm<0b01000011000, OOL, IOL, - "cg\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class CGVecInst: - CGInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -class CGRegInst: - CGInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB), - [/* no pattern */]>; - -multiclass CarryGenerate { - def v2i64 : CGVecInst; - def v4i32 : CGVecInst; - def r64 : CGRegInst; - def r32 : CGRegInst; -} - -defm CG : CarryGenerate; - -// SFX: Subract from, extended. This is used in conjunction with BG to subtract -// with carry (borrow, in this case) -class SFXInst pattern>: - RRForm<0b10000010110, OOL, IOL, - "sfx\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class SFXVecInst: - SFXInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -class SFXRegInst: - SFXInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -multiclass SubtractExtended { - def v2i64 : SFXVecInst; - def v4i32 : SFXVecInst; - def r64 : SFXRegInst; - def r32 : SFXRegInst; -} - -defm SFX : SubtractExtended; - -// BG: only available in vector form, doesn't match a pattern. -class BGInst pattern>: - RRForm<0b01000010000, OOL, IOL, - "bg\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class BGVecInst: - BGInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -class BGRegInst: - BGInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB), - [/* no pattern */]>; - -multiclass BorrowGenerate { - def v4i32 : BGVecInst; - def v2i64 : BGVecInst; - def r64 : BGRegInst; - def r32 : BGRegInst; -} - -defm BG : BorrowGenerate; - -// BGX: Borrow generate, extended. -def BGXvec: - RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, - VECREG:$rCarry), - "bgx\t$rT, $rA, $rB", IntegerOp, - []>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -// Halfword multiply variants: -// N.B: These can be used to build up larger quantities (16x16 -> 32) - -def MPYv8i16: - RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpy\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYr16: - RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - "mpy\t$rT, $rA, $rB", IntegerMulDiv, - [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>; - -// Unsigned 16-bit multiply: - -class MPYUInst pattern>: - RRForm<0b00110011110, OOL, IOL, - "mpyu\t$rT, $rA, $rB", IntegerMulDiv, - pattern>; - -def MPYUv4i32: - MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -def MPYUr16: - MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>; - -def MPYUr32: - MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [/* no pattern */]>; - -// mpyi: multiply 16 x s10imm -> 32 result. - -class MPYIInst pattern>: - RI10Form<0b00101110, OOL, IOL, - "mpyi\t$rT, $rA, $val", IntegerMulDiv, - pattern>; - -def MPYIvec: - MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; - -def MPYIr16: - MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>; - -// mpyui: same issues as other multiplies, plus, this doesn't match a -// pattern... but may be used during target DAG selection or lowering - -class MPYUIInst pattern>: - RI10Form<0b10101110, OOL, IOL, - "mpyui\t$rT, $rA, $val", IntegerMulDiv, - pattern>; - -def MPYUIvec: - MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - []>; - -def MPYUIr16: - MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - []>; - -// mpya: 16 x 16 + 16 -> 32 bit result -class MPYAInst pattern>: - RRRForm<0b0011, OOL, IOL, - "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, - pattern>; - -def MPYAv4i32: - MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (v4i32 VECREG:$rT), - (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))), - (v4i32 VECREG:$rC)))]>; - -def MPYAr32: - MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), - [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)), - R32C:$rC))]>; - -def MPYAr32_sext: - MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), - [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)), - R32C:$rC))]>; - -def MPYAr32_sextinreg: - MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC), - [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16), - (sext_inreg R32C:$rB, i16)), - R32C:$rC))]>; - -// mpyh: multiply high, used to synthesize 32-bit multiplies -class MPYHInst pattern>: - RRForm<0b10100011110, OOL, IOL, - "mpyh\t$rT, $rA, $rB", IntegerMulDiv, - pattern>; - -def MPYHv4i32: - MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -def MPYHr32: - MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [/* no pattern */]>; - -// mpys: multiply high and shift right (returns the top half of -// a 16-bit multiply, sign extended to 32 bits.) - -class MPYSInst: - RRForm<0b11100011110, OOL, IOL, - "mpys\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYSv4i32: - MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYSr16: - MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>; - -// mpyhh: multiply high-high (returns the 32-bit result from multiplying -// the top 16 bits of the $rA, $rB) - -class MPYHHInst: - RRForm<0b01100011110, OOL, IOL, - "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHv8i16: - MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHr32: - MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -// mpyhha: Multiply high-high, add to $rT: - -class MPYHHAInst: - RRForm<0b01100010110, OOL, IOL, - "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHAvec: - MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHAr32: - MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -// mpyhhu: Multiply high-high, unsigned, e.g.: -// -// +-------+-------+ +-------+-------+ +---------+ -// | a0 . a1 | x | b0 . b1 | = | a0 x b0 | -// +-------+-------+ +-------+-------+ +---------+ -// -// where a0, b0 are the upper 16 bits of the 32-bit word - -class MPYHHUInst: - RRForm<0b01110011110, OOL, IOL, - "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHUv4i32: - MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHUr32: - MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -// mpyhhau: Multiply high-high, unsigned - -class MPYHHAUInst: - RRForm<0b01110010110, OOL, IOL, - "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHAUvec: - MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHAUr32: - MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// clz: Count leading zeroes -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -class CLZInst pattern>: - RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA", - IntegerOp, pattern>; - -class CLZRegInst: - CLZInst<(outs rclass:$rT), (ins rclass:$rA), - [(set rclass:$rT, (ctlz rclass:$rA))]>; - -class CLZVecInst: - CLZInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>; - -multiclass CountLeadingZeroes { - def v4i32 : CLZVecInst; - def r32 : CLZRegInst; -} - -defm CLZ : CountLeadingZeroes; - -// cntb: Count ones in bytes (aka "population count") -// -// NOTE: This instruction is really a vector instruction, but the custom -// lowering code uses it in unorthodox ways to support CTPOP for other -// data types! - -def CNTBv16i8: - RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), - "cntb\t$rT, $rA", IntegerOp, - [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>; - -def CNTBv8i16 : - RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), - "cntb\t$rT, $rA", IntegerOp, - [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>; - -def CNTBv4i32 : - RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), - "cntb\t$rT, $rA", IntegerOp, - [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>; - -// gbb: Gather the low order bits from each byte in $rA into a single 16-bit -// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are -// slots 1-3. -// -// Note: This instruction "pairs" with the fsmb instruction for all of the -// various types defined here. -// -// Note 2: The "VecInst" and "RegInst" forms refer to the result being either -// a vector or register. - -class GBBInst pattern>: - RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>; - -class GBBRegInst: - GBBInst<(outs rclass:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -class GBBVecInst: - GBBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -multiclass GatherBitsFromBytes { - def v16i8_r32: GBBRegInst; - def v16i8_r16: GBBRegInst; - def v16i8: GBBVecInst; -} - -defm GBB: GatherBitsFromBytes; - -// gbh: Gather all low order bits from each halfword in $rA into a single -// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0 -// and slots 1-3 also set to 0. -// -// See notes for GBBInst, above. - -class GBHInst pattern>: - RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp, - pattern>; - -class GBHRegInst: - GBHInst<(outs rclass:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -class GBHVecInst: - GBHInst<(outs VECREG:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -multiclass GatherBitsHalfword { - def v8i16_r32: GBHRegInst; - def v8i16_r16: GBHRegInst; - def v8i16: GBHVecInst; -} - -defm GBH: GatherBitsHalfword; - -// gb: Gather all low order bits from each word in $rA into a single -// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0, -// as well as slots 1-3. -// -// See notes for gbb, above. - -class GBInst pattern>: - RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp, - pattern>; - -class GBRegInst: - GBInst<(outs rclass:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -class GBVecInst: - GBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -multiclass GatherBitsWord { - def v4i32_r32: GBRegInst; - def v4i32_r16: GBRegInst; - def v4i32: GBVecInst; -} - -defm GB: GatherBitsWord; - -// avgb: average bytes -def AVGB: - RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "avgb\t$rT, $rA, $rB", ByteOp, - []>; - -// absdb: absolute difference of bytes -def ABSDB: - RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "absdb\t$rT, $rA, $rB", ByteOp, - []>; - -// sumb: sum bytes into halfwords -def SUMB: - RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "sumb\t$rT, $rA, $rB", ByteOp, - []>; - -// Sign extension operations: -class XSBHInst pattern>: - RRForm_1<0b01101101010, OOL, IOL, - "xsbh\t$rDst, $rSrc", - IntegerOp, pattern>; - -class XSBHInRegInst pattern>: - XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), - pattern>; - -multiclass ExtendByteHalfword { - def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [ - /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>; - def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), - [(set R16C:$rDst, (sext R8C:$rSrc))]>; - def r16: XSBHInRegInst; - - // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit - // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 - // pattern below). Intentionally doesn't match a pattern because we want the - // sext 8->32 pattern to do the work for us, namely because we need the extra - // XSHWr32. - def r32: XSBHInRegInst; - - // Same as the 32-bit version, but for i64 - def r64: XSBHInRegInst; -} - -defm XSBH : ExtendByteHalfword; - -// Sign extend halfwords to words: - -class XSHWInst pattern>: - RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc", - IntegerOp, pattern>; - -class XSHWVecInst: - XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc), - [(set (out_vectype VECREG:$rDest), - (sext (in_vectype VECREG:$rSrc)))]>; - -class XSHWInRegInst pattern>: - XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc), - pattern>; - -class XSHWRegInst: - XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc), - [(set rclass:$rDest, (sext R16C:$rSrc))]>; - -multiclass ExtendHalfwordWord { - def v4i32: XSHWVecInst; - - def r16: XSHWRegInst; - - def r32: XSHWInRegInst; - def r64: XSHWInRegInst; -} - -defm XSHW : ExtendHalfwordWord; - -// Sign-extend words to doublewords (32->64 bits) - -class XSWDInst pattern>: - RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc", - IntegerOp, pattern>; - -class XSWDVecInst: - XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [/*(set (out_vectype VECREG:$rDst), - (sext (out_vectype VECREG:$rSrc)))*/]>; - -class XSWDRegInst: - XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc), - [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>; - -multiclass ExtendWordToDoubleWord { - def v2i64: XSWDVecInst; - def r64: XSWDRegInst; - - def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc), - [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>; -} - -defm XSWD : ExtendWordToDoubleWord; - -// AND operations - -class ANDInst pattern> : - RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ANDVecInst: - ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class ANDRegInst: - ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>; - -multiclass BitwiseAnd -{ - def v16i8: ANDVecInst; - def v8i16: ANDVecInst; - def v4i32: ANDVecInst; - def v2i64: ANDVecInst; - - def r128: ANDRegInst; - def r64: ANDRegInst; - def r32: ANDRegInst; - def r16: ANDRegInst; - def r8: ANDRegInst; - - //===--------------------------------------------- - // Special instructions to perform the fabs instruction - def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), - [/* Intentionally does not match a pattern */]>; - - def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), - [/* Intentionally does not match a pattern */]>; - - def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* Intentionally does not match a pattern */]>; - - //===--------------------------------------------- - - // Hacked form of AND to zero-extend 16-bit quantities to 32-bit - // quantities -- see 16->32 zext pattern. - // - // This pattern is somewhat artificial, since it might match some - // compiler generated pattern but it is unlikely to do so. - - def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB), - [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>; -} - -defm AND : BitwiseAnd; - - -def vnot_cell_conv : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>; - -// N.B.: vnot_cell_conv is one of those special target selection pattern -// fragments, -// in which we expect there to be a bit_convert on the constant. Bear in mind -// that llvm translates "not " to "xor , -1" (or in this case, a -// constant -1 vector.) - -class ANDCInst pattern>: - RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ANDCVecInst: - ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (and (vectype VECREG:$rA), - (vnot_frag (vectype VECREG:$rB))))]>; - -class ANDCRegInst: - ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>; - -multiclass AndComplement -{ - def v16i8: ANDCVecInst; - def v8i16: ANDCVecInst; - def v4i32: ANDCVecInst; - def v2i64: ANDCVecInst; - - def r128: ANDCRegInst; - def r64: ANDCRegInst; - def r32: ANDCRegInst; - def r16: ANDCRegInst; - def r8: ANDCRegInst; - - // Sometimes, the xor pattern has a bitcast constant: - def v16i8_conv: ANDCVecInst; -} - -defm ANDC : AndComplement; - -class ANDBIInst pattern>: - RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass AndByteImm -{ - def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (v16i8 VECREG:$rT), - (and (v16i8 VECREG:$rA), - (v16i8 v16i8U8Imm:$val)))]>; - - def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), - [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>; -} - -defm ANDBI : AndByteImm; - -class ANDHIInst pattern> : - RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass AndHalfwordImm -{ - def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; - - def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), - [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>; - - // Zero-extend i8 to i16: - def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val), - [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>; -} - -defm ANDHI : AndHalfwordImm; - -class ANDIInst pattern> : - RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val", - IntegerOp, pattern>; - -multiclass AndWordImm -{ - def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>; - - def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>; - - // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32 - // pattern below. - def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), - [(set R32C:$rT, - (and (zext R8C:$rA), i32ImmSExt10:$val))]>; - - // Hacked form of ANDI to zero-extend i16 quantities to i32. See the - // zext 16->32 pattern below. - // - // Note that this pattern is somewhat artificial, since it might match - // something the compiler generates but is unlikely to occur in practice. - def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), - [(set R32C:$rT, - (and (zext R16C:$rA), i32ImmSExt10:$val))]>; -} - -defm ANDI : AndWordImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Bitwise OR group: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// Bitwise "or" (N.B.: These are also register-register copy instructions...) -class ORInst pattern>: - RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ORVecInst: - ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class ORRegInst: - ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>; - - -multiclass BitwiseOr -{ - def v16i8: ORVecInst; - def v8i16: ORVecInst; - def v4i32: ORVecInst; - def v2i64: ORVecInst; - - def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4f32 VECREG:$rT), - (v4f32 (bitconvert (or (v4i32 VECREG:$rA), - (v4i32 VECREG:$rB)))))]>; - - def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v2f64 VECREG:$rT), - (v2f64 (bitconvert (or (v2i64 VECREG:$rA), - (v2i64 VECREG:$rB)))))]>; - - def r128: ORRegInst; - def r64: ORRegInst; - def r32: ORRegInst; - def r16: ORRegInst; - def r8: ORRegInst; - - // OR instructions used to copy f32 and f64 registers. - def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [/* no pattern */]>; - - def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - [/* no pattern */]>; -} - -defm OR : BitwiseOr; - -//===----------------------------------------------------------------------===// -// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers -//===----------------------------------------------------------------------===// -def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)), - (COPY_TO_REGCLASS R8C:$rA, VECREG)>; - -def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), - (COPY_TO_REGCLASS R16C:$rA, VECREG)>; - -def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), - (COPY_TO_REGCLASS R32C:$rA, VECREG)>; - -def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), - (COPY_TO_REGCLASS R64C:$rA, VECREG)>; - -def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)), - (COPY_TO_REGCLASS R32FP:$rA, VECREG)>; - -def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)), - (COPY_TO_REGCLASS R64FP:$rA, VECREG)>; - -def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))), - (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>; - -def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))), - (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>; - -def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))), - (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>; - -def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))), - (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>; - -def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))), - (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>; - -def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))), - (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>; - -// Load Register: This is an assembler alias for a bitwise OR of a register -// against itself. It's here because it brings some clarity to assembly -// language output. - -let hasCtrlDep = 1 in { - class LRInst - : SPUInstr { - bits<7> RA; - bits<7> RT; - - let Pattern = [/*no pattern*/]; - - let Inst{0-10} = 0b10000010000; /* It's an OR operation */ - let Inst{11-17} = RA; - let Inst{18-24} = RA; - let Inst{25-31} = RT; - } - - class LRVecInst: - LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>; - - class LRRegInst: - LRInst<(outs rclass:$rT), (ins rclass:$rA)>; - - multiclass LoadRegister { - def v2i64: LRVecInst; - def v2f64: LRVecInst; - def v4i32: LRVecInst; - def v4f32: LRVecInst; - def v8i16: LRVecInst; - def v16i8: LRVecInst; - - def r128: LRRegInst; - def r64: LRRegInst; - def f64: LRRegInst; - def r32: LRRegInst; - def f32: LRRegInst; - def r16: LRRegInst; - def r8: LRRegInst; - } - - defm LR: LoadRegister; -} - -// ORC: Bitwise "or" with complement (c = a | ~b) - -class ORCInst pattern>: - RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ORCVecInst: - ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - (vnot (vectype VECREG:$rB))))]>; - -class ORCRegInst: - ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>; - -multiclass BitwiseOrComplement -{ - def v16i8: ORCVecInst; - def v8i16: ORCVecInst; - def v4i32: ORCVecInst; - def v2i64: ORCVecInst; - - def r128: ORCRegInst; - def r64: ORCRegInst; - def r32: ORCRegInst; - def r16: ORCRegInst; - def r8: ORCRegInst; -} - -defm ORC : BitwiseOrComplement; - -// OR byte immediate -class ORBIInst pattern>: - RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val", - IntegerOp, pattern>; - -class ORBIVecInst: - ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA), - (vectype immpred:$val)))]>; - -multiclass BitwiseOrByteImm -{ - def v16i8: ORBIVecInst; - - def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), - [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>; -} - -defm ORBI : BitwiseOrByteImm; - -// OR halfword immediate -class ORHIInst pattern>: - RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val", - IntegerOp, pattern>; - -class ORHIVecInst: - ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - immpred:$val))]>; - -multiclass BitwiseOrHalfwordImm -{ - def v8i16: ORHIVecInst; - - def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), - [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>; - - // Specialized ORHI form used to promote 8-bit registers to 16-bit - def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val), - [(set R16C:$rT, (or (anyext R8C:$rA), - i16ImmSExt10:$val))]>; -} - -defm ORHI : BitwiseOrHalfwordImm; - -class ORIInst pattern>: - RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val", - IntegerOp, pattern>; - -class ORIVecInst: - ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - immpred:$val))]>; - -// Bitwise "or" with immediate -multiclass BitwiseOrImm -{ - def v4i32: ORIVecInst; - - def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>; - - // i16i32: hacked version of the ori instruction to extend 16-bit quantities - // to 32-bit quantities. used exclusively to match "anyext" conversions (vide - // infra "anyext 16->32" pattern.) - def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (or (anyext R16C:$rA), - i32ImmSExt10:$val))]>; - - // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities - // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide - // infra "anyext 16->32" pattern.) - def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (or (anyext R8C:$rA), - i32ImmSExt10:$val))]>; -} - -defm ORI : BitwiseOrImm; - -// ORX: "or" across the vector: or's $rA's word slots leaving the result in -// $rT[0], slots 1-3 are zeroed. -// -// FIXME: Needs to match an intrinsic pattern. -def ORXv4i32: - RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "orx\t$rT, $rA, $rB", IntegerOp, - []>; - -// XOR: - -class XORInst pattern> : - RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class XORVecInst: - XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class XORRegInst: - XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>; - -multiclass BitwiseExclusiveOr -{ - def v16i8: XORVecInst; - def v8i16: XORVecInst; - def v4i32: XORVecInst; - def v2i64: XORVecInst; - - def r128: XORRegInst; - def r64: XORRegInst; - def r32: XORRegInst; - def r16: XORRegInst; - def r8: XORRegInst; - - // XOR instructions used to negate f32 and f64 quantities. - - def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), - [/* no pattern */]>; - - def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), - [/* no pattern */]>; - - def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern, see fneg{32,64} */]>; -} - -defm XOR : BitwiseExclusiveOr; - -//==---------------------------------------------------------- - -class XORBIInst pattern>: - RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val", - IntegerOp, pattern>; - -multiclass XorByteImm -{ - def v16i8: - XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>; - - def r8: - XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), - [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>; -} - -defm XORBI : XorByteImm; - -def XORHIv8i16: - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - "xorhi\t$rT, $rA, $val", IntegerOp, - [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA), - v8i16SExt10Imm:$val))]>; - -def XORHIr16: - RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "xorhi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>; - -def XORIv4i32: - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val), - "xori\t$rT, $rA, $val", IntegerOp, - [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA), - v4i32SExt10Imm:$val))]>; - -def XORIr32: - RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - "xori\t$rT, $rA, $val", IntegerOp, - [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>; - -// NAND: - -class NANDInst pattern>: - RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class NANDVecInst: - NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA), - (vectype VECREG:$rB))))]>; -class NANDRegInst: - NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>; - -multiclass BitwiseNand -{ - def v16i8: NANDVecInst; - def v8i16: NANDVecInst; - def v4i32: NANDVecInst; - def v2i64: NANDVecInst; - - def r128: NANDRegInst; - def r64: NANDRegInst; - def r32: NANDRegInst; - def r16: NANDRegInst; - def r8: NANDRegInst; -} - -defm NAND : BitwiseNand; - -// NOR: - -class NORInst pattern>: - RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class NORVecInst: - NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA), - (vectype VECREG:$rB))))]>; -class NORRegInst: - NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>; - -multiclass BitwiseNor -{ - def v16i8: NORVecInst; - def v8i16: NORVecInst; - def v4i32: NORVecInst; - def v2i64: NORVecInst; - - def r128: NORRegInst; - def r64: NORRegInst; - def r32: NORRegInst; - def r16: NORRegInst; - def r8: NORRegInst; -} - -defm NOR : BitwiseNor; - -// Select bits: -class SELBInst pattern>: - RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC", - IntegerOp, pattern>; - -class SELBVecInst: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (vectype VECREG:$rT), - (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)), - (and (vnot_frag (vectype VECREG:$rC)), - (vectype VECREG:$rA))))]>; - -class SELBVecVCondInst: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (vectype VECREG:$rT), - (select (vectype VECREG:$rC), - (vectype VECREG:$rB), - (vectype VECREG:$rA)))]>; - -class SELBVecCondInst: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), - [(set (vectype VECREG:$rT), - (select R32C:$rC, - (vectype VECREG:$rB), - (vectype VECREG:$rA)))]>; - -class SELBRegInst: - SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC), - [(set rclass:$rT, - (or (and rclass:$rB, rclass:$rC), - (and rclass:$rA, (not rclass:$rC))))]>; - -class SELBRegCondInst: - SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC), - [(set rclass:$rT, - (select rcond:$rC, rclass:$rB, rclass:$rA))]>; - -multiclass SelectBits -{ - def v16i8: SELBVecInst; - def v8i16: SELBVecInst; - def v4i32: SELBVecInst; - def v2i64: SELBVecInst; - - def r128: SELBRegInst; - def r64: SELBRegInst; - def r32: SELBRegInst; - def r16: SELBRegInst; - def r8: SELBRegInst; - - def v16i8_cond: SELBVecCondInst; - def v8i16_cond: SELBVecCondInst; - def v4i32_cond: SELBVecCondInst; - def v2i64_cond: SELBVecCondInst; - - def v16i8_vcond: SELBVecCondInst; - def v8i16_vcond: SELBVecCondInst; - def v4i32_vcond: SELBVecCondInst; - def v2i64_vcond: SELBVecCondInst; - - def v4f32_cond: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (v4f32 VECREG:$rT), - (select (v4i32 VECREG:$rC), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rA)))]>; - - // SELBr64_cond is defined in SPU64InstrInfo.td - def r32_cond: SELBRegCondInst; - def f32_cond: SELBRegCondInst; - def r16_cond: SELBRegCondInst; - def r8_cond: SELBRegCondInst; -} - -defm SELB : SelectBits; - -class SPUselbPatVec: - Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)), - (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>; - -def : SPUselbPatVec; -def : SPUselbPatVec; -def : SPUselbPatVec; -def : SPUselbPatVec; - -class SPUselbPatReg: - Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC), - (inst rclass:$rA, rclass:$rB, rclass:$rC)>; - -def : SPUselbPatReg; -def : SPUselbPatReg; -def : SPUselbPatReg; -def : SPUselbPatReg; - -// EQV: Equivalence (1 for each same bit, otherwise 0) -// -// Note: There are a lot of ways to match this bit operator and these patterns -// attempt to be as exhaustive as possible. - -class EQVInst pattern>: - RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class EQVVecInst: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), - (and (vnot (vectype VECREG:$rA)), - (vnot (vectype VECREG:$rB)))))]>; - -class EQVRegInst: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB), - (and (not rclass:$rA), (not rclass:$rB))))]>; - -class EQVVecPattern1: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>; - -class EQVRegPattern1: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>; - -class EQVVecPattern2: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), - (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>; - -class EQVRegPattern2: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, - (or (and rclass:$rA, rclass:$rB), - (not (or rclass:$rA, rclass:$rB))))]>; - -class EQVVecPattern3: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>; - -class EQVRegPattern3: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>; - -multiclass BitEquivalence -{ - def v16i8: EQVVecInst; - def v8i16: EQVVecInst; - def v4i32: EQVVecInst; - def v2i64: EQVVecInst; - - def v16i8_1: EQVVecPattern1; - def v8i16_1: EQVVecPattern1; - def v4i32_1: EQVVecPattern1; - def v2i64_1: EQVVecPattern1; - - def v16i8_2: EQVVecPattern2; - def v8i16_2: EQVVecPattern2; - def v4i32_2: EQVVecPattern2; - def v2i64_2: EQVVecPattern2; - - def v16i8_3: EQVVecPattern3; - def v8i16_3: EQVVecPattern3; - def v4i32_3: EQVVecPattern3; - def v2i64_3: EQVVecPattern3; - - def r128: EQVRegInst; - def r64: EQVRegInst; - def r32: EQVRegInst; - def r16: EQVRegInst; - def r8: EQVRegInst; - - def r128_1: EQVRegPattern1; - def r64_1: EQVRegPattern1; - def r32_1: EQVRegPattern1; - def r16_1: EQVRegPattern1; - def r8_1: EQVRegPattern1; - - def r128_2: EQVRegPattern2; - def r64_2: EQVRegPattern2; - def r32_2: EQVRegPattern2; - def r16_2: EQVRegPattern2; - def r8_2: EQVRegPattern2; - - def r128_3: EQVRegPattern3; - def r64_3: EQVRegPattern3; - def r32_3: EQVRegPattern3; - def r16_3: EQVRegPattern3; - def r8_3: EQVRegPattern3; -} - -defm EQV: BitEquivalence; - -//===----------------------------------------------------------------------===// -// Vector shuffle... -//===----------------------------------------------------------------------===// -// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB. -// See the SPUshuffle SDNode operand above, which sets up the DAG pattern -// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with -// the SPUISD::SHUFB opcode. -//===----------------------------------------------------------------------===// - -class SHUFBInst pattern>: - RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC", - ShuffleOp, pattern>; - -class SHUFBVecInst: - SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (resultvec VECREG:$rT), - (SPUshuffle (resultvec VECREG:$rA), - (resultvec VECREG:$rB), - (maskvec VECREG:$rC)))]>; - -class SHUFBGPRCInst: - SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC), - [/* no pattern */]>; - -multiclass ShuffleBytes -{ - def v16i8 : SHUFBVecInst; - def v16i8_m32 : SHUFBVecInst; - def v8i16 : SHUFBVecInst; - def v8i16_m32 : SHUFBVecInst; - def v4i32 : SHUFBVecInst; - def v4i32_m32 : SHUFBVecInst; - def v2i64 : SHUFBVecInst; - def v2i64_m32 : SHUFBVecInst; - - def v4f32 : SHUFBVecInst; - def v4f32_m32 : SHUFBVecInst; - - def v2f64 : SHUFBVecInst; - def v2f64_m32 : SHUFBVecInst; - - def gprc : SHUFBGPRCInst; -} - -defm SHUFB : ShuffleBytes; - -//===----------------------------------------------------------------------===// -// Shift and rotate group: -//===----------------------------------------------------------------------===// - -class SHLHInst pattern>: - RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -class SHLHVecInst: - SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; - -multiclass ShiftLeftHalfword -{ - def v8i16: SHLHVecInst; - def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>; - def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>; -} - -defm SHLH : ShiftLeftHalfword; - -//===----------------------------------------------------------------------===// - -class SHLHIInst pattern>: - RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class SHLHIVecInst: - SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), - [(set (vectype VECREG:$rT), - (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>; - -multiclass ShiftLeftHalfwordImm -{ - def v8i16: SHLHIVecInst; - def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), - [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>; -} - -defm SHLHI : ShiftLeftHalfwordImm; - -def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)), - (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>; - -def : Pat<(shl R16C:$rA, (i32 uimm7:$val)), - (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>; - -//===----------------------------------------------------------------------===// - -class SHLInst pattern>: - RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -multiclass ShiftLeftWord -{ - def v4i32: - SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - def r32: - SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; -} - -defm SHL: ShiftLeftWord; - -//===----------------------------------------------------------------------===// - -class SHLIInst pattern>: - RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val", - RotShiftVec, pattern>; - -multiclass ShiftLeftWordImm -{ - def v4i32: - SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), - [(set (v4i32 VECREG:$rT), - (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>; - - def r32: - SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val), - [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>; -} - -defm SHLI : ShiftLeftWordImm; - -//===----------------------------------------------------------------------===// -// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit -// register) to the left. Vector form is here to ensure type correctness. -// -// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift -// of 7 bits is actually possible. -// -// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI -// to shift i64 and i128. SHLQBI is the residual left over after shifting by -// bytes with SHLQBY. - -class SHLQBIInst pattern>: - RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class SHLQBIVecInst: - SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>; - -class SHLQBIRegInst: - SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass ShiftLeftQuadByBits -{ - def v16i8: SHLQBIVecInst; - def v8i16: SHLQBIVecInst; - def v4i32: SHLQBIVecInst; - def v4f32: SHLQBIVecInst; - def v2i64: SHLQBIVecInst; - def v2f64: SHLQBIVecInst; - - def r128: SHLQBIRegInst; -} - -defm SHLQBI : ShiftLeftQuadByBits; - -// See note above on SHLQBI. In this case, the predicate actually does then -// enforcement, whereas with SHLQBI, we have to "take it on faith." -class SHLQBIIInst pattern>: - RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class SHLQBIIVecInst: - SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>; - -multiclass ShiftLeftQuadByBitsImm -{ - def v16i8 : SHLQBIIVecInst; - def v8i16 : SHLQBIIVecInst; - def v4i32 : SHLQBIIVecInst; - def v4f32 : SHLQBIIVecInst; - def v2i64 : SHLQBIIVecInst; - def v2f64 : SHLQBIIVecInst; -} - -defm SHLQBII : ShiftLeftQuadByBitsImm; - -// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes, -// not by bits. See notes above on SHLQBI. - -class SHLQBYInst pattern>: - RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class SHLQBYVecInst: - SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>; - -multiclass ShiftLeftQuadBytes -{ - def v16i8: SHLQBYVecInst; - def v8i16: SHLQBYVecInst; - def v4i32: SHLQBYVecInst; - def v4f32: SHLQBYVecInst; - def v2i64: SHLQBYVecInst; - def v2f64: SHLQBYVecInst; - def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), - [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>; -} - -defm SHLQBY: ShiftLeftQuadBytes; - -class SHLQBYIInst pattern>: - RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class SHLQBYIVecInst: - SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>; - -multiclass ShiftLeftQuadBytesImm -{ - def v16i8: SHLQBYIVecInst; - def v8i16: SHLQBYIVecInst; - def v4i32: SHLQBYIVecInst; - def v4f32: SHLQBYIVecInst; - def v2i64: SHLQBYIVecInst; - def v2f64: SHLQBYIVecInst; - def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val), - [(set GPRC:$rT, - (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>; -} - -defm SHLQBYI : ShiftLeftQuadBytesImm; - -class SHLQBYBIInst pattern>: - RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class SHLQBYBIVecInst: - SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern */]>; - -class SHLQBYBIRegInst: - SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass ShiftLeftQuadBytesBitCount -{ - def v16i8: SHLQBYBIVecInst; - def v8i16: SHLQBYBIVecInst; - def v4i32: SHLQBYBIVecInst; - def v4f32: SHLQBYBIVecInst; - def v2i64: SHLQBYBIVecInst; - def v2f64: SHLQBYBIVecInst; - - def r128: SHLQBYBIRegInst; -} - -defm SHLQBYBI : ShiftLeftQuadBytesBitCount; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate halfword: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -class ROTHInst pattern>: - RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -class ROTHVecInst: - ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>; - -class ROTHRegInst: - ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>; - -multiclass RotateLeftHalfword -{ - def v8i16: ROTHVecInst; - def r16: ROTHRegInst; -} - -defm ROTH: RotateLeftHalfword; - -def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate halfword, immediate: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -class ROTHIInst pattern>: - RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class ROTHIVecInst: - ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), - [(set (vectype VECREG:$rT), - (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>; - -multiclass RotateLeftHalfwordImm -{ - def v8i16: ROTHIVecInst; - def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), - [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>; - def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val), - [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>; -} - -defm ROTHI: RotateLeftHalfwordImm; - -def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)), - (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate word: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTInst pattern>: - RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -class ROTVecInst: - ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [(set (vectype VECREG:$rT), - (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>; - -class ROTRegInst: - ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [(set rclass:$rT, - (rotl rclass:$rA, R32C:$rB))]>; - -multiclass RotateLeftWord -{ - def v4i32: ROTVecInst; - def r32: ROTRegInst; -} - -defm ROT: RotateLeftWord; - -// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or -// 32-bit register -def ROTr32_r16_anyext: - ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB), - [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>; - -def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))), - (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; - -def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))), - (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; - -def ROTr32_r8_anyext: - ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB), - [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>; - -def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))), - (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; - -def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))), - (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate word, immediate -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTIInst pattern>: - RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class ROTIVecInst: - ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), - [(set (vectype VECREG:$rT), - (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>; - -class ROTIRegInst: - ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), - [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>; - -multiclass RotateLeftWordImm -{ - def v4i32: ROTIVecInst; - def v4i32_i16: ROTIVecInst; - def v4i32_i8: ROTIVecInst; - - def r32: ROTIRegInst; - def r32_i16: ROTIRegInst; - def r32_i8: ROTIRegInst; -} - -defm ROTI : RotateLeftWordImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad by byte (count) -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQBYInst pattern>: - RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQBYGenInst: - ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB), - [(set (type rc:$rT), - (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>; - -class ROTQBYVecInst: - ROTQBYGenInst; - -multiclass RotateQuadLeftByBytes -{ - def v16i8: ROTQBYVecInst; - def v8i16: ROTQBYVecInst; - def v4i32: ROTQBYVecInst; - def v4f32: ROTQBYVecInst; - def v2i64: ROTQBYVecInst; - def v2f64: ROTQBYVecInst; - def i128: ROTQBYGenInst; -} - -defm ROTQBY: RotateQuadLeftByBytes; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad by byte (count), immediate -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQBYIInst pattern>: - RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQBYIGenInst: - ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val), - [(set (type rclass:$rT), - (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>; - -class ROTQBYIVecInst: - ROTQBYIGenInst; - -multiclass RotateQuadByBytesImm -{ - def v16i8: ROTQBYIVecInst; - def v8i16: ROTQBYIVecInst; - def v4i32: ROTQBYIVecInst; - def v4f32: ROTQBYIVecInst; - def v2i64: ROTQBYIVecInst; - def vfi64: ROTQBYIVecInst; - def i128: ROTQBYIGenInst; -} - -defm ROTQBYI: RotateQuadByBytesImm; - -// See ROTQBY note above. -class ROTQBYBIInst pattern>: - RI7Form<0b00110011100, OOL, IOL, - "rotqbybi\t$rT, $rA, $shift", - RotShiftQuad, pattern>; - -class ROTQBYBIVecInst: - ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift), - [(set (vectype VECREG:$rT), - (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>; - -multiclass RotateQuadByBytesByBitshift { - def v16i8_r32: ROTQBYBIVecInst; - def v8i16_r32: ROTQBYBIVecInst; - def v4i32_r32: ROTQBYBIVecInst; - def v2i64_r32: ROTQBYBIVecInst; -} - -defm ROTQBYBI : RotateQuadByBytesByBitshift; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// See ROTQBY note above. -// -// Assume that the user of this instruction knows to shift the rotate count -// into bit 29 -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQBIInst pattern>: - RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQBIVecInst: - ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern yet */]>; - -class ROTQBIRegInst: - ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern yet */]>; - -multiclass RotateQuadByBitCount -{ - def v16i8: ROTQBIVecInst; - def v8i16: ROTQBIVecInst; - def v4i32: ROTQBIVecInst; - def v2i64: ROTQBIVecInst; - - def r128: ROTQBIRegInst; - def r64: ROTQBIRegInst; -} - -defm ROTQBI: RotateQuadByBitCount; - -class ROTQBIIInst pattern>: - RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQBIIVecInst: - ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), - [/* no pattern yet */]>; - -class ROTQBIIRegInst: - ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), - [/* no pattern yet */]>; - -multiclass RotateQuadByBitCountImm -{ - def v16i8: ROTQBIIVecInst; - def v8i16: ROTQBIIVecInst; - def v4i32: ROTQBIIVecInst; - def v2i64: ROTQBIIVecInst; - - def r128: ROTQBIIRegInst; - def r64: ROTQBIIRegInst; -} - -defm ROTQBII : RotateQuadByBitCountImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// ROTHM v8i16 form: -// NOTE(1): No vector rotate is generated by the C/C++ frontend (today), -// so this only matches a synthetically generated/lowered code -// fragment. -// NOTE(2): $rB must be negated before the right rotate! -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTHMInst pattern>: - RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -def ROTHMv8i16: - ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; - -// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left -// Note: This instruction doesn't match a pattern because rB must be negated -// for the instruction to work. Thus, the pattern below the instruction! - -def ROTHMr16: - ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - [/* see patterns below - $rB must be negated! */]>; - -def : Pat<(srl R16C:$rA, R32C:$rB), - (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(srl R16C:$rA, R16C:$rB), - (ROTHMr16 R16C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(srl R16C:$rA, R8C:$rB), - (ROTHMr16 R16C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; - -// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is -// that the immediate can be complemented, so that the user doesn't have to -// worry about it. - -class ROTHMIInst pattern>: - RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val", - RotShiftVec, pattern>; - -def ROTHMIv8i16: - ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), - [/* no pattern */]>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)), - (ROTHMIv8i16 VECREG:$rA, imm:$val)>; - -def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)), - (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)), - (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def ROTHMIr16: - ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val), - [/* no pattern */]>; - -def: Pat<(srl R16C:$rA, (i32 uimm7:$val)), - (ROTHMIr16 R16C:$rA, uimm7:$val)>; - -def: Pat<(srl R16C:$rA, (i16 uimm7:$val)), - (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -def: Pat<(srl R16C:$rA, (i8 uimm7:$val)), - (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -// ROTM v4i32 form: See the ROTHM v8i16 comments. -class ROTMInst pattern>: - RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -def ROTMv4i32: - ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), - (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>; - -def ROTMr32: - ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(srl R32C:$rA, R32C:$rB), - (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(srl R32C:$rA, R16C:$rB), - (ROTMr32 R32C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(srl R32C:$rA, R8C:$rB), - (ROTMr32 R32C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; - -// ROTMI v4i32 form: See the comment for ROTHM v8i16. -def ROTMIv4i32: - RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - "rotmi\t$rT, $rA, $val", RotShiftVec, - [(set (v4i32 VECREG:$rT), - (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)), - (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)), - (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>; - -// ROTMI r32 form: know how to complement the immediate value. -def ROTMIr32: - RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val), - "rotmi\t$rT, $rA, $val", RotShiftVec, - [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>; - -def : Pat<(srl R32C:$rA, (i16 imm:$val)), - (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>; - -def : Pat<(srl R32C:$rA, (i8 imm:$val)), - (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// ROTQMBY: This is a vector form merely so that when used in an -// instruction pattern, type checking will succeed. This instruction assumes -// that the user knew to negate $rB. -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBYInst pattern>: - RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQMBYVecInst: - ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern, $rB must be negated */]>; - -class ROTQMBYRegInst: - ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass RotateQuadBytes -{ - def v16i8: ROTQMBYVecInst; - def v8i16: ROTQMBYVecInst; - def v4i32: ROTQMBYVecInst; - def v2i64: ROTQMBYVecInst; - - def r128: ROTQMBYRegInst; - def r64: ROTQMBYRegInst; -} - -defm ROTQMBY : RotateQuadBytes; - -def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB), - (ROTQMBYr128 GPRC:$rA, - (SFIr32 R32C:$rB, 0))>; - -class ROTQMBYIInst pattern>: - RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQMBYIVecInst: - ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - [/* no pattern */]>; - -class ROTQMBYIRegInst: - ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), - [/* no pattern */]>; - -// 128-bit zero extension form: -class ROTQMBYIZExtInst: - ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val), - [/* no pattern */]>; - -multiclass RotateQuadBytesImm -{ - def v16i8: ROTQMBYIVecInst; - def v8i16: ROTQMBYIVecInst; - def v4i32: ROTQMBYIVecInst; - def v2i64: ROTQMBYIVecInst; - - def r128: ROTQMBYIRegInst; - def r64: ROTQMBYIRegInst; - - def r128_zext_r8: ROTQMBYIZExtInst; - def r128_zext_r16: ROTQMBYIZExtInst; - def r128_zext_r32: ROTQMBYIZExtInst; - def r128_zext_r64: ROTQMBYIZExtInst; -} - -defm ROTQMBYI : RotateQuadBytesImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate right and mask by bit count -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBYBIInst pattern>: - RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQMBYBIVecInst: - ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern, */]>; - -multiclass RotateMaskQuadByBitCount -{ - def v16i8: ROTQMBYBIVecInst; - def v8i16: ROTQMBYBIVecInst; - def v4i32: ROTQMBYBIVecInst; - def v2i64: ROTQMBYBIVecInst; - def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), - [/*no pattern*/]>; -} - -defm ROTQMBYBI: RotateMaskQuadByBitCount; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad and mask by bits -// Note that the rotate amount has to be negated -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBIInst pattern>: - RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQMBIVecInst: - ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern */]>; - -class ROTQMBIRegInst: - ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass RotateMaskQuadByBits -{ - def v16i8: ROTQMBIVecInst; - def v8i16: ROTQMBIVecInst; - def v4i32: ROTQMBIVecInst; - def v2i64: ROTQMBIVecInst; - - def r128: ROTQMBIRegInst; - def r64: ROTQMBIRegInst; -} - -defm ROTQMBI: RotateMaskQuadByBits; - -def : Pat<(srl GPRC:$rA, R32C:$rB), - (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA, - (SFIr32 R32C:$rB, 0)), - (SFIr32 R32C:$rB, 0))>; - - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad and mask by bits, immediate -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBIIInst pattern>: - RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQMBIIVecInst: - ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - [/* no pattern */]>; - -class ROTQMBIIRegInst: - ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), - [/* no pattern */]>; - -multiclass RotateMaskQuadByBitsImm -{ - def v16i8: ROTQMBIIVecInst; - def v8i16: ROTQMBIIVecInst; - def v4i32: ROTQMBIIVecInst; - def v2i64: ROTQMBIIVecInst; - - def r128: ROTQMBIIRegInst; - def r64: ROTQMBIIRegInst; -} - -defm ROTQMBII: RotateMaskQuadByBitsImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def ROTMAHv8i16: - RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "rotmah\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; - -def ROTMAHr16: - RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - "rotmah\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(sra R16C:$rA, R32C:$rB), - (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(sra R16C:$rA, R16C:$rB), - (ROTMAHr16 R16C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(sra R16C:$rA, R8C:$rB), - (ROTMAHr16 R16C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; - -def ROTMAHIv8i16: - RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), - "rotmahi\t$rT, $rA, $val", RotShiftVec, - [(set (v8i16 VECREG:$rT), - (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)), - (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)), - (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>; - -def ROTMAHIr16: - RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val), - "rotmahi\t$rT, $rA, $val", RotShiftVec, - [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>; - -def : Pat<(sra R16C:$rA, (i32 imm:$val)), - (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -def : Pat<(sra R16C:$rA, (i8 imm:$val)), - (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -def ROTMAv4i32: - RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "rotma\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), - (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>; - -def ROTMAr32: - RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "rotma\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(sra R32C:$rA, R32C:$rB), - (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(sra R32C:$rA, R16C:$rB), - (ROTMAr32 R32C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(sra R32C:$rA, R8C:$rB), - (ROTMAr32 R32C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; - -class ROTMAIInst pattern>: - RRForm<0b01011110000, OOL, IOL, - "rotmai\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class ROTMAIVecInst: - ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val), - [(set (vectype VECREG:$rT), - (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>; - -class ROTMAIRegInst: - ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val), - [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>; - -multiclass RotateMaskAlgebraicImm { - def v2i64_i32 : ROTMAIVecInst; - def v4i32_i32 : ROTMAIVecInst; - def r64_i32 : ROTMAIRegInst; - def r32_i32 : ROTMAIRegInst; -} - -defm ROTMAI : RotateMaskAlgebraicImm; - -//===----------------------------------------------------------------------===// -// Branch and conditionals: -//===----------------------------------------------------------------------===// - -let isTerminator = 1, isBarrier = 1 in { - // Halt If Equal (r32 preferred slot only, no vector form) - def HEQr32: - RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB), - "heq\t$rA, $rB", BranchResolv, - [/* no pattern to match */]>; - - def HEQIr32 : - RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val), - "heqi\t$rA, $val", BranchResolv, - [/* no pattern to match */]>; - - // HGT/HGTI: These instructions use signed arithmetic for the comparison, - // contrasting with HLGT/HLGTI, which use unsigned comparison: - def HGTr32: - RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB), - "hgt\t$rA, $rB", BranchResolv, - [/* no pattern to match */]>; - - def HGTIr32: - RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val), - "hgti\t$rA, $val", BranchResolv, - [/* no pattern to match */]>; - - def HLGTr32: - RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB), - "hlgt\t$rA, $rB", BranchResolv, - [/* no pattern to match */]>; - - def HLGTIr32: - RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val), - "hlgti\t$rA, $val", BranchResolv, - [/* no pattern to match */]>; -} - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Comparison operators for i8, i16 and i32: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class CEQBInst pattern> : - RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpEqualByte -{ - def v16i8 : - CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r8 : - CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), - [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>; -} - -class CEQBIInst pattern> : - RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpEqualByteImm -{ - def v16i8 : - CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), - [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA), - v16i8SExt8Imm:$val))]>; - def r8: - CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), - [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>; -} - -class CEQHInst pattern> : - RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpEqualHalfword -{ - def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>; -} - -class CEQHIInst pattern> : - RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpEqualHalfwordImm -{ - def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (seteq (v8i16 VECREG:$rA), - (v8i16 v8i16SExt10Imm:$val)))]>; - def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>; -} - -class CEQInst pattern> : - RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpEqualWord -{ - def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - - def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>; -} - -class CEQIInst pattern> : - RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpEqualWordImm -{ - def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (seteq (v4i32 VECREG:$rA), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>; -} - -class CGTBInst pattern> : - RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpGtrByte -{ - def v16i8 : - CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r8 : - CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), - [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>; -} - -class CGTBIInst pattern> : - RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpGtrByteImm -{ - def v16i8 : - CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), - [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA), - v16i8SExt8Imm:$val))]>; - def r8: - CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), - [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>; -} - -class CGTHInst pattern> : - RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpGtrHalfword -{ - def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>; -} - -class CGTHIInst pattern> : - RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpGtrHalfwordImm -{ - def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (setgt (v8i16 VECREG:$rA), - (v8i16 v8i16SExt10Imm:$val)))]>; - def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>; -} - -class CGTInst pattern> : - RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpGtrWord -{ - def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - - def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>; -} - -class CGTIInst pattern> : - RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpGtrWordImm -{ - def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (setgt (v4i32 VECREG:$rA), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; - - // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence: - def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val), - [/* no pattern */]>; -} - -class CLGTBInst pattern> : - RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpLGtrByte -{ - def v16i8 : - CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r8 : - CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), - [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>; -} - -class CLGTBIInst pattern> : - RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpLGtrByteImm -{ - def v16i8 : - CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), - [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA), - v16i8SExt8Imm:$val))]>; - def r8: - CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), - [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>; -} - -class CLGTHInst pattern> : - RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpLGtrHalfword -{ - def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>; -} - -class CLGTHIInst pattern> : - RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpLGtrHalfwordImm -{ - def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (setugt (v8i16 VECREG:$rA), - (v8i16 v8i16SExt10Imm:$val)))]>; - def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>; -} - -class CLGTInst pattern> : - RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpLGtrWord -{ - def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - - def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>; -} - -class CLGTIInst pattern> : - RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpLGtrWordImm -{ - def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (setugt (v4i32 VECREG:$rA), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>; -} - -defm CEQB : CmpEqualByte; -defm CEQBI : CmpEqualByteImm; -defm CEQH : CmpEqualHalfword; -defm CEQHI : CmpEqualHalfwordImm; -defm CEQ : CmpEqualWord; -defm CEQI : CmpEqualWordImm; -defm CGTB : CmpGtrByte; -defm CGTBI : CmpGtrByteImm; -defm CGTH : CmpGtrHalfword; -defm CGTHI : CmpGtrHalfwordImm; -defm CGT : CmpGtrWord; -defm CGTI : CmpGtrWordImm; -defm CLGTB : CmpLGtrByte; -defm CLGTBI : CmpLGtrByteImm; -defm CLGTH : CmpLGtrHalfword; -defm CLGTHI : CmpLGtrHalfwordImm; -defm CLGT : CmpLGtrWord; -defm CLGTI : CmpLGtrWordImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// For SETCC primitives not supported above (setlt, setle, setge, etc.) -// define a pattern to generate the right code, as a binary operator -// (in a manner of speaking.) -// -// Notes: -// 1. This only matches the setcc set of conditionals. Special pattern -// matching is used for select conditionals. -// -// 2. The "DAG" versions of these classes is almost exclusively used for -// i64 comparisons. See the tblgen fundamentals documentation for what -// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern -// class for where ResultInstrs originates. -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class SETCCNegCondReg: - Pat<(cond rclass:$rA, rclass:$rB), - (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>; - -class SETCCNegCondImm: - Pat<(cond rclass:$rA, (inttype immpred:$imm)), - (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>; - -def : SETCCNegCondReg; -def : SETCCNegCondImm; - -def : SETCCNegCondReg; -def : SETCCNegCondImm; - -def : SETCCNegCondReg; -def : SETCCNegCondImm; - -class SETCCBinOpReg: - Pat<(cond rclass:$rA, rclass:$rB), - (binop (cmpOp1 rclass:$rA, rclass:$rB), - (cmpOp2 rclass:$rA, rclass:$rB))>; - -class SETCCBinOpImm: - Pat<(cond rclass:$rA, (immtype immpred:$imm)), - (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)), - (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setle R8C:$rA, R8C:$rB), - (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>; -def : Pat<(setle R8C:$rA, immU8:$imm), - (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setle R16C:$rA, R16C:$rB), - (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; -def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm), - (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setle R32C:$rA, R32C:$rB), - (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; -def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm), - (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setule R8C:$rA, R8C:$rB), - (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>; -def : Pat<(setule R8C:$rA, immU8:$imm), - (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setule R16C:$rA, R16C:$rB), - (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; -def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm), - (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setule R32C:$rA, R32C:$rB), - (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; -def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm), - (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// select conditional patterns: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class SELECTNegCondReg: - Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rTrue, rclass:$rFalse, - (cmpare rclass:$rA, rclass:$rB))>; - -class SELECTNegCondImm: - Pat<(select (inttype (cond rclass:$rA, immpred:$imm)), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rTrue, rclass:$rFalse, - (cmpare rclass:$rA, immpred:$imm))>; - -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; - -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; - -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; - -class SELECTBinOpReg: - Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rFalse, rclass:$rTrue, - (binop (cmpOp1 rclass:$rA, rclass:$rB), - (cmpOp2 rclass:$rA, rclass:$rB)))>; - -class SELECTBinOpImm: - Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rFalse, rclass:$rTrue, - (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)), - (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -let isCall = 1, - // All calls clobber the non-callee-saved registers: - Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, - R10,R11,R12,R13,R14,R15,R16,R17,R18,R19, - R20,R21,R22,R23,R24,R25,R26,R27,R28,R29, - R30,R31,R32,R33,R34,R35,R36,R37,R38,R39, - R40,R41,R42,R43,R44,R45,R46,R47,R48,R49, - R50,R51,R52,R53,R54,R55,R56,R57,R58,R59, - R60,R61,R62,R63,R64,R65,R66,R67,R68,R69, - R70,R71,R72,R73,R74,R75,R76,R77,R78,R79], - // All of these instructions use $lr (aka $0) - Uses = [R0] in { - // Branch relative and set link: Used if we actually know that the target - // is within [-32768, 32767] bytes of the target - def BRSL: - BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func), - "brsl\t$$lr, $func", - [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>; - - // Branch absolute and set link: Used if we actually know that the target - // is an absolute address - def BRASL: - BranchSetLink<0b011001100, (outs), (ins calltarget:$func), - "brasl\t$$lr, $func", - [(SPUcall (SPUaform tglobaladdr:$func, 0))]>; - - // Branch indirect and set link if external data. These instructions are not - // actually generated, matched by an intrinsic: - def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>; - def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>; - def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>; - def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>; - - // Branch indirect and set link. This is the "X-form" address version of a - // function call - def BISL: - BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>; -} - -// Support calls to external symbols: -def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)), - (BRSL texternalsym:$func)>; - -def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)), - (BRASL texternalsym:$func)>; - -// Unconditional branches: -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { - let isBarrier = 1 in { - def BR : - UncondBranch<0b001001100, (outs), (ins brtarget:$dest), - "br\t$dest", - [(br bb:$dest)]>; - - // Unconditional, absolute address branch - def BRA: - UncondBranch<0b001100000, (outs), (ins brtarget:$dest), - "bra\t$dest", - [/* no pattern */]>; - - // Indirect branch - let isIndirectBranch = 1 in { - def BI: - BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; - } - } - - // Conditional branches: - class BRNZInst pattern>: - RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest", - BranchResolv, pattern>; - - class BRNZRegInst: - BRNZInst<(ins rclass:$rCond, brtarget:$dest), - [(brcond rclass:$rCond, bb:$dest)]>; - - class BRNZVecInst: - BRNZInst<(ins VECREG:$rCond, brtarget:$dest), - [(brcond (vectype VECREG:$rCond), bb:$dest)]>; - - multiclass BranchNotZero { - def v4i32 : BRNZVecInst; - def r32 : BRNZRegInst; - } - - defm BRNZ : BranchNotZero; - - class BRZInst pattern>: - RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest", - BranchResolv, pattern>; - - class BRZRegInst: - BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; - - class BRZVecInst: - BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; - - multiclass BranchZero { - def v4i32: BRZVecInst; - def r32: BRZRegInst; - } - - defm BRZ: BranchZero; - - // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would - // be useful: - /* - class BINZInst pattern>: - BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>; - - class BINZRegInst: - BINZInst<(ins rclass:$rA, brtarget:$dest), - [(brcond rclass:$rA, R32C:$dest)]>; - - class BINZVecInst: - BINZInst<(ins VECREG:$rA, R32C:$dest), - [(brcond (vectype VECREG:$rA), R32C:$dest)]>; - - multiclass BranchNotZeroIndirect { - def v4i32: BINZVecInst; - def r32: BINZRegInst; - } - - defm BINZ: BranchNotZeroIndirect; - - class BIZInst pattern>: - BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>; - - class BIZRegInst: - BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>; - - class BIZVecInst: - BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>; - - multiclass BranchZeroIndirect { - def v4i32: BIZVecInst; - def r32: BIZRegInst; - } - - defm BIZ: BranchZeroIndirect; - */ - - class BRHNZInst pattern>: - RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv, - pattern>; - - class BRHNZRegInst: - BRHNZInst<(ins rclass:$rCond, brtarget:$dest), - [(brcond rclass:$rCond, bb:$dest)]>; - - class BRHNZVecInst: - BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>; - - multiclass BranchNotZeroHalfword { - def v8i16: BRHNZVecInst; - def r16: BRHNZRegInst; - } - - defm BRHNZ: BranchNotZeroHalfword; - - class BRHZInst pattern>: - RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv, - pattern>; - - class BRHZRegInst: - BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; - - class BRHZVecInst: - BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; - - multiclass BranchZeroHalfword { - def v8i16: BRHZVecInst; - def r16: BRHZRegInst; - } - - defm BRHZ: BranchZeroHalfword; -} - -//===----------------------------------------------------------------------===// -// setcc and brcond patterns: -//===----------------------------------------------------------------------===// - -def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), - (BRHZr16 R16C:$rA, bb:$dest)>; -def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), - (BRHNZr16 R16C:$rA, bb:$dest)>; - -def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), - (BRZr32 R32C:$rA, bb:$dest)>; -def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), - (BRNZr32 R32C:$rA, bb:$dest)>; - -multiclass BranchCondEQ -{ - def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; - - def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; -} - -defm BRCONDeq : BranchCondEQ; -defm BRCONDne : BranchCondEQ; - -multiclass BranchCondLGT -{ - def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; - - def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; -} - -defm BRCONDugt : BranchCondLGT; -defm BRCONDule : BranchCondLGT; - -multiclass BranchCondLGTEQ -{ - def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), - (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), - bb:$dest)>; - - def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB), - (CEQHr16 R16C:$rA, R16:$rB)), - bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), - (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), - bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB), - (CEQr32 R32C:$rA, R32C:$rB)), - bb:$dest)>; -} - -defm BRCONDuge : BranchCondLGTEQ; -defm BRCONDult : BranchCondLGTEQ; - -multiclass BranchCondGT -{ - def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; - - def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; -} - -defm BRCONDgt : BranchCondGT; -defm BRCONDle : BranchCondGT; - -multiclass BranchCondGTEQ -{ - def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), - (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), - bb:$dest)>; - - def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB), - (CEQHr16 R16C:$rA, R16:$rB)), - bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), - (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), - bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB), - (CEQr32 R32C:$rA, R32C:$rB)), - bb:$dest)>; -} - -defm BRCONDge : BranchCondGTEQ; -defm BRCONDlt : BranchCondGTEQ; - -let isTerminator = 1, isBarrier = 1 in { - let isReturn = 1 in { - def RET: - RETForm<"bi\t$$lr", [(retflag)]>; - } -} - -//===----------------------------------------------------------------------===// -// Single precision floating point instructions -//===----------------------------------------------------------------------===// - -class FAInst pattern>: - RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB", - SPrecFP, pattern>; - -class FAVecInst: - FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; - -multiclass SFPAdd -{ - def v4f32: FAVecInst; - def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; -} - -defm FA : SFPAdd; - -class FSInst pattern>: - RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB", - SPrecFP, pattern>; - -class FSVecInst: - FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; - -multiclass SFPSub -{ - def v4f32: FSVecInst; - def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; -} - -defm FS : SFPSub; - -class FMInst pattern>: - RRForm<0b01100011010, OOL, IOL, - "fm\t$rT, $rA, $rB", SPrecFP, - pattern>; - -class FMVecInst: - FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (type VECREG:$rT), - (fmul (type VECREG:$rA), (type VECREG:$rB)))]>; - -multiclass SFPMul -{ - def v4f32: FMVecInst; - def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; -} - -defm FM : SFPMul; - -// Floating point multiply and add -// e.g. d = c + (a * b) -def FMAv4f32: - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fadd (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; - -def FMAf32: - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -// FP multiply and subtract -// Subtracts value in rC from product -// res = a * b - c -def FMSv4f32 : - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), - (v4f32 VECREG:$rC)))]>; - -def FMSf32 : - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, - (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; - -// Floating Negative Mulitply and Subtract -// Subtracts product from value in rC -// res = fneg(fms a b c) -// = - (a * b - c) -// = c - a * b -// NOTE: subtraction order -// fsub a b = a - b -// fs a b = b - a? -def FNMSf32 : - RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -def FNMSv4f32 : - RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB))))]>; - - - - -// Floating point reciprocal estimate - -class FRESTInst: - RRForm_1<0b00110111000, OOL, IOL, - "frest\t$rT, $rA", SPrecFP, - [/* no pattern */]>; - -def FRESTv4f32 : - FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>; - -def FRESTf32 : - FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>; - -// Floating point interpolate (used in conjunction with reciprocal estimate) -def FIv4f32 : - RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fi\t$rT, $rA, $rB", SPrecFP, - [/* no pattern */]>; - -def FIf32 : - RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - "fi\t$rT, $rA, $rB", SPrecFP, - [/* no pattern */]>; - -//-------------------------------------------------------------------------- -// Basic single precision floating point comparisons: -// -// Note: There is no support on SPU for single precision NaN. Consequently, -// ordered and unordered comparisons are the same. -//-------------------------------------------------------------------------- - -def FCEQf32 : - RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fceq\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>; - -def : Pat<(setoeq R32FP:$rA, R32FP:$rB), - (FCEQf32 R32FP:$rA, R32FP:$rB)>; - -def FCMEQf32 : - RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fcmeq\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; - -def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)), - (FCMEQf32 R32FP:$rA, R32FP:$rB)>; - -def FCGTf32 : - RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fcgt\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>; - -def : Pat<(setogt R32FP:$rA, R32FP:$rB), - (FCGTf32 R32FP:$rA, R32FP:$rB)>; - -def FCMGTf32 : - RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fcmgt\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; - -def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)), - (FCMGTf32 R32FP:$rA, R32FP:$rB)>; - -//-------------------------------------------------------------------------- -// Single precision floating point comparisons and SETCC equivalents: -//-------------------------------------------------------------------------- - -def : SETCCNegCondReg; -def : SETCCNegCondReg; - -def : SETCCBinOpReg; -def : SETCCBinOpReg; - -def : SETCCBinOpReg; -def : SETCCBinOpReg; - -def : Pat<(setule R32FP:$rA, R32FP:$rB), - (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; -def : Pat<(setole R32FP:$rA, R32FP:$rB), - (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; - -// FP Status and Control Register Write -// Why isn't rT a don't care in the ISA? -// Should we create a special RRForm_3 for this guy and zero out the rT? -def FSCRWf32 : - RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA), - "fscrwr\t$rA", SPrecFP, - [/* This instruction requires an intrinsic. Note: rT is unused. */]>; - -// FP Status and Control Register Read -def FSCRRf32 : - RRForm_2<0b01011101110, (outs R32FP:$rT), (ins), - "fscrrd\t$rT", SPrecFP, - [/* This instruction requires an intrinsic */]>; - -// llvm instruction space -// How do these map onto cell instructions? -// fdiv rA rB -// frest rC rB # c = 1/b (both lines) -// fi rC rB rC -// fm rD rA rC # d = a * 1/b -// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world -// fma rB rB rC rD # b = b * c + d -// = -(d *b -a) * c + d -// = a * c - c ( a *b *c - a) - -// fcopysign (???) - -// Library calls: -// These llvm instructions will actually map to library calls. -// All that's needed, then, is to check that the appropriate library is -// imported and do a brsl to the proper function name. -// frem # fmod(x, y): x - (x/y) * y -// (Note: fmod(double, double), fmodf(float,float) -// fsqrt? -// fsin? -// fcos? -// Unimplemented SPU instruction space -// floating reciprocal absolute square root estimate (frsqest) - -// The following are probably just intrinsics -// status and control register write -// status and control register read - -//-------------------------------------- -// Floating Point Conversions -// Signed conversions: -def CSiFv4f32: - CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "csflt\t$rT, $rA, 0", SPrecFP, - [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>; - -// Convert signed integer to floating point -def CSiFf32 : - CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA), - "csflt\t$rT, $rA, 0", SPrecFP, - [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>; - -// Convert unsigned into to float -def CUiFv4f32 : - CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "cuflt\t$rT, $rA, 0", SPrecFP, - [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>; - -def CUiFf32 : - CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA), - "cuflt\t$rT, $rA, 0", SPrecFP, - [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>; - -// Convert float to unsigned int -// Assume that scale = 0 - -def CFUiv4f32 : - CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "cfltu\t$rT, $rA, 0", SPrecFP, - [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>; - -def CFUif32 : - CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), - "cfltu\t$rT, $rA, 0", SPrecFP, - [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>; - -// Convert float to signed int -// Assume that scale = 0 - -def CFSiv4f32 : - CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "cflts\t$rT, $rA, 0", SPrecFP, - [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>; - -def CFSif32 : - CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), - "cflts\t$rT, $rA, 0", SPrecFP, - [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>; - -//===----------------------------------------------------------------------==// -// Single<->Double precision conversions -//===----------------------------------------------------------------------==// - -// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a -// v4f32, output is v2f64--which goes in the name?) - -// Floating point extend single to double -// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it -// operates on two double-word slots (i.e. 1st and 3rd fp numbers -// are ignored). -def FESDvec : - RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA), - "fesd\t$rT, $rA", SPrecFP, - [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>; - -def FESDf32 : - RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA), - "fesd\t$rT, $rA", SPrecFP, - [(set R64FP:$rT, (fextend R32FP:$rA))]>; - -// Floating point round double to single -//def FRDSvec : -// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA), -// "frds\t$rT, $rA,", SPrecFP, -// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>; - -def FRDSf64 : - RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA), - "frds\t$rT, $rA", SPrecFP, - [(set R32FP:$rT, (fround R64FP:$rA))]>; - -//ToDo include anyextend? - -//===----------------------------------------------------------------------==// -// Double precision floating point instructions -//===----------------------------------------------------------------------==// -def FAf64 : - RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - "dfa\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>; - -def FAv2f64 : - RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfa\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; - -def FSf64 : - RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - "dfs\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>; - -def FSv2f64 : - RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfs\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; - -def FMf64 : - RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - "dfm\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>; - -def FMv2f64: - RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfm\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; - -def FMAf64: - RRForm<0b00111010110, (outs R64FP:$rT), - (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - "dfma\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FMAv2f64: - RRForm<0b00111010110, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "dfma\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fadd (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FMSf64 : - RRForm<0b10111010110, (outs R64FP:$rT), - (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - "dfms\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FMSv2f64 : - RRForm<0b10111010110, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "dfms\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)), - (v2f64 VECREG:$rC)))]>; - -// DFNMS: - (a * b - c) -// - (a * b) + c => c - (a * b) - -class DFNMSInst pattern>: - RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB", - DPrecFP, pattern>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -class DFNMSVecInst pattern>: - DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - pattern>; - -class DFNMSRegInst pattern>: - DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - pattern>; - -multiclass DFMultiplySubtract -{ - def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), - (fsub (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB))))]>; - - def f64 : DFNMSRegInst<[(set R64FP:$rT, - (fsub R64FP:$rC, - (fmul R64FP:$rA, R64FP:$rB)))]>; -} - -defm DFNMS : DFMultiplySubtract; - -// - (a * b + c) -// - (a * b) - c -def FNMAf64 : - RRForm<0b11111010110, (outs R64FP:$rT), - (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - "dfnma\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FNMAv2f64 : - RRForm<0b11111010110, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "dfnma\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fneg (fadd (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -//===----------------------------------------------------------------------==// -// Floating point negation and absolute value -//===----------------------------------------------------------------------==// - -def : Pat<(fneg (v4f32 VECREG:$rA)), - (XORfnegvec (v4f32 VECREG:$rA), - (v4f32 (ILHUv4i32 0x8000)))>; - -def : Pat<(fneg R32FP:$rA), - (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>; - -// Floating point absolute value -// Note: f64 fabs is custom-selected. - -def : Pat<(fabs R32FP:$rA), - (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>; - -def : Pat<(fabs (v4f32 VECREG:$rA)), - (ANDfabsvec (v4f32 VECREG:$rA), - (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>; - -//===----------------------------------------------------------------------===// -// Hint for branch instructions: -//===----------------------------------------------------------------------===// -def HBRA : - HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">; - -//===----------------------------------------------------------------------===// -// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong -// in the odd pipeline) -//===----------------------------------------------------------------------===// - -def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> { - let Pattern = []; - - let Inst{0-10} = 0b10000000010; - let Inst{11-17} = 0; - let Inst{18-24} = 0; - let Inst{25-31} = 0; -} - -def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> { - let Pattern = []; - - let Inst{0-10} = 0b10000000000; - let Inst{11-17} = 0; - let Inst{18-24} = 0; - let Inst{25-31} = 0; -} - -//===----------------------------------------------------------------------===// -// Bit conversions (type conversions between vector/packed types) -// NOTE: Promotions are handled using the XS* instructions. -//===----------------------------------------------------------------------===// -def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>; - -def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>; - -def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>; - -def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>; - -def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>; - -def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>; - -def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; - -def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))), - (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))), - (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))), - (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))), - (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))), - (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))), - (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; - -def : Pat<(i32 (bitconvert R32FP:$rA)), - (COPY_TO_REGCLASS R32FP:$rA, R32C)>; - -def : Pat<(f32 (bitconvert R32C:$rA)), - (COPY_TO_REGCLASS R32C:$rA, R32FP)>; - -def : Pat<(i64 (bitconvert R64FP:$rA)), - (COPY_TO_REGCLASS R64FP:$rA, R64C)>; - -def : Pat<(f64 (bitconvert R64C:$rA)), - (COPY_TO_REGCLASS R64C:$rA, R64FP)>; - - -//===----------------------------------------------------------------------===// -// Instruction patterns: -//===----------------------------------------------------------------------===// - -// General 32-bit constants: -def : Pat<(i32 imm:$imm), - (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>; - -// Single precision float constants: -def : Pat<(f32 fpimm:$imm), - (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>; - -// General constant 32-bit vectors -def : Pat<(v4i32 v4i32Imm:$imm), - (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))), - (LO16_vec v4i32Imm:$imm))>; - -// 8-bit constants -def : Pat<(i8 imm:$imm), - (ILHr8 imm:$imm)>; - -//===----------------------------------------------------------------------===// -// Zero/Any/Sign extensions -//===----------------------------------------------------------------------===// - -// sext 8->32: Sign extend bytes to words -def : Pat<(sext_inreg R32C:$rSrc, i8), - (XSHWr32 (XSBHr32 R32C:$rSrc))>; - -def : Pat<(i32 (sext R8C:$rSrc)), - (XSHWr16 (XSBHr8 R8C:$rSrc))>; - -// sext 8->64: Sign extend bytes to double word -def : Pat<(sext_inreg R64C:$rSrc, i8), - (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>; - -def : Pat<(i64 (sext R8C:$rSrc)), - (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>; - -// zext 8->16: Zero extend bytes to halfwords -def : Pat<(i16 (zext R8C:$rSrc)), - (ANDHIi8i16 R8C:$rSrc, 0xff)>; - -// zext 8->32: Zero extend bytes to words -def : Pat<(i32 (zext R8C:$rSrc)), - (ANDIi8i32 R8C:$rSrc, 0xff)>; - -// zext 8->64: Zero extend bytes to double words -def : Pat<(i64 (zext R8C:$rSrc)), - (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32 - (COPY_TO_REGCLASS - (ANDIi8i32 R8C:$rSrc,0xff), VECREG), - 0x4), - (ILv4i32 0x0), - (FSMBIv4i32 0x0f0f)), R64C)>; - -// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits -def : Pat<(i16 (anyext R8C:$rSrc)), - (ORHIi8i16 R8C:$rSrc, 0)>; - -// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits -def : Pat<(i32 (anyext R8C:$rSrc)), - (COPY_TO_REGCLASS R8C:$rSrc, R32C)>; - -// sext 16->64: Sign extend halfword to double word -def : Pat<(sext_inreg R64C:$rSrc, i16), - (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>; - -def : Pat<(sext R16C:$rSrc), - (XSWDr64 (XSHWr16 R16C:$rSrc))>; - -// zext 16->32: Zero extend halfwords to words -def : Pat<(i32 (zext R16C:$rSrc)), - (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>; - -def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))), - (ANDIi16i32 R16C:$rSrc, 0xf)>; - -def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))), - (ANDIi16i32 R16C:$rSrc, 0xff)>; - -def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))), - (ANDIi16i32 R16C:$rSrc, 0xfff)>; - -// anyext 16->32: Extend 16->32 bits, irrespective of sign -def : Pat<(i32 (anyext R16C:$rSrc)), - (COPY_TO_REGCLASS R16C:$rSrc, R32C)>; - -//===----------------------------------------------------------------------===// -// Truncates: -// These truncates are for the SPU's supported types (i8, i16, i32). i64 and -// above are custom lowered. -//===----------------------------------------------------------------------===// - -def : Pat<(i8 (trunc GPRC:$src)), - (COPY_TO_REGCLASS - (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>; - -def : Pat<(i8 (trunc R64C:$src)), - (COPY_TO_REGCLASS - (SHUFBv2i64_m32 - (COPY_TO_REGCLASS R64C:$src, VECREG), - (COPY_TO_REGCLASS R64C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>; - -def : Pat<(i8 (trunc R32C:$src)), - (COPY_TO_REGCLASS - (SHUFBv4i32_m32 - (COPY_TO_REGCLASS R32C:$src, VECREG), - (COPY_TO_REGCLASS R32C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; - -def : Pat<(i8 (trunc R16C:$src)), - (COPY_TO_REGCLASS - (SHUFBv4i32_m32 - (COPY_TO_REGCLASS R16C:$src, VECREG), - (COPY_TO_REGCLASS R16C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; - -def : Pat<(i16 (trunc GPRC:$src)), - (COPY_TO_REGCLASS - (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>; - -def : Pat<(i16 (trunc R64C:$src)), - (COPY_TO_REGCLASS - (SHUFBv2i64_m32 - (COPY_TO_REGCLASS R64C:$src, VECREG), - (COPY_TO_REGCLASS R64C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>; - -def : Pat<(i16 (trunc R32C:$src)), - (COPY_TO_REGCLASS - (SHUFBv4i32_m32 - (COPY_TO_REGCLASS R32C:$src, VECREG), - (COPY_TO_REGCLASS R32C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>; - -def : Pat<(i32 (trunc GPRC:$src)), - (COPY_TO_REGCLASS - (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>; - -def : Pat<(i32 (trunc R64C:$src)), - (COPY_TO_REGCLASS - (SHUFBv2i64_m32 - (COPY_TO_REGCLASS R64C:$src, VECREG), - (COPY_TO_REGCLASS R64C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>; - -//===----------------------------------------------------------------------===// -// Address generation: SPU, like PPC, has to split addresses into high and -// low parts in order to load them into a register. -//===----------------------------------------------------------------------===// - -def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>; -def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; -def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; - -def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0), - (SPUlo tglobaladdr:$in, 0)), - (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; - -def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0), - (SPUlo texternalsym:$in, 0)), - (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; - -def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0), - (SPUlo tjumptable:$in, 0)), - (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; - -def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), - (SPUlo tconstpool:$in, 0)), - (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; - -def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), - (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; - -def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)), - (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; - -def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)), - (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; - -def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), - (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; - -// Intrinsics: -include "CellSDKIntrinsics.td" -// Various math operator instruction sequences -include "SPUMathInstr.td" -// 64-bit "instructions"/support -include "SPU64InstrInfo.td" -// 128-bit "instructions"/support -include "SPU128InstrInfo.td" diff --git a/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp b/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp deleted file mode 100644 index 3e948d0..0000000 --- a/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "SPUMachineFunction.h" - -using namespace llvm; - -void SPUFunctionInfo::anchor() { } diff --git a/llvm/lib/Target/CellSPU/SPUMachineFunction.h b/llvm/lib/Target/CellSPU/SPUMachineFunction.h deleted file mode 100644 index 399684b..0000000 --- a/llvm/lib/Target/CellSPU/SPUMachineFunction.h +++ /dev/null @@ -1,50 +0,0 @@ -//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_MACHINE_FUNCTION_INFO_H -#define SPU_MACHINE_FUNCTION_INFO_H - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// SPUFunctionInfo - Cell SPU target-specific information for each -/// MachineFunction -class SPUFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); - - /// UsesLR - Indicates whether LR is used in the current function. - /// - bool UsesLR; - - // VarArgsFrameIndex - FrameIndex for start of varargs area. - int VarArgsFrameIndex; - -public: - SPUFunctionInfo(MachineFunction& MF) - : UsesLR(false), - VarArgsFrameIndex(0) - {} - - void setUsesLR(bool U) { UsesLR = U; } - bool usesLR() { return UsesLR; } - - int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } - void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } -}; - -} // end of namespace llvm - - -#endif - diff --git a/llvm/lib/Target/CellSPU/SPUMathInstr.td b/llvm/lib/Target/CellSPU/SPUMathInstr.td deleted file mode 100644 index 9a5c397..0000000 --- a/llvm/lib/Target/CellSPU/SPUMathInstr.td +++ /dev/null @@ -1,97 +0,0 @@ -//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===// -// -// Cell SPU math operations -// -// This target description file contains instruction sequences for various -// math operations, such as vector multiplies, i32 multiply, etc., for the -// SPU's i32, i16 i8 and corresponding vector types. -// -// Any resemblance to libsimdmath or the Cell SDK simdmath library is -// purely and completely coincidental. -//===----------------------------------------------------------------------===// - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v16i8 multiply instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), - (ORv4i32 - (ANDv4i32 - (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), - (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), - (ROTMAHIv8i16 VECREG:$rB, 8)), 8), - (FSMBIv8i16 0x2222)), - (ILAv4i32 0x0000ffff)), - (SHLIv4i32 - (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), - (ROTMAIv4i32_i32 VECREG:$rB, 16)), - (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), - (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), - (FSMBIv8i16 0x2222)), 16))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v8i16 multiply instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), - (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), - (FSMBIv8i16 0xcccc))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v4i32, i32 multiply instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def MPYv4i32: - Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), - (Av4i32 - (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)), - (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), - (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; - -def MPYi32: - Pat<(mul R32C:$rA, R32C:$rB), - (Ar32 - (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), - (MPYHr32 R32C:$rB, R32C:$rA)), - (MPYUr32 R32C:$rA, R32C:$rB))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// f32, v4f32 divide instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// Reciprocal estimate and interpolation -def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; -// Division estimate -def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; -// Newton-Raphson iteration -def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), - Interpf32.Fragment, - DivEstf32.Fragment)>; -// Epsilon addition -def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; - -def : Pat<(fdiv R32FP:$rA, R32FP:$rB), - (SELBf32_cond NRaphf32.Fragment, - Epsilonf32.Fragment, - (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; - -// Reciprocal estimate and interpolation -def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; -// Division estimate -def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; -// Newton-Raphson iteration -def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rA)), - Interpv4f32.Fragment, - DivEstv4f32.Fragment)>; -// Epsilon addition -def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; - -def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), - (SELBv4f32_cond NRaphv4f32.Fragment, - Epsilonv4f32.Fragment, - (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), - Epsilonv4f32.Fragment, - (v4f32 VECREG:$rA)), -1))>; diff --git a/llvm/lib/Target/CellSPU/SPUNodes.td b/llvm/lib/Target/CellSPU/SPUNodes.td deleted file mode 100644 index a47e9ef..0000000 --- a/llvm/lib/Target/CellSPU/SPUNodes.td +++ /dev/null @@ -1,159 +0,0 @@ -//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Type profiles and SelectionDAG nodes used by CellSPU -// -//===----------------------------------------------------------------------===// - -// Type profile for a call sequence -def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; - -// SPU_GenControl: Type profile for generating control words for insertions -def SPU_GenControl : SDTypeProfile<1, 1, []>; -def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; - -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; -//===----------------------------------------------------------------------===// -// Operand constraints: -//===----------------------------------------------------------------------===// - -def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -// Operand type constraints for vector shuffle/permute operations -def SDT_SPUshuffle : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> -]>; - -// Vector binary operator type constraints (needs a further constraint to -// ensure that operand 0 is a vector...): - -def SPUVecBinop: SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> -]>; - -// Trinary operators, e.g., addx, carry generate -def SPUIntTrinaryOp : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> -]>; - -// SELECT_MASK type constraints: There are several variations for the various -// vector types (this avoids having to bit_convert all over the place.) -def SPUselmask_type: SDTypeProfile<1, 1, [ - SDTCisInt<1> -]>; - -// SELB type constraints: -def SPUselb_type: SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>; - -// SPU Vector shift pseudo-instruction type constraints -def SPUvecshift_type: SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisInt<2>]>; - -// "marker" type for i64 operators that need a shuffle mask -// (i.e., uses cg or bg or another instruction that needs to -// use shufb to get things in the right place.) -// Op0: The result -// Op1, 2: LHS, RHS -// Op3: Carry-generate shuffle mask - -def SPUmarker_type : SDTypeProfile<1, 3, [ - SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; - -//===----------------------------------------------------------------------===// -// Synthetic/pseudo-instructions -//===----------------------------------------------------------------------===// - -// SPU CNTB: -def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; - -// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see -// SPUISelLowering.h): -def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; - -// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): -def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>; -def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>; -def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>; - -def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; -def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; - -// Vector rotate left, bits shifted out of the left are rotated in on the right -def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", - SPUvecshift_type, []>; - -// Vector rotate left by bytes, but the count is given in bits and the SPU -// internally converts it to bytes (saves an instruction to mask off lower -// three bits) -def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS", - SPUvecshift_type>; - -// Shift entire quad left by bytes/bits. Zeros are shifted in on the right -// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128 -def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>; -def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>; -def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>; - -// SPU form select mask for bytes, immediate -def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; - -// SPU select bits instruction -def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; - -def SDTprefslot2vec: SDTypeProfile<1, 1, []>; -def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; - -def SPU_vec_demote : SDTypeProfile<1, 1, []>; -def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>; - -// Address high and low components, used for [r+r] type addressing -def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>; -def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; - -// PC-relative address -def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; - -// A-Form local store addresses -def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; - -// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses -def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; - -// i64 markers: supplies extra operands used to generate the i64 operator -// instruction sequences -def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>; -def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>; -def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>; - -//===----------------------------------------------------------------------===// -// Constraints: (taken from PPCInstrInfo.td) -//===----------------------------------------------------------------------===// - -class RegConstraint { - string Constraints = C; -} - -class NoEncode { - string DisableEncoding = E; -} - -//===----------------------------------------------------------------------===// -// Return (flag isn't quite what it means: the operations are flagged so that -// instruction scheduling doesn't disassociate them.) -//===----------------------------------------------------------------------===// - -def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; diff --git a/llvm/lib/Target/CellSPU/SPUNopFiller.cpp b/llvm/lib/Target/CellSPU/SPUNopFiller.cpp deleted file mode 100644 index 7c58041..0000000 --- a/llvm/lib/Target/CellSPU/SPUNopFiller.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The final pass just before assembly printing. This pass is the last -// checkpoint where nops and lnops are added to the instruction stream to -// satisfy the dual issue requirements. The actual dual issue scheduling is -// done (TODO: nowhere, currently) -// -//===----------------------------------------------------------------------===// - -#include "SPU.h" -#include "SPUTargetMachine.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - struct SPUNopFiller : public MachineFunctionPass { - - TargetMachine &TM; - const TargetInstrInfo *TII; - const InstrItineraryData *IID; - bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd - - static char ID; - SPUNopFiller(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), - IID(tm.getInstrItineraryData()) - { - DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; ); - } - - virtual const char *getPassName() const { - return "SPU nop/lnop Filler"; - } - - void runOnMachineBasicBlock(MachineBasicBlock &MBB); - - bool runOnMachineFunction(MachineFunction &F) { - isEvenPlace = true; //all functions get an .align 3 directive at start - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - runOnMachineBasicBlock(*FI); - return true; //never-ever do any more modifications, just print it! - } - - typedef enum { none = 0, // no more instructions in this function / BB - pseudo = 1, // this does not get executed - even = 2, - odd = 3 } SPUOpPlace; - SPUOpPlace getOpPlacement( MachineInstr &instr ); - - }; - char SPUNopFiller::ID = 0; - -} - -// Fill a BasicBlock to alignment. -// In the assebly we align the functions to 'even' adresses, but -// basic blocks have an implicit alignmnet. We hereby define -// basic blocks to have the same, even, alignment. -void SPUNopFiller:: -runOnMachineBasicBlock(MachineBasicBlock &MBB) -{ - assert( isEvenPlace && "basic block start from odd address"); - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - { - SPUOpPlace this_optype, next_optype; - MachineBasicBlock::iterator J = I; - J++; - - this_optype = getOpPlacement( *I ); - next_optype = none; - while (J!=MBB.end()){ - next_optype = getOpPlacement( *J ); - ++J; - if (next_optype != pseudo ) - break; - } - - // padd: odd(wrong), even(wrong), ... - // to: nop(corr), odd(corr), even(corr)... - if( isEvenPlace && this_optype == odd && next_optype == even ) { - DEBUG( dbgs() <<"Adding NOP before: "; ); - DEBUG( I->dump(); ); - BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP)); - isEvenPlace=false; - } - - // padd: even(wrong), odd(wrong), ... - // to: lnop(corr), even(corr), odd(corr)... - else if ( !isEvenPlace && this_optype == even && next_optype == odd){ - DEBUG( dbgs() <<"Adding LNOP before: "; ); - DEBUG( I->dump(); ); - BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP)); - isEvenPlace=true; - } - - // now go to next mem slot - if( this_optype != pseudo ) - isEvenPlace = !isEvenPlace; - - } - - // padd basicblock end - if( !isEvenPlace ){ - MachineBasicBlock::iterator J = MBB.end(); - J--; - if (getOpPlacement( *J ) == odd) { - DEBUG( dbgs() <<"Padding basic block with NOP\n"; ); - BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP)); - } - else { - J++; - DEBUG( dbgs() <<"Padding basic block with LNOP\n"; ); - BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP)); - } - isEvenPlace=true; - } -} - -FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) { - return new SPUNopFiller(tm); -} - -// Figure out if 'instr' is executed in the even or odd pipeline -SPUNopFiller::SPUOpPlace -SPUNopFiller::getOpPlacement( MachineInstr &instr ) { - int sc = instr.getDesc().getSchedClass(); - const InstrStage *stage = IID->beginStage(sc); - unsigned FUs = stage->getUnits(); - SPUOpPlace retval; - - switch( FUs ) { - case 0: retval = pseudo; break; - case 1: retval = odd; break; - case 2: retval = even; break; - default: retval= pseudo; - assert( false && "got unknown FuncUnit\n"); - break; - }; - return retval; -} diff --git a/llvm/lib/Target/CellSPU/SPUOperands.td b/llvm/lib/Target/CellSPU/SPUOperands.td deleted file mode 100644 index 6f8deef..0000000 --- a/llvm/lib/Target/CellSPU/SPUOperands.td +++ /dev/null @@ -1,664 +0,0 @@ -//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cell SPU Instruction Operands: -//===----------------------------------------------------------------------===// - -// TO_IMM32 - Convert an i8/i16 to i32. -def TO_IMM32 : SDNodeXFormgetZExtValue()); -}]>; - -// TO_IMM16 - Convert an i8/i32 to i16. -def TO_IMM16 : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i16); -}]>; - - -def LO16 : SDNodeXFormgetZExtValue(); - // Transformation function: get the low 16 bits. - return getI32Imm(val & 0xffff); -}]>; - -def LO16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR - && "LO16_vec got something other than a BUILD_VECTOR"); - - // Get first constant operand... - for (unsigned i = 0, e = N->getNumOperands(); - OpVal.getNode() == 0 && i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) - OpVal = N->getOperand(i); - } - - assert(OpVal.getNode() != 0 && "LO16_vec did not locate a node"); - ConstantSDNode *CN = cast(OpVal); - return getI32Imm((unsigned)CN->getZExtValue() & 0xffff); -}]>; - -// Transform an immediate, returning the high 16 bits shifted down: -def HI16 : SDNodeXFormgetZExtValue() >> 16); -}]>; - -// Transformation function: shift the high 16 bit immediate from a build_vector -// node into the low 16 bits, and return a 16-bit constant. -def HI16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR - && "HI16_vec got something other than a BUILD_VECTOR"); - - // Get first constant operand... - for (unsigned i = 0, e = N->getNumOperands(); - OpVal.getNode() == 0 && i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) - OpVal = N->getOperand(i); - } - - assert(OpVal.getNode() != 0 && "HI16_vec did not locate a node"); - ConstantSDNode *CN = cast(OpVal); - return getI32Imm((unsigned)CN->getZExtValue() >> 16); -}]>; - -// simm7 predicate - True if the immediate fits in an 7-bit signed -// field. -def simm7: PatLeaf<(imm), [{ - int sextVal = int(N->getSExtValue()); - return (sextVal >= -64 && sextVal <= 63); -}]>; - -// uimm7 predicate - True if the immediate fits in an 7-bit unsigned -// field. -def uimm7: PatLeaf<(imm), [{ - return (N->getZExtValue() <= 0x7f); -}]>; - -// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended -// field. -def immSExt8 : PatLeaf<(imm), [{ - int Value = int(N->getSExtValue()); - return (Value >= -(1 << 8) && Value <= (1 << 8) - 1); -}]>; - -// immU8: immediate, unsigned 8-bit quantity -def immU8 : PatLeaf<(imm), [{ - return (N->getZExtValue() <= 0xff); -}]>; - -// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign -// extended field. Used by RI10Form instructions like 'ldq'. -def i32ImmSExt10 : PatLeaf<(imm), [{ - return isI32IntS10Immediate(N); -}]>; - -// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned -// field. Used by RI10Form instructions like 'ldq'. -def i32ImmUns10 : PatLeaf<(imm), [{ - return isI32IntU10Immediate(N); -}]>; - -// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign -// extended field. Used by RI10Form instructions like 'ldq'. -def i16ImmSExt10 : PatLeaf<(imm), [{ - return isI16IntS10Immediate(N); -}]>; - -// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned -// value. Used by RI10Form instructions. -def i16ImmUns10 : PatLeaf<(imm), [{ - return isI16IntU10Immediate(N); -}]>; - -def immSExt16 : PatLeaf<(imm), [{ - // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - short Ignored; - return isIntS16Immediate(N, Ignored); -}]>; - -def immZExt16 : PatLeaf<(imm), [{ - // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended - // field. - return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); -}], LO16>; - -def immU16 : PatLeaf<(imm), [{ - // immU16 predicate- True if the immediate fits into a 16-bit unsigned field. - return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff); -}]>; - -def imm18 : PatLeaf<(imm), [{ - // imm18 predicate: True if the immediate fits into an 18-bit unsigned field. - int Value = (int) N->getZExtValue(); - return isUInt<18>(Value); -}]>; - -def lo16 : PatLeaf<(imm), [{ - // lo16 predicate - returns true if the immediate has all zeros in the - // low order bits and is a 32-bit constant: - if (N->getValueType(0) == MVT::i32) { - uint32_t val = N->getZExtValue(); - return ((val & 0x0000ffff) == val); - } - - return false; -}], LO16>; - -def hi16 : PatLeaf<(imm), [{ - // hi16 predicate - returns true if the immediate has all zeros in the - // low order bits and is a 32-bit constant: - if (N->getValueType(0) == MVT::i32) { - uint32_t val = uint32_t(N->getZExtValue()); - return ((val & 0xffff0000) == val); - } else if (N->getValueType(0) == MVT::i64) { - uint64_t val = N->getZExtValue(); - return ((val & 0xffff0000ULL) == val); - } - - return false; -}], HI16>; - -def bitshift : PatLeaf<(imm), [{ - // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII - // (shift left quadword by bits immediate) - int64_t Val = N->getZExtValue(); - return (Val > 0 && Val <= 7); -}]>; - -//===----------------------------------------------------------------------===// -// Floating point operands: -//===----------------------------------------------------------------------===// - -// Transform a float, returning the high 16 bits shifted down, as if -// the float was really an unsigned integer: -def HI16_f32 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm(FloatToBits(fval) >> 16); -}]>; - -// Transformation function on floats: get the low 16 bits as if the float was -// an unsigned integer. -def LO16_f32 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm(FloatToBits(fval) & 0xffff); -}]>; - -def FPimm_sext16 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16)); -}]>; - -def FPimm_u18 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1)); -}]>; - -def fpimmSExt16 : PatLeaf<(fpimm), [{ - short Ignored; - return isFPS16Immediate(N, Ignored); -}], FPimm_sext16>; - -// Does the SFP constant only have upp 16 bits set? -def hi16_f32 : PatLeaf<(fpimm), [{ - if (N->getValueType(0) == MVT::f32) { - uint32_t val = FloatToBits(N->getValueAPF().convertToFloat()); - return ((val & 0xffff0000) == val); - } - - return false; -}], HI16_f32>; - -// Does the SFP constant fit into 18 bits? -def fpimm18 : PatLeaf<(fpimm), [{ - if (N->getValueType(0) == MVT::f32) { - uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat()); - return isUInt<18>(Value); - } - - return false; -}], FPimm_u18>; - -//===----------------------------------------------------------------------===// -// 64-bit operands (TODO): -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// build_vector operands: -//===----------------------------------------------------------------------===// - -// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended -// immediate constant load for v16i8 vectors. N.B.: The incoming constant has -// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). -def v16i8SExt8Imm_xform: SDNodeXForm; - -// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant -// load, works in conjunction with its transform function. N.B.: This relies the -// incoming constant being a 16-bit quantity, where the upper and lower bytes -// are EXACTLY the same (e.g., 0x2a2a) -def v16i8SExt8Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; -}], v16i8SExt8Imm_xform>; - -// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit -// immediate constant load for v16i8 vectors. N.B.: The incoming constant has -// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). -def v16i8U8Imm_xform: SDNodeXForm; - -// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant -// load, works in conjunction with its transform function. N.B.: This relies the -// incoming constant being a 16-bit quantity, where the upper and lower bytes -// are EXACTLY the same (e.g., 0x2a2a) -def v16i8U8Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; -}], v16i8U8Imm_xform>; - -// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended -// immediate constant load for v8i16 vectors. -def v8i16SExt8Imm_xform: SDNodeXForm; - -// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v8i16SExt8Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16SExt8Imm_xform>; - -// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v8i16 vectors. -def v8i16SExt10Imm_xform: SDNodeXForm; - -// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v8i16SExt10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16SExt10Imm_xform>; - -// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned -// immediate constant load for v8i16 vectors. -def v8i16Uns10Imm_xform: SDNodeXForm; - -// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant -// load, works in conjunction with its transform function. -def v8i16Uns10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16Uns10Imm_xform>; - -// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v8i16 vectors. -def v8i16Uns16Imm_xform: SDNodeXForm; - -// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v8i16SExt16Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16Uns16Imm_xform>; - -// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended -// immediate constant load for v4i32 vectors. -def v4i32SExt10Imm_xform: SDNodeXForm; - -// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v4i32SExt10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32SExt10Imm_xform>; - -// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned -// immediate constant load for v4i32 vectors. -def v4i32Uns10Imm_xform: SDNodeXForm; - -// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant -// load, works in conjunction with its transform function. -def v4i32Uns10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32Uns10Imm_xform>; - -// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v4i32 vectors. -def v4i32SExt16Imm_xform: SDNodeXForm; - -// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v4i32SExt16Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32SExt16Imm_xform>; - -// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned -// immediate constant load for v4i32 vectors. -def v4i32Uns18Imm_xform: SDNodeXForm; - -// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, -// works in conjunction with its transform function. -def v4i32Uns18Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32Uns18Imm_xform>; - -// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant -// load. -def ILHUvec_get_imm: SDNodeXForm; - -/// immILHUvec: Predicate test for a ILHU constant vector. -def immILHUvec: PatLeaf<(build_vector), [{ - return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], ILHUvec_get_imm>; - -// Catch-all for any other i32 vector constants -def v4i32_get_imm: SDNodeXForm; - -def v4i32Imm: PatLeaf<(build_vector), [{ - return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0; -}], v4i32_get_imm>; - -// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended -// immediate constant load for v2i64 vectors. -def v2i64SExt10Imm_xform: SDNodeXForm; - -// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v2i64SExt10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], v2i64SExt10Imm_xform>; - -// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v2i64 vectors. -def v2i64SExt16Imm_xform: SDNodeXForm; - -// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v2i64SExt16Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], v2i64SExt16Imm_xform>; - -// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned -// immediate constant load for v2i64 vectors. -def v2i64Uns18Imm_xform: SDNodeXForm; - -// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, -// works in conjunction with its transform function. -def v2i64Uns18Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], v2i64Uns18Imm_xform>; - -/// immILHUvec: Predicate test for a ILHU constant vector. -def immILHUvec_i64: PatLeaf<(build_vector), [{ - return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], ILHUvec_get_imm>; - -// Catch-all for any other i32 vector constants -def v2i64_get_imm: SDNodeXForm; - -def v2i64Imm: PatLeaf<(build_vector), [{ - return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0; -}], v2i64_get_imm>; - -//===----------------------------------------------------------------------===// -// Operand Definitions. - -def s7imm: Operand { - let PrintMethod = "printS7ImmOperand"; -} - -def s7imm_i8: Operand { - let PrintMethod = "printS7ImmOperand"; -} - -def u7imm: Operand { - let PrintMethod = "printU7ImmOperand"; -} - -def u7imm_i8: Operand { - let PrintMethod = "printU7ImmOperand"; -} - -def u7imm_i32: Operand { - let PrintMethod = "printU7ImmOperand"; -} - -// Halfword, signed 10-bit constant -def s10imm : Operand { - let PrintMethod = "printS10ImmOperand"; -} - -def s10imm_i8: Operand { - let PrintMethod = "printS10ImmOperand"; -} - -def s10imm_i32: Operand { - let PrintMethod = "printS10ImmOperand"; -} - -def s10imm_i64: Operand { - let PrintMethod = "printS10ImmOperand"; -} - -// Unsigned 10-bit integers: -def u10imm: Operand { - let PrintMethod = "printU10ImmOperand"; -} - -def u10imm_i8: Operand { - let PrintMethod = "printU10ImmOperand"; -} - -def u10imm_i32: Operand { - let PrintMethod = "printU10ImmOperand"; -} - -def s16imm : Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_i8: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_i32: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_i64: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_f32: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_f64: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def u16imm_i64 : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def u16imm_i32 : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def u16imm : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def f16imm : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def s18imm : Operand { - let PrintMethod = "printS18ImmOperand"; -} - -def u18imm : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -def u18imm_i64 : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -def f18imm : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -def f18imm_f64 : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -// Negated 7-bit halfword rotate immediate operands -def rothNeg7imm : Operand { - let PrintMethod = "printROTHNeg7Imm"; -} - -def rothNeg7imm_i16 : Operand { - let PrintMethod = "printROTHNeg7Imm"; -} - -// Negated 7-bit word rotate immediate operands -def rotNeg7imm : Operand { - let PrintMethod = "printROTNeg7Imm"; -} - -def rotNeg7imm_i16 : Operand { - let PrintMethod = "printROTNeg7Imm"; -} - -def rotNeg7imm_i8 : Operand { - let PrintMethod = "printROTNeg7Imm"; -} - -def target : Operand { - let PrintMethod = "printBranchOperand"; -} - -// Absolute address call target -def calltarget : Operand { - let PrintMethod = "printCallOperand"; - let MIOperandInfo = (ops u18imm:$calldest); -} - -// PC relative call target -def relcalltarget : Operand { - let PrintMethod = "printPCRelativeOperand"; - let MIOperandInfo = (ops s16imm:$calldest); -} - -// Branch targets: -def brtarget : Operand { - let PrintMethod = "printPCRelativeOperand"; -} - -// Hint for branch target -def hbrtarget : Operand { - let PrintMethod = "printHBROperand"; -} - -// Indirect call target -def indcalltarget : Operand { - let PrintMethod = "printCallOperand"; - let MIOperandInfo = (ops ptr_rc:$calldest); -} - -def symbolHi: Operand { - let PrintMethod = "printSymbolHi"; -} - -def symbolLo: Operand { - let PrintMethod = "printSymbolLo"; -} - -def symbolLSA: Operand { - let PrintMethod = "printSymbolLSA"; -} - -// Shuffle address memory operaand [s7imm(reg) d-format] -def shufaddr : Operand { - let PrintMethod = "printShufAddr"; - let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg); -} - -// memory s10imm(reg) operand -def dformaddr : Operand { - let PrintMethod = "printDFormAddr"; - let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg); -} - -// 256K local store address -// N.B.: The tblgen code generator expects to have two operands, an offset -// and a pointer. Of these, only the immediate is actually used. -def addr256k : Operand { - let PrintMethod = "printAddr256K"; - let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg); -} - -// memory s18imm(reg) operand -def memri18 : Operand { - let PrintMethod = "printMemRegImmS18"; - let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg); -} - -// memory register + register operand -def memrr : Operand { - let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b); -} - -// Define SPU-specific addressing modes: These come in three basic -// flavors: -// -// D-form : [r+I10] (10-bit signed offset + reg) -// X-form : [r+r] (reg+reg) -// A-form : abs (256K LSA offset) -// D-form(2): [r+I7] (7-bit signed offset + reg) - -def dform_addr : ComplexPattern; -def xform_addr : ComplexPattern; -def aform_addr : ComplexPattern; -def dform2_addr : ComplexPattern; diff --git a/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp deleted file mode 100644 index e6c872d..0000000 --- a/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp +++ /dev/null @@ -1,357 +0,0 @@ -//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Cell implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "reginfo" -#include "SPURegisterInfo.h" -#include "SPU.h" -#include "SPUInstrBuilder.h" -#include "SPUSubtarget.h" -#include "SPUMachineFunction.h" -#include "SPUFrameLowering.h" -#include "llvm/Constants.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include - -#define GET_REGINFO_TARGET_DESC -#include "SPUGenRegisterInfo.inc" - -using namespace llvm; - -/// getRegisterNumbering - Given the enum value for some register, e.g. -/// PPC::F14, return the number that it corresponds to (e.g. 14). -unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { - using namespace SPU; - switch (RegEnum) { - case SPU::R0: return 0; - case SPU::R1: return 1; - case SPU::R2: return 2; - case SPU::R3: return 3; - case SPU::R4: return 4; - case SPU::R5: return 5; - case SPU::R6: return 6; - case SPU::R7: return 7; - case SPU::R8: return 8; - case SPU::R9: return 9; - case SPU::R10: return 10; - case SPU::R11: return 11; - case SPU::R12: return 12; - case SPU::R13: return 13; - case SPU::R14: return 14; - case SPU::R15: return 15; - case SPU::R16: return 16; - case SPU::R17: return 17; - case SPU::R18: return 18; - case SPU::R19: return 19; - case SPU::R20: return 20; - case SPU::R21: return 21; - case SPU::R22: return 22; - case SPU::R23: return 23; - case SPU::R24: return 24; - case SPU::R25: return 25; - case SPU::R26: return 26; - case SPU::R27: return 27; - case SPU::R28: return 28; - case SPU::R29: return 29; - case SPU::R30: return 30; - case SPU::R31: return 31; - case SPU::R32: return 32; - case SPU::R33: return 33; - case SPU::R34: return 34; - case SPU::R35: return 35; - case SPU::R36: return 36; - case SPU::R37: return 37; - case SPU::R38: return 38; - case SPU::R39: return 39; - case SPU::R40: return 40; - case SPU::R41: return 41; - case SPU::R42: return 42; - case SPU::R43: return 43; - case SPU::R44: return 44; - case SPU::R45: return 45; - case SPU::R46: return 46; - case SPU::R47: return 47; - case SPU::R48: return 48; - case SPU::R49: return 49; - case SPU::R50: return 50; - case SPU::R51: return 51; - case SPU::R52: return 52; - case SPU::R53: return 53; - case SPU::R54: return 54; - case SPU::R55: return 55; - case SPU::R56: return 56; - case SPU::R57: return 57; - case SPU::R58: return 58; - case SPU::R59: return 59; - case SPU::R60: return 60; - case SPU::R61: return 61; - case SPU::R62: return 62; - case SPU::R63: return 63; - case SPU::R64: return 64; - case SPU::R65: return 65; - case SPU::R66: return 66; - case SPU::R67: return 67; - case SPU::R68: return 68; - case SPU::R69: return 69; - case SPU::R70: return 70; - case SPU::R71: return 71; - case SPU::R72: return 72; - case SPU::R73: return 73; - case SPU::R74: return 74; - case SPU::R75: return 75; - case SPU::R76: return 76; - case SPU::R77: return 77; - case SPU::R78: return 78; - case SPU::R79: return 79; - case SPU::R80: return 80; - case SPU::R81: return 81; - case SPU::R82: return 82; - case SPU::R83: return 83; - case SPU::R84: return 84; - case SPU::R85: return 85; - case SPU::R86: return 86; - case SPU::R87: return 87; - case SPU::R88: return 88; - case SPU::R89: return 89; - case SPU::R90: return 90; - case SPU::R91: return 91; - case SPU::R92: return 92; - case SPU::R93: return 93; - case SPU::R94: return 94; - case SPU::R95: return 95; - case SPU::R96: return 96; - case SPU::R97: return 97; - case SPU::R98: return 98; - case SPU::R99: return 99; - case SPU::R100: return 100; - case SPU::R101: return 101; - case SPU::R102: return 102; - case SPU::R103: return 103; - case SPU::R104: return 104; - case SPU::R105: return 105; - case SPU::R106: return 106; - case SPU::R107: return 107; - case SPU::R108: return 108; - case SPU::R109: return 109; - case SPU::R110: return 110; - case SPU::R111: return 111; - case SPU::R112: return 112; - case SPU::R113: return 113; - case SPU::R114: return 114; - case SPU::R115: return 115; - case SPU::R116: return 116; - case SPU::R117: return 117; - case SPU::R118: return 118; - case SPU::R119: return 119; - case SPU::R120: return 120; - case SPU::R121: return 121; - case SPU::R122: return 122; - case SPU::R123: return 123; - case SPU::R124: return 124; - case SPU::R125: return 125; - case SPU::R126: return 126; - case SPU::R127: return 127; - default: - report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); - } -} - -SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, - const TargetInstrInfo &tii) : - SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii) -{ -} - -/// getPointerRegClass - Return the register class to use to hold pointers. -/// This is used for addressing modes. -const TargetRegisterClass * -SPURegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { - return &SPU::R32CRegClass; -} - -const uint16_t * -SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const -{ - // Cell ABI calling convention - static const uint16_t SPU_CalleeSaveRegs[] = { - SPU::R80, SPU::R81, SPU::R82, SPU::R83, - SPU::R84, SPU::R85, SPU::R86, SPU::R87, - SPU::R88, SPU::R89, SPU::R90, SPU::R91, - SPU::R92, SPU::R93, SPU::R94, SPU::R95, - SPU::R96, SPU::R97, SPU::R98, SPU::R99, - SPU::R100, SPU::R101, SPU::R102, SPU::R103, - SPU::R104, SPU::R105, SPU::R106, SPU::R107, - SPU::R108, SPU::R109, SPU::R110, SPU::R111, - SPU::R112, SPU::R113, SPU::R114, SPU::R115, - SPU::R116, SPU::R117, SPU::R118, SPU::R119, - SPU::R120, SPU::R121, SPU::R122, SPU::R123, - SPU::R124, SPU::R125, SPU::R126, SPU::R127, - SPU::R2, /* environment pointer */ - SPU::R1, /* stack pointer */ - SPU::R0, /* link register */ - 0 /* end */ - }; - - return SPU_CalleeSaveRegs; -} - -/*! - R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is - generally unused) are the Cell's reserved registers - */ -BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - Reserved.set(SPU::R0); // LR - Reserved.set(SPU::R1); // SP - Reserved.set(SPU::R2); // environment pointer - return Reserved; -} - -//===----------------------------------------------------------------------===// -// Stack Frame Processing methods -//===----------------------------------------------------------------------===// - -//-------------------------------------------------------------------------- -void -SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) - const -{ - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - -void -SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const -{ - unsigned i = 0; - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - DebugLoc dl = II->getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - MachineOperand &SPOp = MI.getOperand(i); - int FrameIndex = SPOp.getIndex(); - - // Now add the frame object offset to the offset from r1. - int Offset = MFI->getObjectOffset(FrameIndex); - - // Most instructions, except for generated FrameIndex additions using AIr32 - // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the - // immediate in operand 2. - unsigned OpNo = 1; - if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32) - OpNo = 2; - - MachineOperand &MO = MI.getOperand(OpNo); - - // Offset is biased by $lr's slot at the bottom. - Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize(); - assert((Offset & 0xf) == 0 - && "16-byte alignment violated in eliminateFrameIndex"); - - // Replace the FrameIndex with base register with $sp (aka $r1) - SPOp.ChangeToRegister(SPU::R1, false); - - // if 'Offset' doesn't fit to the D-form instruction's - // immediate, convert the instruction to X-form - // if the instruction is not an AI (which takes a s10 immediate), assume - // it is a load/store that can take a s14 immediate - if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset)) - || !isInt<14>(Offset)) { - int newOpcode = convertDFormToXForm(MI.getOpcode()); - unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj); - BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg ) - .addImm(Offset); - BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg()) - .addReg(tmpReg, RegState::Kill) - .addReg(SPU::R1); - // remove the replaced D-form instruction - MBB.erase(II); - } else { - MO.ChangeToImmediate(Offset); - } -} - -unsigned -SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const -{ - return SPU::R1; -} - -int -SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const -{ - switch(dFormOpcode) - { - case SPU::AIr32: return SPU::Ar32; - case SPU::LQDr32: return SPU::LQXr32; - case SPU::LQDr128: return SPU::LQXr128; - case SPU::LQDv16i8: return SPU::LQXv16i8; - case SPU::LQDv4i32: return SPU::LQXv4i32; - case SPU::LQDv4f32: return SPU::LQXv4f32; - case SPU::STQDr32: return SPU::STQXr32; - case SPU::STQDr128: return SPU::STQXr128; - case SPU::STQDv16i8: return SPU::STQXv16i8; - case SPU::STQDv4i32: return SPU::STQXv4i32; - case SPU::STQDv4f32: return SPU::STQXv4f32; - - default: assert( false && "Unhandled D to X-form conversion"); - } - // default will assert, but need to return something to keep the - // compiler happy. - return dFormOpcode; -} - -// TODO this is already copied from PPC. Could this convenience function -// be moved to the RegScavenger class? -unsigned -SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, - RegScavenger *RS, - const TargetRegisterClass *RC, - int SPAdj) const -{ - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - assert( Reg && "Register scavenger failed"); - return Reg; -} diff --git a/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/llvm/lib/Target/CellSPU/SPURegisterInfo.h deleted file mode 100644 index e9f9aba..0000000 --- a/llvm/lib/Target/CellSPU/SPURegisterInfo.h +++ /dev/null @@ -1,106 +0,0 @@ -//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Cell SPU implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_REGISTERINFO_H -#define SPU_REGISTERINFO_H - -#include "SPU.h" - -#define GET_REGINFO_HEADER -#include "SPUGenRegisterInfo.inc" - -namespace llvm { - class SPUSubtarget; - class TargetInstrInfo; - class Type; - - class SPURegisterInfo : public SPUGenRegisterInfo { - private: - const SPUSubtarget &Subtarget; - const TargetInstrInfo &TII; - - //! Predicate: Does the machine function use the link register? - bool usesLR(MachineFunction &MF) const; - - public: - SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii); - - //! Translate a register's enum value to a register number - /*! - This method translates a register's enum value to it's regiser number, - e.g. SPU::R14 -> 14. - */ - static unsigned getRegisterNumbering(unsigned RegEnum); - - /// getPointerRegClass - Return the register class to use to hold pointers. - /// This is used for addressing modes. - virtual const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; - - /// After allocating this many registers, the allocator should feel - /// register pressure. The value is a somewhat random guess, based on the - /// number of non callee saved registers in the C calling convention. - virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, - MachineFunction &MF) const{ - return 50; - } - - //! Return the array of callee-saved registers - virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; - - //! Allow for scavenging, so we can get scratch registers when needed. - virtual bool requiresRegisterScavenging(const MachineFunction &MF) const - { return true; } - - //! Enable tracking of liveness after register allocation, since register - // scavenging is enabled. - virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const - { return true; } - - //! Return the reserved registers - BitVector getReservedRegs(const MachineFunction &MF) const; - - //! Eliminate the call frame setup pseudo-instructions - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - //! Convert frame indicies into machine operands - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS = NULL) const; - - //! Get the stack frame register (SP, aka R1) - unsigned getFrameRegister(const MachineFunction &MF) const; - - //------------------------------------------------------------------------ - // New methods added: - //------------------------------------------------------------------------ - - //! Convert D-form load/store to X-form load/store - /*! - Converts a regiser displacement load/store into a register-indexed - load/store for large stack frames, when the stack frame exceeds the - range of a s10 displacement. - */ - int convertDFormToXForm(int dFormOpcode) const; - - //! Acquire an unused register in an emergency. - unsigned findScratchRegister(MachineBasicBlock::iterator II, - RegScavenger *RS, - const TargetRegisterClass *RC, - int SPAdj) const; - - }; -} // end namespace llvm - -#endif diff --git a/llvm/lib/Target/CellSPU/SPURegisterInfo.td b/llvm/lib/Target/CellSPU/SPURegisterInfo.td deleted file mode 100644 index f27b042..0000000 --- a/llvm/lib/Target/CellSPU/SPURegisterInfo.td +++ /dev/null @@ -1,183 +0,0 @@ -//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -class SPUReg : Register { - let Namespace = "SPU"; -} - -// The SPU's register are all 128-bits wide, which makes specifying the -// registers relatively easy, if relatively mundane: - -class SPUVecReg num, string n> : SPUReg { - field bits<7> Num = num; -} - -def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>; -def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>; -def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>; -def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>; -def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>; -def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>; -def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>; -def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>; -def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>; -def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>; -def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>; -def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>; -def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>; -def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>; -def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>; -def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>; -def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>; -def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>; -def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>; -def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>; -def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>; -def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>; -def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>; -def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>; -def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>; -def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>; -def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>; -def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>; -def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>; -def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>; -def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>; -def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>; -def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>; -def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>; -def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>; -def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>; -def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>; -def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>; -def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>; -def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>; -def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>; -def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>; -def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>; -def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>; -def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>; -def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>; -def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>; -def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>; -def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>; -def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>; -def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>; -def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>; -def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>; -def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>; -def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>; -def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>; -def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>; -def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>; -def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>; -def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>; -def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>; -def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>; -def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>; -def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>; -def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>; -def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>; -def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>; -def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>; -def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>; -def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>; -def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>; -def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>; -def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>; -def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>; -def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>; -def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>; -def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>; -def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>; -def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>; -def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>; -def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>; -def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>; -def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>; -def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>; -def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>; -def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>; -def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>; -def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>; -def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>; -def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>; -def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>; -def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>; -def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>; -def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>; -def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>; -def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>; -def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>; -def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>; -def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>; -def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>; -def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>; -def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>; -def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>; -def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>; -def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>; -def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>; -def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>; -def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>; -def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>; -def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>; -def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>; -def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>; -def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>; -def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>; -def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>; -def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>; -def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>; -def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>; -def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>; -def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>; -def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>; -def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>; -def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>; -def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>; -def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>; -def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>; -def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>; -def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>; - -/* Need floating point status register here: */ -/* def FPCSR : ... */ - -// The SPU's registers as 128-bit wide entities, and can function as general -// purpose registers, where the operands are in the "preferred slot": -// The non-volatile registers are allocated in reverse order, like PPC does it. -def GPRC : RegisterClass<"SPU", [i128], 128, - (add (sequence "R%u", 0, 79), - (sequence "R%u", 127, 80))>; - -// The SPU's registers as 64-bit wide (double word integer) "preferred slot": -def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>; - -// The SPU's registers as 64-bit wide (double word) FP "preferred slot": -def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>; - -// The SPU's registers as 32-bit wide (word) "preferred slot": -def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>; - -// The SPU's registers as single precision floating point "preferred slot": -def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>; - -// The SPU's registers as 16-bit wide (halfword) "preferred slot": -def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>; - -// The SPU's registers as 8-bit wide (byte) "preferred slot": -def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>; - -// The SPU's registers as vector registers: -def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, - (add GPRC)>; diff --git a/llvm/lib/Target/CellSPU/SPURegisterNames.h b/llvm/lib/Target/CellSPU/SPURegisterNames.h deleted file mode 100644 index e557ed3..0000000 --- a/llvm/lib/Target/CellSPU/SPURegisterNames.h +++ /dev/null @@ -1,19 +0,0 @@ -//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_REGISTER_NAMES_H -#define SPU_REGISTER_NAMES_H - -// Define symbolic names for Cell registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "SPUGenRegisterInfo.inc" - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUSchedule.td b/llvm/lib/Target/CellSPU/SPUSchedule.td deleted file mode 100644 index 9ccd084..0000000 --- a/llvm/lib/Target/CellSPU/SPUSchedule.td +++ /dev/null @@ -1,59 +0,0 @@ -//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Even pipeline: - -def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000) -def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100) - -//===----------------------------------------------------------------------===// -// Instruction Itinerary classes used for Cell SPU -//===----------------------------------------------------------------------===// - -def LoadStore : InstrItinClass; // ODD_UNIT -def BranchHints : InstrItinClass; // ODD_UNIT -def BranchResolv : InstrItinClass; // ODD_UNIT -def ChanOpSPR : InstrItinClass; // ODD_UNIT -def ShuffleOp : InstrItinClass; // ODD_UNIT -def SelectOp : InstrItinClass; // ODD_UNIT -def GatherOp : InstrItinClass; // ODD_UNIT -def LoadNOP : InstrItinClass; // ODD_UNIT -def ExecNOP : InstrItinClass; // EVEN_UNIT -def SPrecFP : InstrItinClass; // EVEN_UNIT -def DPrecFP : InstrItinClass; // EVEN_UNIT -def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer) -def ByteOp : InstrItinClass; // EVEN_UNIT -def IntegerOp : InstrItinClass; // EVEN_UNIT -def IntegerMulDiv: InstrItinClass; // EVEN_UNIT -def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector -def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad -def ImmLoad : InstrItinClass; // EVEN_UNIT - -/* Note: The itinerary for the Cell SPU is somewhat contrived... */ -def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> - ]>; diff --git a/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp deleted file mode 100644 index 5732fd4..0000000 --- a/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SPUSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "cellspu-selectiondag-info" -#include "SPUTargetMachine.h" -using namespace llvm; - -SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} - -SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { -} diff --git a/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h deleted file mode 100644 index 39257d9..0000000 --- a/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the CellSPU subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef CELLSPUSELECTIONDAGINFO_H -#define CELLSPUSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class SPUTargetMachine; - -class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM); - ~SPUSelectionDAGInfo(); -}; - -} - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/llvm/lib/Target/CellSPU/SPUSubtarget.cpp deleted file mode 100644 index eec2d25..0000000 --- a/llvm/lib/Target/CellSPU/SPUSubtarget.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the CellSPU-specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "SPUSubtarget.h" -#include "SPU.h" -#include "SPURegisterInfo.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "SPUGenSubtargetInfo.inc" - -using namespace llvm; - -SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) : - SPUGenSubtargetInfo(TT, CPU, FS), - StackAlignment(16), - ProcDirective(SPU::DEFAULT_PROC), - UseLargeMem(false) -{ - // Should be the target SPU processor type. For now, since there's only - // one, simply default to the current "v0" default: - std::string default_cpu("v0"); - - // Parse features string. - ParseSubtargetFeatures(default_cpu, FS); - - // Initialize scheduling itinerary for the specified CPU. - InstrItins = getInstrItineraryForCPU(default_cpu); -} - -/// SetJITMode - This is called to inform the subtarget info that we are -/// producing code for the JIT. -void SPUSubtarget::SetJITMode() { -} - -/// Enable PostRA scheduling for optimization levels -O2 and -O3. -bool SPUSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - // CriticalPathsRCs seems to be the set of - // RegisterClasses that antidep breakings are performed for. - // Do it for all register classes - CriticalPathRCs.clear(); - CriticalPathRCs.push_back(&SPU::R8CRegClass); - CriticalPathRCs.push_back(&SPU::R16CRegClass); - CriticalPathRCs.push_back(&SPU::R32CRegClass); - CriticalPathRCs.push_back(&SPU::R32FPRegClass); - CriticalPathRCs.push_back(&SPU::R64CRegClass); - CriticalPathRCs.push_back(&SPU::VECREGRegClass); - return OptLevel >= CodeGenOpt::Default; -} diff --git a/llvm/lib/Target/CellSPU/SPUSubtarget.h b/llvm/lib/Target/CellSPU/SPUSubtarget.h deleted file mode 100644 index 27d28b2..0000000 --- a/llvm/lib/Target/CellSPU/SPUSubtarget.h +++ /dev/null @@ -1,97 +0,0 @@ -//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef CELLSUBTARGET_H -#define CELLSUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrItineraries.h" -#include - -#define GET_SUBTARGETINFO_HEADER -#include "SPUGenSubtargetInfo.inc" - -namespace llvm { - class GlobalValue; - class StringRef; - - namespace SPU { - enum { - PROC_NONE, - DEFAULT_PROC - }; - } - - class SPUSubtarget : public SPUGenSubtargetInfo { - protected: - /// stackAlignment - The minimum alignment known to hold of the stack frame - /// on entry to the function and which must be maintained by every function. - unsigned StackAlignment; - - /// Selected instruction itineraries (one entry per itinerary class.) - InstrItineraryData InstrItins; - - /// Which SPU processor (this isn't really used, but it's there to keep - /// the C compiler happy) - unsigned ProcDirective; - - /// Use (assume) large memory -- effectively disables the LQA/STQA - /// instructions that assume 259K local store. - bool UseLargeMem; - - public: - /// This constructor initializes the data members to match that - /// of the specified triple. - /// - SPUSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - /// SetJITMode - This is called to inform the subtarget info that we are - /// producing code for the JIT. - void SetJITMode(); - - /// getStackAlignment - Returns the minimum alignment known to hold of the - /// stack frame on entry to the function and which must be maintained by - /// every function for this subtarget. - unsigned getStackAlignment() const { return StackAlignment; } - - /// getInstrItins - Return the instruction itineraies based on subtarget - /// selection. - const InstrItineraryData &getInstrItineraryData() const { - return InstrItins; - } - - /// Use large memory addressing predicate - bool usingLargeMem() const { - return UseLargeMem; - } - - /// getDataLayoutString - Return the pointer size and type alignment - /// properties of this subtarget. - const char *getDataLayoutString() const { - return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" - "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128" - "-s:128:128-n32:64"; - } - - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; - }; -} // End llvm namespace - -#endif diff --git a/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp deleted file mode 100644 index 9183165..0000000 --- a/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the Cell SPU target. -// -//===----------------------------------------------------------------------===// - -#include "SPUTargetMachine.h" -#include "SPU.h" -#include "llvm/PassManager.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -extern "C" void LLVMInitializeCellSPUTarget() { - // Register the target. - RegisterTargetMachine X(TheCellSPUTarget); -} - -const std::pair * -SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { - NumEntries = 1; - return &LR[0]; -} - -SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), - DL(Subtarget.getDataLayoutString()), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), - TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { -} - -//===----------------------------------------------------------------------===// -// Pass Pipeline Configuration -//===----------------------------------------------------------------------===// - -namespace { -/// SPU Code Generator Pass Configuration Options. -class SPUPassConfig : public TargetPassConfig { -public: - SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - SPUTargetMachine &getSPUTargetMachine() const { - return getTM(); - } - - virtual bool addInstSelector(); - virtual bool addPreEmitPass(); -}; -} // namespace - -TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) { - return new SPUPassConfig(this, PM); -} - -bool SPUPassConfig::addInstSelector() { - // Install an instruction selector. - addPass(createSPUISelDag(getSPUTargetMachine())); - return false; -} - -// passes to run just before printing the assembly -bool SPUPassConfig::addPreEmitPass() { - // load the TCE instruction scheduler, if available via - // loaded plugins - typedef llvm::FunctionPass* (*BuilderFunc)(const char*); - BuilderFunc schedulerCreator = - (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( - "createTCESchedulerPass"); - if (schedulerCreator != NULL) - addPass(schedulerCreator("cellspu")); - - //align instructions with nops/lnops for dual issue - addPass(createSPUNopFillerPass(getSPUTargetMachine())); - return true; -} diff --git a/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/llvm/lib/Target/CellSPU/SPUTargetMachine.h deleted file mode 100644 index 7f53ea6f..0000000 --- a/llvm/lib/Target/CellSPU/SPUTargetMachine.h +++ /dev/null @@ -1,96 +0,0 @@ -//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the CellSPU-specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_TARGETMACHINE_H -#define SPU_TARGETMACHINE_H - -#include "SPUSubtarget.h" -#include "SPUInstrInfo.h" -#include "SPUISelLowering.h" -#include "SPUSelectionDAGInfo.h" -#include "SPUFrameLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" -#include "llvm/DataLayout.h" - -namespace llvm { - -/// SPUTargetMachine -/// -class SPUTargetMachine : public LLVMTargetMachine { - SPUSubtarget Subtarget; - const DataLayout DL; - SPUInstrInfo InstrInfo; - SPUFrameLowering FrameLowering; - SPUTargetLowering TLInfo; - SPUSelectionDAGInfo TSInfo; - InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; -public: - SPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); - - /// Return the subtarget implementation object - virtual const SPUSubtarget *getSubtargetImpl() const { - return &Subtarget; - } - virtual const SPUInstrInfo *getInstrInfo() const { - return &InstrInfo; - } - virtual const SPUFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - /*! - \note Cell SPU does not support JIT today. It could support JIT at some - point. - */ - virtual TargetJITInfo *getJITInfo() { - return NULL; - } - - virtual const SPUTargetLowering *getTargetLowering() const { - return &TLInfo; - } - - virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - virtual const SPURegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - - virtual const DataLayout *getDataLayout() const { - return &DL; - } - - virtual const InstrItineraryData *getInstrItineraryData() const { - return &InstrItins; - } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } - - // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); -}; - -} // end namespace llvm - -#endif diff --git a/llvm/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/llvm/lib/Target/CellSPU/TargetInfo/CMakeLists.txt deleted file mode 100644 index 6a98f95..0000000 --- a/llvm/lib/Target/CellSPU/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMCellSPUInfo - CellSPUTargetInfo.cpp - ) - -add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen) diff --git a/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp deleted file mode 100644 index 84aadfa..0000000 --- a/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp +++ /dev/null @@ -1,20 +0,0 @@ -//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "SPU.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheCellSPUTarget; - -extern "C" void LLVMInitializeCellSPUTargetInfo() { - RegisterTarget - X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]"); -} diff --git a/llvm/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/llvm/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 6937e70..0000000 --- a/llvm/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = CellSPUInfo -parent = CellSPU -required_libraries = MC Support Target -add_to_library_groups = CellSPU diff --git a/llvm/lib/Target/CellSPU/TargetInfo/Makefile b/llvm/lib/Target/CellSPU/TargetInfo/Makefile deleted file mode 100644 index 9cb6827..0000000 --- a/llvm/lib/Target/CellSPU/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/LLVMBuild.txt b/llvm/lib/Target/LLVMBuild.txt index 8995080..eb6c779 100644 --- a/llvm/lib/Target/LLVMBuild.txt +++ b/llvm/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore +subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/llvm/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/llvm/test/CodeGen/CellSPU/2009-01-01-BrCond.ll deleted file mode 100644 index 3542231..0000000 --- a/llvm/test/CodeGen/CellSPU/2009-01-01-BrCond.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llc < %s -march=cellspu -o - | grep brz -; PR3274 - -target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128" -target triple = "spu" - %struct.anon = type { i64 } - %struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon } - -define double @__floatunsidf(i32 %arg_a) nounwind { -entry: - %in = alloca %struct.fp_number_type, align 16 - %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1 - store i32 0, i32* %0, align 4 - %1 = icmp eq i32 %arg_a, 0 - %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0 - br i1 %1, label %bb, label %bb1 - -bb: ; preds = %entry - store i32 2, i32* %2, align 8 - br label %bb7 - -bb1: ; preds = %entry - ret double 0.0 - -bb7: ; preds = %bb5, %bb1, %bb - ret double 1.0 -} - -; declare i32 @llvm.ctlz.i32(i32) nounwind readnone - -declare double @__pack_d(%struct.fp_number_type*) diff --git a/llvm/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/llvm/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll deleted file mode 100644 index 401399f..0000000 --- a/llvm/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s -; Check that DEBUG_VALUE comments come through on a variety of targets. - -define i32 @main() nounwind ssp { -entry: -; CHECK: DEBUG_VALUE - call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9 - ret i32 0, !dbg !10 -} - -declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone - -declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone - -!llvm.dbg.sp = !{!0} - -!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] -!4 = metadata !{metadata !5} -!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 0} -!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ] -!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] -!9 = metadata !{i32 3, i32 11, metadata !8, null} -!10 = metadata !{i32 4, i32 2, metadata !8, null} - diff --git a/llvm/test/CodeGen/CellSPU/and_ops.ll b/llvm/test/CodeGen/CellSPU/and_ops.ll deleted file mode 100644 index 4203e91..0000000 --- a/llvm/test/CodeGen/CellSPU/and_ops.ll +++ /dev/null @@ -1,282 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep and %t1.s | count 234 -; RUN: grep andc %t1.s | count 85 -; RUN: grep andi %t1.s | count 37 -; RUN: grep andhi %t1.s | count 30 -; RUN: grep andbi %t1.s | count 4 - -; CellSPU legalization is over-sensitive to Legalize's traversal order. -; XFAIL: * - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; AND instruction generation: -define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = and <4 x i32> %arg1, %arg2 - ret <4 x i32> %A -} - -define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = and <4 x i32> %arg2, %arg1 - ret <4 x i32> %A -} - -define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = and <8 x i16> %arg1, %arg2 - ret <8 x i16> %A -} - -define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = and <8 x i16> %arg2, %arg1 - ret <8 x i16> %A -} - -define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = and <16 x i8> %arg2, %arg1 - ret <16 x i8> %A -} - -define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = and <16 x i8> %arg1, %arg2 - ret <16 x i8> %A -} - -define i32 @and_i32_1(i32 %arg1, i32 %arg2) { - %A = and i32 %arg2, %arg1 - ret i32 %A -} - -define i32 @and_i32_2(i32 %arg1, i32 %arg2) { - %A = and i32 %arg1, %arg2 - ret i32 %A -} - -define i16 @and_i16_1(i16 %arg1, i16 %arg2) { - %A = and i16 %arg2, %arg1 - ret i16 %A -} - -define i16 @and_i16_2(i16 %arg1, i16 %arg2) { - %A = and i16 %arg1, %arg2 - ret i16 %A -} - -define i8 @and_i8_1(i8 %arg1, i8 %arg2) { - %A = and i8 %arg2, %arg1 - ret i8 %A -} - -define i8 @and_i8_2(i8 %arg1, i8 %arg2) { - %A = and i8 %arg1, %arg2 - ret i8 %A -} - -; ANDC instruction generation: -define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = and <4 x i32> %arg1, %A - ret <4 x i32> %B -} - -define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = and <4 x i32> %arg2, %A - ret <4 x i32> %B -} - -define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = and <4 x i32> %A, %arg2 - ret <4 x i32> %B -} - -define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %arg1, %A - ret <8 x i16> %B -} - -define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %arg2, %A - ret <8 x i16> %B -} - -define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %arg2, %A - ret <16 x i8> %B -} - -define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %arg1, %A - ret <16 x i8> %B -} - -define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg1 - ret <16 x i8> %B -} - -define i32 @andc_i32_1(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = and i32 %A, %arg1 - ret i32 %B -} - -define i32 @andc_i32_2(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg1, -1 - %B = and i32 %A, %arg2 - ret i32 %B -} - -define i32 @andc_i32_3(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = and i32 %arg1, %A - ret i32 %B -} - -define i16 @andc_i16_1(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = and i16 %A, %arg1 - ret i16 %B -} - -define i16 @andc_i16_2(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg1, -1 - %B = and i16 %A, %arg2 - ret i16 %B -} - -define i16 @andc_i16_3(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = and i16 %arg1, %A - ret i16 %B -} - -define i8 @andc_i8_1(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = and i8 %A, %arg1 - ret i8 %B -} - -define i8 @andc_i8_2(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg1, -1 - %B = and i8 %A, %arg2 - ret i8 %B -} - -define i8 @andc_i8_3(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = and i8 %arg1, %A - ret i8 %B -} - -; ANDI instruction generation (i32 data type): -define <4 x i32> @andi_v4i32_1(<4 x i32> %in) { - %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 > - ret <4 x i32> %tmp2 -} - -define <4 x i32> @andi_v4i32_2(<4 x i32> %in) { - %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 > - ret <4 x i32> %tmp2 -} - -define <4 x i32> @andi_v4i32_3(<4 x i32> %in) { - %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 > - ret <4 x i32> %tmp2 -} - -define <4 x i32> @andi_v4i32_4(<4 x i32> %in) { - %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 > - ret <4 x i32> %tmp2 -} - -define zeroext i32 @andi_u32(i32 zeroext %in) { - %tmp37 = and i32 %in, 37 - ret i32 %tmp37 -} - -define signext i32 @andi_i32(i32 signext %in) { - %tmp38 = and i32 %in, 37 - ret i32 %tmp38 -} - -define i32 @andi_i32_1(i32 %in) { - %tmp37 = and i32 %in, 37 - ret i32 %tmp37 -} - -; ANDHI instruction generation (i16 data type): -define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) { - %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511, - i16 511, i16 511, i16 511, i16 511 > - ret <8 x i16> %tmp2 -} - -define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) { - %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510, - i16 510, i16 510, i16 510, i16 510 > - ret <8 x i16> %tmp2 -} - -define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) { - %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1 > - ret <8 x i16> %tmp2 -} - -define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) { - %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512, - i16 -512, i16 -512, i16 -512, i16 -512 > - ret <8 x i16> %tmp2 -} - -define zeroext i16 @andhi_u16(i16 zeroext %in) { - %tmp37 = and i16 %in, 37 ; [#uses=1] - ret i16 %tmp37 -} - -define signext i16 @andhi_i16(i16 signext %in) { - %tmp38 = and i16 %in, 37 ; [#uses=1] - ret i16 %tmp38 -} - -; i8 data type (s/b ANDBI if 8-bit registers were supported): -define <16 x i8> @and_v16i8(<16 x i8> %in) { - ; ANDBI generated for vector types - %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, - i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, - i8 42, i8 42, i8 42, i8 42 > - ret <16 x i8> %tmp2 -} - -define zeroext i8 @and_u8(i8 zeroext %in) { - ; ANDBI generated: - %tmp37 = and i8 %in, 37 - ret i8 %tmp37 -} - -define signext i8 @and_sext8(i8 signext %in) { - ; ANDBI generated - %tmp38 = and i8 %in, 37 - ret i8 %tmp38 -} - -define i8 @and_i8(i8 %in) { - ; ANDBI generated - %tmp38 = and i8 %in, 205 - ret i8 %tmp38 -} diff --git a/llvm/test/CodeGen/CellSPU/arg_ret.ll b/llvm/test/CodeGen/CellSPU/arg_ret.ll deleted file mode 100644 index 7410b72..0000000 --- a/llvm/test/CodeGen/CellSPU/arg_ret.ll +++ /dev/null @@ -1,34 +0,0 @@ -; Test parameter passing and return values -;RUN: llc --march=cellspu %s -o - | FileCheck %s - -; this fits into registers r3-r74 -%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, - i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, - i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, - i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, - i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, - i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32} -define ccc i32 @test_regs( %paramstruct %prm ) -{ -;CHECK: lr $3, $74 -;CHECK: bi $lr - %1 = extractvalue %paramstruct %prm, 71 - ret i32 %1 -} - -define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm ) -{ -;CHECK-NOT: a $3, $74, $75 - %1 = extractvalue %paramstruct %prm, 71 - %2 = add i32 %1, %stackprm - ret i32 %2 -} - -define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm ) -{ -;CHECK: lqd {{\$[0-9]+}}, 80($sp) -;CHECK-NOT: ori {{\$[0-9]+, \$[0-9]+, 0}} -;CHECK: lr $3, $4 - ret %paramstruct %prm -} - diff --git a/llvm/test/CodeGen/CellSPU/bigstack.ll b/llvm/test/CodeGen/CellSPU/bigstack.ll deleted file mode 100644 index 63293e2..0000000 --- a/llvm/test/CodeGen/CellSPU/bigstack.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep lqx %t1.s | count 3 -; RUN: grep il %t1.s | grep -v file | count 5 -; RUN: grep stqx %t1.s | count 1 - -define i32 @bigstack() nounwind { -entry: - %avar = alloca i32 - %big_data = alloca [2048 x i32] - store i32 3840, i32* %avar, align 4 - br label %return - -return: - %retval = load i32* %avar - ret i32 %retval -} - diff --git a/llvm/test/CodeGen/CellSPU/bss.ll b/llvm/test/CodeGen/CellSPU/bss.ll deleted file mode 100644 index 327800d..0000000 --- a/llvm/test/CodeGen/CellSPU/bss.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -@bssVar = global i32 zeroinitializer -; CHECK: .section .bss -; CHECK-NEXT: .globl - -@localVar= internal global i32 zeroinitializer -; CHECK-NOT: .lcomm -; CHECK: .local -; CHECK-NEXT: .comm - diff --git a/llvm/test/CodeGen/CellSPU/call.ll b/llvm/test/CodeGen/CellSPU/call.ll deleted file mode 100644 index 11cf770..0000000 --- a/llvm/test/CodeGen/CellSPU/call.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define i32 @main() { -entry: - %a = call i32 @stub_1(i32 1, float 0x400921FA00000000) - call void @extern_stub_1(i32 %a, i32 4) - ret i32 %a -} - -declare void @extern_stub_1(i32, i32) - -define i32 @stub_1(i32 %x, float %y) { - ; CHECK: il $3, 0 - ; CHECK: bi $lr -entry: - ret i32 0 -} - -; vararg call: ensure that all caller-saved registers are spilled to the -; stack: -define i32 @stub_2(...) { -entry: - ret i32 0 -} - -; check that struct is passed in r3-> -; assert this by changing the second field in the struct -%0 = type { i32, i32, i32 } -declare %0 @callee() -define %0 @test_structret() -{ -;CHECK: stqd $lr, 16($sp) -;CHECK: stqd $sp, -48($sp) -;CHECK: ai $sp, $sp, -48 -;CHECK: brasl $lr, callee - %rv = call %0 @callee() -;CHECK: ai $4, $4, 1 -;CHECK: lqd $lr, 64($sp) -;CHECK: ai $sp, $sp, 48 -;CHECK: bi $lr - %oldval = extractvalue %0 %rv, 1 - %newval = add i32 %oldval,1 - %newrv = insertvalue %0 %rv, i32 %newval, 1 - ret %0 %newrv -} - diff --git a/llvm/test/CodeGen/CellSPU/crash.ll b/llvm/test/CodeGen/CellSPU/crash.ll deleted file mode 100644 index cc2ab71..0000000 --- a/llvm/test/CodeGen/CellSPU/crash.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc %s -march=cellspu -o - -declare i8 @return_i8() -declare i16 @return_i16() -define void @testfunc() { - %rv1 = call i8 @return_i8() - %rv2 = call i16 @return_i16() - ret void -} \ No newline at end of file diff --git a/llvm/test/CodeGen/CellSPU/ctpop.ll b/llvm/test/CodeGen/CellSPU/ctpop.ll deleted file mode 100644 index e1a6cd8..0000000 --- a/llvm/test/CodeGen/CellSPU/ctpop.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep cntb %t1.s | count 3 -; RUN: grep andi %t1.s | count 3 -; RUN: grep rotmi %t1.s | count 2 -; RUN: grep rothmi %t1.s | count 1 -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -declare i8 @llvm.ctpop.i8(i8) -declare i16 @llvm.ctpop.i16(i16) -declare i32 @llvm.ctpop.i32(i32) - -define i32 @test_i8(i8 %X) { - call i8 @llvm.ctpop.i8(i8 %X) - %Y = zext i8 %1 to i32 - ret i32 %Y -} - -define i32 @test_i16(i16 %X) { - call i16 @llvm.ctpop.i16(i16 %X) - %Y = zext i16 %1 to i32 - ret i32 %Y -} - -define i32 @test_i32(i32 %X) { - call i32 @llvm.ctpop.i32(i32 %X) - %Y = bitcast i32 %1 to i32 - ret i32 %Y -} - diff --git a/llvm/test/CodeGen/CellSPU/div_ops.ll b/llvm/test/CodeGen/CellSPU/div_ops.ll deleted file mode 100644 index 0c93d83..0000000 --- a/llvm/test/CodeGen/CellSPU/div_ops.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc --march=cellspu %s -o - | FileCheck %s - -; signed division rounds towards zero, rotma don't. -define i32 @sdivide (i32 %val ) -{ -; CHECK: rotmai -; CHECK: rotmi -; CHECK: a -; CHECK: rotmai -; CHECK: bi $lr - %rv = sdiv i32 %val, 4 - ret i32 %rv -} - -define i32 @udivide (i32 %val ) -{ -; CHECK: rotmi -; CHECK: bi $lr - %rv = udiv i32 %val, 4 - ret i32 %rv -} - diff --git a/llvm/test/CodeGen/CellSPU/dp_farith.ll b/llvm/test/CodeGen/CellSPU/dp_farith.ll deleted file mode 100644 index 66bff3eb..0000000 --- a/llvm/test/CodeGen/CellSPU/dp_farith.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep dfa %t1.s | count 2 -; RUN: grep dfs %t1.s | count 2 -; RUN: grep dfm %t1.s | count 6 -; RUN: grep dfma %t1.s | count 2 -; RUN: grep dfms %t1.s | count 2 -; RUN: grep dfnms %t1.s | count 4 -; -; This file includes double precision floating point arithmetic instructions -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define double @fadd(double %arg1, double %arg2) { - %A = fadd double %arg1, %arg2 - ret double %A -} - -define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) { - %A = fadd <2 x double> %arg1, %arg2 - ret <2 x double> %A -} - -define double @fsub(double %arg1, double %arg2) { - %A = fsub double %arg1, %arg2 - ret double %A -} - -define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) { - %A = fsub <2 x double> %arg1, %arg2 - ret <2 x double> %A -} - -define double @fmul(double %arg1, double %arg2) { - %A = fmul double %arg1, %arg2 - ret double %A -} - -define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) { - %A = fmul <2 x double> %arg1, %arg2 - ret <2 x double> %A -} - -define double @fma(double %arg1, double %arg2, double %arg3) { - %A = fmul double %arg1, %arg2 - %B = fadd double %A, %arg3 - ret double %B -} - -define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = fmul <2 x double> %arg1, %arg2 - %B = fadd <2 x double> %A, %arg3 - ret <2 x double> %B -} - -define double @fms(double %arg1, double %arg2, double %arg3) { - %A = fmul double %arg1, %arg2 - %B = fsub double %A, %arg3 - ret double %B -} - -define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = fmul <2 x double> %arg1, %arg2 - %B = fsub <2 x double> %A, %arg3 - ret <2 x double> %B -} - -; - (a * b - c) -define double @d_fnms_1(double %arg1, double %arg2, double %arg3) { - %A = fmul double %arg1, %arg2 - %B = fsub double %A, %arg3 - %C = fsub double -0.000000e+00, %B ; [#uses=1] - ret double %C -} - -; Annother way of getting fnms -; - ( a * b ) + c => c - (a * b) -define double @d_fnms_2(double %arg1, double %arg2, double %arg3) { - %A = fmul double %arg1, %arg2 - %B = fsub double %arg3, %A - ret double %B -} - -; FNMS: - (a * b - c) => c - (a * b) -define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = fmul <2 x double> %arg1, %arg2 - %B = fsub <2 x double> %arg3, %A - ret <2 x double> %B -} - -; Another way to get fnms using a constant vector -; - ( a * b - c) -define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = fmul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1] - %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1] - %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B - ret <2 x double> %C -} - -;define double @fdiv_1(double %arg1, double %arg2) { -; %A = fdiv double %arg1, %arg2 ; [#uses=1] -; ret double %A -;} diff --git a/llvm/test/CodeGen/CellSPU/eqv.ll b/llvm/test/CodeGen/CellSPU/eqv.ll deleted file mode 100644 index 7967681..0000000 --- a/llvm/test/CodeGen/CellSPU/eqv.ll +++ /dev/null @@ -1,152 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep eqv %t1.s | count 18 -; RUN: grep xshw %t1.s | count 6 -; RUN: grep xsbh %t1.s | count 3 -; RUN: grep andi %t1.s | count 3 - -; Test the 'eqv' instruction, whose boolean expression is: -; (a & b) | (~a & ~b), which simplifies to -; (a & b) | ~(a | b) -; Alternatively, a ^ ~b, which the compiler will also match. - -; ModuleID = 'eqv.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = and <4 x i32> %arg1, %arg2 - %B = or <4 x i32> %arg1, %arg2 - %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > - %C = or <4 x i32> %A, %Bnot - ret <4 x i32> %C -} - -define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { - %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] - ret <4 x i32> %C -} - -define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { - %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] - ret <4 x i32> %C -} - -define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) { - %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > - %C = xor <4 x i32> %arg1, %arg2not - ret <4 x i32> %C -} - -define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) { - %A = and i32 %arg1, %arg2 ; [#uses=1] - %B = or i32 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i32 %B, -1 ; [#uses=1] - %C = or i32 %A, %Bnot ; [#uses=1] - ret i32 %C -} - -define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) { - %B = or i32 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i32 %B, -1 ; [#uses=1] - %A = and i32 %arg1, %arg2 ; [#uses=1] - %C = or i32 %A, %Bnot ; [#uses=1] - ret i32 %C -} - -define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) { - %B = or i32 %arg1, %arg2 ; [#uses=1] - %A = and i32 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i32 %B, -1 ; [#uses=1] - %C = or i32 %A, %Bnot ; [#uses=1] - ret i32 %C -} - -define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) { - %arg2not = xor i32 %arg2, -1 - %C = xor i32 %arg1, %arg2not - ret i32 %C -} - -define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) { - %arg1not = xor i32 %arg1, -1 - %C = xor i32 %arg2, %arg1not - ret i32 %C -} - -define signext i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) { - %A = and i16 %arg1, %arg2 ; [#uses=1] - %B = or i16 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i16 %B, -1 ; [#uses=1] - %C = or i16 %A, %Bnot ; [#uses=1] - ret i16 %C -} - -define signext i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) { - %B = or i16 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i16 %B, -1 ; [#uses=1] - %A = and i16 %arg1, %arg2 ; [#uses=1] - %C = or i16 %A, %Bnot ; [#uses=1] - ret i16 %C -} - -define signext i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) { - %B = or i16 %arg1, %arg2 ; [#uses=1] - %A = and i16 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i16 %B, -1 ; [#uses=1] - %C = or i16 %A, %Bnot ; [#uses=1] - ret i16 %C -} - -define signext i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) { - %A = and i8 %arg1, %arg2 ; [#uses=1] - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C -} - -define signext i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C -} - -define signext i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C -} - -define zeroext i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) { - %A = and i8 %arg1, %arg2 ; [#uses=1] - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C -} - -define zeroext i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C -} - -define zeroext i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C -} diff --git a/llvm/test/CodeGen/CellSPU/extract_elt.ll b/llvm/test/CodeGen/CellSPU/extract_elt.ll deleted file mode 100644 index 0ac971c..0000000 --- a/llvm/test/CodeGen/CellSPU/extract_elt.ll +++ /dev/null @@ -1,277 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep shufb %t1.s | count 39 -; RUN: grep ilhu %t1.s | count 27 -; RUN: grep iohl %t1.s | count 27 -; RUN: grep lqa %t1.s | count 10 -; RUN: grep shlqby %t1.s | count 12 -; RUN: grep 515 %t1.s | count 1 -; RUN: grep 1029 %t1.s | count 2 -; RUN: grep 1543 %t1.s | count 2 -; RUN: grep 2057 %t1.s | count 2 -; RUN: grep 2571 %t1.s | count 2 -; RUN: grep 3085 %t1.s | count 2 -; RUN: grep 3599 %t1.s | count 2 -; RUN: grep 32768 %t1.s | count 1 -; RUN: grep 32769 %t1.s | count 1 -; RUN: grep 32770 %t1.s | count 1 -; RUN: grep 32771 %t1.s | count 1 -; RUN: grep 32772 %t1.s | count 1 -; RUN: grep 32773 %t1.s | count 1 -; RUN: grep 32774 %t1.s | count 1 -; RUN: grep 32775 %t1.s | count 1 -; RUN: grep 32776 %t1.s | count 1 -; RUN: grep 32777 %t1.s | count 1 -; RUN: grep 32778 %t1.s | count 1 -; RUN: grep 32779 %t1.s | count 1 -; RUN: grep 32780 %t1.s | count 1 -; RUN: grep 32781 %t1.s | count 1 -; RUN: grep 32782 %t1.s | count 1 -; RUN: grep 32783 %t1.s | count 1 -; RUN: grep 32896 %t1.s | count 24 - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define i32 @i32_extract_0(<4 x i32> %v) { -entry: - %a = extractelement <4 x i32> %v, i32 0 - ret i32 %a -} - -define i32 @i32_extract_1(<4 x i32> %v) { -entry: - %a = extractelement <4 x i32> %v, i32 1 - ret i32 %a -} - -define i32 @i32_extract_2(<4 x i32> %v) { -entry: - %a = extractelement <4 x i32> %v, i32 2 - ret i32 %a -} - -define i32 @i32_extract_3(<4 x i32> %v) { -entry: - %a = extractelement <4 x i32> %v, i32 3 - ret i32 %a -} - -define i16 @i16_extract_0(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 0 - ret i16 %a -} - -define i16 @i16_extract_1(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 1 - ret i16 %a -} - -define i16 @i16_extract_2(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 2 - ret i16 %a -} - -define i16 @i16_extract_3(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 3 - ret i16 %a -} - -define i16 @i16_extract_4(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 4 - ret i16 %a -} - -define i16 @i16_extract_5(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 5 - ret i16 %a -} - -define i16 @i16_extract_6(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 6 - ret i16 %a -} - -define i16 @i16_extract_7(<8 x i16> %v) { -entry: - %a = extractelement <8 x i16> %v, i32 7 - ret i16 %a -} - -define i8 @i8_extract_0(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 0 - ret i8 %a -} - -define i8 @i8_extract_1(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 1 - ret i8 %a -} - -define i8 @i8_extract_2(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 2 - ret i8 %a -} - -define i8 @i8_extract_3(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 3 - ret i8 %a -} - -define i8 @i8_extract_4(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 4 - ret i8 %a -} - -define i8 @i8_extract_5(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 5 - ret i8 %a -} - -define i8 @i8_extract_6(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 6 - ret i8 %a -} - -define i8 @i8_extract_7(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 7 - ret i8 %a -} - -define i8 @i8_extract_8(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 8 - ret i8 %a -} - -define i8 @i8_extract_9(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 9 - ret i8 %a -} - -define i8 @i8_extract_10(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 10 - ret i8 %a -} - -define i8 @i8_extract_11(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 11 - ret i8 %a -} - -define i8 @i8_extract_12(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 12 - ret i8 %a -} - -define i8 @i8_extract_13(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 13 - ret i8 %a -} - -define i8 @i8_extract_14(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 14 - ret i8 %a -} - -define i8 @i8_extract_15(<16 x i8> %v) { -entry: - %a = extractelement <16 x i8> %v, i32 15 - ret i8 %a -} - -;;-------------------------------------------------------------------------- -;; extract element, variable index: -;;-------------------------------------------------------------------------- - -define i8 @extract_varadic_i8(i32 %i) nounwind readnone { -entry: - %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i - ret i8 %0 -} - -define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone { -entry: - %0 = extractelement <16 x i8> %v, i32 %i - ret i8 %0 -} - -define i16 @extract_varadic_i16(i32 %i) nounwind readnone { -entry: - %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i - ret i16 %0 -} - -define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone { -entry: - %0 = extractelement <8 x i16> %v, i32 %i - ret i16 %0 -} - -define i32 @extract_varadic_i32(i32 %i) nounwind readnone { -entry: - %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i - ret i32 %0 -} - -define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone { -entry: - %0 = extractelement <4 x i32> %v, i32 %i - ret i32 %0 -} - -define float @extract_varadic_f32(i32 %i) nounwind readnone { -entry: - %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i - ret float %0 -} - -define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone { -entry: - %0 = extractelement <4 x float> %v, i32 %i - ret float %0 -} - -define i64 @extract_varadic_i64(i32 %i) nounwind readnone { -entry: - %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i - ret i64 %0 -} - -define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone { -entry: - %0 = extractelement <2 x i64> %v, i32 %i - ret i64 %0 -} - -define double @extract_varadic_f64(i32 %i) nounwind readnone { -entry: - %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i - ret double %0 -} - -define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone { -entry: - %0 = extractelement <2 x double> %v, i32 %i - ret double %0 -} diff --git a/llvm/test/CodeGen/CellSPU/fcmp32.ll b/llvm/test/CodeGen/CellSPU/fcmp32.ll deleted file mode 100644 index f6b028d..0000000 --- a/llvm/test/CodeGen/CellSPU/fcmp32.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: llc --mtriple=cellspu-unknown-elf %s -o - | FileCheck %s - -; Exercise the floating point comparison operators for f32: - -declare double @fabs(double) -declare float @fabsf(float) - -define i1 @fcmp_eq(float %arg1, float %arg2) { -; CHECK: fceq -; CHECK: bi $lr - %A = fcmp oeq float %arg1, %arg2 - ret i1 %A -} - -define i1 @fcmp_mag_eq(float %arg1, float %arg2) { -; CHECK: fcmeq -; CHECK: bi $lr - %1 = call float @fabsf(float %arg1) readnone - %2 = call float @fabsf(float %arg2) readnone - %3 = fcmp oeq float %1, %2 - ret i1 %3 -} - -define i1 @test_ogt(float %a, float %b) { -; CHECK: fcgt -; CHECK: bi $lr - %cmp = fcmp ogt float %a, %b - ret i1 %cmp -} - -define i1 @test_ugt(float %a, float %b) { -; CHECK: fcgt -; CHECK: bi $lr - %cmp = fcmp ugt float %a, %b - ret i1 %cmp -} diff --git a/llvm/test/CodeGen/CellSPU/fcmp64.ll b/llvm/test/CodeGen/CellSPU/fcmp64.ll deleted file mode 100644 index 2b61fa6..0000000 --- a/llvm/test/CodeGen/CellSPU/fcmp64.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s - -define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind { -entry: - %A = fcmp oeq double %arg1, %arg2 - ret i1 %A -} diff --git a/llvm/test/CodeGen/CellSPU/fdiv.ll b/llvm/test/CodeGen/CellSPU/fdiv.ll deleted file mode 100644 index 9921626..0000000 --- a/llvm/test/CodeGen/CellSPU/fdiv.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep frest %t1.s | count 2 -; RUN: grep -w fi %t1.s | count 2 -; RUN: grep -w fm %t1.s | count 2 -; RUN: grep fma %t1.s | count 2 -; RUN: grep fnms %t1.s | count 4 -; RUN: grep cgti %t1.s | count 2 -; RUN: grep selb %t1.s | count 2 -; -; This file includes standard floating point arithmetic instructions -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define float @fdiv32(float %arg1, float %arg2) { - %A = fdiv float %arg1, %arg2 - ret float %A -} - -define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) { - %A = fdiv <4 x float> %arg1, %arg2 - ret <4 x float> %A -} diff --git a/llvm/test/CodeGen/CellSPU/fneg-fabs.ll b/llvm/test/CodeGen/CellSPU/fneg-fabs.ll deleted file mode 100644 index 6e01906..0000000 --- a/llvm/test/CodeGen/CellSPU/fneg-fabs.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep 32768 %t1.s | count 2 -; RUN: grep xor %t1.s | count 4 -; RUN: grep and %t1.s | count 2 - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define double @fneg_dp(double %X) { - %Y = fsub double -0.000000e+00, %X - ret double %Y -} - -define <2 x double> @fneg_dp_vec(<2 x double> %X) { - %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X - ret <2 x double> %Y -} - -define float @fneg_sp(float %X) { - %Y = fsub float -0.000000e+00, %X - ret float %Y -} - -define <4 x float> @fneg_sp_vec(<4 x float> %X) { - %Y = fsub <4 x float> , %X - ret <4 x float> %Y -} - -declare double @fabs(double) - -declare float @fabsf(float) - -define double @fabs_dp(double %X) { - %Y = call double @fabs( double %X ) readnone - ret double %Y -} - -define float @fabs_sp(float %X) { - %Y = call float @fabsf( float %X ) readnone - ret float %Y -} diff --git a/llvm/test/CodeGen/CellSPU/i64ops.ll b/llvm/test/CodeGen/CellSPU/i64ops.ll deleted file mode 100644 index 3553cbb..0000000 --- a/llvm/test/CodeGen/CellSPU/i64ops.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep xswd %t1.s | count 3 -; RUN: grep xsbh %t1.s | count 1 -; RUN: grep xshw %t1.s | count 2 -; RUN: grep shufb %t1.s | count 7 -; RUN: grep cg %t1.s | count 4 -; RUN: grep addx %t1.s | count 4 -; RUN: grep fsmbi %t1.s | count 3 -; RUN: grep il %t1.s | count 2 -; RUN: grep mpy %t1.s | count 10 -; RUN: grep mpyh %t1.s | count 6 -; RUN: grep mpyhhu %t1.s | count 2 -; RUN: grep mpyu %t1.s | count 4 - -; ModuleID = 'stores.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define i64 @sext_i64_i8(i8 %a) nounwind { - %1 = sext i8 %a to i64 - ret i64 %1 -} - -define i64 @sext_i64_i16(i16 %a) nounwind { - %1 = sext i16 %a to i64 - ret i64 %1 -} - -define i64 @sext_i64_i32(i32 %a) nounwind { - %1 = sext i32 %a to i64 - ret i64 %1 -} - -define i64 @zext_i64_i8(i8 %a) nounwind { - %1 = zext i8 %a to i64 - ret i64 %1 -} - -define i64 @zext_i64_i16(i16 %a) nounwind { - %1 = zext i16 %a to i64 - ret i64 %1 -} - -define i64 @zext_i64_i32(i32 %a) nounwind { - %1 = zext i32 %a to i64 - ret i64 %1 -} - -define i64 @add_i64(i64 %a, i64 %b) nounwind { - %1 = add i64 %a, %b - ret i64 %1 -} - -define i64 @mul_i64(i64 %a, i64 %b) nounwind { - %1 = mul i64 %a, %b - ret i64 %1 -} diff --git a/llvm/test/CodeGen/CellSPU/i8ops.ll b/llvm/test/CodeGen/CellSPU/i8ops.ll deleted file mode 100644 index 57a2aa8..0000000 --- a/llvm/test/CodeGen/CellSPU/i8ops.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s - -; ModuleID = 'i8ops.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define i8 @add_i8(i8 %a, i8 %b) nounwind { - %1 = add i8 %a, %b - ret i8 %1 -} - -define i8 @add_i8_imm(i8 %a, i8 %b) nounwind { - %1 = add i8 %a, 15 - ret i8 %1 -} - -define i8 @sub_i8(i8 %a, i8 %b) nounwind { - %1 = sub i8 %a, %b - ret i8 %1 -} - -define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind { - %1 = sub i8 %a, 15 - ret i8 %1 -} diff --git a/llvm/test/CodeGen/CellSPU/icmp16.ll b/llvm/test/CodeGen/CellSPU/icmp16.ll deleted file mode 100644 index 853ae1d..0000000 --- a/llvm/test/CodeGen/CellSPU/icmp16.ll +++ /dev/null @@ -1,574 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 -; $3 = %arg1, $4 = %val1, $5 = %val2 -; -; For "positive" comparisons: -; selb $3, $6, $5, -; selb $3, $5, $4, -; -; For "negative" comparisons, i.e., those where the result of the comparison -; must be inverted (setne, for example): -; selb $3, $5, $6, -; selb $3, $4, $5, - -; i16 integer comparisons: -define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_eq_select_i16: -; CHECK: ceqh -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp eq i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_eq_setcc_i16: -; CHECK: ilhu -; CHECK: ceqh -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp eq i16 %arg1, %arg2 - ret i1 %A -} - -define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_eq_immed01_i16: -; CHECK: ceqhi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i16 %arg1, 511 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_eq_immed02_i16: -; CHECK: ceqhi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i16 %arg1, -512 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_eq_immed03_i16: -; CHECK: ceqhi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i16 %arg1, -1 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_eq_immed04_i16: -; CHECK: ilh -; CHECK: ceqh -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i16 %arg1, 32768 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ne_select_i16: -; CHECK: ceqh -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp ne i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ne_setcc_i16: -; CHECK: ceqh -; CHECK: ilhu -; CHECK: xorhi -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ne i16 %arg1, %arg2 - ret i1 %A -} - -define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ne_immed01_i16: -; CHECK: ceqhi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i16 %arg1, 511 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ne_immed02_i16: -; CHECK: ceqhi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i16 %arg1, -512 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ne_immed03_i16: -; CHECK: ceqhi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i16 %arg1, -1 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ne_immed04_i16: -; CHECK: ilh -; CHECK: ceqh -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i16 %arg1, 32768 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ugt_select_i16: -; CHECK: clgth -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp ugt i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ugt_setcc_i16: -; CHECK: ilhu -; CHECK: clgth -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ugt i16 %arg1, %arg2 - ret i1 %A -} - -define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ugt_immed01_i16: -; CHECK: clgthi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i16 %arg1, 500 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ugt_immed02_i16: -; CHECK: ceqhi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ugt i16 %arg1, 0 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ugt_immed03_i16: -; CHECK: clgthi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i16 %arg1, 65024 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ugt_immed04_i16: -; CHECK: ilh -; CHECK: clgth -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i16 %arg1, 32768 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_uge_select_i16: -; CHECK: ceqh -; CHECK: clgth -; CHECK: or -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp uge i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_uge_setcc_i16: -; CHECK: ceqh -; CHECK: clgth -; CHECK: ilhu -; CHECK: or -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp uge i16 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp uge i16 %arg1, can always be transformed into -;; icmp ugt i16 %arg1, -1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ult_select_i16: -; CHECK: ceqh -; CHECK: clgth -; CHECK: nor -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp ult i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ult_setcc_i16: -; CHECK: ceqh -; CHECK: clgth -; CHECK: ilhu -; CHECK: nor -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ult i16 %arg1, %arg2 - ret i1 %A -} - -define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ult_immed01_i16: -; CHECK: ceqhi -; CHECK: clgthi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i16 %arg1, 511 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ult_immed02_i16: -; CHECK: ceqhi -; CHECK: clgthi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i16 %arg1, 65534 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ult_immed03_i16: -; CHECK: ceqhi -; CHECK: clgthi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i16 %arg1, 65024 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ult_immed04_i16: -; CHECK: ilh -; CHECK: ceqh -; CHECK: clgth -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i16 %arg1, 32769 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ule_select_i16: -; CHECK: clgth -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp ule i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_ule_setcc_i16: -; CHECK: clgth -; CHECK: ilhu -; CHECK: xorhi -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ule i16 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp ule i16 %arg1, can always be transformed into -;; icmp ult i16 %arg1, +1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sgt_select_i16: -; CHECK: cgth -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp sgt i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sgt_setcc_i16: -; CHECK: ilhu -; CHECK: cgth -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp sgt i16 %arg1, %arg2 - ret i1 %A -} - -define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sgt_immed01_i16: -; CHECK: cgthi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i16 %arg1, 511 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sgt_immed02_i16: -; CHECK: cgthi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i16 %arg1, -1 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sgt_immed03_i16: -; CHECK: cgthi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i16 %arg1, -512 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sgt_immed04_i16: -; CHECK: ilh -; CHECK: ceqh -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp sgt i16 %arg1, 32768 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sge_select_i16: -; CHECK: ceqh -; CHECK: cgth -; CHECK: or -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp sge i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sge_setcc_i16: -; CHECK: ceqh -; CHECK: cgth -; CHECK: ilhu -; CHECK: or -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp sge i16 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp sge i16 %arg1, can always be transformed into -;; icmp sgt i16 %arg1, -1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_slt_select_i16: -; CHECK: ceqh -; CHECK: cgth -; CHECK: nor -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp slt i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_slt_setcc_i16: -; CHECK: ceqh -; CHECK: cgth -; CHECK: ilhu -; CHECK: nor -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp slt i16 %arg1, %arg2 - ret i1 %A -} - -define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_slt_immed01_i16: -; CHECK: ceqhi -; CHECK: cgthi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i16 %arg1, 511 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_slt_immed02_i16: -; CHECK: ceqhi -; CHECK: cgthi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i16 %arg1, -512 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_slt_immed03_i16: -; CHECK: ceqhi -; CHECK: cgthi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i16 %arg1, -1 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_slt_immed04_i16: -; CHECK: lr -; CHECK-NEXT: bi - -entry: - %A = icmp slt i16 %arg1, 32768 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sle_select_i16: -; CHECK: cgth -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp sle i16 %arg1, %arg2 - %B = select i1 %A, i16 %val1, i16 %val2 - ret i16 %B -} - -define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { -; CHECK: icmp_sle_setcc_i16: -; CHECK: cgth -; CHECK: ilhu -; CHECK: xorhi -; CHECK: iohl -; CHECK: bi - -entry: - %A = icmp sle i16 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp sle i16 %arg1, can always be transformed into -;; icmp slt i16 %arg1, +1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - diff --git a/llvm/test/CodeGen/CellSPU/icmp32.ll b/llvm/test/CodeGen/CellSPU/icmp32.ll deleted file mode 100644 index 1794f4c..0000000 --- a/llvm/test/CodeGen/CellSPU/icmp32.ll +++ /dev/null @@ -1,575 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 -; $3 = %arg1, $4 = %val1, $5 = %val2 -; -; For "positive" comparisons: -; selb $3, $6, $5, -; selb $3, $5, $4, -; -; For "negative" comparisons, i.e., those where the result of the comparison -; must be inverted (setne, for example): -; selb $3, $5, $6, -; selb $3, $4, $5, - -; i32 integer comparisons: -define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_eq_select_i32: -; CHECK: ceq -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp eq i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_eq_setcc_i32: -; CHECK: ilhu -; CHECK: ceq -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp eq i32 %arg1, %arg2 - ret i1 %A -} - -define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_eq_immed01_i32: -; CHECK: ceqi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i32 %arg1, 511 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_eq_immed02_i32: -; CHECK: ceqi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i32 %arg1, -512 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_eq_immed03_i32: -; CHECK: ceqi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i32 %arg1, -1 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_eq_immed04_i32: -; CHECK: ila -; CHECK: ceq -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i32 %arg1, 32768 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ne_select_i32: -; CHECK: ceq -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp ne i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ne_setcc_i32: -; CHECK: ceq -; CHECK: ilhu -; CHECK: xori -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ne i32 %arg1, %arg2 - ret i1 %A -} - -define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ne_immed01_i32: -; CHECK: ceqi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i32 %arg1, 511 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ne_immed02_i32: -; CHECK: ceqi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i32 %arg1, -512 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ne_immed03_i32: -; CHECK: ceqi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i32 %arg1, -1 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ne_immed04_i32: -; CHECK: ila -; CHECK: ceq -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i32 %arg1, 32768 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ugt_select_i32: -; CHECK: clgt -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp ugt i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ugt_setcc_i32: -; CHECK: ilhu -; CHECK: clgt -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ugt i32 %arg1, %arg2 - ret i1 %A -} - -define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ugt_immed01_i32: -; CHECK: clgti -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i32 %arg1, 511 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ugt_immed02_i32: -; CHECK: clgti -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i32 %arg1, 4294966784 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ugt_immed03_i32: -; CHECK: clgti -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i32 %arg1, 4294967293 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ugt_immed04_i32: -; CHECK: ila -; CHECK: clgt -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i32 %arg1, 32768 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_uge_select_i32: -; CHECK: ceq -; CHECK: clgt -; CHECK: or -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp uge i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_uge_setcc_i32: -; CHECK: ceq -; CHECK: clgt -; CHECK: ilhu -; CHECK: or -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp uge i32 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp uge i32 %arg1, can always be transformed into -;; icmp ugt i32 %arg1, -1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ult_select_i32: -; CHECK: ceq -; CHECK: clgt -; CHECK: nor -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp ult i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ult_setcc_i32: -; CHECK: ceq -; CHECK: clgt -; CHECK: ilhu -; CHECK: nor -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ult i32 %arg1, %arg2 - ret i1 %A -} - -define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ult_immed01_i32: -; CHECK: ceqi -; CHECK: clgti -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i32 %arg1, 511 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ult_immed02_i32: -; CHECK: ceqi -; CHECK: clgti -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i32 %arg1, 4294966784 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ult_immed03_i32: -; CHECK: ceqi -; CHECK: clgti -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i32 %arg1, 4294967293 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ult_immed04_i32: -; CHECK: rotmi -; CHECK: ceqi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i32 %arg1, 32768 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ule_select_i32: -; CHECK: clgt -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp ule i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_ule_setcc_i32: -; CHECK: clgt -; CHECK: ilhu -; CHECK: xori -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp ule i32 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp ule i32 %arg1, can always be transformed into -;; icmp ult i32 %arg1, +1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sgt_select_i32: -; CHECK: cgt -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp sgt i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sgt_setcc_i32: -; CHECK: ilhu -; CHECK: cgt -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp sgt i32 %arg1, %arg2 - ret i1 %A -} - -define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sgt_immed01_i32: -; CHECK: cgti -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i32 %arg1, 511 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sgt_immed02_i32: -; CHECK: cgti -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i32 %arg1, 4294966784 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sgt_immed03_i32: -; CHECK: cgti -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i32 %arg1, 4294967293 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sgt_immed04_i32: -; CHECK: ila -; CHECK: cgt -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i32 %arg1, 32768 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sge_select_i32: -; CHECK: ceq -; CHECK: cgt -; CHECK: or -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp sge i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sge_setcc_i32: -; CHECK: ceq -; CHECK: cgt -; CHECK: ilhu -; CHECK: or -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp sge i32 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp sge i32 %arg1, can always be transformed into -;; icmp sgt i32 %arg1, -1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_slt_select_i32: -; CHECK: ceq -; CHECK: cgt -; CHECK: nor -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp slt i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_slt_setcc_i32: -; CHECK: ceq -; CHECK: cgt -; CHECK: ilhu -; CHECK: nor -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp slt i32 %arg1, %arg2 - ret i1 %A -} - -define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_slt_immed01_i32: -; CHECK: ceqi -; CHECK: cgti -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i32 %arg1, 511 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_slt_immed02_i32: -; CHECK: ceqi -; CHECK: cgti -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i32 %arg1, -512 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_slt_immed03_i32: -; CHECK: ceqi -; CHECK: cgti -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i32 %arg1, -1 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_slt_immed04_i32: -; CHECK: ila -; CHECK: ceq -; CHECK: cgt -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i32 %arg1, 32768 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sle_select_i32: -; CHECK: cgt -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp sle i32 %arg1, %arg2 - %B = select i1 %A, i32 %val1, i32 %val2 - ret i32 %B -} - -define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { -; CHECK: icmp_sle_setcc_i32: -; CHECK: cgt -; CHECK: ilhu -; CHECK: xori -; CHECK: iohl -; CHECK: shufb - -entry: - %A = icmp sle i32 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp sle i32 %arg1, can always be transformed into -;; icmp slt i32 %arg1, +1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - diff --git a/llvm/test/CodeGen/CellSPU/icmp64.ll b/llvm/test/CodeGen/CellSPU/icmp64.ll deleted file mode 100644 index 9dd2cdc..0000000 --- a/llvm/test/CodeGen/CellSPU/icmp64.ll +++ /dev/null @@ -1,146 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep ceq %t1.s | count 20 -; RUN: grep cgti %t1.s | count 12 -; RUN: grep cgt %t1.s | count 16 -; RUN: grep clgt %t1.s | count 12 -; RUN: grep gb %t1.s | count 12 -; RUN: grep fsm %t1.s | count 10 -; RUN: grep xori %t1.s | count 5 -; RUN: grep selb %t1.s | count 18 - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 -; $3 = %arg1, $4 = %val1, $5 = %val2 -; -; i64 integer comparisons: -define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp eq i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp eq i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ne i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ne i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ugt i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ugt i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp uge i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp uge i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ult i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ult i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ule i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp ule i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp sgt i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp sgt i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp sge i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp sge i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp slt i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp slt i64 %arg1, %arg2 - ret i1 %A -} - -define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp sle i64 %arg1, %arg2 - %B = select i1 %A, i64 %val1, i64 %val2 - ret i64 %B -} - -define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { -entry: - %A = icmp sle i64 %arg1, %arg2 - ret i1 %A -} diff --git a/llvm/test/CodeGen/CellSPU/icmp8.ll b/llvm/test/CodeGen/CellSPU/icmp8.ll deleted file mode 100644 index 1db641e..0000000 --- a/llvm/test/CodeGen/CellSPU/icmp8.ll +++ /dev/null @@ -1,446 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 -; $3 = %arg1, $4 = %val1, $5 = %val2 -; -; For "positive" comparisons: -; selb $3, $6, $5, -; selb $3, $5, $4, -; -; For "negative" comparisons, i.e., those where the result of the comparison -; must be inverted (setne, for example): -; selb $3, $5, $6, -; selb $3, $4, $5, - -; i8 integer comparisons: -define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_eq_select_i8: -; CHECK: ceqb -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp eq i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_eq_setcc_i8: -; CHECK: ceqb -; CHECK-NEXT: bi - -entry: - %A = icmp eq i8 %arg1, %arg2 - ret i1 %A -} - -define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_eq_immed01_i8: -; CHECK: ceqbi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i8 %arg1, 127 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_eq_immed02_i8: -; CHECK: ceqbi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i8 %arg1, -128 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_eq_immed03_i8: -; CHECK: ceqbi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp eq i8 %arg1, -1 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ne_select_i8: -; CHECK: ceqb -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp ne i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ne_setcc_i8: -; CHECK: ceqb -; CHECK: xorbi -; CHECK-NEXT: bi - -entry: - %A = icmp ne i8 %arg1, %arg2 - ret i1 %A -} - -define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ne_immed01_i8: -; CHECK: ceqbi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i8 %arg1, 127 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ne_immed02_i8: -; CHECK: ceqbi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i8 %arg1, -128 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ne_immed03_i8: -; CHECK: ceqbi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp ne i8 %arg1, -1 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ugt_select_i8: -; CHECK: clgtb -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp ugt i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ugt_setcc_i8: -; CHECK: clgtb -; CHECK-NEXT: bi - -entry: - %A = icmp ugt i8 %arg1, %arg2 - ret i1 %A -} - -define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ugt_immed01_i8: -; CHECK: clgtbi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ugt i8 %arg1, 126 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_uge_select_i8: -; CHECK: ceqb -; CHECK: clgtb -; CHECK: or -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp uge i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_uge_setcc_i8: -; CHECK: ceqb -; CHECK: clgtb -; CHECK: or -; CHECK-NEXT: bi - -entry: - %A = icmp uge i8 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp uge i8 %arg1, can always be transformed into -;; icmp ugt i8 %arg1, -1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ult_select_i8: -; CHECK: ceqb -; CHECK: clgtb -; CHECK: nor -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp ult i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ult_setcc_i8: -; CHECK: ceqb -; CHECK: clgtb -; CHECK: nor -; CHECK-NEXT: bi - -entry: - %A = icmp ult i8 %arg1, %arg2 - ret i1 %A -} - -define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ult_immed01_i8: -; CHECK: ceqbi -; CHECK: clgtbi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i8 %arg1, 253 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ult_immed02_i8: -; CHECK: ceqbi -; CHECK: clgtbi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp ult i8 %arg1, 129 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ule_select_i8: -; CHECK: clgtb -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp ule i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_ule_setcc_i8: -; CHECK: clgtb -; CHECK: xorbi -; CHECK-NEXT: bi - -entry: - %A = icmp ule i8 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp ule i8 %arg1, can always be transformed into -;; icmp ult i8 %arg1, +1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sgt_select_i8: -; CHECK: cgtb -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp sgt i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sgt_setcc_i8: -; CHECK: cgtb -; CHECK-NEXT: bi - -entry: - %A = icmp sgt i8 %arg1, %arg2 - ret i1 %A -} - -define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sgt_immed01_i8: -; CHECK: cgtbi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i8 %arg1, 96 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sgt_immed02_i8: -; CHECK: cgtbi -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp sgt i8 %arg1, -1 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sgt_immed03_i8: -; CHECK: ceqbi -; CHECK: selb $3, $4, $5, $3 - -entry: - %A = icmp sgt i8 %arg1, -128 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sge_select_i8: -; CHECK: ceqb -; CHECK: cgtb -; CHECK: or -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp sge i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sge_setcc_i8: -; CHECK: ceqb -; CHECK: cgtb -; CHECK: or -; CHECK-NEXT: bi - -entry: - %A = icmp sge i8 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp sge i8 %arg1, can always be transformed into -;; icmp sgt i8 %arg1, -1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - -define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_slt_select_i8: -; CHECK: ceqb -; CHECK: cgtb -; CHECK: nor -; CHECK: selb $3, $6, $5, $3 - -entry: - %A = icmp slt i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_slt_setcc_i8: -; CHECK: ceqb -; CHECK: cgtb -; CHECK: nor -; CHECK-NEXT: bi - -entry: - %A = icmp slt i8 %arg1, %arg2 - ret i1 %A -} - -define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_slt_immed01_i8: -; CHECK: ceqbi -; CHECK: cgtbi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i8 %arg1, 96 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_slt_immed02_i8: -; CHECK: ceqbi -; CHECK: cgtbi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i8 %arg1, -120 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_slt_immed03_i8: -; CHECK: ceqbi -; CHECK: cgtbi -; CHECK: nor -; CHECK: selb $3, $5, $4, $3 - -entry: - %A = icmp slt i8 %arg1, -1 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sle_select_i8: -; CHECK: cgtb -; CHECK: selb $3, $5, $6, $3 - -entry: - %A = icmp sle i8 %arg1, %arg2 - %B = select i1 %A, i8 %val1, i8 %val2 - ret i8 %B -} - -define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { -; CHECK: icmp_sle_setcc_i8: -; CHECK: cgtb -; CHECK: xorbi -; CHECK-NEXT: bi - -entry: - %A = icmp sle i8 %arg1, %arg2 - ret i1 %A -} - -;; Note: icmp sle i8 %arg1, can always be transformed into -;; icmp slt i8 %arg1, +1 -;; -;; Consequently, even though the patterns exist to match, it's unlikely -;; they'll ever be generated. - diff --git a/llvm/test/CodeGen/CellSPU/immed16.ll b/llvm/test/CodeGen/CellSPU/immed16.ll deleted file mode 100644 index 077d071..0000000 --- a/llvm/test/CodeGen/CellSPU/immed16.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep "ilh" %t1.s | count 11 -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define i16 @test_1() { - %x = alloca i16, align 16 - store i16 419, i16* %x ;; ILH via pattern - ret i16 0 -} - -define i16 @test_2() { - %x = alloca i16, align 16 - store i16 1023, i16* %x ;; ILH via pattern - ret i16 0 -} - -define i16 @test_3() { - %x = alloca i16, align 16 - store i16 -1023, i16* %x ;; ILH via pattern - ret i16 0 -} - -define i16 @test_4() { - %x = alloca i16, align 16 - store i16 32767, i16* %x ;; ILH via pattern - ret i16 0 -} - -define i16 @test_5() { - %x = alloca i16, align 16 - store i16 -32768, i16* %x ;; ILH via pattern - ret i16 0 -} - -define i16 @test_6() { - ret i16 0 -} - - diff --git a/llvm/test/CodeGen/CellSPU/immed32.ll b/llvm/test/CodeGen/CellSPU/immed32.ll deleted file mode 100644 index 8e48f0b..0000000 --- a/llvm/test/CodeGen/CellSPU/immed32.ll +++ /dev/null @@ -1,83 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep ilhu %t1.s | count 9 -; RUN: grep iohl %t1.s | count 7 -; RUN: grep -w il %t1.s | count 3 -; RUN: grep 16429 %t1.s | count 1 -; RUN: grep 63572 %t1.s | count 1 -; RUN: grep 128 %t1.s | count 1 -; RUN: grep 32639 %t1.s | count 1 -; RUN: grep 65535 %t1.s | count 1 -; RUN: grep 16457 %t1.s | count 1 -; RUN: grep 4059 %t1.s | count 1 -; RUN: grep 49077 %t1.s | count 1 -; RUN: grep 1267 %t1.s | count 2 -; RUN: grep 16309 %t1.s | count 1 -; RUN: cat %t1.s | FileCheck %s -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define i32 @test_1() { - ret i32 4784128 ;; ILHU via pattern (0x49000) -} - -define i32 @test_2() { - ret i32 5308431 ;; ILHU/IOHL via pattern (0x5100f) -} - -define i32 @test_3() { - ret i32 511 ;; IL via pattern -} - -define i32 @test_4() { - ret i32 -512 ;; IL via pattern -} - -define i32 @test_5() -{ -;CHECK: test_5: -;CHECK-NOT: ila $3, 40000 -;CHECK: ilhu -;CHECK: iohl -;CHECK: bi $lr - ret i32 400000 -} - -;; double float floatval -;; 0x4005bf0a80000000 0x402d|f854 2.718282 -define float @float_const_1() { - ret float 0x4005BF0A80000000 ;; ILHU/IOHL -} - -;; double float floatval -;; 0x3810000000000000 0x0080|0000 0.000000 -define float @float_const_2() { - ret float 0x3810000000000000 ;; IL 128 -} - -;; double float floatval -;; 0x47efffffe0000000 0x7f7f|ffff NaN -define float @float_const_3() { - ret float 0x47EFFFFFE0000000 ;; ILHU/IOHL via pattern -} - -;; double float floatval -;; 0x400921fb60000000 0x4049|0fdb 3.141593 -define float @float_const_4() { - ret float 0x400921FB60000000 ;; ILHU/IOHL via pattern -} - -;; double float floatval -;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214 -define float @float_const_5() { - ret float 0xBFF6A09E60000000 ;; ILHU/IOHL via pattern -} - -;; double float floatval -;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214 -define float @float_const_6() { - ret float 0x3FF6A09E60000000 ;; ILHU/IOHL via pattern -} - -define float @float_const_7() { - ret float 0.000000e+00 ;; IL 0 via pattern -} diff --git a/llvm/test/CodeGen/CellSPU/immed64.ll b/llvm/test/CodeGen/CellSPU/immed64.ll deleted file mode 100644 index fd48365..0000000 --- a/llvm/test/CodeGen/CellSPU/immed64.ll +++ /dev/null @@ -1,95 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep lqa %t1.s | count 13 -; RUN: grep ilhu %t1.s | count 15 -; RUN: grep ila %t1.s | count 1 -; RUN: grep -w il %t1.s | count 6 -; RUN: grep shufb %t1.s | count 13 -; RUN: grep 65520 %t1.s | count 1 -; RUN: grep 43981 %t1.s | count 1 -; RUN: grep 13702 %t1.s | count 1 -; RUN: grep 28225 %t1.s | count 1 -; RUN: grep 30720 %t1.s | count 1 -; RUN: grep 3233857728 %t1.s | count 8 -; RUN: grep 2155905152 %t1.s | count 6 -; RUN: grep 66051 %t1.s | count 7 -; RUN: grep 471670303 %t1.s | count 11 - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202) -; 18446744073709551591 => 0x ffffffff ffffffe7 (-25) -; 18446744073708516742 => 0x ffffffff fff03586 (-1034874) -; 5308431 => 0x 00000000 0051000F -; 9223372038704560128 => 0x 80000000 6e417800 - -define i64 @i64_const_1() { - ret i64 1311768467750121234 ;; Constant pool spill -} - -define i64 @i64_const_2() { - ret i64 18446744073709551591 ;; IL/SHUFB -} - -define i64 @i64_const_3() { - ret i64 18446744073708516742 ;; IHLU/IOHL/SHUFB -} - -define i64 @i64_const_4() { - ret i64 5308431 ;; ILHU/IOHL/SHUFB -} - -define i64 @i64_const_5() { - ret i64 511 ;; IL/SHUFB -} - -define i64 @i64_const_6() { - ret i64 -512 ;; IL/SHUFB -} - -define i64 @i64_const_7() { - ret i64 9223372038704560128 ;; IHLU/IOHL/SHUFB -} - -define i64 @i64_const_8() { - ret i64 0 ;; IL -} - -define i64 @i64_const_9() { - ret i64 -1 ;; IL -} - -define i64 @i64_const_10() { - ret i64 281470681808895 ;; IL 65535 -} - -; 0x4005bf0a8b145769 -> -; (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906]) -; (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377]) -define double @f64_const_1() { - ret double 0x4005bf0a8b145769 ;; ILHU/IOHL via pattern -} - -define double @f64_const_2() { - ret double 0x0010000000000000 -} - -define double @f64_const_3() { - ret double 0x7fefffffffffffff -} - -define double @f64_const_4() { - ret double 0x400921fb54442d18 -} - -define double @f64_const_5() { - ret double 0xbff6a09e667f3bcd ;; ILHU/IOHL via pattern -} - -define double @f64_const_6() { - ret double 0x3ff6a09e667f3bcd -} - -define double @f64_const_7() { - ret double 0.000000e+00 -} diff --git a/llvm/test/CodeGen/CellSPU/int2fp.ll b/llvm/test/CodeGen/CellSPU/int2fp.ll deleted file mode 100644 index 984c017..0000000 --- a/llvm/test/CodeGen/CellSPU/int2fp.ll +++ /dev/null @@ -1,41 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep csflt %t1.s | count 5 -; RUN: grep cuflt %t1.s | count 1 -; RUN: grep xshw %t1.s | count 2 -; RUN: grep xsbh %t1.s | count 1 -; RUN: grep and %t1.s | count 2 -; RUN: grep andi %t1.s | count 1 -; RUN: grep ila %t1.s | count 1 - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define float @sitofp_i32(i32 %arg1) { - %A = sitofp i32 %arg1 to float ; [#uses=1] - ret float %A -} - -define float @uitofp_u32(i32 %arg1) { - %A = uitofp i32 %arg1 to float ; [#uses=1] - ret float %A -} - -define float @sitofp_i16(i16 %arg1) { - %A = sitofp i16 %arg1 to float ; [#uses=1] - ret float %A -} - -define float @uitofp_i16(i16 %arg1) { - %A = uitofp i16 %arg1 to float ; [#uses=1] - ret float %A -} - -define float @sitofp_i8(i8 %arg1) { - %A = sitofp i8 %arg1 to float ; [#uses=1] - ret float %A -} - -define float @uitofp_i8(i8 %arg1) { - %A = uitofp i8 %arg1 to float ; [#uses=1] - ret float %A -} diff --git a/llvm/test/CodeGen/CellSPU/intrinsics_branch.ll b/llvm/test/CodeGen/CellSPU/intrinsics_branch.ll deleted file mode 100644 index b0f6a62..0000000 --- a/llvm/test/CodeGen/CellSPU/intrinsics_branch.ll +++ /dev/null @@ -1,150 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep ceq %t1.s | count 30 -; RUN: grep ceqb %t1.s | count 10 -; RUN: grep ceqhi %t1.s | count 5 -; RUN: grep ceqi %t1.s | count 5 -; RUN: grep cgt %t1.s | count 30 -; RUN: grep cgtb %t1.s | count 10 -; RUN: grep cgthi %t1.s | count 5 -; RUN: grep cgti %t1.s | count 5 -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) - -declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>) -declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>) -declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>) -declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16) -declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16) -declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8) - -declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>) -declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>) -declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>) -declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16) -declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16) -declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8) - -declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>) -declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>) -declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>) -declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16) -declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16) -declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8) - - - -define <4 x i32> @test(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) { - call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y -} - -define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) { - call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y -} - -define <4 x i32> @ceqitest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <8 x i16> @ceqhitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y -} - -define <16 x i8> @ceqbitest(<16 x i8> %A) { - call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y -} - -define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) { - call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y -} - -define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) { - call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y -} - -define <4 x i32> @cgtitest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <8 x i16> @cgthitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y -} - -define <16 x i8> @cgtbitest(<16 x i8> %A) { - call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y -} - -define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) { - call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y -} - -define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) { - call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y -} - -define <4 x i32> @clgtitest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <8 x i16> @clgthitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y -} - -define <16 x i8> @clgtbitest(<16 x i8> %A) { - call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y -} diff --git a/llvm/test/CodeGen/CellSPU/intrinsics_float.ll b/llvm/test/CodeGen/CellSPU/intrinsics_float.ll deleted file mode 100644 index 8137347..0000000 --- a/llvm/test/CodeGen/CellSPU/intrinsics_float.ll +++ /dev/null @@ -1,94 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep fa %t1.s | count 5 -; RUN: grep fs %t1.s | count 5 -; RUN: grep fm %t1.s | count 15 -; RUN: grep fceq %t1.s | count 5 -; RUN: grep fcmeq %t1.s | count 5 -; RUN: grep fcgt %t1.s | count 5 -; RUN: grep fcmgt %t1.s | count 5 -; RUN: grep fma %t1.s | count 5 -; RUN: grep fnms %t1.s | count 5 -; RUN: grep fms %t1.s | count 5 -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) - -declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>) -declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>) -declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>) - -declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>) -declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>) -declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>) -declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>) - -declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>) -declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>) -declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>) - -define <4 x i32> @test(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { - call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { - call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} - -define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { - call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y -} diff --git a/llvm/test/CodeGen/CellSPU/intrinsics_logical.ll b/llvm/test/CodeGen/CellSPU/intrinsics_logical.ll deleted file mode 100644 index a29ee4c..0000000 --- a/llvm/test/CodeGen/CellSPU/intrinsics_logical.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep and %t1.s | count 20 -; RUN: grep andc %t1.s | count 5 -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16) -declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16) -declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8) - -declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16) -declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16) -declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8) - -declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16) -declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16) -declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8) - -declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>) - -define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <4 x i32> @anditest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y -} - -define <8 x i16> @andhitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y -} diff --git a/llvm/test/CodeGen/CellSPU/jumptable.ll b/llvm/test/CodeGen/CellSPU/jumptable.ll deleted file mode 100644 index 66c2fde..0000000 --- a/llvm/test/CodeGen/CellSPU/jumptable.ll +++ /dev/null @@ -1,21 +0,0 @@ -;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s -; This is to check that emitting jumptables doesn't crash llc -define i32 @test(i32 %param) { -entry: -;CHECK: ai {{\$.}}, $3, -1 -;CHECK: clgti {{\$., \$.}}, 3 -;CHECK: brnz {{\$.}},.LBB0_ - switch i32 %param, label %bb2 [ - i32 1, label %bb1 - i32 2, label %bb2 - i32 3, label %bb3 - i32 4, label %bb2 - ] -;CHECK-NOT: # BB#2 -bb1: - ret i32 1 -bb2: - ret i32 2 -bb3: - ret i32 %param -} diff --git a/llvm/test/CodeGen/CellSPU/lit.local.cfg b/llvm/test/CodeGen/CellSPU/lit.local.cfg deleted file mode 100644 index ea00867..0000000 --- a/llvm/test/CodeGen/CellSPU/lit.local.cfg +++ /dev/null @@ -1,6 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -targets = set(config.root.targets_to_build.split()) -if not 'CellSPU' in targets: - config.unsupported = True - diff --git a/llvm/test/CodeGen/CellSPU/loads.ll b/llvm/test/CodeGen/CellSPU/loads.ll deleted file mode 100644 index 4771752..0000000 --- a/llvm/test/CodeGen/CellSPU/loads.ll +++ /dev/null @@ -1,59 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -; ModuleID = 'loads.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly { -entry: - %tmp1 = load <4 x float>* %a - ret <4 x float> %tmp1 -; CHECK: lqd $3, 0($3) -} - -define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly { -entry: - %arrayidx = getelementptr <4 x float>* %a, i32 1 - %tmp1 = load <4 x float>* %arrayidx - ret <4 x float> %tmp1 -; CHECK: lqd $3, 16($3) -} - - -declare <4 x i32>* @getv4f32ptr() -define <4 x i32> @func() { - ;CHECK: brasl - ; we need to have some instruction to move the result to safety. - ; which instruction (lr, stqd...) depends on the regalloc - ;CHECK: {{.*}} - ;CHECK: brasl - %rv1 = call <4 x i32>* @getv4f32ptr() - %rv2 = call <4 x i32>* @getv4f32ptr() - %rv3 = load <4 x i32>* %rv1 - ret <4 x i32> %rv3 -} - -define <4 x float> @load_undef(){ - ; CHECK: lqd $3, 0($3) - %val = load <4 x float>* undef - ret <4 x float> %val -} - -;check that 'misaligned' loads that may span two memory chunks -;have two loads. Don't check for the bitmanipulation, as that -;might change with improved algorithms or scheduling -define i32 @load_misaligned( i32* %ptr ){ -;CHECK: load_misaligned -;CHECK: lqd -;CHECK: lqd -;CHECK: bi $lr - %rv = load i32* %ptr, align 2 - ret i32 %rv -} - -define <4 x i32> @load_null_vec( ) { -;CHECK: lqa -;CHECK: bi $lr - %rv = load <4 x i32>* null - ret <4 x i32> %rv -} diff --git a/llvm/test/CodeGen/CellSPU/mul-with-overflow.ll b/llvm/test/CodeGen/CellSPU/mul-with-overflow.ll deleted file mode 100644 index c04e69e..0000000 --- a/llvm/test/CodeGen/CellSPU/mul-with-overflow.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -march=cellspu - -declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b) -define zeroext i1 @a(i16 %x) nounwind { - %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3) - %obil = extractvalue {i16, i1} %res, 1 - ret i1 %obil -} - -declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b) -define zeroext i1 @b(i16 %x) nounwind { - %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3) - %obil = extractvalue {i16, i1} %res, 1 - ret i1 %obil -} diff --git a/llvm/test/CodeGen/CellSPU/mul_ops.ll b/llvm/test/CodeGen/CellSPU/mul_ops.ll deleted file mode 100644 index 1e28fc7..0000000 --- a/llvm/test/CodeGen/CellSPU/mul_ops.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep mpy %t1.s | count 44 -; RUN: grep mpyu %t1.s | count 4 -; RUN: grep mpyh %t1.s | count 10 -; RUN: grep mpyhh %t1.s | count 2 -; RUN: grep rotma %t1.s | count 12 -; RUN: grep rotmahi %t1.s | count 4 -; RUN: grep and %t1.s | count 2 -; RUN: grep selb %t1.s | count 6 -; RUN: grep fsmbi %t1.s | count 4 -; RUN: grep shli %t1.s | count 4 -; RUN: grep shlhi %t1.s | count 4 -; RUN: grep ila %t1.s | count 2 -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; 32-bit multiply instruction generation: -define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { -entry: - %A = mul <4 x i32> %arg1, %arg2 - ret <4 x i32> %A -} - -define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { -entry: - %A = mul <4 x i32> %arg2, %arg1 - ret <4 x i32> %A -} - -define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { -entry: - %A = mul <8 x i16> %arg1, %arg2 - ret <8 x i16> %A -} - -define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { -entry: - %A = mul <8 x i16> %arg2, %arg1 - ret <8 x i16> %A -} - -define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { -entry: - %A = mul <16 x i8> %arg2, %arg1 - ret <16 x i8> %A -} - -define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { -entry: - %A = mul <16 x i8> %arg1, %arg2 - ret <16 x i8> %A -} - -define i32 @mul_i32_1(i32 %arg1, i32 %arg2) { -entry: - %A = mul i32 %arg2, %arg1 - ret i32 %A -} - -define i32 @mul_i32_2(i32 %arg1, i32 %arg2) { -entry: - %A = mul i32 %arg1, %arg2 - ret i32 %A -} - -define i16 @mul_i16_1(i16 %arg1, i16 %arg2) { -entry: - %A = mul i16 %arg2, %arg1 - ret i16 %A -} - -define i16 @mul_i16_2(i16 %arg1, i16 %arg2) { -entry: - %A = mul i16 %arg1, %arg2 - ret i16 %A -} - -define i8 @mul_i8_1(i8 %arg1, i8 %arg2) { -entry: - %A = mul i8 %arg2, %arg1 - ret i8 %A -} - -define i8 @mul_i8_2(i8 %arg1, i8 %arg2) { -entry: - %A = mul i8 %arg1, %arg2 - ret i8 %A -} diff --git a/llvm/test/CodeGen/CellSPU/nand.ll b/llvm/test/CodeGen/CellSPU/nand.ll deleted file mode 100644 index 57ac709..0000000 --- a/llvm/test/CodeGen/CellSPU/nand.ll +++ /dev/null @@ -1,125 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep nand %t1.s | count 90 -; RUN: grep and %t1.s | count 94 -; RUN: grep xsbh %t1.s | count 2 -; RUN: grep xshw %t1.s | count 4 - -; CellSPU legalization is over-sensitive to Legalize's traversal order. -; XFAIL: * - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1] - %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > - ret <4 x i32> %B -} - -define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > - ret <4 x i32> %B -} - -define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1] - %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - ret <8 x i16> %B -} - -define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = and <8 x i16> %arg1, %arg2 ; <<8 x i16>> [#uses=1] - %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - ret <8 x i16> %B -} - -define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1] - %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - ret <16 x i8> %B -} - -define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = and <16 x i8> %arg1, %arg2 ; <<16 x i8>> [#uses=1] - %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - ret <16 x i8> %B -} - -define i32 @nand_i32_1(i32 %arg1, i32 %arg2) { - %A = and i32 %arg2, %arg1 ; [#uses=1] - %B = xor i32 %A, -1 ; [#uses=1] - ret i32 %B -} - -define i32 @nand_i32_2(i32 %arg1, i32 %arg2) { - %A = and i32 %arg1, %arg2 ; [#uses=1] - %B = xor i32 %A, -1 ; [#uses=1] - ret i32 %B -} - -define signext i16 @nand_i16_1(i16 signext %arg1, i16 signext %arg2) { - %A = and i16 %arg2, %arg1 ; [#uses=1] - %B = xor i16 %A, -1 ; [#uses=1] - ret i16 %B -} - -define signext i16 @nand_i16_2(i16 signext %arg1, i16 signext %arg2) { - %A = and i16 %arg1, %arg2 ; [#uses=1] - %B = xor i16 %A, -1 ; [#uses=1] - ret i16 %B -} - -define zeroext i16 @nand_i16u_1(i16 zeroext %arg1, i16 zeroext %arg2) { - %A = and i16 %arg2, %arg1 ; [#uses=1] - %B = xor i16 %A, -1 ; [#uses=1] - ret i16 %B -} - -define zeroext i16 @nand_i16u_2(i16 zeroext %arg1, i16 zeroext %arg2) { - %A = and i16 %arg1, %arg2 ; [#uses=1] - %B = xor i16 %A, -1 ; [#uses=1] - ret i16 %B -} - -define zeroext i8 @nand_i8u_1(i8 zeroext %arg1, i8 zeroext %arg2) { - %A = and i8 %arg2, %arg1 ; [#uses=1] - %B = xor i8 %A, -1 ; [#uses=1] - ret i8 %B -} - -define zeroext i8 @nand_i8u_2(i8 zeroext %arg1, i8 zeroext %arg2) { - %A = and i8 %arg1, %arg2 ; [#uses=1] - %B = xor i8 %A, -1 ; [#uses=1] - ret i8 %B -} - -define signext i8 @nand_i8_1(i8 signext %arg1, i8 signext %arg2) { - %A = and i8 %arg2, %arg1 ; [#uses=1] - %B = xor i8 %A, -1 ; [#uses=1] - ret i8 %B -} - -define signext i8 @nand_i8_2(i8 signext %arg1, i8 signext %arg2) { - %A = and i8 %arg1, %arg2 ; [#uses=1] - %B = xor i8 %A, -1 ; [#uses=1] - ret i8 %B -} - -define i8 @nand_i8_3(i8 %arg1, i8 %arg2) { - %A = and i8 %arg2, %arg1 ; [#uses=1] - %B = xor i8 %A, -1 ; [#uses=1] - ret i8 %B -} - -define i8 @nand_i8_4(i8 %arg1, i8 %arg2) { - %A = and i8 %arg1, %arg2 ; [#uses=1] - %B = xor i8 %A, -1 ; [#uses=1] - ret i8 %B -} diff --git a/llvm/test/CodeGen/CellSPU/or_ops.ll b/llvm/test/CodeGen/CellSPU/or_ops.ll deleted file mode 100644 index f329266..0000000 --- a/llvm/test/CodeGen/CellSPU/or_ops.ll +++ /dev/null @@ -1,278 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep and %t1.s | count 2 -; RUN: grep orc %t1.s | count 85 -; RUN: grep ori %t1.s | count 34 -; RUN: grep orhi %t1.s | count 30 -; RUN: grep orbi %t1.s | count 15 -; RUN: FileCheck %s < %t1.s - -; CellSPU legalization is over-sensitive to Legalize's traversal order. -; XFAIL: * - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; OR instruction generation: -define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = or <4 x i32> %arg1, %arg2 - ret <4 x i32> %A -} - -define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = or <4 x i32> %arg2, %arg1 - ret <4 x i32> %A -} - -define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = or <8 x i16> %arg1, %arg2 - ret <8 x i16> %A -} - -define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = or <8 x i16> %arg2, %arg1 - ret <8 x i16> %A -} - -define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = or <16 x i8> %arg2, %arg1 - ret <16 x i8> %A -} - -define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = or <16 x i8> %arg1, %arg2 - ret <16 x i8> %A -} - -define i32 @or_i32_1(i32 %arg1, i32 %arg2) { - %A = or i32 %arg2, %arg1 - ret i32 %A -} - -define i32 @or_i32_2(i32 %arg1, i32 %arg2) { - %A = or i32 %arg1, %arg2 - ret i32 %A -} - -define i16 @or_i16_1(i16 %arg1, i16 %arg2) { - %A = or i16 %arg2, %arg1 - ret i16 %A -} - -define i16 @or_i16_2(i16 %arg1, i16 %arg2) { - %A = or i16 %arg1, %arg2 - ret i16 %A -} - -define i8 @or_i8_1(i8 %arg1, i8 %arg2) { - %A = or i8 %arg2, %arg1 - ret i8 %A -} - -define i8 @or_i8_2(i8 %arg1, i8 %arg2) { - %A = or i8 %arg1, %arg2 - ret i8 %A -} - -; ORC instruction generation: -define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = or <4 x i32> %arg1, %A - ret <4 x i32> %B -} - -define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = or <4 x i32> %arg2, %A - ret <4 x i32> %B -} - -define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = or <4 x i32> %A, %arg2 - ret <4 x i32> %B -} - -define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = or <8 x i16> %arg1, %A - ret <8 x i16> %B -} - -define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { - %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = or <8 x i16> %arg2, %A - ret <8 x i16> %B -} - -define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = or <16 x i8> %arg2, %A - ret <16 x i8> %B -} - -define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = or <16 x i8> %arg1, %A - ret <16 x i8> %B -} - -define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) { - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = or <16 x i8> %A, %arg1 - ret <16 x i8> %B -} - -define i32 @orc_i32_1(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = or i32 %A, %arg1 - ret i32 %B -} - -define i32 @orc_i32_2(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg1, -1 - %B = or i32 %A, %arg2 - ret i32 %B -} - -define i32 @orc_i32_3(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = or i32 %arg1, %A - ret i32 %B -} - -define i16 @orc_i16_1(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = or i16 %A, %arg1 - ret i16 %B -} - -define i16 @orc_i16_2(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg1, -1 - %B = or i16 %A, %arg2 - ret i16 %B -} - -define i16 @orc_i16_3(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = or i16 %arg1, %A - ret i16 %B -} - -define i8 @orc_i8_1(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = or i8 %A, %arg1 - ret i8 %B -} - -define i8 @orc_i8_2(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg1, -1 - %B = or i8 %A, %arg2 - ret i8 %B -} - -define i8 @orc_i8_3(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = or i8 %arg1, %A - ret i8 %B -} - -; ORI instruction generation (i32 data type): -define <4 x i32> @ori_v4i32_1(<4 x i32> %in) { - %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 > - ret <4 x i32> %tmp2 -} - -define <4 x i32> @ori_v4i32_2(<4 x i32> %in) { - %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 > - ret <4 x i32> %tmp2 -} - -define <4 x i32> @ori_v4i32_3(<4 x i32> %in) { - %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 > - ret <4 x i32> %tmp2 -} - -define <4 x i32> @ori_v4i32_4(<4 x i32> %in) { - %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 > - ret <4 x i32> %tmp2 -} - -define zeroext i32 @ori_u32(i32 zeroext %in) { - %tmp37 = or i32 %in, 37 ; [#uses=1] - ret i32 %tmp37 -} - -define signext i32 @ori_i32(i32 signext %in) { - %tmp38 = or i32 %in, 37 ; [#uses=1] - ret i32 %tmp38 -} - -define i32 @ori_i32_600(i32 %in) { - ;600 does not fit into 'ori' immediate field - ;CHECK: ori_i32_600 - ;CHECK: il - ;CHECK: ori - %tmp = or i32 %in, 600 - ret i32 %tmp -} - -; ORHI instruction generation (i16 data type): -define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) { - %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511, - i16 511, i16 511, i16 511, i16 511 > - ret <8 x i16> %tmp2 -} - -define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) { - %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510, - i16 510, i16 510, i16 510, i16 510 > - ret <8 x i16> %tmp2 -} - -define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) { - %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1 > - ret <8 x i16> %tmp2 -} - -define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) { - %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512, - i16 -512, i16 -512, i16 -512, i16 -512 > - ret <8 x i16> %tmp2 -} - -define zeroext i16 @orhi_u16(i16 zeroext %in) { - %tmp37 = or i16 %in, 37 ; [#uses=1] - ret i16 %tmp37 -} - -define signext i16 @orhi_i16(i16 signext %in) { - %tmp38 = or i16 %in, 37 ; [#uses=1] - ret i16 %tmp38 -} - -; ORBI instruction generation (i8 data type): -define <16 x i8> @orbi_v16i8(<16 x i8> %in) { - %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, - i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, - i8 42, i8 42, i8 42, i8 42 > - ret <16 x i8> %tmp2 -} - -define zeroext i8 @orbi_u8(i8 zeroext %in) { - %tmp37 = or i8 %in, 37 ; [#uses=1] - ret i8 %tmp37 -} - -define signext i8 @orbi_i8(i8 signext %in) { - %tmp38 = or i8 %in, 37 ; [#uses=1] - ret i8 %tmp38 -} diff --git a/llvm/test/CodeGen/CellSPU/private.ll b/llvm/test/CodeGen/CellSPU/private.ll deleted file mode 100644 index 1d933ad..0000000 --- a/llvm/test/CodeGen/CellSPU/private.ll +++ /dev/null @@ -1,19 +0,0 @@ -; Test to make sure that the 'private' is used correctly. -; -; RUN: llc < %s -march=cellspu > %t -; RUN: grep .Lfoo: %t -; RUN: grep brsl.*\.Lfoo %t -; RUN: grep .Lbaz: %t -; RUN: grep ila.*\.Lbaz %t - -define private void @foo() { - ret void -} - -@baz = private global i32 4 - -define i32 @bar() { - call void @foo() - %1 = load i32* @baz, align 4 - ret i32 %1 -} diff --git a/llvm/test/CodeGen/CellSPU/rotate_ops.ll b/llvm/test/CodeGen/CellSPU/rotate_ops.ll deleted file mode 100644 index 97709352..0000000 --- a/llvm/test/CodeGen/CellSPU/rotate_ops.ll +++ /dev/null @@ -1,172 +0,0 @@ -; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep rot %t1.s | count 86 -; RUN: grep roth %t1.s | count 8 -; RUN: grep roti.*5 %t1.s | count 1 -; RUN: grep roti.*27 %t1.s | count 1 -; RUN: grep rothi.*5 %t1.s | count 2 -; RUN: grep rothi.*11 %t1.s | count 1 -; RUN: grep rothi.*,.3 %t1.s | count 1 -; RUN: grep andhi %t1.s | count 4 -; RUN: grep shlhi %t1.s | count 4 -; RUN: cat %t1.s | FileCheck %s - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; Vector rotates are not currently supported in gcc or llvm assembly. These are -; not tested. - -; 32-bit rotates: -define i32 @rotl32_1a(i32 %arg1, i8 %arg2) { - %tmp1 = zext i8 %arg2 to i32 ; [#uses=1] - %B = shl i32 %arg1, %tmp1 ; [#uses=1] - %arg22 = sub i8 32, %arg2 ; [#uses=1] - %tmp2 = zext i8 %arg22 to i32 ; [#uses=1] - %C = lshr i32 %arg1, %tmp2 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -define i32 @rotl32_1b(i32 %arg1, i16 %arg2) { - %tmp1 = zext i16 %arg2 to i32 ; [#uses=1] - %B = shl i32 %arg1, %tmp1 ; [#uses=1] - %arg22 = sub i16 32, %arg2 ; [#uses=1] - %tmp2 = zext i16 %arg22 to i32 ; [#uses=1] - %C = lshr i32 %arg1, %tmp2 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -define i32 @rotl32_2(i32 %arg1, i32 %arg2) { - %B = shl i32 %arg1, %arg2 ; [#uses=1] - %tmp1 = sub i32 32, %arg2 ; [#uses=1] - %C = lshr i32 %arg1, %tmp1 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -define i32 @rotl32_3(i32 %arg1, i32 %arg2) { - %tmp1 = sub i32 32, %arg2 ; [#uses=1] - %B = shl i32 %arg1, %arg2 ; [#uses=1] - %C = lshr i32 %arg1, %tmp1 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -define i32 @rotl32_4(i32 %arg1, i32 %arg2) { - %tmp1 = sub i32 32, %arg2 ; [#uses=1] - %C = lshr i32 %arg1, %tmp1 ; [#uses=1] - %B = shl i32 %arg1, %arg2 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -define i32 @rotr32_1(i32 %A, i8 %Amt) { - %tmp1 = zext i8 %Amt to i32 ; [#uses=1] - %B = lshr i32 %A, %tmp1 ; [#uses=1] - %Amt2 = sub i8 32, %Amt ; [#uses=1] - %tmp2 = zext i8 %Amt2 to i32 ; [#uses=1] - %C = shl i32 %A, %tmp2 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -define i32 @rotr32_2(i32 %A, i8 %Amt) { - %Amt2 = sub i8 32, %Amt ; [#uses=1] - %tmp1 = zext i8 %Amt to i32 ; [#uses=1] - %B = lshr i32 %A, %tmp1 ; [#uses=1] - %tmp2 = zext i8 %Amt2 to i32 ; [#uses=1] - %C = shl i32 %A, %tmp2 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -; Rotate left with immediate -define i32 @rotli32(i32 %A) { - %B = shl i32 %A, 5 ; [#uses=1] - %C = lshr i32 %A, 27 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -; Rotate right with immediate -define i32 @rotri32(i32 %A) { - %B = lshr i32 %A, 5 ; [#uses=1] - %C = shl i32 %A, 27 ; [#uses=1] - %D = or i32 %B, %C ; [#uses=1] - ret i32 %D -} - -; 16-bit rotates: -define i16 @rotr16_1(i16 %arg1, i8 %arg) { - %tmp1 = zext i8 %arg to i16 ; [#uses=1] - %B = lshr i16 %arg1, %tmp1 ; [#uses=1] - %arg2 = sub i8 16, %arg ; [#uses=1] - %tmp2 = zext i8 %arg2 to i16 ; [#uses=1] - %C = shl i16 %arg1, %tmp2 ; [#uses=1] - %D = or i16 %B, %C ; [#uses=1] - ret i16 %D -} - -define i16 @rotr16_2(i16 %arg1, i16 %arg) { - %B = lshr i16 %arg1, %arg ; [#uses=1] - %tmp1 = sub i16 16, %arg ; [#uses=1] - %C = shl i16 %arg1, %tmp1 ; [#uses=1] - %D = or i16 %B, %C ; [#uses=1] - ret i16 %D -} - -define i16 @rotli16(i16 %A) { - %B = shl i16 %A, 5 ; [#uses=1] - %C = lshr i16 %A, 11 ; [#uses=1] - %D = or i16 %B, %C ; [#uses=1] - ret i16 %D -} - -define i16 @rotri16(i16 %A) { - %B = lshr i16 %A, 5 ; [#uses=1] - %C = shl i16 %A, 11 ; [#uses=1] - %D = or i16 %B, %C ; [#uses=1] - ret i16 %D -} - -define i8 @rotl8(i8 %A, i8 %Amt) { - %B = shl i8 %A, %Amt ; [#uses=1] - %Amt2 = sub i8 8, %Amt ; [#uses=1] - %C = lshr i8 %A, %Amt2 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D -} - -define i8 @rotr8(i8 %A, i8 %Amt) { - %B = lshr i8 %A, %Amt ; [#uses=1] - %Amt2 = sub i8 8, %Amt ; [#uses=1] - %C = shl i8 %A, %Amt2 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D -} - -define i8 @rotli8(i8 %A) { - %B = shl i8 %A, 5 ; [#uses=1] - %C = lshr i8 %A, 3 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D -} - -define i8 @rotri8(i8 %A) { - %B = lshr i8 %A, 5 ; [#uses=1] - %C = shl i8 %A, 3 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D -} - -define <2 x float> @test1(<4 x float> %param ) -{ -; CHECK: test1 -; CHECK: shufb - %el = extractelement <4 x float> %param, i32 1 - %vec1 = insertelement <1 x float> undef, float %el, i32 0 - %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32> -; CHECK: bi $lr - ret <2 x float> %rv -} diff --git a/llvm/test/CodeGen/CellSPU/select_bits.ll b/llvm/test/CodeGen/CellSPU/select_bits.ll deleted file mode 100644 index 65e0aa6..0000000 --- a/llvm/test/CodeGen/CellSPU/select_bits.ll +++ /dev/null @@ -1,572 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep selb %t1.s | count 56 - -; CellSPU legalization is over-sensitive to Legalize's traversal order. -; XFAIL: * - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -; v2i64 -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -; (or (and rC, rB), (and (not rC), rA)) -define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %C = and <2 x i64> %rC, %rB - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %A, %rA - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -; (or (and rB, rC), (and (not rC), rA)) -define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %C = and <2 x i64> %rB, %rC - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %A, %rA - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -; (or (and (not rC), rA), (and rB, rC)) -define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %A, %rA - %C = and <2 x i64> %rB, %rC - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -; (or (and (not rC), rA), (and rC, rB)) -define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %A, %rA - %C = and <2 x i64> %rC, %rB - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -; (or (and rC, rB), (and rA, (not rC))) -define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %C = and <2 x i64> %rC, %rB - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %rA, %A - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -; (or (and rB, rC), (and rA, (not rC))) -define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %C = and <2 x i64> %rB, %rC - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %rA, %A - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -; (or (and rA, (not rC)), (and rB, rC)) -define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %rA, %A - %C = and <2 x i64> %rB, %rC - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -; (or (and rA, (not rC)), (and rC, rB)) -define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { - %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > - %B = and <2 x i64> %rA, %A - %C = and <2 x i64> %rC, %rB - %D = or <2 x i64> %C, %B - ret <2 x i64> %D -} - -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -; v4i32 -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -; (or (and rC, rB), (and (not rC), rA)) -define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %C = and <4 x i32> %rC, %rB - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = and <4 x i32> %A, %rA - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -; (or (and rB, rC), (and (not rC), rA)) -define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %C = and <4 x i32> %rB, %rC - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = and <4 x i32> %A, %rA - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -; (or (and (not rC), rA), (and rB, rC)) -define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > - %B = and <4 x i32> %A, %rA - %C = and <4 x i32> %rB, %rC - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -; (or (and (not rC), rA), (and rC, rB)) -define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> - %B = and <4 x i32> %A, %rA - %C = and <4 x i32> %rC, %rB - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -; (or (and rC, rB), (and rA, (not rC))) -define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %C = and <4 x i32> %rC, %rB - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> - %B = and <4 x i32> %rA, %A - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -; (or (and rB, rC), (and rA, (not rC))) -define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %C = and <4 x i32> %rB, %rC - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> - %B = and <4 x i32> %rA, %A - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -; (or (and rA, (not rC)), (and rB, rC)) -define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> - %B = and <4 x i32> %rA, %A - %C = and <4 x i32> %rB, %rC - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -; (or (and rA, (not rC)), (and rC, rB)) -define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { - %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> - %B = and <4 x i32> %rA, %A - %C = and <4 x i32> %rC, %rB - %D = or <4 x i32> %C, %B - ret <4 x i32> %D -} - -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -; v8i16 -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -; (or (and rC, rB), (and (not rC), rA)) -define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %C = and <8 x i16> %rC, %rB - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %A, %rA - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -; (or (and rB, rC), (and (not rC), rA)) -define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %C = and <8 x i16> %rB, %rC - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %A, %rA - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -; (or (and (not rC), rA), (and rB, rC)) -define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %A, %rA - %C = and <8 x i16> %rB, %rC - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -; (or (and (not rC), rA), (and rC, rB)) -define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %A, %rA - %C = and <8 x i16> %rC, %rB - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -; (or (and rC, rB), (and rA, (not rC))) -define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %C = and <8 x i16> %rC, %rB - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %rA, %A - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -; (or (and rB, rC), (and rA, (not rC))) -define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %C = and <8 x i16> %rB, %rC - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %rA, %A - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -; (or (and rA, (not rC)), (and rB, rC)) -define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %rA, %A - %C = and <8 x i16> %rB, %rC - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -; (or (and rA, (not rC)), (and rC, rB)) -define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { - %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1, i16 -1, i16 -1 > - %B = and <8 x i16> %rA, %A - %C = and <8 x i16> %rC, %rB - %D = or <8 x i16> %C, %B - ret <8 x i16> %D -} - -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -; v16i8 -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -; (or (and rC, rB), (and (not rC), rA)) -define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %C = and <16 x i8> %rC, %rB - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %rA - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -; (or (and rB, rC), (and (not rC), rA)) -define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %C = and <16 x i8> %rB, %rC - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %rA - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -; (or (and (not rC), rA), (and rB, rC)) -define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %rA - %C = and <16 x i8> %rB, %rC - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -; (or (and (not rC), rA), (and rC, rB)) -define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %rA - %C = and <16 x i8> %rC, %rB - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -; (or (and rC, rB), (and rA, (not rC))) -define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %C = and <16 x i8> %rC, %rB - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %rA, %A - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -; (or (and rB, rC), (and rA, (not rC))) -define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %C = and <16 x i8> %rB, %rC - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %rA, %A - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -; (or (and rA, (not rC)), (and rB, rC)) -define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %rA, %A - %C = and <16 x i8> %rB, %rC - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -; (or (and rA, (not rC)), (and rC, rB)) -define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { - %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %rA, %A - %C = and <16 x i8> %rC, %rB - %D = or <16 x i8> %C, %B - ret <16 x i8> %D -} - -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -; i32 -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -; (or (and rC, rB), (and (not rC), rA)) -define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) { - %C = and i32 %rC, %rB - %A = xor i32 %rC, -1 - %B = and i32 %A, %rA - %D = or i32 %C, %B - ret i32 %D -} - -; (or (and rB, rC), (and (not rC), rA)) -define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) { - %C = and i32 %rB, %rC - %A = xor i32 %rC, -1 - %B = and i32 %A, %rA - %D = or i32 %C, %B - ret i32 %D -} - -; (or (and (not rC), rA), (and rB, rC)) -define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) { - %A = xor i32 %rC, -1 - %B = and i32 %A, %rA - %C = and i32 %rB, %rC - %D = or i32 %C, %B - ret i32 %D -} - -; (or (and (not rC), rA), (and rC, rB)) -define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) { - %A = xor i32 %rC, -1 - %B = and i32 %A, %rA - %C = and i32 %rC, %rB - %D = or i32 %C, %B - ret i32 %D -} - -; (or (and rC, rB), (and rA, (not rC))) -define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) { - %C = and i32 %rC, %rB - %A = xor i32 %rC, -1 - %B = and i32 %rA, %A - %D = or i32 %C, %B - ret i32 %D -} - -; (or (and rB, rC), (and rA, (not rC))) -define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) { - %C = and i32 %rB, %rC - %A = xor i32 %rC, -1 - %B = and i32 %rA, %A - %D = or i32 %C, %B - ret i32 %D -} - -; (or (and rA, (not rC)), (and rB, rC)) -define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) { - %A = xor i32 %rC, -1 - %B = and i32 %rA, %A - %C = and i32 %rB, %rC - %D = or i32 %C, %B - ret i32 %D -} - -; (or (and rA, (not rC)), (and rC, rB)) -define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) { - %A = xor i32 %rC, -1 - %B = and i32 %rA, %A - %C = and i32 %rC, %rB - %D = or i32 %C, %B - ret i32 %D -} - -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -; i16 -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -; (or (and rC, rB), (and (not rC), rA)) -define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) { - %C = and i16 %rC, %rB - %A = xor i16 %rC, -1 - %B = and i16 %A, %rA - %D = or i16 %C, %B - ret i16 %D -} - -; (or (and rB, rC), (and (not rC), rA)) -define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) { - %C = and i16 %rB, %rC - %A = xor i16 %rC, -1 - %B = and i16 %A, %rA - %D = or i16 %C, %B - ret i16 %D -} - -; (or (and (not rC), rA), (and rB, rC)) -define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) { - %A = xor i16 %rC, -1 - %B = and i16 %A, %rA - %C = and i16 %rB, %rC - %D = or i16 %C, %B - ret i16 %D -} - -; (or (and (not rC), rA), (and rC, rB)) -define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) { - %A = xor i16 %rC, -1 - %B = and i16 %A, %rA - %C = and i16 %rC, %rB - %D = or i16 %C, %B - ret i16 %D -} - -; (or (and rC, rB), (and rA, (not rC))) -define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) { - %C = and i16 %rC, %rB - %A = xor i16 %rC, -1 - %B = and i16 %rA, %A - %D = or i16 %C, %B - ret i16 %D -} - -; (or (and rB, rC), (and rA, (not rC))) -define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) { - %C = and i16 %rB, %rC - %A = xor i16 %rC, -1 - %B = and i16 %rA, %A - %D = or i16 %C, %B - ret i16 %D -} - -; (or (and rA, (not rC)), (and rB, rC)) -define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) { - %A = xor i16 %rC, -1 - %B = and i16 %rA, %A - %C = and i16 %rB, %rC - %D = or i16 %C, %B - ret i16 %D -} - -; (or (and rA, (not rC)), (and rC, rB)) -define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) { - %A = xor i16 %rC, -1 - %B = and i16 %rA, %A - %C = and i16 %rC, %rB - %D = or i16 %C, %B - ret i16 %D -} - -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -; i8 -;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -; (or (and rC, rB), (and (not rC), rA)) -define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) { - %C = and i8 %rC, %rB - %A = xor i8 %rC, -1 - %B = and i8 %A, %rA - %D = or i8 %C, %B - ret i8 %D -} - -; (or (and rB, rC), (and (not rC), rA)) -define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) { - %C = and i8 %rB, %rC - %A = xor i8 %rC, -1 - %B = and i8 %A, %rA - %D = or i8 %C, %B - ret i8 %D -} - -; (or (and (not rC), rA), (and rB, rC)) -define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) { - %A = xor i8 %rC, -1 - %B = and i8 %A, %rA - %C = and i8 %rB, %rC - %D = or i8 %C, %B - ret i8 %D -} - -; (or (and (not rC), rA), (and rC, rB)) -define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) { - %A = xor i8 %rC, -1 - %B = and i8 %A, %rA - %C = and i8 %rC, %rB - %D = or i8 %C, %B - ret i8 %D -} - -; (or (and rC, rB), (and rA, (not rC))) -define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) { - %C = and i8 %rC, %rB - %A = xor i8 %rC, -1 - %B = and i8 %rA, %A - %D = or i8 %C, %B - ret i8 %D -} - -; (or (and rB, rC), (and rA, (not rC))) -define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) { - %C = and i8 %rB, %rC - %A = xor i8 %rC, -1 - %B = and i8 %rA, %A - %D = or i8 %C, %B - ret i8 %D -} - -; (or (and rA, (not rC)), (and rB, rC)) -define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) { - %A = xor i8 %rC, -1 - %B = and i8 %rA, %A - %C = and i8 %rB, %rC - %D = or i8 %C, %B - ret i8 %D -} - -; (or (and rA, (not rC)), (and rC, rB)) -define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) { - %A = xor i8 %rC, -1 - %B = and i8 %rA, %A - %C = and i8 %rC, %rB - %D = or i8 %C, %B - ret i8 %D -} diff --git a/llvm/test/CodeGen/CellSPU/sext128.ll b/llvm/test/CodeGen/CellSPU/sext128.ll deleted file mode 100644 index 6ae9aa5..0000000 --- a/llvm/test/CodeGen/CellSPU/sext128.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -; ModuleID = 'sext128.bc' -target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128" -target triple = "spu" - -define i128 @sext_i64_i128(i64 %a) { -entry: - %0 = sext i64 %a to i128 - ret i128 %0 -; CHECK: long 269488144 -; CHECK: long 269488144 -; CHECK: long 66051 -; CHECK: long 67438087 -; CHECK-NOT: rotqmbyi -; CHECK: lqa -; CHECK: rotmai -; CHECK: shufb -} - -define i128 @sext_i32_i128(i32 %a) { -entry: - %0 = sext i32 %a to i128 - ret i128 %0 -; CHECK: long 269488144 -; CHECK: long 269488144 -; CHECK: long 269488144 -; CHECK: long 66051 -; CHECK-NOT: rotqmbyi -; CHECK: lqa -; CHECK: rotmai -; CHECK: shufb -} - -define i128 @sext_i32_i128a(float %a) { -entry: - %0 = call i32 @myfunc(float %a) - %1 = sext i32 %0 to i128 - ret i128 %1 -; CHECK: long 269488144 -; CHECK: long 269488144 -; CHECK: long 269488144 -; CHECK: long 66051 -; CHECK-NOT: rotqmbyi -; CHECK: lqa -; CHECK: rotmai -; CHECK: shufb -} - -declare i32 @myfunc(float) - -define i128 @func1(i8 %u) { -entry: -; CHECK: xsbh -; CHECK: xshw -; CHECK: rotmai -; CHECK: shufb -; CHECK: bi $lr - %0 = sext i8 %u to i128 - ret i128 %0 -} - -define i128 @func2(i16 %u) { -entry: -; CHECK: xshw -; CHECK: rotmai -; CHECK: shufb -; CHECK: bi $lr - %0 = sext i16 %u to i128 - ret i128 %0 -} diff --git a/llvm/test/CodeGen/CellSPU/shift_ops.ll b/llvm/test/CodeGen/CellSPU/shift_ops.ll deleted file mode 100644 index 1ccc356..0000000 --- a/llvm/test/CodeGen/CellSPU/shift_ops.ll +++ /dev/null @@ -1,348 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep "shlh " %t1.s | count 10 -; RUN: grep "shlhi " %t1.s | count 3 -; RUN: grep "shl " %t1.s | count 10 -; RUN: grep "shli " %t1.s | count 3 -; RUN: grep "xshw " %t1.s | count 5 -; RUN: grep "and " %t1.s | count 15 -; RUN: grep "andi " %t1.s | count 4 -; RUN: grep "rotmi " %t1.s | count 4 -; RUN: grep "rotqmbyi " %t1.s | count 1 -; RUN: grep "rotqmbii " %t1.s | count 2 -; RUN: grep "rotqmby " %t1.s | count 1 -; RUN: grep "rotqmbi " %t1.s | count 2 -; RUN: grep "rotqbyi " %t1.s | count 1 -; RUN: grep "rotqbii " %t1.s | count 2 -; RUN: grep "rotqbybi " %t1.s | count 1 -; RUN: grep "sfi " %t1.s | count 6 -; RUN: cat %t1.s | FileCheck %s - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; Shift left i16 via register, note that the second operand to shl is promoted -; to a 32-bit type: - -define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) { - %A = shl i16 %arg1, %arg2 - ret i16 %A -} - -define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) { - %A = shl i16 %arg2, %arg1 - ret i16 %A -} - -define signext i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) { - %A = shl i16 %arg1, %arg2 - ret i16 %A -} - -define signext i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) { - %A = shl i16 %arg2, %arg1 - ret i16 %A -} - -define zeroext i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) { - %A = shl i16 %arg1, %arg2 - ret i16 %A -} - -define zeroext i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) { - %A = shl i16 %arg2, %arg1 - ret i16 %A -} - -; Shift left i16 with immediate: -define i16 @shlhi_i16_1(i16 %arg1) { - %A = shl i16 %arg1, 12 - ret i16 %A -} - -; Should not generate anything other than the return, arg1 << 0 = arg1 -define i16 @shlhi_i16_2(i16 %arg1) { - %A = shl i16 %arg1, 0 - ret i16 %A -} - -define i16 @shlhi_i16_3(i16 %arg1) { - %A = shl i16 16383, %arg1 - ret i16 %A -} - -; Should generate 0, 0 << arg1 = 0 -define i16 @shlhi_i16_4(i16 %arg1) { - %A = shl i16 0, %arg1 - ret i16 %A -} - -define signext i16 @shlhi_i16_5(i16 signext %arg1) { - %A = shl i16 %arg1, 12 - ret i16 %A -} - -; Should not generate anything other than the return, arg1 << 0 = arg1 -define signext i16 @shlhi_i16_6(i16 signext %arg1) { - %A = shl i16 %arg1, 0 - ret i16 %A -} - -define signext i16 @shlhi_i16_7(i16 signext %arg1) { - %A = shl i16 16383, %arg1 - ret i16 %A -} - -; Should generate 0, 0 << arg1 = 0 -define signext i16 @shlhi_i16_8(i16 signext %arg1) { - %A = shl i16 0, %arg1 - ret i16 %A -} - -define zeroext i16 @shlhi_i16_9(i16 zeroext %arg1) { - %A = shl i16 %arg1, 12 - ret i16 %A -} - -; Should not generate anything other than the return, arg1 << 0 = arg1 -define zeroext i16 @shlhi_i16_10(i16 zeroext %arg1) { - %A = shl i16 %arg1, 0 - ret i16 %A -} - -define zeroext i16 @shlhi_i16_11(i16 zeroext %arg1) { - %A = shl i16 16383, %arg1 - ret i16 %A -} - -; Should generate 0, 0 << arg1 = 0 -define zeroext i16 @shlhi_i16_12(i16 zeroext %arg1) { - %A = shl i16 0, %arg1 - ret i16 %A -} - -; Shift left i32 via register, note that the second operand to shl is promoted -; to a 32-bit type: - -define i32 @shl_i32_1(i32 %arg1, i32 %arg2) { - %A = shl i32 %arg1, %arg2 - ret i32 %A -} - -define i32 @shl_i32_2(i32 %arg1, i32 %arg2) { - %A = shl i32 %arg2, %arg1 - ret i32 %A -} - -define signext i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) { - %A = shl i32 %arg1, %arg2 - ret i32 %A -} - -define signext i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) { - %A = shl i32 %arg2, %arg1 - ret i32 %A -} - -define zeroext i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) { - %A = shl i32 %arg1, %arg2 - ret i32 %A -} - -define zeroext i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) { - %A = shl i32 %arg2, %arg1 - ret i32 %A -} - -; Shift left i32 with immediate: -define i32 @shli_i32_1(i32 %arg1) { - %A = shl i32 %arg1, 12 - ret i32 %A -} - -; Should not generate anything other than the return, arg1 << 0 = arg1 -define i32 @shli_i32_2(i32 %arg1) { - %A = shl i32 %arg1, 0 - ret i32 %A -} - -define i32 @shli_i32_3(i32 %arg1) { - %A = shl i32 16383, %arg1 - ret i32 %A -} - -; Should generate 0, 0 << arg1 = 0 -define i32 @shli_i32_4(i32 %arg1) { - %A = shl i32 0, %arg1 - ret i32 %A -} - -define signext i32 @shli_i32_5(i32 signext %arg1) { - %A = shl i32 %arg1, 12 - ret i32 %A -} - -; Should not generate anything other than the return, arg1 << 0 = arg1 -define signext i32 @shli_i32_6(i32 signext %arg1) { - %A = shl i32 %arg1, 0 - ret i32 %A -} - -define signext i32 @shli_i32_7(i32 signext %arg1) { - %A = shl i32 16383, %arg1 - ret i32 %A -} - -; Should generate 0, 0 << arg1 = 0 -define signext i32 @shli_i32_8(i32 signext %arg1) { - %A = shl i32 0, %arg1 - ret i32 %A -} - -define zeroext i32 @shli_i32_9(i32 zeroext %arg1) { - %A = shl i32 %arg1, 12 - ret i32 %A -} - -; Should not generate anything other than the return, arg1 << 0 = arg1 -define zeroext i32 @shli_i32_10(i32 zeroext %arg1) { - %A = shl i32 %arg1, 0 - ret i32 %A -} - -define zeroext i32 @shli_i32_11(i32 zeroext %arg1) { - %A = shl i32 16383, %arg1 - ret i32 %A -} - -; Should generate 0, 0 << arg1 = 0 -define zeroext i32 @shli_i32_12(i32 zeroext %arg1) { - %A = shl i32 0, %arg1 - ret i32 %A -} - -;; i64 shift left - -define i64 @shl_i64_1(i64 %arg1) { - %A = shl i64 %arg1, 9 - ret i64 %A -} - -define i64 @shl_i64_2(i64 %arg1) { - %A = shl i64 %arg1, 3 - ret i64 %A -} - -define i64 @shl_i64_3(i64 %arg1, i32 %shift) { - %1 = zext i32 %shift to i64 - %2 = shl i64 %arg1, %1 - ret i64 %2 -} - -;; i64 shift right logical (shift 0s from the right) - -define i64 @lshr_i64_1(i64 %arg1) { - %1 = lshr i64 %arg1, 9 - ret i64 %1 -} - -define i64 @lshr_i64_2(i64 %arg1) { - %1 = lshr i64 %arg1, 3 - ret i64 %1 -} - -define i64 @lshr_i64_3(i64 %arg1, i32 %shift) { - %1 = zext i32 %shift to i64 - %2 = lshr i64 %arg1, %1 - ret i64 %2 -} - -;; i64 shift right arithmetic (shift 1s from the right) - -define i64 @ashr_i64_1(i64 %arg) { - %1 = ashr i64 %arg, 9 - ret i64 %1 -} - -define i64 @ashr_i64_2(i64 %arg) { - %1 = ashr i64 %arg, 3 - ret i64 %1 -} - -define i64 @ashr_i64_3(i64 %arg1, i32 %shift) { - %1 = zext i32 %shift to i64 - %2 = ashr i64 %arg1, %1 - ret i64 %2 -} - -define i32 @hi32_i64(i64 %arg) { - %1 = lshr i64 %arg, 32 - %2 = trunc i64 %1 to i32 - ret i32 %2 -} - -; some random tests -define i128 @test_lshr_i128( i128 %val ) { - ;CHECK: test_lshr_i128 - ;CHECK: sfi - ;CHECK: rotqmbi - ;CHECK: rotqmbybi - ;CHECK: bi $lr - %rv = lshr i128 %val, 64 - ret i128 %rv -} - -;Vector shifts -define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) { -;CHECK: shl -;CHECK: bi $lr - %rv = shl <2 x i32> %val, %sh - ret <2 x i32> %rv -} - -define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) { -;CHECK: shl -;CHECK: bi $lr - %rv = shl <4 x i32> %val, %sh - ret <4 x i32> %rv -} - -define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) { -;CHECK: shlh -;CHECK: bi $lr - %rv = shl <8 x i16> %val, %sh - ret <8 x i16> %rv -} - -define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) { -;CHECK: rotm -;CHECK: bi $lr - %rv = lshr <4 x i32> %val, %sh - ret <4 x i32> %rv -} - -define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) { -;CHECK: sfhi -;CHECK: rothm -;CHECK: bi $lr - %rv = lshr <8 x i16> %val, %sh - ret <8 x i16> %rv -} - -define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) { -;CHECK: rotma -;CHECK: bi $lr - %rv = ashr <4 x i32> %val, %sh - ret <4 x i32> %rv -} - -define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) { -;CHECK: sfhi -;CHECK: rotmah -;CHECK: bi $lr - %rv = ashr <8 x i16> %val, %sh - ret <8 x i16> %rv -} - -define <2 x i64> @special_const() { - ret <2 x i64> -} diff --git a/llvm/test/CodeGen/CellSPU/shuffles.ll b/llvm/test/CodeGen/CellSPU/shuffles.ll deleted file mode 100644 index 973586b..0000000 --- a/llvm/test/CodeGen/CellSPU/shuffles.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: llc -O1 --march=cellspu < %s | FileCheck %s - -;CHECK: shuffle -define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) { - ; CHECK: cwd {{\$.}}, 0($sp) - ; CHECK: shufb {{\$., \$4, \$3, \$.}} - %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> - ret <4 x float> %val -} - -;CHECK: splat -define <4 x float> @splat(float %param1) { - ; CHECK: lqa - ; CHECK: shufb $3 - ; CHECK: bi - %vec = insertelement <1 x float> undef, float %param1, i32 0 - %val= shufflevector <1 x float> %vec, <1 x float> undef, <4 x i32> - ret <4 x float> %val -} - -;CHECK: test_insert -define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) { - %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0 -;CHECK: lqa $6, -;CHECK: shufb $4, $4, $5, $6 - %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1 - -;CHECK: cdd $5, 0($3) -;CHECK: lqd $6, 0($3) -;CHECK: shufb $4, $4, $6, $5 -;CHECK: stqd $4, 0($3) -;CHECK: bi $lr - store <2 x float> %sl2_17, <2 x float>* %ptr - ret void -} - -;CHECK: test_insert_1 -define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) { -;CHECK: cwd $5, 4($sp) -;CHECK: shufb $3, $4, $3, $5 -;CHECK: bi $lr - %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1 - ret <4 x float> %rv -} - -;CHECK: test_v2i32 -define <2 x i32> @test_v2i32(<4 x i32>%vec) -{ -;CHECK: rotqbyi $3, $3, 4 -;CHECK: bi $lr - %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32> - ret <2 x i32> %rv -} - -define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec) -{ - %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, - <4 x i32> - ret <4 x i32> %rv -} - -;CHECK: test_v4i32_rot4 -define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec) -{ - %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, - <4 x i32> - ret <4 x i32> %rv -} - diff --git a/llvm/test/CodeGen/CellSPU/sp_farith.ll b/llvm/test/CodeGen/CellSPU/sp_farith.ll deleted file mode 100644 index 80bf47c..0000000 --- a/llvm/test/CodeGen/CellSPU/sp_farith.ll +++ /dev/null @@ -1,90 +0,0 @@ -; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s -; RUN: grep fa %t1.s | count 2 -; RUN: grep fs %t1.s | count 2 -; RUN: grep fm %t1.s | count 6 -; RUN: grep fma %t1.s | count 2 -; RUN: grep fms %t1.s | count 2 -; RUN: grep fnms %t1.s | count 3 -; -; This file includes standard floating point arithmetic instructions -; NOTE fdiv is tested separately since it is a compound operation -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define float @fp_add(float %arg1, float %arg2) { - %A = fadd float %arg1, %arg2 ; [#uses=1] - ret float %A -} - -define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) { - %A = fadd <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - ret <4 x float> %A -} - -define float @fp_sub(float %arg1, float %arg2) { - %A = fsub float %arg1, %arg2 ; [#uses=1] - ret float %A -} - -define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) { - %A = fsub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - ret <4 x float> %A -} - -define float @fp_mul(float %arg1, float %arg2) { - %A = fmul float %arg1, %arg2 ; [#uses=1] - ret float %A -} - -define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) { - %A = fmul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - ret <4 x float> %A -} - -define float @fp_mul_add(float %arg1, float %arg2, float %arg3) { - %A = fmul float %arg1, %arg2 ; [#uses=1] - %B = fadd float %A, %arg3 ; [#uses=1] - ret float %B -} - -define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { - %A = fmul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - %B = fadd <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] - ret <4 x float> %B -} - -define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) { - %A = fmul float %arg1, %arg2 ; [#uses=1] - %B = fsub float %A, %arg3 ; [#uses=1] - ret float %B -} - -define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { - %A = fmul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - %B = fsub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] - ret <4 x float> %B -} - -; Test the straightforward way of getting fnms -; c - a * b -define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) { - %A = fmul float %arg1, %arg2 - %B = fsub float %arg3, %A - ret float %B -} - -; Test another way of getting fnms -; - ( a *b -c ) = c - a * b -define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) { - %A = fmul float %arg1, %arg2 - %B = fsub float %A, %arg3 - %C = fsub float -0.0, %B - ret float %C -} - -define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { - %A = fmul <4 x float> %arg1, %arg2 - %B = fsub <4 x float> %A, %arg3 - %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B - ret <4 x float> %D -} diff --git a/llvm/test/CodeGen/CellSPU/stores.ll b/llvm/test/CodeGen/CellSPU/stores.ll deleted file mode 100644 index 43f8776..0000000 --- a/llvm/test/CodeGen/CellSPU/stores.ll +++ /dev/null @@ -1,181 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep 'stqd.*0($3)' %t1.s | count 4 -; RUN: grep 'stqd.*16($3)' %t1.s | count 4 -; RUN: grep 16256 %t1.s | count 2 -; RUN: grep 16384 %t1.s | count 1 -; RUN: grep 771 %t1.s | count 4 -; RUN: grep 515 %t1.s | count 2 -; RUN: grep 1799 %t1.s | count 2 -; RUN: grep 1543 %t1.s | count 5 -; RUN: grep 1029 %t1.s | count 3 -; RUN: grep 'shli.*, 4' %t1.s | count 4 -; RUN: grep stqx %t1.s | count 4 -; RUN: grep ilhu %t1.s | count 11 -; RUN: grep iohl %t1.s | count 8 -; RUN: grep shufb %t1.s | count 15 -; RUN: grep frds %t1.s | count 1 -; RUN: llc < %s -march=cellspu | FileCheck %s - -; ModuleID = 'stores.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -define void @store_v16i8_1(<16 x i8>* %a) nounwind { -entry: - store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a - ret void -} - -define void @store_v16i8_2(<16 x i8>* %a) nounwind { -entry: - %arrayidx = getelementptr <16 x i8>* %a, i32 1 - store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx - ret void -} - -define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <16 x i8>* %a, i32 %i - store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx - ret void -} - -define void @store_v8i16_1(<8 x i16>* %a) nounwind { -entry: - store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a - ret void -} - -define void @store_v8i16_2(<8 x i16>* %a) nounwind { -entry: - %arrayidx = getelementptr <8 x i16>* %a, i16 1 - store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx - ret void -} - -define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <8 x i16>* %a, i32 %i - store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx - ret void -} - -define void @store_v4i32_1(<4 x i32>* %a) nounwind { -entry: - store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a - ret void -} - -define void @store_v4i32_2(<4 x i32>* %a) nounwind { -entry: - %arrayidx = getelementptr <4 x i32>* %a, i32 1 - store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx - ret void -} - -define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <4 x i32>* %a, i32 %i - store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx - ret void -} - -define void @store_v4f32_1(<4 x float>* %a) nounwind { -entry: - store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a - ret void -} - -define void @store_v4f32_2(<4 x float>* %a) nounwind { -entry: - %arrayidx = getelementptr <4 x float>* %a, i32 1 - store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx - ret void -} - -define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <4 x float>* %a, i32 %i - store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx - ret void -} - -; Test truncating stores: - -define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind { -entry: - %conv = trunc i16 %val to i8 - store i8 %conv, i8* %dest - ret i8 %conv -} - -define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind { -entry: - %conv = trunc i32 %val to i8 - store i8 %conv, i8* %dest - ret i8 %conv -} - -define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind { -entry: - %conv = trunc i32 %val to i16 - store i16 %conv, i16* %dest - ret i16 %conv -} - -define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind { -entry: - %conv = trunc i64 %val to i8 - store i8 %conv, i8* %dest - ret i8 %conv -} - -define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind { -entry: - %conv = trunc i64 %val to i16 - store i16 %conv, i16* %dest - ret i16 %conv -} - -define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind { -entry: - %conv = trunc i64 %val to i32 - store i32 %conv, i32* %dest - ret i32 %conv -} - -define float @tstore_f64_f32(double %val, float* %dest) nounwind { -entry: - %conv = fptrunc double %val to float - store float %conv, float* %dest - ret float %conv -} - -;Check stores that might span two 16 byte memory blocks -define void @store_misaligned( i32 %val, i32* %ptr) { -;CHECK: store_misaligned -;CHECK: lqd -;CHECK: lqd -;CHECK: stqd -;CHECK: stqd -;CHECK: bi $lr - store i32 %val, i32*%ptr, align 2 - ret void -} - -define void @store_v8( <8 x float> %val, <8 x float>* %ptr ) -{ -;CHECK: stq -;CHECK: stq -;CHECK: bi $lr - store <8 x float> %val, <8 x float>* %ptr - ret void -} - -define void @store_null_vec( <4 x i32> %val ) { -; FIXME - this is for some reason compiled into a il+stqd, not a sta. -;CHECK: stqd -;CHECK: bi $lr - store <4 x i32> %val, <4 x i32>* null - ret void -} diff --git a/llvm/test/CodeGen/CellSPU/storestruct.ll b/llvm/test/CodeGen/CellSPU/storestruct.ll deleted file mode 100644 index 47185e8..0000000 --- a/llvm/test/CodeGen/CellSPU/storestruct.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -%0 = type {i32, i32} -@buffer = global [ 72 x %0 ] zeroinitializer - -define void@test( ) { -; Check that there is no illegal "a rt, ra, imm" instruction -; CHECK-NOT: a {{\$., \$., 5..}} -; CHECK: a {{\$., \$., \$.}} - store %0 {i32 1, i32 2} , - %0* getelementptr ([72 x %0]* @buffer, i32 0, i32 71) - ret void -} diff --git a/llvm/test/CodeGen/CellSPU/struct_1.ll b/llvm/test/CodeGen/CellSPU/struct_1.ll deleted file mode 100644 index 8c32750..0000000 --- a/llvm/test/CodeGen/CellSPU/struct_1.ll +++ /dev/null @@ -1,147 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep lqa %t1.s | count 5 -; RUN: grep lqd %t1.s | count 11 -; RUN: grep rotqbyi %t1.s | count 7 -; RUN: grep xshw %t1.s | count 1 -; RUN: grep andi %t1.s | count 5 -; RUN: grep cbd %t1.s | count 3 -; RUN: grep chd %t1.s | count 1 -; RUN: grep cwd %t1.s | count 3 -; RUN: grep shufb %t1.s | count 7 -; RUN: grep stqd %t1.s | count 7 -; RUN: grep iohl %t2.s | count 16 -; RUN: grep ilhu %t2.s | count 16 -; RUN: grep lqd %t2.s | count 16 -; RUN: grep rotqbyi %t2.s | count 7 -; RUN: grep xshw %t2.s | count 1 -; RUN: grep andi %t2.s | count 5 -; RUN: grep cbd %t2.s | count 3 -; RUN: grep chd %t2.s | count 1 -; RUN: grep cwd %t2.s | count 3 -; RUN: grep shufb %t2.s | count 7 -; RUN: grep stqd %t2.s | count 7 - -; CellSPU legalization is over-sensitive to Legalize's traversal order. -; XFAIL: * - -; ModuleID = 'struct_1.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; struct hackstate { -; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3) -; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3) -; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3) -; int i1; // offset 4 (rotate left by 4 bytes to byte 0) -; short s1; // offset 8 (rotate left by 6 bytes to byte 2) -; int i2; // offset 12 [ignored] -; unsigned char c4; // offset 16 [ignored] -; unsigned char c5; // offset 17 [ignored] -; unsigned char c6; // offset 18 (rotate left by 14 bytes to byte 3) -; unsigned char c7; // offset 19 (no rotate, in preferred slot) -; int i3; // offset 20 [ignored] -; int i4; // offset 24 [ignored] -; int i5; // offset 28 [ignored] -; int i6; // offset 32 (no rotate, in preferred slot) -; } -%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 } - -; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } -@state = global %struct.hackstate zeroinitializer, align 16 - -define zeroext i8 @get_hackstate_c1() nounwind { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret i8 %tmp2 -} - -define zeroext i8 @get_hackstate_c2() nounwind { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret i8 %tmp2 -} - -define zeroext i8 @get_hackstate_c3() nounwind { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret i8 %tmp2 -} - -define i32 @get_hackstate_i1() nounwind { -entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret i32 %tmp2 -} - -define signext i16 @get_hackstate_s1() nounwind { -entry: - %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret i16 %tmp2 -} - -define zeroext i8 @get_hackstate_c6() nounwind { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16 - ret i8 %tmp2 -} - -define zeroext i8 @get_hackstate_c7() nounwind { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 - ret i8 %tmp2 -} - -define i32 @get_hackstate_i3() nounwind { -entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 - ret i32 %tmp2 -} - -define i32 @get_hackstate_i6() nounwind { -entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret i32 %tmp2 -} - -define void @set_hackstate_c1(i8 zeroext %c) nounwind { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret void -} - -define void @set_hackstate_c2(i8 zeroext %c) nounwind { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret void -} - -define void @set_hackstate_c3(i8 zeroext %c) nounwind { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret void -} - -define void @set_hackstate_i1(i32 %i) nounwind { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret void -} - -define void @set_hackstate_s1(i16 signext %s) nounwind { -entry: - store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret void -} - -define void @set_hackstate_i3(i32 %i) nounwind { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 - ret void -} - -define void @set_hackstate_i6(i32 %i) nounwind { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret void -} diff --git a/llvm/test/CodeGen/CellSPU/sub_ops.ll b/llvm/test/CodeGen/CellSPU/sub_ops.ll deleted file mode 100644 index f0c40d3..0000000 --- a/llvm/test/CodeGen/CellSPU/sub_ops.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llc < %s -march=cellspu | FileCheck %s - -define i32 @subword( i32 %param1, i32 %param2) { -; Check ordering of registers ret=param1-param2 -> rt=rb-ra -; CHECK-NOT: sf $3, $3, $4 -; CHECK: sf $3, $4, $3 - %1 = sub i32 %param1, %param2 - ret i32 %1 -} - -define i16 @subhword( i16 %param1, i16 %param2) { -; Check ordering of registers ret=param1-param2 -> rt=rb-ra -; CHECK-NOT: sfh $3, $3, $4 -; CHECK: sfh $3, $4, $3 - %1 = sub i16 %param1, %param2 - ret i16 %1 -} - -define float @subfloat( float %param1, float %param2) { -; Check ordering of registers ret=param1-param2 -> rt=ra-rb -; (yes this is reverse of i32 instruction) -; CHECK-NOT: fs $3, $4, $3 -; CHECK: fs $3, $3, $4 - %1 = fsub float %param1, %param2 - ret float %1 -} diff --git a/llvm/test/CodeGen/CellSPU/trunc.ll b/llvm/test/CodeGen/CellSPU/trunc.ll deleted file mode 100644 index e4c8fb4..0000000 --- a/llvm/test/CodeGen/CellSPU/trunc.ll +++ /dev/null @@ -1,94 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep shufb %t1.s | count 19 -; RUN: grep "ilhu.*1799" %t1.s | count 1 -; RUN: grep "ilhu.*771" %t1.s | count 2 -; RUN: grep "ilhu.*1543" %t1.s | count 1 -; RUN: grep "ilhu.*1029" %t1.s | count 1 -; RUN: grep "ilhu.*515" %t1.s | count 1 -; RUN: grep "ilhu.*3855" %t1.s | count 1 -; RUN: grep "ilhu.*3599" %t1.s | count 1 -; RUN: grep "ilhu.*3085" %t1.s | count 1 -; RUN: grep "iohl.*3855" %t1.s | count 1 -; RUN: grep "iohl.*3599" %t1.s | count 2 -; RUN: grep "iohl.*1543" %t1.s | count 2 -; RUN: grep "iohl.*771" %t1.s | count 2 -; RUN: grep "iohl.*515" %t1.s | count 1 -; RUN: grep "iohl.*1799" %t1.s | count 1 -; RUN: grep lqa %t1.s | count 1 -; RUN: grep cbd %t1.s | count 4 -; RUN: grep chd %t1.s | count 3 -; RUN: grep cwd %t1.s | count 1 -; RUN: grep cdd %t1.s | count 1 - -; ModuleID = 'trunc.bc' -target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128" -target triple = "spu" - -define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) { -entry: - %0 = trunc i128 %u to i8 - %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15 - ret <16 x i8> %tmp1 -} - -define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) { -entry: - %0 = trunc i128 %u to i16 - %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8 - ret <8 x i16> %tmp1 -} - -define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) { -entry: - %0 = trunc i128 %u to i32 - %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2 - ret <4 x i32> %tmp1 -} - -define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) { -entry: - %0 = trunc i128 %u to i64 - %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1 - ret <2 x i64> %tmp1 -} - -define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) { -entry: - %0 = trunc i64 %u to i8 - %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10 - ret <16 x i8> %tmp1 -} - -define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) { -entry: - %0 = trunc i64 %u to i16 - %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6 - ret <8 x i16> %tmp1 -} - -define i32 @trunc_i64_i32(i64 %u) { -entry: - %0 = trunc i64 %u to i32 - ret i32 %0 -} - -define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) { -entry: - %0 = trunc i32 %u to i8 - %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7 - ret <16 x i8> %tmp1 -} - -define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) { -entry: - %0 = trunc i32 %u to i16 - %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3 - ret <8 x i16> %tmp1 -} - -define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) { -entry: - %0 = trunc i16 %u to i8 - %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5 - ret <16 x i8> %tmp1 -} diff --git a/llvm/test/CodeGen/CellSPU/useful-harnesses/README.txt b/llvm/test/CodeGen/CellSPU/useful-harnesses/README.txt deleted file mode 100644 index d87b398..0000000 --- a/llvm/test/CodeGen/CellSPU/useful-harnesses/README.txt +++ /dev/null @@ -1,5 +0,0 @@ -This directory contains code that's not part of the DejaGNU test suite, -but is generally useful as various test harnesses. - -vecoperations.c: Various vector operation sanity checks, e.g., shuffles, - 8-bit vector add and multiply. diff --git a/llvm/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/llvm/test/CodeGen/CellSPU/useful-harnesses/i32operations.c deleted file mode 100644 index 12fc30b..0000000 --- a/llvm/test/CodeGen/CellSPU/useful-harnesses/i32operations.c +++ /dev/null @@ -1,69 +0,0 @@ -#include - -typedef unsigned int uint32_t; -typedef int int32_t; - -const char *boolstring(int val) { - return val ? "true" : "false"; -} - -int i32_eq(int32_t a, int32_t b) { - return (a == b); -} - -int i32_neq(int32_t a, int32_t b) { - return (a != b); -} - -int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) { - return ((a == b) ? c : d); -} - -int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) { - return ((a != b) ? c : d); -} - -struct pred_s { - const char *name; - int (*predfunc)(int32_t, int32_t); - int (*selfunc)(int32_t, int32_t, int32_t, int32_t); -}; - -struct pred_s preds[] = { - { "eq", i32_eq, i32_eq_select }, - { "neq", i32_neq, i32_neq_select } -}; - -int main(void) { - int i; - int32_t a = 1234567890; - int32_t b = 345678901; - int32_t c = 1234500000; - int32_t d = 10001; - int32_t e = 10000; - - printf("a = %12d (0x%08x)\n", a, a); - printf("b = %12d (0x%08x)\n", b, b); - printf("c = %12d (0x%08x)\n", c, c); - printf("d = %12d (0x%08x)\n", d, d); - printf("e = %12d (0x%08x)\n", e, e); - printf("----------------------------------------\n"); - - for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { - printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a))); - printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a))); - printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b))); - printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c))); - printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e))); - printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e))); - - printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d)); - printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c)); - printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d)); - printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d)); - - printf("----------------------------------------\n"); - } - - return 0; -} diff --git a/llvm/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/llvm/test/CodeGen/CellSPU/useful-harnesses/i64operations.c deleted file mode 100644 index b613bd8..0000000 --- a/llvm/test/CodeGen/CellSPU/useful-harnesses/i64operations.c +++ /dev/null @@ -1,673 +0,0 @@ -#include -#include "i64operations.h" - -int64_t tval_a = 1234567890003LL; -int64_t tval_b = 2345678901235LL; -int64_t tval_c = 1234567890001LL; -int64_t tval_d = 10001LL; -int64_t tval_e = 10000LL; -uint64_t tval_f = 0xffffff0750135eb9; -int64_t tval_g = -1; - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ - -int -i64_eq(int64_t a, int64_t b) -{ - return (a == b); -} - -int -i64_neq(int64_t a, int64_t b) -{ - return (a != b); -} - -int -i64_gt(int64_t a, int64_t b) -{ - return (a > b); -} - -int -i64_le(int64_t a, int64_t b) -{ - return (a <= b); -} - -int -i64_ge(int64_t a, int64_t b) { - return (a >= b); -} - -int -i64_lt(int64_t a, int64_t b) { - return (a < b); -} - -int -i64_uge(uint64_t a, uint64_t b) -{ - return (a >= b); -} - -int -i64_ult(uint64_t a, uint64_t b) -{ - return (a < b); -} - -int -i64_ugt(uint64_t a, uint64_t b) -{ - return (a > b); -} - -int -i64_ule(uint64_t a, uint64_t b) -{ - return (a <= b); -} - -int64_t -i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d) -{ - return ((a == b) ? c : d); -} - -int64_t -i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d) -{ - return ((a != b) ? c : d); -} - -int64_t -i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) { - return ((a > b) ? c : d); -} - -int64_t -i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) { - return ((a <= b) ? c : d); -} - -int64_t -i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) { - return ((a >= b) ? c : d); -} - -int64_t -i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) { - return ((a < b) ? c : d); -} - -uint64_t -i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) -{ - return ((a > b) ? c : d); -} - -uint64_t -i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) -{ - return ((a <= b) ? c : d); -} - -uint64_t -i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) { - return ((a >= b) ? c : d); -} - -uint64_t -i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) { - return ((a < b) ? c : d); -} - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ - -struct harness_int64_pred int64_tests_eq[] = { - {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c} -}; - -struct harness_int64_pred int64_tests_neq[] = { - {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d} -}; - -struct harness_int64_pred int64_tests_sgt[] = { - {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d} -}; - -struct harness_int64_pred int64_tests_sle[] = { - {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c} -}; - -struct harness_int64_pred int64_tests_sge[] = { - {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c} -}; - -struct harness_int64_pred int64_tests_slt[] = { - {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c}, - {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}, - {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d} -}; - -struct int64_pred_s int64_preds[] = { - {"eq", i64_eq, i64_eq_select, - int64_tests_eq, ARR_SIZE(int64_tests_eq)}, - {"neq", i64_neq, i64_neq_select, - int64_tests_neq, ARR_SIZE(int64_tests_neq)}, - {"gt", i64_gt, i64_gt_select, - int64_tests_sgt, ARR_SIZE(int64_tests_sgt)}, - {"le", i64_le, i64_le_select, - int64_tests_sle, ARR_SIZE(int64_tests_sle)}, - {"ge", i64_ge, i64_ge_select, - int64_tests_sge, ARR_SIZE(int64_tests_sge)}, - {"lt", i64_lt, i64_lt_select, - int64_tests_slt, ARR_SIZE(int64_tests_slt)} -}; - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ - -struct harness_uint64_pred uint64_tests_ugt[] = { - {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, - {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }, - {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c }, - {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c }, - {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d } -}; - -struct harness_uint64_pred uint64_tests_ule[] = { - {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, - {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, - {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, - {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, - {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c} -}; - -struct harness_uint64_pred uint64_tests_uge[] = { - {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, - {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, - {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, - {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, - {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c} -}; - -struct harness_uint64_pred uint64_tests_ult[] = { - {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, - {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}, - {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, - {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}, - {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c, - (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d} -}; - -struct uint64_pred_s uint64_preds[] = { - {"ugt", i64_ugt, i64_ugt_select, - uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)}, - {"ule", i64_ule, i64_ule_select, - uint64_tests_ule, ARR_SIZE(uint64_tests_ule)}, - {"uge", i64_uge, i64_uge_select, - uint64_tests_uge, ARR_SIZE(uint64_tests_uge)}, - {"ult", i64_ult, i64_ult_select, - uint64_tests_ult, ARR_SIZE(uint64_tests_ult)} -}; - -int -compare_expect_int64(const struct int64_pred_s * pred) -{ - int j, failed = 0; - - for (j = 0; j < pred->n_tests; ++j) { - int pred_result; - - pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs); - - if (pred_result != pred->tests[j].expected) { - char str[64]; - - sprintf(str, pred->tests[j].fmt_string, pred->name); - printf("%s: returned value is %d, expecting %d\n", str, - pred_result, pred->tests[j].expected); - printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs, - *pred->tests[j].lhs); - printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs, - *pred->tests[j].rhs); - ++failed; - } else { - int64_t selresult; - - selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs, - *pred->tests[j].select_a, - *pred->tests[j].select_b); - - if (selresult != *pred->tests[j].select_expected) { - char str[64]; - - sprintf(str, pred->tests[j].fmt_string, pred->name); - printf("%s select: returned value is %d, expecting %d\n", str, - pred_result, pred->tests[j].expected); - printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs, - *pred->tests[j].lhs); - printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs, - *pred->tests[j].rhs); - printf(" true = %19lld (0x%016llx)\n", *pred->tests[j].select_a, - *pred->tests[j].select_a); - printf(" false = %19lld (0x%016llx)\n", *pred->tests[j].select_b, - *pred->tests[j].select_b); - ++failed; - } - } - } - - printf(" %d tests performed, should be %d.\n", j, pred->n_tests); - - return failed; -} - -int -compare_expect_uint64(const struct uint64_pred_s * pred) -{ - int j, failed = 0; - - for (j = 0; j < pred->n_tests; ++j) { - int pred_result; - - pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs); - if (pred_result != pred->tests[j].expected) { - char str[64]; - - sprintf(str, pred->tests[j].fmt_string, pred->name); - printf("%s: returned value is %d, expecting %d\n", str, - pred_result, pred->tests[j].expected); - printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs, - *pred->tests[j].lhs); - printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs, - *pred->tests[j].rhs); - ++failed; - } else { - uint64_t selresult; - - selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs, - *pred->tests[j].select_a, - *pred->tests[j].select_b); - if (selresult != *pred->tests[j].select_expected) { - char str[64]; - - sprintf(str, pred->tests[j].fmt_string, pred->name); - printf("%s select: returned value is %d, expecting %d\n", str, - pred_result, pred->tests[j].expected); - printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs, - *pred->tests[j].lhs); - printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs, - *pred->tests[j].rhs); - printf(" true = %19llu (0x%016llx)\n", *pred->tests[j].select_a, - *pred->tests[j].select_a); - printf(" false = %19llu (0x%016llx)\n", *pred->tests[j].select_b, - *pred->tests[j].select_b); - ++failed; - } - } - } - - printf(" %d tests performed, should be %d.\n", j, pred->n_tests); - - return failed; -} - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ - -int -test_i64_sext_i32(int in, int64_t expected) { - int64_t result = (int64_t) in; - - if (result != expected) { - char str[64]; - sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result); - return 1; - } - - return 0; -} - -int -test_i64_sext_i16(short in, int64_t expected) { - int64_t result = (int64_t) in; - - if (result != expected) { - char str[64]; - sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result); - return 1; - } - - return 0; -} - -int -test_i64_sext_i8(signed char in, int64_t expected) { - int64_t result = (int64_t) in; - - if (result != expected) { - char str[64]; - sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result); - return 1; - } - - return 0; -} - -int -test_i64_zext_i32(unsigned int in, uint64_t expected) { - uint64_t result = (uint64_t) in; - - if (result != expected) { - char str[64]; - sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result); - return 1; - } - - return 0; -} - -int -test_i64_zext_i16(unsigned short in, uint64_t expected) { - uint64_t result = (uint64_t) in; - - if (result != expected) { - char str[64]; - sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result); - return 1; - } - - return 0; -} - -int -test_i64_zext_i8(unsigned char in, uint64_t expected) { - uint64_t result = (uint64_t) in; - - if (result != expected) { - char str[64]; - sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result); - return 1; - } - - return 0; -} - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ - -int64_t -i64_shl_const(int64_t a) { - return a << 10; -} - -int64_t -i64_shl(int64_t a, int amt) { - return a << amt; -} - -uint64_t -u64_shl_const(uint64_t a) { - return a << 10; -} - -uint64_t -u64_shl(uint64_t a, int amt) { - return a << amt; -} - -int64_t -i64_srl_const(int64_t a) { - return a >> 10; -} - -int64_t -i64_srl(int64_t a, int amt) { - return a >> amt; -} - -uint64_t -u64_srl_const(uint64_t a) { - return a >> 10; -} - -uint64_t -u64_srl(uint64_t a, int amt) { - return a >> amt; -} - -int64_t -i64_sra_const(int64_t a) { - return a >> 10; -} - -int64_t -i64_sra(int64_t a, int amt) { - return a >> amt; -} - -uint64_t -u64_sra_const(uint64_t a) { - return a >> 10; -} - -uint64_t -u64_sra(uint64_t a, int amt) { - return a >> amt; -} - -int -test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) { - uint64_t result = (*func)(a); - - if (result != expected) { - printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected); - return 1; - } - - return 0; -} - -int -test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) { - int64_t result = (*func)(a); - - if (result != expected) { - printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected); - return 1; - } - - return 0; -} - -int -test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) { - uint64_t result = (*func)(a, b); - - if (result != expected) { - printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected); - return 1; - } - - return 0; -} - -int -test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) { - int64_t result = (*func)(a, b); - - if (result != expected) { - printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected); - return 1; - } - - return 0; -} - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ - -int64_t i64_mul(int64_t a, int64_t b) { - return a * b; -} - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ - -int -main(void) -{ - int i, j, failed = 0; - const char *something_failed = " %d tests failed.\n"; - const char *all_tests_passed = " All tests passed.\n"; - - printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a); - printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b); - printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c); - printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d); - printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e); - printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f); - printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g); - printf("----------------------------------------\n"); - - for (i = 0; i < ARR_SIZE(int64_preds); ++i) { - printf("%s series:\n", int64_preds[i].name); - if ((failed = compare_expect_int64(int64_preds + i)) > 0) { - printf(something_failed, failed); - } else { - printf(all_tests_passed); - } - - printf("----------------------------------------\n"); - } - - for (i = 0; i < ARR_SIZE(uint64_preds); ++i) { - printf("%s series:\n", uint64_preds[i].name); - if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) { - printf(something_failed, failed); - } else { - printf(all_tests_passed); - } - - printf("----------------------------------------\n"); - } - - /*----------------------------------------------------------------------*/ - - puts("signed/zero-extend tests:"); - - failed = 0; - failed += test_i64_sext_i32(-1, -1LL); - failed += test_i64_sext_i32(10, 10LL); - failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL); - failed += test_i64_sext_i16(-1, -1LL); - failed += test_i64_sext_i16(10, 10LL); - failed += test_i64_sext_i16(0x7fff, 0x7fffLL); - failed += test_i64_sext_i8(-1, -1LL); - failed += test_i64_sext_i8(10, 10LL); - failed += test_i64_sext_i8(0x7f, 0x7fLL); - - failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU); - failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU); - failed += test_i64_zext_i16(0xffff, 0x000000000000ffffLLU); - failed += test_i64_zext_i16(0x569a, 0x000000000000569aLLU); - failed += test_i64_zext_i8(0xff, 0x00000000000000ffLLU); - failed += test_i64_zext_i8(0xa0, 0x00000000000000a0LLU); - - if (failed > 0) { - printf(" %d tests failed.\n", failed); - } else { - printf(" All tests passed.\n"); - } - - printf("----------------------------------------\n"); - - failed = 0; - puts("signed left/right shift tests:"); - failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a, 0x00047dc7ec114c00LL); - failed += test_i64_variable_shift("i64_shl", i64_shl, tval_a, 10, 0x00047dc7ec114c00LL); - failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a, 0x0000000047dc7ec1LL); - failed += test_i64_variable_shift("i64_srl", i64_srl, tval_a, 10, 0x0000000047dc7ec1LL); - failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a, 0x0000000047dc7ec1LL); - failed += test_i64_variable_shift("i64_sra", i64_sra, tval_a, 10, 0x0000000047dc7ec1LL); - - if (failed > 0) { - printf(" %d tests ailed.\n", failed); - } else { - printf(" All tests passed.\n"); - } - - printf("----------------------------------------\n"); - - failed = 0; - puts("unsigned left/right shift tests:"); - failed += test_u64_constant_shift("u64_shl_const", u64_shl_const, tval_f, 0xfffc1d404d7ae400LL); - failed += test_u64_variable_shift("u64_shl", u64_shl, tval_f, 10, 0xfffc1d404d7ae400LL); - failed += test_u64_constant_shift("u64_srl_const", u64_srl_const, tval_f, 0x003fffffc1d404d7LL); - failed += test_u64_variable_shift("u64_srl", u64_srl, tval_f, 10, 0x003fffffc1d404d7LL); - failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_f, 0xffffffffc1d404d7LL); - failed += test_i64_variable_shift("i64_sra", i64_sra, tval_f, 10, 0xffffffffc1d404d7LL); - failed += test_u64_constant_shift("u64_sra_const", u64_sra_const, tval_f, 0x003fffffc1d404d7LL); - failed += test_u64_variable_shift("u64_sra", u64_sra, tval_f, 10, 0x003fffffc1d404d7LL); - - if (failed > 0) { - printf(" %d tests ailed.\n", failed); - } else { - printf(" All tests passed.\n"); - } - - printf("----------------------------------------\n"); - - int64_t result; - - result = i64_mul(tval_g, tval_g); - printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result); - result = i64_mul(tval_d, tval_e); - printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result); - /* 0xba7a664f13077c9 */ - result = i64_mul(tval_a, tval_b); - printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result); - - printf("----------------------------------------\n"); - - return 0; -} diff --git a/llvm/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/llvm/test/CodeGen/CellSPU/useful-harnesses/i64operations.h deleted file mode 100644 index 7a02794..0000000 --- a/llvm/test/CodeGen/CellSPU/useful-harnesses/i64operations.h +++ /dev/null @@ -1,43 +0,0 @@ -#define TRUE_VAL (!0) -#define FALSE_VAL 0 -#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0])) - -typedef unsigned long long int uint64_t; -typedef long long int int64_t; - -/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ -struct harness_int64_pred { - const char *fmt_string; - int64_t *lhs; - int64_t *rhs; - int64_t *select_a; - int64_t *select_b; - int expected; - int64_t *select_expected; -}; - -struct harness_uint64_pred { - const char *fmt_string; - uint64_t *lhs; - uint64_t *rhs; - uint64_t *select_a; - uint64_t *select_b; - int expected; - uint64_t *select_expected; -}; - -struct int64_pred_s { - const char *name; - int (*predfunc) (int64_t, int64_t); - int64_t (*selfunc) (int64_t, int64_t, int64_t, int64_t); - struct harness_int64_pred *tests; - int n_tests; -}; - -struct uint64_pred_s { - const char *name; - int (*predfunc) (uint64_t, uint64_t); - uint64_t (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t); - struct harness_uint64_pred *tests; - int n_tests; -}; diff --git a/llvm/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg b/llvm/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg deleted file mode 100644 index e6f55ee..0000000 --- a/llvm/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg +++ /dev/null @@ -1 +0,0 @@ -config.suffixes = [] diff --git a/llvm/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/llvm/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c deleted file mode 100644 index c4c86e3..0000000 --- a/llvm/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c +++ /dev/null @@ -1,179 +0,0 @@ -#include - -typedef unsigned char v16i8 __attribute__((ext_vector_type(16))); -typedef short v8i16 __attribute__((ext_vector_type(16))); -typedef int v4i32 __attribute__((ext_vector_type(4))); -typedef float v4f32 __attribute__((ext_vector_type(4))); -typedef long long v2i64 __attribute__((ext_vector_type(2))); -typedef double v2f64 __attribute__((ext_vector_type(2))); - -void print_v16i8(const char *str, const v16i8 v) { - union { - unsigned char elts[16]; - v16i8 vec; - } tv; - tv.vec = v; - printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, " - "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, " - "%hhu, %hhu }\n", - str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5], - tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11], - tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]); -} - -void print_v16i8_hex(const char *str, const v16i8 v) { - union { - unsigned char elts[16]; - v16i8 vec; - } tv; - tv.vec = v; - printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, " - "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, " - "0x%02hhx, 0x%02hhx }\n", - str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5], - tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11], - tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]); -} - -void print_v8i16_hex(const char *str, v8i16 v) { - union { - short elts[8]; - v8i16 vec; - } tv; - tv.vec = v; - printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, " - "0x%04hx, 0x%04hx, 0x%04hx }\n", - str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], - tv.elts[5], tv.elts[6], tv.elts[7]); -} - -void print_v4i32(const char *str, v4i32 v) { - printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w); -} - -void print_v4f32(const char *str, v4f32 v) { - printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w); -} - -void print_v2i64(const char *str, v2i64 v) { - printf("%s = { %lld, %lld }\n", str, v.x, v.y); -} - -void print_v2f64(const char *str, v2f64 v) { - printf("%s = { %g, %g }\n", str, v.x, v.y); -} - -/*----------------------------------------------------------------------*/ - -v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) { - return v1 * v2; -} - -v16i8 v16i8_add(v16i8 v1, v16i8 v2) { - return v1 + v2; -} - -v4i32 v4i32_shuffle_1(v4i32 a) { - v4i32 c2 = a.yzwx; - return c2; -} - -v4i32 v4i32_shuffle_2(v4i32 a) { - v4i32 c2 = a.zwxy; - return c2; -} - -v4i32 v4i32_shuffle_3(v4i32 a) { - v4i32 c2 = a.wxyz; - return c2; -} - -v4i32 v4i32_shuffle_4(v4i32 a) { - v4i32 c2 = a.xyzw; - return c2; -} - -v4i32 v4i32_shuffle_5(v4i32 a) { - v4i32 c2 = a.xwzy; - return c2; -} - -v4f32 v4f32_shuffle_1(v4f32 a) { - v4f32 c2 = a.yzwx; - return c2; -} - -v4f32 v4f32_shuffle_2(v4f32 a) { - v4f32 c2 = a.zwxy; - return c2; -} - -v4f32 v4f32_shuffle_3(v4f32 a) { - v4f32 c2 = a.wxyz; - return c2; -} - -v4f32 v4f32_shuffle_4(v4f32 a) { - v4f32 c2 = a.xyzw; - return c2; -} - -v4f32 v4f32_shuffle_5(v4f32 a) { - v4f32 c2 = a.xwzy; - return c2; -} - -v2i64 v2i64_shuffle(v2i64 a) { - v2i64 c2 = a.yx; - return c2; -} - -v2f64 v2f64_shuffle(v2f64 a) { - v2f64 c2 = a.yx; - return c2; -} - -int main(void) { - v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a, - 0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 }; - v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 }; - v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5, - 0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 }; - v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0, - 0xe194, 0x0184, 0x801e, 0x5940 }; - v4i32 v1 = { 1, 2, 3, 4 }; - v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 }; - v2i64 v3 = { 691043ll, 910301513ll }; - v2f64 v4 = { 5.8e56, 9.103e-62 }; - - puts("---- vector tests start ----"); - - print_v16i8_hex("v00 ", v00); - print_v16i8_hex("va0 ", va0); - print_v16i8_hex("va1 ", va1); - print_v16i8_hex("va0 x va1 ", v16i8_mpy(va0, va1)); - print_v16i8_hex("va0 + va1 ", v16i8_add(va0, va1)); - print_v8i16_hex("v01 ", v01); - - print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1)); - print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1)); - print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1)); - print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1)); - print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1)); - - print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2)); - print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2)); - print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2)); - print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2)); - print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2)); - - print_v2i64("v3 ", v3); - print_v2i64("v2i64_shuffle ", v2i64_shuffle(v3)); - print_v2f64("v4 ", v4); - print_v2f64("v2f64_shuffle ", v2f64_shuffle(v4)); - - puts("---- vector tests end ----"); - - return 0; -} diff --git a/llvm/test/CodeGen/CellSPU/v2f32.ll b/llvm/test/CodeGen/CellSPU/v2f32.ll deleted file mode 100644 index 09e15ff..0000000 --- a/llvm/test/CodeGen/CellSPU/v2f32.ll +++ /dev/null @@ -1,78 +0,0 @@ -;RUN: llc --march=cellspu %s -o - | FileCheck %s -%vec = type <2 x float> - -define %vec @test_ret(%vec %param) -{ -;CHECK: bi $lr - ret %vec %param -} - -define %vec @test_add(%vec %param) -{ -;CHECK: fa {{\$.}}, $3, $3 - %1 = fadd %vec %param, %param -;CHECK: bi $lr - ret %vec %1 -} - -define %vec @test_sub(%vec %param) -{ -;CHECK: fs {{\$.}}, $3, $3 - %1 = fsub %vec %param, %param - -;CHECK: bi $lr - ret %vec %1 -} - -define %vec @test_mul(%vec %param) -{ -;CHECK: fm {{\$.}}, $3, $3 - %1 = fmul %vec %param, %param - -;CHECK: bi $lr - ret %vec %1 -} - -; CHECK: test_splat: -define %vec @test_splat(float %param ) { -;CHECK: lqa -;CHECK: shufb - %sv = insertelement <1 x float> undef, float %param, i32 0 - %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer -;CHECK: bi $lr - ret %vec %rv -} - -define void @test_store(%vec %val, %vec* %ptr){ -; CHECK: test_store: -;CHECK: stqd - store %vec zeroinitializer, %vec* null - -;CHECK: stqd $3, 0(${{.*}}) -;CHECK: bi $lr - store %vec %val, %vec* %ptr - ret void -} - -; CHECK: test_insert: -define %vec @test_insert(){ -;CHECK: cwd -;CHECK: shufb $3 - %rv = insertelement %vec undef, float 0.0e+00, i32 undef -;CHECK: bi $lr - ret %vec %rv -} - -; CHECK: test_unaligned_store: - -define void @test_unaligned_store() { -;CHECK: cdd -;CHECK: shufb -;CHECK: stqd - %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1] - %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; [#uses=1] - %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1] - store <2 x float> zeroinitializer, <2 x float>* %vptr - ret void -} - diff --git a/llvm/test/CodeGen/CellSPU/v2i32.ll b/llvm/test/CodeGen/CellSPU/v2i32.ll deleted file mode 100644 index 9c5b896..0000000 --- a/llvm/test/CodeGen/CellSPU/v2i32.ll +++ /dev/null @@ -1,61 +0,0 @@ -;RUN: llc --march=cellspu %s -o - | FileCheck %s -%vec = type <2 x i32> - -define %vec @test_ret(%vec %param) -{ -;CHECK: bi $lr - ret %vec %param -} - -define %vec @test_add(%vec %param) -{ -;CHECK: shufb -;CHECK: addx - %1 = add %vec %param, %param -;CHECK: bi $lr - ret %vec %1 -} - -define %vec @test_sub(%vec %param) -{ - %1 = sub %vec %param, -;CHECK: bi $lr - ret %vec %1 -} - -define %vec @test_mul(%vec %param) -{ - %1 = mul %vec %param, %param -;CHECK: bi $lr - ret %vec %1 -} - -define <2 x i32> @test_splat(i32 %param ) { -;see svn log for why this is here... -;CHECK-NOT: or $3, $3, $3 -;CHECK: lqa -;CHECK: shufb - %sv = insertelement <1 x i32> undef, i32 %param, i32 0 - %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer -;CHECK: bi $lr - ret <2 x i32> %rv -} - -define i32 @test_extract() { -;CHECK: shufb $3 - %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; [#uses=1] -;CHECK: bi $lr - ret i32 %rv -} - -define void @test_store( %vec %val, %vec* %ptr) -{ - store %vec %val, %vec* %ptr - ret void -} - -define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr) -{ - %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1 - ret <2 x i32>* %rv -} diff --git a/llvm/test/CodeGen/CellSPU/vec_const.ll b/llvm/test/CodeGen/CellSPU/vec_const.ll deleted file mode 100644 index 24c05c6..0000000 --- a/llvm/test/CodeGen/CellSPU/vec_const.ll +++ /dev/null @@ -1,154 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep -w il %t1.s | count 3 -; RUN: grep ilhu %t1.s | count 8 -; RUN: grep -w ilh %t1.s | count 5 -; RUN: grep iohl %t1.s | count 7 -; RUN: grep lqa %t1.s | count 6 -; RUN: grep 24672 %t1.s | count 2 -; RUN: grep 16429 %t1.s | count 1 -; RUN: grep 63572 %t1.s | count 1 -; RUN: grep 4660 %t1.s | count 1 -; RUN: grep 22136 %t1.s | count 1 -; RUN: grep 43981 %t1.s | count 1 -; RUN: grep 61202 %t1.s | count 1 -; RUN: grep 16393 %t1.s | count 1 -; RUN: grep 8699 %t1.s | count 1 -; RUN: grep 21572 %t1.s | count 1 -; RUN: grep 11544 %t1.s | count 1 -; RUN: grep 1311768467750121234 %t1.s | count 1 -; RUN: grep lqd %t2.s | count 6 - -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" -target triple = "spu-unknown-elf" - -; Vector constant load tests: - -; IL , 2 -define <4 x i32> @v4i32_constvec() { - ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 > -} - -; Spill to constant pool -define <4 x i32> @v4i32_constpool() { - ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 > -} - -; Max negative range for IL -define <4 x i32> @v4i32_constvec_2() { - ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 > -} - -; ILHU , 73 (0x49) -; 4784128 = 0x490000 -define <4 x i32> @v4i32_constvec_3() { - ret <4 x i32> < i32 4784128, i32 4784128, - i32 4784128, i32 4784128 > -} - -; ILHU , 61 (0x3d) -; IOHL , 15395 (0x3c23) -define <4 x i32> @v4i32_constvec_4() { - ret <4 x i32> < i32 4013091, i32 4013091, - i32 4013091, i32 4013091 > -} - -; ILHU , 0x5050 (20560) -; IOHL , 0x5050 (20560) -; Tests for whether we expand the size of the bit pattern properly, because -; this could be interpreted as an i8 pattern (0x50) -define <4 x i32> @v4i32_constvec_5() { - ret <4 x i32> < i32 1347440720, i32 1347440720, - i32 1347440720, i32 1347440720 > -} - -; ILH -define <8 x i16> @v8i16_constvec_1() { - ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767, - i16 32767, i16 32767, i16 32767, i16 32767 > -} - -; ILH -define <8 x i16> @v8i16_constvec_2() { - ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511, - i16 511, i16 511, i16 511 > -} - -; ILH -define <8 x i16> @v8i16_constvec_3() { - ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512, - i16 -512, i16 -512, i16 -512 > -} - -; ILH , 24672 (0x6060) -; Tests whether we expand the size of the bit pattern properly, because -; this could be interpreted as an i8 pattern (0x60) -define <8 x i16> @v8i16_constvec_4() { - ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672, - i16 24672, i16 24672, i16 24672 > -} - -; ILH , 24672 (0x6060) -; Tests whether we expand the size of the bit pattern properly, because -; this is an i8 pattern but has to be expanded out to i16 to load it -; properly into the vector register. -define <16 x i8> @v16i8_constvec_1() { - ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, - i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 > -} - -define <4 x float> @v4f32_constvec_1() { -entry: - ret <4 x float> < float 0x4005BF0A80000000, - float 0x4005BF0A80000000, - float 0x4005BF0A80000000, - float 0x4005BF0A80000000 > -} - -define <4 x float> @v4f32_constvec_2() { -entry: - ret <4 x float> < float 0.000000e+00, - float 0.000000e+00, - float 0.000000e+00, - float 0.000000e+00 > -} - - -define <4 x float> @v4f32_constvec_3() { -entry: - ret <4 x float> < float 0x4005BF0A80000000, - float 0x3810000000000000, - float 0x47EFFFFFE0000000, - float 0x400921FB60000000 > -} - -; 1311768467750121234 => 0x 12345678 abcdef12 -; HI32_hi: 4660 -; HI32_lo: 22136 -; LO32_hi: 43981 -; LO32_lo: 61202 -define <2 x i64> @i64_constvec_1() { -entry: - ret <2 x i64> < i64 1311768467750121234, - i64 1311768467750121234 > -} - -define <2 x i64> @i64_constvec_2() { -entry: - ret <2 x i64> < i64 1, i64 1311768467750121234 > -} - -define <2 x double> @f64_constvec_1() { -entry: - ret <2 x double> < double 0x400921fb54442d18, - double 0xbff6a09e667f3bcd > -} - -; 0x400921fb 54442d18 -> -; (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699]) -; (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544]) -define <2 x double> @f64_constvec_2() { -entry: - ret <2 x double> < double 0x400921fb54442d18, - double 0x400921fb54442d18 > -} diff --git a/llvm/test/CodeGen/CellSPU/vecinsert.ll b/llvm/test/CodeGen/CellSPU/vecinsert.ll deleted file mode 100644 index 8dcab1d..0000000 --- a/llvm/test/CodeGen/CellSPU/vecinsert.ll +++ /dev/null @@ -1,131 +0,0 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep cbd %t1.s | count 5 -; RUN: grep chd %t1.s | count 5 -; RUN: grep cwd %t1.s | count 11 -; RUN: grep -w il %t1.s | count 5 -; RUN: grep -w ilh %t1.s | count 6 -; RUN: grep iohl %t1.s | count 1 -; RUN: grep ilhu %t1.s | count 4 -; RUN: grep shufb %t1.s | count 27 -; RUN: grep 17219 %t1.s | count 1 -; RUN: grep 22598 %t1.s | count 1 -; RUN: grep -- -39 %t1.s | count 1 -; RUN: grep 24 %t1.s | count 1 -; RUN: grep 1159 %t1.s | count 1 -; RUN: FileCheck %s < %t1.s - -; ModuleID = 'vecinsert.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" -target triple = "spu-unknown-elf" - -; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343 -define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) { -entry: - %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10 - %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7 - %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15 - ret <16 x i8> %tmp1.2 -} - -; 22598 -> 0x5846 -define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) { -entry: - %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5 - %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7 - %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2 - ret <8 x i16> %tmp1.2 -} - -; 1574023 -> 0x180487 (ILHU 24/IOHL 1159) -define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) { -entry: - %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 - %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1 - %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 - ret <4 x i32> %tmp1.2 -} - -; Should generate IL for the load -define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) { -entry: - %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 - %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1 - %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 - ret <4 x i32> %tmp1.2 -} - -define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <16 x i8>* %a, i32 %i - %tmp2 = load <16 x i8>* %arrayidx - %tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1 - %tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11 - store <16 x i8> %tmp8, <16 x i8>* %arrayidx - ret void -} - -define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <8 x i16>* %a, i32 %i - %tmp2 = load <8 x i16>* %arrayidx - %tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1 - %tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6 - store <8 x i16> %tmp8, <8 x i16>* %arrayidx - ret void -} - -define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <4 x i32>* %a, i32 %i - %tmp2 = load <4 x i32>* %arrayidx - %tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1 - %tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2 - store <4 x i32> %tmp8, <4 x i32>* %arrayidx - ret void -} - -define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <4 x float>* %a, i32 %i - %tmp2 = load <4 x float>* %arrayidx - %tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1 - %tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2 - store <4 x float> %tmp8, <4 x float>* %arrayidx - ret void -} - -define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <2 x i64>* %a, i32 %i - %tmp2 = load <2 x i64>* %arrayidx - %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0 - store <2 x i64> %tmp3, <2 x i64>* %arrayidx - ret void -} - -define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <2 x i64>* %a, i32 %i - %tmp2 = load <2 x i64>* %arrayidx - %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1 - store <2 x i64> %tmp3, <2 x i64>* %arrayidx - ret void -} - -define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind { -entry: - %arrayidx = getelementptr <2 x double>* %a, i32 %i - %tmp2 = load <2 x double>* %arrayidx - %tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1 - store <2 x double> %tmp3, <2 x double>* %arrayidx - ret void -} - -define <4 x i32> @undef_v4i32( i32 %param ) { - ;CHECK: cwd - ;CHECK: lqa - ;CHECK: shufb - %val = insertelement <4 x i32> , i32 %param, i32 undef - ret <4 x i32> %val -} - diff --git a/llvm/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp b/llvm/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp index 4bc58d7..2b60cb9 100644 --- a/llvm/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp +++ b/llvm/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp @@ -2,7 +2,7 @@ # Do not edit here. If you wish to override these values # edit the last section set target_triplet "x86_64-apple-darwin10" -set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend" +set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips PIC16 XCore MSP430 Blackfin MSIL CppBackend" set srcroot "/Volumes/Data/ddunbar/llvm" set objroot "/Volumes/Data/ddunbar/llvm.obj.64" set srcdir "/Volumes/Data/ddunbar/llvm/test" diff --git a/llvm/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp b/llvm/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp index 4bc58d7..2b60cb9 100644 --- a/llvm/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp +++ b/llvm/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp @@ -2,7 +2,7 @@ # Do not edit here. If you wish to override these values # edit the last section set target_triplet "x86_64-apple-darwin10" -set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend" +set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips PIC16 XCore MSP430 Blackfin MSIL CppBackend" set srcroot "/Volumes/Data/ddunbar/llvm" set objroot "/Volumes/Data/ddunbar/llvm.obj.64" set srcdir "/Volumes/Data/ddunbar/llvm/test"