From 4047343c656f27f5f5c7a6718db05dc4a2d0b9a0 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Wed, 27 Apr 2011 22:59:45 -0700 Subject: [PATCH] altivec: Fix and add various opcode rules --- orc/orcexecutor.c | 3 +- orc/orcpowerpc.c | 4 +- orc/orcpowerpc.h | 1 + orc/orcprogram-altivec.c | 11 ++-- orc/orcrules-altivec.c | 145 ++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 145 insertions(+), 19 deletions(-) diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c index 6e2b76a..3bea0b5 100644 --- a/orc/orcexecutor.c +++ b/orc/orcexecutor.c @@ -24,7 +24,8 @@ orc_executor_new (OrcProgram *program) ex = malloc(sizeof(OrcExecutor)); memset(ex,0,sizeof(OrcExecutor)); - ex->program = program; + //ex->program = program; + ex->arrays[ORC_VAR_A2] = program->orccode; return ex; } diff --git a/orc/orcpowerpc.c b/orc/orcpowerpc.c index c951da6..982020d 100644 --- a/orc/orcpowerpc.c +++ b/orc/orcpowerpc.c @@ -487,10 +487,10 @@ powerpc_load_constant (OrcCompiler *p, int i, int reg) powerpc_emit_lwz (p, greg, POWERPC_R3, - (int)ORC_STRUCT_OFFSET(OrcExecutor, program)); + (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[ORC_VAR_A2])); powerpc_emit_lwz (p, greg, greg, - (int)ORC_STRUCT_OFFSET(OrcProgram, code_exec)); + (int)ORC_STRUCT_OFFSET(OrcCode, exec)); powerpc_add_fixup (p, 1, p->codeptr, label_data); { diff --git a/orc/orcpowerpc.h b/orc/orcpowerpc.h index 918a26b..285f8ca 100644 --- a/orc/orcpowerpc.h +++ b/orc/orcpowerpc.h @@ -127,6 +127,7 @@ int powerpc_get_constant (OrcCompiler *p, int type, int value); int powerpc_get_constant_full (OrcCompiler *p, int value0, int value1, int value2, int value3); /* instructions */ +#define powerpc_emit_vand(p,a,b,c) powerpc_emit_VX_2 (p, "vand", 0x10000404, a, b, c) #define powerpc_emit_vandc(p,a,b,c) powerpc_emit_VX_2 (p, "vandc", 0x10000444, a, b, c) #define powerpc_emit_vor(p,a,b,c) powerpc_emit_VX_2 (p, "vor", 0x10000484, a, b, c) diff --git a/orc/orcprogram-altivec.c b/orc/orcprogram-altivec.c index 5f1755b..903c6f5 100644 --- a/orc/orcprogram-altivec.c +++ b/orc/orcprogram-altivec.c @@ -359,11 +359,8 @@ orc_compiler_powerpc_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor, accumulators[k])); if (var->size == 2) { - powerpc_emit_vxor (compiler, - POWERPC_V0, POWERPC_V0, POWERPC_V0); - powerpc_emit_VX_2 (compiler, "vsum4shs", 0x10000648, - POWERPC_V0, var->alloc, POWERPC_V0); - powerpc_emit_vor (compiler, var->alloc, POWERPC_V0, POWERPC_V0); + powerpc_emit_vxor (compiler, POWERPC_V0, POWERPC_V0, POWERPC_V0); + powerpc_emit_vmrghh (compiler, var->alloc, POWERPC_V0, var->alloc); } ORC_ASM_CODE(compiler," lvsr %s, 0, %s\n", @@ -376,8 +373,8 @@ orc_compiler_powerpc_assemble (OrcCompiler *compiler) POWERPC_V0); ORC_ASM_CODE(compiler," stvewx %s, 0, %s\n", - powerpc_get_regname (var->alloc), - powerpc_get_regname (POWERPC_R0)); + powerpc_get_regname (var->alloc), + powerpc_get_regname (POWERPC_R0)); powerpc_emit_X (compiler, 0x7c00018e, powerpc_regnum(var->alloc), 0, powerpc_regnum(POWERPC_R0)); diff --git a/orc/orcrules-altivec.c b/orc/orcrules-altivec.c index f1ffe87..a405bed 100644 --- a/orc/orcrules-altivec.c +++ b/orc/orcrules-altivec.c @@ -156,6 +156,75 @@ powerpc_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn) } static void +powerpc_rule_loadoffX (OrcCompiler *compiler, void *user, OrcInstruction *insn) +{ + OrcVariable *src = compiler->vars + insn->src_args[0]; + OrcVariable *dest = compiler->vars + insn->dest_args[0]; + int size = src->size << compiler->loop_shift; + int perm = orc_compiler_get_temp_reg (compiler); + int offset; + + if (compiler->vars[insn->src_args[1]].vartype != ORC_VAR_TYPE_CONST) { + ORC_COMPILER_ERROR(compiler, "Rule only works with consts"); + return; + } + + offset = compiler->vars[insn->src_args[1]].value.i * src->size; + powerpc_emit_addi (compiler, compiler->gp_tmpreg, POWERPC_R0, offset); + switch (size) { + case 1: + ORC_ASM_CODE(compiler," lvebx %s, %s, %s\n", + powerpc_get_regname (dest->alloc), + powerpc_get_regname (compiler->gp_tmpreg), + powerpc_get_regname (src->ptr_register)); + powerpc_emit_X (compiler, 0x7c00000e, powerpc_regnum(dest->alloc), + powerpc_regnum(compiler->gp_tmpreg), + powerpc_regnum(src->ptr_register)); + break; + case 2: + ORC_ASM_CODE(compiler," lvehx %s, %s, %s\n", + powerpc_get_regname (dest->alloc), + powerpc_get_regname (compiler->gp_tmpreg), + powerpc_get_regname (src->ptr_register)); + powerpc_emit_X (compiler, 0x7c00004e, powerpc_regnum(dest->alloc), + powerpc_regnum(compiler->gp_tmpreg), + powerpc_regnum(src->ptr_register)); + break; + case 4: + ORC_ASM_CODE(compiler," lvewx %s, %s, %s\n", + powerpc_get_regname (dest->alloc), + powerpc_get_regname (compiler->gp_tmpreg), + powerpc_get_regname (src->ptr_register)); + powerpc_emit_X (compiler, 0x7c00008e, powerpc_regnum(dest->alloc), + powerpc_regnum(compiler->gp_tmpreg), + powerpc_regnum(src->ptr_register)); + break; + case 8: + case 16: + ORC_ASM_CODE(compiler," lvx %s, %s, %s\n", + powerpc_get_regname (dest->alloc), + powerpc_get_regname (compiler->gp_tmpreg), + powerpc_get_regname (src->ptr_register)); + powerpc_emit_X (compiler, 0x7c0000ce, powerpc_regnum(dest->alloc), + powerpc_regnum(compiler->gp_tmpreg), + powerpc_regnum(src->ptr_register)); + break; + default: + ORC_COMPILER_ERROR(compiler,"bad load size %d", + src->size << compiler->loop_shift); + break; + } + ORC_ASM_CODE(compiler," lvsl %s, %s, %s\n", + powerpc_get_regname (perm), + powerpc_get_regname (compiler->gp_tmpreg), + powerpc_get_regname (src->ptr_register)); + powerpc_emit_X (compiler, 0x7c00000c, powerpc_regnum(perm), + powerpc_regnum(compiler->gp_tmpreg), + powerpc_regnum(src->ptr_register)); + powerpc_emit_vperm (compiler, dest->alloc, dest->alloc, dest->alloc, perm); +} + +static void powerpc_rule_storeX (OrcCompiler *compiler, void *user, OrcInstruction *insn) { OrcVariable *src = compiler->vars + insn->src_args[0]; @@ -331,6 +400,10 @@ RULE(subssl, "vsubsws", 0x10000780) RULE(subusl, "vsubuws", 0x10000680) RULE(xorl, "vxor", 0x100004c4) +RULE(andq, "vand", 0x10000404) +RULE(orq, "vor", 0x10000484) +RULE(xorq, "vxor", 0x100004c4) + RULE(addf, "vaddfp", 0x1000000a) RULE(subf, "vsubfp", 0x1000004a) RULE(maxf, "vmaxfp", 0x1000040a) @@ -569,8 +642,6 @@ powerpc_rule_muluwl (OrcCompiler *p, void *user, OrcInstruction *insn) powerpc_emit_vmuleuh (p, dest, src1, src2); } -#if 0 -/* doesn't work */ static void powerpc_rule_accw (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -579,7 +650,6 @@ powerpc_rule_accw (OrcCompiler *p, void *user, OrcInstruction *insn) powerpc_emit_vadduhm (p, dest, dest, src1); } -#endif static void powerpc_rule_accl (OrcCompiler *p, void *user, OrcInstruction *insn) @@ -590,8 +660,6 @@ powerpc_rule_accl (OrcCompiler *p, void *user, OrcInstruction *insn) powerpc_emit_vadduwm (p, dest, dest, src1); } -#if 0 -/* doesn't work */ static void powerpc_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -603,10 +671,21 @@ powerpc_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn) powerpc_emit_vmaxub (p, tmp1, src1, src2); powerpc_emit_vminub (p, tmp2, src1, src2); + powerpc_emit_vsububm (p, tmp1, tmp1, tmp2); - powerpc_emit_vsum4ubs (p, dest, dest, tmp1); + if (p->loop_shift == 0) { + powerpc_emit_vxor (p, tmp2, tmp2, tmp2); + powerpc_emit_vmrghb (p, tmp1, tmp2, tmp1); + powerpc_emit_vmrghh (p, tmp1, tmp2, tmp1); + powerpc_emit_vadduwm (p, dest, dest, tmp1); + } else if (p->loop_shift == 1) { + powerpc_emit_vxor (p, tmp2, tmp2, tmp2); + powerpc_emit_vmrghh (p, tmp1, tmp2, tmp1); + powerpc_emit_vsum4ubs (p, dest, dest, tmp1); + } else { + powerpc_emit_vsum4ubs (p, dest, dest, tmp1); + } } -#endif static void powerpc_rule_signb (OrcCompiler *p, void *user, OrcInstruction *insn) @@ -937,6 +1016,42 @@ powerpc_rule_swapl (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +powerpc_rule_swapwl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src1 = ORC_SRC_ARG (p, insn, 0); + int dest = ORC_DEST_ARG (p, insn, 0); + int perm; + + perm = powerpc_get_constant_full (p, 0x02030001, 0x06070405, + 0x0a0b0809, 0x0e0f0c0d); + powerpc_emit_vperm (p, dest, src1, src1, perm); +} + +static void +powerpc_rule_swaplq (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src1 = ORC_SRC_ARG (p, insn, 0); + int dest = ORC_DEST_ARG (p, insn, 0); + int perm; + + perm = powerpc_get_constant_full (p, 0x04050607, 0x00010203, + 0x0c0d0e0f, 0x08090a0b); + powerpc_emit_vperm (p, dest, src1, src1, perm); +} + +static void +powerpc_rule_swapq (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src1 = ORC_SRC_ARG (p, insn, 0); + int dest = ORC_DEST_ARG (p, insn, 0); + int perm; + + perm = powerpc_get_constant_full (p, 0x07060504, 0x03020100, + 0x0f0e0d0c, 0x0b0a0908); + powerpc_emit_vperm (p, dest, src1, src1, perm); +} + +static void powerpc_rule_splitql (OrcCompiler *p, void *user, OrcInstruction *insn) { int src1 = ORC_SRC_ARG (p, insn, 0); @@ -1159,6 +1274,10 @@ orc_compiler_powerpc_register_rules (OrcTarget *target) REG(subusl); REG(xorl); + REG(andq); + REG(orq); + REG(xorq); + REG(mullb); REG(mulhsb); REG(mulhub); @@ -1184,9 +1303,9 @@ orc_compiler_powerpc_register_rules (OrcTarget *target) REG(mulswl); REG(muluwl); - //REG(accw); + REG(accw); REG(accl); - //REG(accsadubl); + REG(accsadubl); REG(signb); REG(signw); @@ -1215,6 +1334,9 @@ orc_compiler_powerpc_register_rules (OrcTarget *target) REG(convql); REG(swapw); REG(swapl); + REG(swapwl); + REG(swapq); + REG(swaplq); if (0) REG(splitql); REG(splitlw); REG(splitwb); @@ -1239,6 +1361,9 @@ orc_compiler_powerpc_register_rules (OrcTarget *target) orc_rule_register (rule_set, "loadw", powerpc_rule_loadX, NULL); orc_rule_register (rule_set, "loadl", powerpc_rule_loadX, NULL); orc_rule_register (rule_set, "loadq", powerpc_rule_loadX, NULL); + orc_rule_register (rule_set, "loadoffb", powerpc_rule_loadoffX, NULL); + orc_rule_register (rule_set, "loadoffw", powerpc_rule_loadoffX, NULL); + orc_rule_register (rule_set, "loadoffl", powerpc_rule_loadoffX, NULL); orc_rule_register (rule_set, "storeb", powerpc_rule_storeX, NULL); orc_rule_register (rule_set, "storew", powerpc_rule_storeX, NULL); orc_rule_register (rule_set, "storel", powerpc_rule_storeX, NULL); @@ -1247,10 +1372,12 @@ orc_compiler_powerpc_register_rules (OrcTarget *target) orc_rule_register (rule_set, "andnb", powerpc_rule_andnX, NULL); orc_rule_register (rule_set, "andnw", powerpc_rule_andnX, NULL); orc_rule_register (rule_set, "andnl", powerpc_rule_andnX, NULL); + orc_rule_register (rule_set, "andnq", powerpc_rule_andnX, NULL); orc_rule_register (rule_set, "copyb", powerpc_rule_copyX, NULL); orc_rule_register (rule_set, "copyw", powerpc_rule_copyX, NULL); orc_rule_register (rule_set, "copyl", powerpc_rule_copyX, NULL); + orc_rule_register (rule_set, "copyq", powerpc_rule_copyX, NULL); } -- 2.7.4