}
void
-orc_neon_load_constants (OrcCompiler *compiler)
+orc_neon_load_constants_outer (OrcCompiler *compiler)
{
int i;
for(i=0;i<ORC_N_VARIABLES;i++){
break;
case ORC_VAR_TYPE_SRC:
case ORC_VAR_TYPE_DEST:
+ break;
+ case ORC_VAR_TYPE_ACCUMULATOR:
+ orc_neon_emit_loadil (compiler, compiler->vars[i].alloc, 0);
+ break;
+ case ORC_VAR_TYPE_TEMP:
+ break;
+ default:
+ ORC_PROGRAM_ERROR(compiler,"bad vartype");
+ break;
+ }
+ }
+}
+
+void
+orc_neon_load_constants_inner (OrcCompiler *compiler)
+{
+ int i;
+ for(i=0;i<ORC_N_VARIABLES;i++){
+ if (compiler->vars[i].name == NULL) continue;
+
+ switch (compiler->vars[i].vartype) {
+ case ORC_VAR_TYPE_CONST:
+ break;
+ case ORC_VAR_TYPE_PARAM:
+ break;
+ case ORC_VAR_TYPE_SRC:
+ case ORC_VAR_TYPE_DEST:
orc_arm_emit_load_reg (compiler,
compiler->vars[i].ptr_register,
compiler->exec_reg, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
break;
case ORC_VAR_TYPE_ACCUMULATOR:
- orc_neon_emit_loadil (compiler, compiler->vars[i].alloc, 0);
break;
case ORC_VAR_TYPE_TEMP:
break;
orc_neon_emit_prologue (compiler);
+ orc_neon_load_constants_outer (compiler);
+
if (compiler->program->is_2d) {
if (compiler->program->constant_m > 0) {
orc_arm_emit_load_imm (compiler, ORC_ARM_A3, compiler->program->constant_m);
orc_arm_emit_label (compiler, 7);
}
- orc_neon_load_constants (compiler);
+ orc_neon_load_constants_inner (compiler);
if (compiler->loop_shift > 0) {
int save_loop_shift = compiler->loop_shift;
OrcStaticOpcode *opcode;
OrcRule *rule;
+ orc_compiler_append_code(compiler,"# LOOP shift %d\n", compiler->loop_shift);
for(j=0;j<compiler->n_insns;j++){
compiler->insn_index = j;
insn = compiler->insns + j;
static void
orc_neon_rule_accw (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_neon_emit_binary (p, "vadd.i16", 0xf2100800,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->src_args[0]].alloc);
+ unsigned int code;
+
+ if (p->loop_shift < 2) {
+ ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
+ orc_neon_reg_name (p->tmpreg),
+ orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), 48);
+ code = NEON_BINARY(0xf2a00590, p->tmpreg, 0,
+ p->vars[insn->src_args[0]].alloc);
+ code |= (48) << 16;
+ orc_arm_emit (p, code);
+
+ orc_neon_emit_binary (p, "vadd.i16", 0xf2100800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->tmpreg);
+ } else {
+ orc_neon_emit_binary (p, "vadd.i16", 0xf2100800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->src_args[0]].alloc);
+ }
}
static void
orc_neon_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_uint32 x;
-
- x = 0xf3800700;
- ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
- orc_neon_reg_name_quad (p->tmpreg),
- orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
- orc_neon_reg_name (p->vars[insn->src_args[1]].alloc));
- x |= (p->tmpreg&0xf)<<12;
- x |= ((p->tmpreg>>4)&0x1)<<22;
- x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
- x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
- x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
- x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
- orc_arm_emit (p, x);
-
- orc_neon_emit_unary (p, "vpadal.u16", 0xf3b40680,
- p->vars[insn->dest_args[0]].alloc,
- p->tmpreg);
+ unsigned int code;
+
+ if (p->loop_shift < 2) {
+ x = 0xf3800700;
+ ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
+ orc_neon_reg_name_quad (p->tmpreg),
+ orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
+ orc_neon_reg_name (p->vars[insn->src_args[1]].alloc));
+ x |= (p->tmpreg&0xf)<<12;
+ x |= ((p->tmpreg>>4)&0x1)<<22;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
+ orc_arm_emit (p, x);
+
+ ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
+ orc_neon_reg_name (p->tmpreg),
+ orc_neon_reg_name (p->tmpreg), 64 - (16<<p->loop_shift));
+ code = NEON_BINARY(0xf2a00590, p->tmpreg, 0, p->tmpreg);
+ code |= (64 - (16<<p->loop_shift)) << 16;
+ orc_arm_emit (p, code);
+
+ orc_neon_emit_unary (p, "vpadal.u16", 0xf3b40680,
+ p->vars[insn->dest_args[0]].alloc,
+ p->tmpreg);
+ } else {
+ x = 0xf3800700;
+ ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
+ orc_neon_reg_name_quad (p->tmpreg),
+ orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
+ orc_neon_reg_name (p->vars[insn->src_args[1]].alloc));
+ x |= (p->tmpreg&0xf)<<12;
+ x |= ((p->tmpreg>>4)&0x1)<<22;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
+ orc_arm_emit (p, x);
+
+ orc_neon_emit_unary (p, "vpadal.u16", 0xf3b40680,
+ p->vars[insn->dest_args[0]].alloc,
+ p->tmpreg);
+ }
}
static void