ret = system (cmd);
if (ret != 0) {
ORC_ERROR ("gcc failed");
+ printf("%s\n", orc_program_get_asm_code (p));
return ORC_TEST_FAILED;
}
ret = fwrite(p->code, p->code_size, 1, file);
fclose (file);
- ret = system (PREFIX "gcc -mcpu=cortex-a8 -mfpu=neon -Wall -c tmp.s");
+ ret = system (PREFIX "gcc -march=armv6t2 -mcpu=cortex-a8 -mfpu=neon -Wall -c tmp.s");
if (ret != 0) {
printf("gcc failed\n");
+ printf("%s\n", orc_program_get_asm_code (p));
return FALSE;
}
{
uint32_t code;
int shift2;
+ unsigned int x;
- shift2 = 0;
-#if 0
- while (imm && ((imm&3)==0)) {
- imm >>= 2;
- shift2++;
- }
-#endif
- while (imm && imm > 0xffff) {
- if ((imm&3) != 0) {
- ORC_ERROR("bad immediate value");
+ if ((imm & 0xff) == imm) {
+ shift2 = 0;
+ x = imm;
+ } else {
+ shift2 = 0;
+ x = imm & 0xffffffff;
+ while ((x & 3) == 0) {
+ x >>= 2;
+ shift2++;
+ }
+ if (x > 0xff) {
+ ORC_PROGRAM_ERROR(compiler, "bad immediate value");
}
- imm >>= 2;
- shift2++;
}
code = 0xe3a00000;
code |= (dest&0xf) << 12;
code |= (((16-shift2)&0xf) << 8);
- code |= (imm&0xff);
+ code |= (x&0xff);
- ORC_ASM_CODE(compiler," mov %s, #0x%08x\n", arm_reg_name (dest), imm << (shift2*2));
+ ORC_ASM_CODE(compiler," mov %s, #0x%08x\n", arm_reg_name (dest), imm);
arm_emit (compiler, code);
}
}
void
-arm_emit_add_imm (OrcCompiler *compiler, int dest, int src1, int value)
+arm_emit_add_imm (OrcCompiler *compiler, int dest, int src1, int imm)
{
+#if 0
uint32_t code;
code = 0xe2800000;
arm_reg_name (src1),
value);
arm_emit (compiler, code);
+#endif
+ uint32_t code;
+ int shift2;
+ unsigned int x;
+
+ if ((imm & 0xff) == imm) {
+ shift2 = 0;
+ x = imm;
+ } else {
+ shift2 = 0;
+ x = imm & 0xffffffff;
+ while ((x & 3) == 0) {
+ x >>= 2;
+ shift2++;
+ }
+ if (x > 0xff) {
+ ORC_PROGRAM_ERROR(compiler, "bad immediate value");
+ }
+ }
+
+ code = 0xe2800000;
+ code |= (src1&0xf) << 16;
+ code |= (dest&0xf) << 12;
+ code |= (((16-shift2)&0xf) << 8);
+ code |= (x&0xff);
+
+ ORC_ASM_CODE(compiler," add %s, %s, #0x%08x\n", arm_reg_name (dest),
+ arm_reg_name(src1), imm);
+ arm_emit (compiler, code);
}
void
void arm_do_fixups (OrcCompiler *compiler);
+const char *neon_reg_name (int reg);
+const char *neon_reg_name_quad (int reg);
+void neon_emit_mov (OrcCompiler *compiler, int src, int dest);
#endif
void orc_compiler_rewrite_vars (OrcCompiler *compiler);
void orc_compiler_dump (OrcCompiler *compiler);
+void neon_save_accumulators (OrcCompiler *compiler);
void
compiler->used_regs[i] = 0;
}
+ compiler->exec_reg = ARM_R0;
+ compiler->gp_tmpreg = ARM_A2;
compiler->tmpreg = ORC_VEC_REG_BASE + 0;
compiler->valid_regs[compiler->tmpreg] = 0;
compiler->vars[i].ptr_register,
neon_exec_ptr, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
break;
+ case ORC_VAR_TYPE_ACCUMULATOR:
+ neon_emit_loadil (compiler, compiler->vars[i].alloc, 0);
+ break;
+ case ORC_VAR_TYPE_TEMP:
+ break;
default:
+ ORC_PROGRAM_ERROR(compiler,"bad vartype");
break;
}
}
return -1;
}
+static int
+get_align_var (OrcCompiler *compiler)
+{
+ if (compiler->vars[ORC_VAR_D1].size) return ORC_VAR_D1;
+ if (compiler->vars[ORC_VAR_S1].size) return ORC_VAR_S1;
+
+ ORC_PROGRAM_ERROR(compiler, "could not find alignment variable");
+
+ return -1;
+}
+
void
orc_compiler_neon_assemble (OrcCompiler *compiler)
{
- int dest_var;
- int dest_shift;
+ int align_var;
+ int align_shift;
- dest_var = orc_compiler_get_dest (compiler);
- dest_shift = get_shift (compiler->vars[dest_var].size);
+ align_var = get_align_var (compiler);
+ align_shift = get_shift (compiler->vars[align_var].size);
- compiler->vars[dest_var].is_aligned = FALSE;
+ compiler->vars[align_var].is_aligned = FALSE;
neon_emit_prologue (compiler);
arm_emit_load_reg (compiler, ARM_A3, neon_exec_ptr,
(int)ORC_STRUCT_OFFSET(OrcExecutor,n));
arm_emit_load_reg (compiler, ARM_A2, neon_exec_ptr,
- (int)ORC_STRUCT_OFFSET(OrcExecutor,arrays[dest_var]));
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,arrays[align_var]));
arm_emit_sub (compiler, ARM_IP, ARM_IP, ARM_A2);
arm_emit_and_imm (compiler, ARM_IP, ARM_IP, (1<<align_shift)-1);
- if (dest_shift > 0) {
- arm_emit_asr_imm (compiler, ARM_IP, ARM_IP, dest_shift);
+ if (align_shift > 0) {
+ arm_emit_asr_imm (compiler, ARM_IP, ARM_IP, align_shift);
}
arm_emit_cmp (compiler, ARM_A3, ARM_IP);
arm_emit_label (compiler, 1);
compiler->loop_shift = save_loop_shift;
- compiler->vars[dest_var].is_aligned = TRUE;
+ compiler->vars[align_var].is_aligned = TRUE;
}
if (compiler->loop_shift > 0) {
int save_loop_shift = compiler->loop_shift;
compiler->loop_shift = 0;
- compiler->vars[dest_var].is_aligned = FALSE;
+ compiler->vars[align_var].is_aligned = FALSE;
arm_emit_load_reg (compiler, ARM_IP, neon_exec_ptr,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter3));
compiler->loop_shift = save_loop_shift;
}
+ neon_save_accumulators (compiler);
+
neon_emit_epilogue (compiler);
arm_do_fixups (compiler);
rule = insn->rule;
if (rule && rule->emit) {
+#if 0
if (compiler->vars[insn->dest_args[0]].alloc !=
compiler->vars[insn->src_args[0]].alloc) {
- arm_emit_mov (compiler, compiler->vars[insn->src_args[0]].alloc,
+ neon_emit_mov (compiler, compiler->vars[insn->src_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc);
}
+#endif
rule->emit (compiler, rule->emit_user, insn);
} else {
orc_compiler_append_code(compiler,"No rule for: %s\n", opcode->name);
#endif
}
+void
+neon_save_accumulators (OrcCompiler *compiler)
+{
+ int i;
+ int src;
+ unsigned int code;
+
+ for(i=0;i<ORC_N_VARIABLES;i++){
+ OrcVariable *var = compiler->vars + i;
+
+ if (compiler->vars[i].name == NULL) continue;
+ switch (compiler->vars[i].vartype) {
+ case ORC_VAR_TYPE_ACCUMULATOR:
+ src = compiler->vars[i].alloc;
+
+ arm_emit_load_imm (compiler, compiler->gp_tmpreg,
+ ORC_STRUCT_OFFSET(OrcExecutor, accumulators[i-ORC_VAR_A1]));
+ switch (var->size) {
+ case 2:
+ ORC_ASM_CODE(compiler," vpaddl.u16 %s, %s\n",
+ neon_reg_name (src),
+ neon_reg_name (src));
+ code = 0xf3b40080;
+ code |= (src&0xf) << 16;
+ code |= (src&0xf) << 12;
+ code |= ((src>>4)&0x1) << 22;
+ arm_emit (compiler, code);
+
+ ORC_ASM_CODE(compiler," vpaddl.u32 %s, %s\n",
+ neon_reg_name (src),
+ neon_reg_name (src));
+ code = 0xf3b40080;
+ code |= (src&0xf) << 16;
+ code |= (src&0xf) << 12;
+ code |= ((src>>4)&0x1) << 22;
+ arm_emit (compiler, code);
+
+ ORC_ASM_CODE(compiler," vst1.16 %s[%d], [%s], %s\n",
+ neon_reg_name (src), 0,
+ arm_reg_name (compiler->gp_tmpreg),
+ arm_reg_name (compiler->exec_reg));
+ code = 0xf4800400;
+ code |= (compiler->gp_tmpreg&0xf) << 16;
+ code |= (src&0xf) << 12;
+ code |= ((src>>4)&0x1) << 22;
+ arm_emit (compiler, code);
+ break;
+ case 4:
+ ORC_ASM_CODE(compiler," vpaddl.u32 %s, %s\n",
+ neon_reg_name (src),
+ neon_reg_name (src));
+ code = 0xf3b40080;
+ code |= (src&0xf) << 16;
+ code |= (src&0xf) << 12;
+ code |= ((src>>4)&0x1) << 22;
+ arm_emit (compiler, code);
+
+ ORC_ASM_CODE(compiler," vst1.32 %s[%d], [%s], %s\n",
+ neon_reg_name (src), 0,
+ arm_reg_name (compiler->gp_tmpreg),
+ arm_reg_name (compiler->exec_reg));
+ code = 0xf4800800;
+ code |= (compiler->gp_tmpreg&0xf) << 16;
+ code |= (src&0xf) << 12;
+ code |= ((src>>4)&0x1) << 22;
+ arm_emit (compiler, code);
+ break;
+ default:
+ ORC_ERROR("bad size");
+ }
+
+ break;
+ default:
+ break;
+ }
+ }
+}
+
return vec_regs[reg&0x1f];
}
-#if 0
void
-neon_emit_mov (OrcCompiler *compiler, uint32_t code, int src, int dest)
+neon_emit_mov (OrcCompiler *compiler, int src, int dest)
{
- code |= (src&0xf) << 12;
- code |= ((src>>4)&0x1) << 22;
+ uint32_t code;
+
+ ORC_ASM_CODE(compiler," vmov %s, %s\n",
+ neon_reg_name (dest),
+ neon_reg_name (src));
+ code = 0xf2200110;
code |= (dest&0xf) << 16;
code |= ((dest>>4)&0x1) << 7;
+ code |= (src&0xf) << 12;
+ code |= ((src>>4)&0x1) << 22;
+ code |= (src&0xf) << 0;
+ code |= ((src>>4)&0x1) << 5;
arm_emit (compiler, code);
}
-#endif
void
neon_loadb (OrcCompiler *compiler, int dest, int src1, int update, int is_aligned)
if (value == 0) {
ORC_ASM_CODE(compiler," veor %s, %s, %s\n",
neon_reg_name (reg), neon_reg_name (reg), neon_reg_name (reg));
- code = 0xee000b30;
+ code = 0xf3000110;
code |= (reg&0xf) << 16;
code |= (reg&0xf) << 12;
code |= (reg&0xf) << 0;
if (value == 0) {
ORC_ASM_CODE(compiler," veor %s, %s, %s\n",
neon_reg_name (reg), neon_reg_name (reg), neon_reg_name (reg));
- code = 0xee000b30;
+ code = 0xf3000110;
code |= (reg&0xf) << 16;
code |= (reg&0xf) << 12;
code |= (reg&0xf) << 0;
if (value == 0) {
ORC_ASM_CODE(compiler," veor %s, %s, %s\n",
neon_reg_name (reg), neon_reg_name (reg), neon_reg_name (reg));
- code = 0xee000b30;
+ code = 0xf3000110;
code |= (reg&0xf) << 16;
code |= (reg&0xf) << 12;
code |= (reg&0xf) << 0;
BINARY_LONG(mulswl,"vmull.s16",0xf2900c00)
BINARY_LONG(muluwl,"vmull.u16",0xf3900c00)
+UNARY(swapw,"vrev16.i8",0xf3b00100)
+UNARY(swapl,"vrev32.i8",0xf3b00080)
+
+UNARY_NARROW(select0lw,"vmovn.i32",0xf3b60200)
+UNARY_NARROW(select0wb,"vmovn.i16",0xf3b20200)
+
+UNARY(mergebw,"vzip.8",0xf3b20180)
+UNARY(mergewl,"vzip.16",0xf3b60180)
+
+static void
+neon_emit_binary (OrcCompiler *p, const char *name, unsigned int code,
+ int dest, int src1, int src2)
+{
+ ORC_ASM_CODE(p," %s %s, %s, %s\n", name,
+ neon_reg_name (dest), neon_reg_name (src1), neon_reg_name (src2));
+ code |= (dest&0xf)<<16;
+ code |= ((dest>>4)&0x1)<<7;
+ code |= (src1&0xf)<<12;
+ code |= ((src1>>4)&0x1)<<22;
+ code |= (src2&0xf)<<0;
+ code |= ((src2>>4)&0x1)<<5;
+ arm_emit (p, code);
+
+}
+
+static void
+neon_rule_accw (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ neon_emit_binary (p, "vadd.i16", 0xf2100800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->src_args[0]].alloc);
+}
+
+static void
+neon_rule_accl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ neon_emit_binary (p, "vadd.i32", 0xf2200800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->src_args[0]].alloc);
+}
+
+static void
+neon_rule_select1wb (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ uint32_t x;
+
+ x = 0xf3b00100;
+ ORC_ASM_CODE(p," vrev16.i8 %s, %s\n",
+ neon_reg_name (p->vars[insn->dest_args[0]].alloc),
+ neon_reg_name (p->vars[insn->src_args[0]].alloc));
+ x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12;
+ x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22;
+ //x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ //x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<5;
+ arm_emit (p, x);
+
+ x = 0xf3b20200;
+ ORC_ASM_CODE(p," vmovn.i16 %s, %s\n",
+ neon_reg_name (p->vars[insn->dest_args[0]].alloc),
+ neon_reg_name_quad (p->vars[insn->src_args[0]].alloc));
+ x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12;
+ x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22;
+ //x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ //x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<5;
+ arm_emit (p, x);
+}
+
+static void
+neon_rule_select1lw (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ uint32_t x;
+
+ x = 0xf3b40080;
+ ORC_ASM_CODE(p," vrev32.i16 %s, %s\n",
+ neon_reg_name (p->vars[insn->dest_args[0]].alloc),
+ neon_reg_name (p->vars[insn->src_args[0]].alloc));
+ x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12;
+ x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22;
+ //x |= (p->vars[insn->src_args[1]].alloc&0xf)<<16;
+ //x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<5;
+ arm_emit (p, x);
+
+ x = 0xf3b60200;
+ ORC_ASM_CODE(p," vmovn.i32 %s, %s\n",
+ neon_reg_name (p->vars[insn->dest_args[0]].alloc),
+ neon_reg_name_quad (p->vars[insn->src_args[0]].alloc));
+ x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12;
+ x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22;
+ //x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ //x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<5;
+ arm_emit (p, x);
+}
+
+static void
+neon_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ uint32_t x;
+
+ x = 0xf3840700;
+ ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
+ neon_reg_name_quad (p->tmpreg),
+ neon_reg_name (p->vars[insn->src_args[0]].alloc),
+ neon_reg_name (p->vars[insn->src_args[1]].alloc));
+ x |= (p->tmpreg&0xf)<<12;
+ x |= ((p->tmpreg>>4)&0x1)<<22;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
+ arm_emit (p, x);
+
+ x = 0xf3b40680;
+ ORC_ASM_CODE(p," vpadal.u16 %s, %s\n",
+ neon_reg_name (p->vars[insn->dest_args[0]].alloc),
+ neon_reg_name (p->tmpreg));
+ x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12;
+ x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22;
+ //x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ //x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->tmpreg&0xf)<<0;
+ x |= ((p->tmpreg>>4)&0x1)<<5;
+ arm_emit (p, x);
+}
void
orc_compiler_neon_register_rules (OrcTarget *target)
{
OrcRuleSet *rule_set;
- rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target);
+ rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, 0);
#define REG(x) \
orc_rule_register (rule_set, #x , neon_rule_ ## x, NULL)
REG(mulswl);
REG(muluwl);
+ REG(accw);
+ REG(accl);
+ REG(accsadubl);
+ REG(swapw);
+ REG(swapl);
+ REG(select0wb);
+ REG(select1wb);
+ REG(select0lw);
+ REG(select1lw);
+ REG(mergebw);
+ REG(mergewl);
+
orc_rule_register (rule_set, "shlb", neon_rule_shift, (void *)0);
orc_rule_register (rule_set, "shrsb", neon_rule_shift, (void *)1);
orc_rule_register (rule_set, "shrub", neon_rule_shift, (void *)2);
OrcProgram *p;
char s[40];
- if (opcode->src_size[1] == 0) {
- p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]);
+ if (opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) {
+ if (opcode->src_size[1] == 0) {
+ p = orc_program_new_as (opcode->dest_size[0], opcode->src_size[0]);
+ } else {
+ p = orc_program_new_ass (opcode->dest_size[0], opcode->src_size[0],
+ opcode->src_size[1]);
+ }
} else {
- p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0],
- opcode->src_size[1]);
+ if (opcode->src_size[1] == 0) {
+ p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]);
+ } else {
+ p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0],
+ opcode->src_size[1]);
+ }
}
sprintf(s, "test_%s", opcode->name);
orc_program_set_name (p, s);
- orc_program_append_str (p, opcode->name, "d1", "s1", "s2");
+ if (opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) {
+ orc_program_append_str (p, opcode->name, "a1", "s1", "s2");
+ } else {
+ orc_program_append_str (p, opcode->name, "d1", "s1", "s2");
+ }
orc_test_gcc_compile_neon (p);