From c47558a7e575a5f93f35b252eff39650e6914620 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Mon, 2 Aug 2010 14:59:15 -0700 Subject: [PATCH] neon: add load/store opcodes --- orc/orcprogram-neon.c | 217 ++++++------------------------------------------ orc/orcrules-neon.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+), 194 deletions(-) diff --git a/orc/orcprogram-neon.c b/orc/orcprogram-neon.c index 8161f12..5c9a74b 100644 --- a/orc/orcprogram-neon.c +++ b/orc/orcprogram-neon.c @@ -235,29 +235,8 @@ orc_neon_load_constants_outer (OrcCompiler *compiler) switch (compiler->vars[i].vartype) { case ORC_VAR_TYPE_CONST: - if (compiler->vars[i].size == 1) { - orc_neon_emit_loadib (compiler, compiler->vars[i].alloc, - (int)compiler->vars[i].value); - } else if (compiler->vars[i].size == 2) { - orc_neon_emit_loadiw (compiler, compiler->vars[i].alloc, - (int)compiler->vars[i].value); - } else if (compiler->vars[i].size == 4) { - orc_neon_emit_loadil (compiler, compiler->vars[i].alloc, - (int)compiler->vars[i].value); - } else { - ORC_PROGRAM_ERROR(compiler,"unimplemented"); - } break; case ORC_VAR_TYPE_PARAM: - if (compiler->vars[i].size == 1) { - orc_neon_emit_loadpb (compiler, compiler->vars[i].alloc, i); - } else if (compiler->vars[i].size == 2) { - orc_neon_emit_loadpw (compiler, compiler->vars[i].alloc, i); - } else if (compiler->vars[i].size == 4) { - orc_neon_emit_loadpl (compiler, compiler->vars[i].alloc, i); - } else { - ORC_PROGRAM_ERROR(compiler,"unimplemented"); - } break; case ORC_VAR_TYPE_SRC: case ORC_VAR_TYPE_DEST: @@ -272,6 +251,23 @@ orc_neon_load_constants_outer (OrcCompiler *compiler) break; } } + + for(i=0;in_insns;i++){ + OrcInstruction *insn = compiler->insns + i; + OrcStaticOpcode *opcode = insn->opcode; + OrcRule *rule; + + if (!(compiler->insn_flags[i] & ORC_INSN_FLAG_INVARIANT)) continue; + + ORC_ASM_CODE(compiler,"# %d: %s\n", i, insn->opcode->name); + + rule = insn->rule; + if (rule && rule->emit) { + rule->emit (compiler, rule->emit_user, insn); + } else { + ORC_COMPILER_ERROR(compiler,"No rule for: %s", opcode->name); + } + } } void @@ -303,167 +299,7 @@ orc_neon_load_constants_inner (OrcCompiler *compiler) } } -void -orc_neon_load_alignment_masks (OrcCompiler *compiler) -{ - int i; - //int j; - unsigned int code; - int size; - int b = 0; - - for(i=0;ivars[i]; - - if (var->name == NULL) continue; - - switch (var->vartype) { - case ORC_VAR_TYPE_SRC: - if (var->is_aligned) continue; - - if (compiler->loop_shift > 1) { - int j; - - size = var->size << compiler->loop_shift; - - orc_arm_emit_and_imm (compiler, compiler->gp_tmpreg, - var->ptr_register, size-1); - orc_arm_emit_lsl_imm (compiler, compiler->gp_tmpreg, - compiler->gp_tmpreg, 3); - - if (compiler->target_flags & ORC_TARGET_NEON_CLEAN_COMPILE) { - for(j=0;jmask_alloc), j, - orc_arm_reg_name (compiler->gp_tmpreg)); - code = 0xee400b10; - code |= (var->mask_alloc&0xf)<<16; - code |= ((var->mask_alloc>>4)&0x1)<<7; - code |= (compiler->gp_tmpreg&0xf)<<12; - code |= (j&3)<<5; - code |= (j>>2)<<21; - orc_arm_emit (compiler, code); - - orc_arm_emit_add_imm (compiler, compiler->gp_tmpreg, - compiler->gp_tmpreg, 1); - } - } else { - orc_arm_emit_align (compiler, 3); - orc_arm_emit_add (compiler, compiler->gp_tmpreg, - compiler->gp_tmpreg, ORC_ARM_PC); - orc_arm_emit_add_imm (compiler, compiler->gp_tmpreg, - compiler->gp_tmpreg, 16-8); - - if (size != 4 && size != 8) { - ORC_ERROR("strange size %d", size); - } - - ORC_ASM_CODE(compiler, " vld1.64 %s, [%s]\n", - orc_neon_reg_name (var->mask_alloc), - orc_arm_reg_name (compiler->gp_tmpreg)); - code = 0xf42007cf; - code |= (compiler->gp_tmpreg&0xf) << 16; - code |= (var->mask_alloc&0xf) << 12; - code |= ((var->mask_alloc>>4)&0x1) << 22; - orc_arm_emit (compiler, code); - - orc_arm_emit_branch (compiler, ORC_ARM_COND_AL, 9+b); - for(j=0;j<8;j++){ - ORC_ASM_CODE(compiler, " .word 0x%02x%02x%02x%02x\n", j+3, j+2, j+1, j+0); - orc_arm_emit (compiler, ((j+0)<<0) | ((j+1)<<8) | ((j+2)<<16) | ((j+3)<<24)); - ORC_ASM_CODE(compiler, " .word 0x%02x%02x%02x%02x\n", j+7, j+6, j+5, j+4); - orc_arm_emit (compiler, ((j+4)<<0) | ((j+5)<<8) | ((j+6)<<16) | ((j+7)<<24)); - } - orc_arm_emit_label (compiler, 9+b); - b++; - - } - - orc_arm_emit_and_imm (compiler, var->ptr_offset, var->ptr_register, - size - 1); - orc_arm_emit_sub (compiler, var->ptr_register, var->ptr_register, - var->ptr_offset); - - if (size == 4) { - int update = 1; - ORC_ASM_CODE(compiler," vld1.32 %s[0], [%s]%s\n", - orc_neon_reg_name (var->aligned_data), - orc_arm_reg_name (var->ptr_register), - update ? "!" : ""); - code = 0xf4a0080d; - code |= (var->ptr_register&0xf) << 16; - code |= ((var->aligned_data)&0xf) << 12; - code |= (((var->aligned_data)>>4)&0x1) << 22; - code |= (!update) << 1; - orc_arm_emit (compiler, code); - } else { - int update = 1; - ORC_ASM_CODE(compiler," vld1.64 %s, [%s]%s\n", - orc_neon_reg_name (var->aligned_data), - orc_arm_reg_name (var->ptr_register), - update ? "!" : ""); - code = 0xf42007cd; - code |= (var->ptr_register&0xf) << 16; - code |= ((var->aligned_data)&0xf) << 12; - code |= (((var->aligned_data)>>4)&0x1) << 22; - code |= (!update) << 1; - orc_arm_emit (compiler, code); - } - } - - break; - case ORC_VAR_TYPE_DEST: - break; - case ORC_VAR_TYPE_ACCUMULATOR: - case ORC_VAR_TYPE_CONST: - case ORC_VAR_TYPE_PARAM: - case ORC_VAR_TYPE_TEMP: - break; - default: - ORC_PROGRAM_ERROR(compiler,"bad vartype"); - break; - } - } -} - -void -orc_neon_restore_unalignment (OrcCompiler *compiler) -{ - int i; - int size; - - for(i=0;ivars[i]; - - if (var->name == NULL) continue; - - switch (var->vartype) { - case ORC_VAR_TYPE_SRC: - if (var->is_aligned) continue; - - if (compiler->loop_shift > 1) { - size = var->size << compiler->loop_shift; - - orc_arm_emit_add (compiler, var->ptr_register, var->ptr_register, - var->ptr_offset); - orc_arm_emit_sub_imm (compiler, var->ptr_register, var->ptr_register, - size, FALSE); - } - break; - case ORC_VAR_TYPE_DEST: - break; - case ORC_VAR_TYPE_ACCUMULATOR: - case ORC_VAR_TYPE_CONST: - case ORC_VAR_TYPE_PARAM: - case ORC_VAR_TYPE_TEMP: - break; - default: - ORC_PROGRAM_ERROR(compiler,"bad vartype"); - break; - } - } -} - +#if 0 void orc_neon_emit_load_src (OrcCompiler *compiler, OrcVariable *var, int unroll_index) { @@ -562,6 +398,7 @@ orc_neon_emit_store_dest (OrcCompiler *compiler, OrcVariable *var) break; } } +#endif static int get_shift (int size) @@ -760,12 +597,6 @@ orc_compiler_neon_assemble (OrcCompiler *compiler) orc_arm_emit_cmp_imm (compiler, ORC_ARM_IP, 0); orc_arm_emit_branch (compiler, ORC_ARM_COND_EQ, LABEL_REGION2_SKIP); - if (0) { - /* Disable alignment masks for now. It can easily take all available - registers. */ - orc_neon_load_alignment_masks (compiler); - } - orc_arm_emit_asr_imm (compiler, compiler->gp_tmpreg, ORC_ARM_IP, 17 + var_size_shift - compiler->loop_shift - compiler->unroll_shift); orc_arm_emit_cmp_imm (compiler, compiler->gp_tmpreg, 0); @@ -807,10 +638,6 @@ orc_compiler_neon_assemble (OrcCompiler *compiler) } orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_REGION2_LOOP_SMALL); - if (0) { - orc_neon_restore_unalignment (compiler); - } - orc_arm_emit_label (compiler, LABEL_REGION2_SKIP); if (compiler->loop_shift > 0) { @@ -872,6 +699,8 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index) insn = compiler->insns + j; opcode = insn->opcode; + if (compiler->insn_flags[j] & ORC_INSN_FLAG_INVARIANT) continue; + orc_compiler_append_code(compiler,"# %d: %s", j, insn->opcode->name); /* set up args */ @@ -892,7 +721,7 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index) switch (compiler->vars[insn->src_args[k]].vartype) { case ORC_VAR_TYPE_SRC: case ORC_VAR_TYPE_DEST: - orc_neon_emit_load_src (compiler, &compiler->vars[insn->src_args[k]], unroll_index); + //orc_neon_emit_load_src (compiler, &compiler->vars[insn->src_args[k]], unroll_index); break; case ORC_VAR_TYPE_CONST: break; @@ -924,7 +753,7 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index) switch (compiler->vars[insn->dest_args[k]].vartype) { case ORC_VAR_TYPE_DEST: - orc_neon_emit_store_dest (compiler, &compiler->vars[insn->dest_args[k]]); + //orc_neon_emit_store_dest (compiler, &compiler->vars[insn->dest_args[k]]); break; case ORC_VAR_TYPE_TEMP: break; diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c index ac32fec..9239238 100644 --- a/orc/orcrules-neon.c +++ b/orc/orcrules-neon.c @@ -236,6 +236,7 @@ orc_neon_preload (OrcCompiler *compiler, OrcVariable *var, int write, orc_arm_emit (compiler, code); } +#if 0 void orc_neon_load_halfvec_aligned (OrcCompiler *compiler, OrcVariable *var, int update) { @@ -765,6 +766,217 @@ orc_neon_storeq (OrcCompiler *compiler, int dest, int update, int src1, int is_a code |= (!update) << 1; orc_arm_emit (compiler, code); } +#endif + +static void +neon_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) +{ + OrcVariable *src = compiler->vars + insn->src_args[0]; + OrcVariable *dest = compiler->vars + insn->dest_args[0]; + + if (src->vartype == ORC_VAR_TYPE_CONST) { + if (src->size == 1) { + orc_neon_emit_loadib (compiler, dest->alloc, src->value); + } else if (src->size == 2) { + orc_neon_emit_loadiw (compiler, dest->alloc, src->value); + } else if (src->size == 4) { + orc_neon_emit_loadil (compiler, dest->alloc, src->value); + } else { + ORC_PROGRAM_ERROR(compiler,"unimplemented"); + } + } else { + if (src->size == 1) { + orc_neon_emit_loadpb (compiler, dest->alloc, insn->src_args[0]); + } else if (src->size == 2) { + orc_neon_emit_loadpw (compiler, dest->alloc, insn->src_args[0]); + } else if (src->size == 4) { + orc_neon_emit_loadpl (compiler, dest->alloc, insn->src_args[0]); + } else { + ORC_PROGRAM_ERROR(compiler,"unimplemented"); + } + } +} + +static void +neon_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn) +{ + OrcVariable *src = compiler->vars + insn->src_args[0]; + OrcVariable *dest = compiler->vars + insn->dest_args[0]; + int update = TRUE; + unsigned int code = 0; + int size = src->size << compiler->loop_shift; + + if (src->vartype == ORC_VAR_TYPE_DEST) update = FALSE; + + if (size >= 8) { + if (src->is_aligned) { + if (size == 32) { + ORC_ASM_CODE(compiler," vld1.64 { %s, %s, %s, %s }, [%s,:256]%s\n", + orc_neon_reg_name (dest->alloc), + orc_neon_reg_name (dest->alloc + 1), + orc_neon_reg_name (dest->alloc + 2), + orc_neon_reg_name (dest->alloc + 3), + orc_arm_reg_name (src->ptr_register), + update ? "!" : ""); + code = 0xf42002dd; + } else if (size == 16) { + ORC_ASM_CODE(compiler," vld1.64 { %s, %s }, [%s,:128]%s\n", + orc_neon_reg_name (dest->alloc), + orc_neon_reg_name (dest->alloc + 1), + orc_arm_reg_name (src->ptr_register), + update ? "!" : ""); + code = 0xf4200aed; + } else if (size == 8) { + ORC_ASM_CODE(compiler," vld1.64 %s, [%s]%s\n", + orc_neon_reg_name (dest->alloc), + orc_arm_reg_name (src->ptr_register), + update ? "!" : ""); + code = 0xf42007cd; + } else { + ORC_COMPILER_ERROR(compiler,"bad aligned load size %d", + src->size << compiler->loop_shift); + } + } else { + if (size == 32) { + ORC_ASM_CODE(compiler," vld1.8 { %s, %s, %s, %s }, [%s]%s\n", + orc_neon_reg_name (dest->alloc), + orc_neon_reg_name (dest->alloc + 1), + orc_neon_reg_name (dest->alloc + 2), + orc_neon_reg_name (dest->alloc + 3), + orc_arm_reg_name (src->ptr_register), + update ? "!" : ""); + code = 0xf420020d; + } else if (size == 16) { + ORC_ASM_CODE(compiler," vld1.8 { %s, %s }, [%s]%s\n", + orc_neon_reg_name (dest->alloc), + orc_neon_reg_name (dest->alloc + 1), + orc_arm_reg_name (src->ptr_register), + update ? "!" : ""); + code = 0xf4200a0d; + } else if (size == 8) { + ORC_ASM_CODE(compiler," vld1.8 %s, [%s]%s\n", + orc_neon_reg_name (dest->alloc), + orc_arm_reg_name (src->ptr_register), + update ? "!" : ""); + code = 0xf420070d; + } else { + ORC_COMPILER_ERROR(compiler,"bad unaligned load size %d", + src->size << compiler->loop_shift); + } + } + } else { + int shift; + if (size == 4) { + shift = 2; + } else if (size == 2) { + shift = 1; + } else { + shift = 0; + } + ORC_ASM_CODE(compiler," vld1.%d %s[0], [%s]%s\n", + 8<alloc), + orc_arm_reg_name (src->ptr_register), + update ? "!" : ""); + code = 0xf4a0000d; + code |= shift<<10; + code |= (0&7)<<5; + } + code |= (src->ptr_register&0xf) << 16; + code |= (dest->alloc&0xf) << 12; + code |= ((dest->alloc>>4)&0x1) << 22; + code |= (!update) << 1; + orc_arm_emit (compiler, code); +} + +static void +neon_rule_storeX (OrcCompiler *compiler, void *user, OrcInstruction *insn) +{ + OrcVariable *src = compiler->vars + insn->src_args[0]; + OrcVariable *dest = compiler->vars + insn->dest_args[0]; + int update = TRUE; + unsigned int code = 0; + int size = dest->size << compiler->loop_shift; + + if (size >= 8) { + if (dest->is_aligned) { + if (size == 32) { + ORC_ASM_CODE(compiler," vst1.64 { %s, %s, %s, %s }, [%s,:256]%s\n", + orc_neon_reg_name (src->alloc), + orc_neon_reg_name (src->alloc + 1), + orc_neon_reg_name (src->alloc + 2), + orc_neon_reg_name (src->alloc + 3), + orc_arm_reg_name (dest->ptr_register), + update ? "!" : ""); + code = 0xf40002dd; + } else if (size == 16) { + ORC_ASM_CODE(compiler," vst1.64 { %s, %s }, [%s,:128]%s\n", + orc_neon_reg_name (src->alloc), + orc_neon_reg_name (src->alloc + 1), + orc_arm_reg_name (dest->ptr_register), + update ? "!" : ""); + code = 0xf4000aed; + } else if (size == 8) { + ORC_ASM_CODE(compiler," vst1.64 %s, [%s]%s\n", + orc_neon_reg_name (src->alloc), + orc_arm_reg_name (dest->ptr_register), + update ? "!" : ""); + code = 0xf40007cd; + } else { + ORC_COMPILER_ERROR(compiler,"bad aligned store size %d", size); + } + } else { + if (size == 32) { + ORC_ASM_CODE(compiler," vst1.8 { %s, %s, %s, %s }, [%s]%s\n", + orc_neon_reg_name (src->alloc), + orc_neon_reg_name (src->alloc + 1), + orc_neon_reg_name (src->alloc + 2), + orc_neon_reg_name (src->alloc + 3), + orc_arm_reg_name (dest->ptr_register), + update ? "!" : ""); + code = 0xf400020d; + } else if (size == 16) { + ORC_ASM_CODE(compiler," vst1.8 { %s, %s }, [%s]%s\n", + orc_neon_reg_name (src->alloc), + orc_neon_reg_name (src->alloc + 1), + orc_arm_reg_name (dest->ptr_register), + update ? "!" : ""); + code = 0xf4000a0d; + } else if (size == 8) { + ORC_ASM_CODE(compiler," vst1.8 %s, [%s]%s\n", + orc_neon_reg_name (src->alloc), + orc_arm_reg_name (dest->ptr_register), + update ? "!" : ""); + code = 0xf400070d; + } else { + ORC_COMPILER_ERROR(compiler,"bad aligned store size %d", size); + } + } + } else { + int shift; + if (size == 4) { + shift = 2; + } else if (size == 2) { + shift = 1; + } else { + shift = 0; + } + ORC_ASM_CODE(compiler," vst1.%d %s[0], [%s]%s\n", + 8<alloc), + orc_arm_reg_name (dest->ptr_register), + update ? "!" : ""); + code = 0xf480000d; + code |= shift<<10; + code |= (0&7)<<5; + } + code |= (dest->ptr_register&0xf) << 16; + code |= (src->alloc&0xf) << 12; + code |= ((src->alloc>>4)&0x1) << 22; + code |= (!update) << 1; + orc_arm_emit (compiler, code); +} + static int orc_neon_get_const_shift (unsigned int value) @@ -1790,6 +2002,16 @@ orc_compiler_neon_register_rules (OrcTarget *target) REG(convfl); REG(convlf); + orc_rule_register (rule_set, "loadpb", neon_rule_loadpX, (void *)0); + orc_rule_register (rule_set, "loadpw", neon_rule_loadpX, (void *)0); + orc_rule_register (rule_set, "loadpl", neon_rule_loadpX, (void *)0); + orc_rule_register (rule_set, "loadb", neon_rule_loadX, (void *)0); + orc_rule_register (rule_set, "loadw", neon_rule_loadX, (void *)0); + orc_rule_register (rule_set, "loadl", neon_rule_loadX, (void *)0); + orc_rule_register (rule_set, "storeb", neon_rule_storeX, (void *)0); + orc_rule_register (rule_set, "storew", neon_rule_storeX, (void *)0); + orc_rule_register (rule_set, "storel", neon_rule_storeX, (void *)0); + orc_rule_register (rule_set, "shlb", orc_neon_rule_shift, (void *)0); orc_rule_register (rule_set, "shrsb", orc_neon_rule_shift, (void *)1); orc_rule_register (rule_set, "shrub", orc_neon_rule_shift, (void *)2); -- 2.7.4