{
uint8_t *p;
uint32_t insn;
- int offset, min_offset, pc_offset, data_size;
+ int offset, min_offset, pc_offset, data_size, spare, max_pool;
uint8_t data_allocated[1024];
unsigned int data_index;
+ int type;
memset(data_allocated, 0, sizeof(data_allocated));
p = p_start;
min_offset = p_end - p_start;
+ spare = 0x7fffffff;
while (p < p_start + min_offset) {
insn = get32((uint32_t *)p);
+ /* TODO: Armv5e ldrd. */
+ /* TODO: VFP load. */
if ((insn & 0x0d5f0000) == 0x051f0000) {
/* ldr reg, [pc, #im] */
offset = insn & 0xfff;
if (!(insn & 0x00800000))
- offset = -offset;
+ offset = -offset;
+ max_pool = 4096;
+ type = 0;
+ } else if ((insn & 0x0e5f0f00) == 0x0c1f0100) {
+ /* FPA ldf. */
+ offset = (insn & 0xff) << 2;
+ if (!(insn & 0x00800000))
+ offset = -offset;
+ max_pool = 1024;
+ type = 1;
+ } else if ((insn & 0x0fff0000) == 0x028f0000) {
+ /* Some gcc load a doubleword immediate with
+ add regN, pc, #imm
+ ldmia regN, {regN, regM}
+ Hope and pray the compiler never generates somethin like
+ add reg, pc, #imm1; ldr reg, [reg, #-imm2]; */
+ int r;
+
+ r = (insn & 0xf00) >> 7;
+ offset = ((insn & 0xff) >> r) | ((insn & 0xff) << (32 - r));
+ max_pool = 1024;
+ type = 2;
+ } else {
+ max_pool = 0;
+ type = -1;
+ }
+ if (type >= 0) {
+ /* PC-relative load needs fixing up. */
+ if (spare > max_pool - offset)
+ spare = max_pool - offset;
if ((offset & 3) !=0)
- error("%s:%04x: ldr pc offset must be 32 bit aligned",
+ error("%s:%04x: pc offset must be 32 bit aligned",
+ name, start_offset + p - p_start);
+ if (offset < 0)
+ error("%s:%04x: Embedded literal value",
name, start_offset + p - p_start);
pc_offset = p - p_start + offset + 8;
if (pc_offset <= (p - p_start) ||
pc_offset >= (p_end - p_start))
- error("%s:%04x: ldr pc offset must point inside the function code",
+ error("%s:%04x: pc offset must point inside the function code",
name, start_offset + p - p_start);
if (pc_offset < min_offset)
min_offset = pc_offset;
if (outfile) {
- /* ldr position */
+ /* The intruction position */
fprintf(outfile, " arm_ldr_ptr->ptr = gen_code_ptr + %d;\n",
p - p_start);
- /* ldr data index */
- data_index = ((p_end - p_start) - pc_offset - 4) >> 2;
- fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr + %d;\n",
+ /* The position of the constant pool data. */
+ data_index = ((p_end - p_start) - pc_offset) >> 2;
+ fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr - %d;\n",
data_index);
+ fprintf(outfile, " arm_ldr_ptr->type = %d;\n", type);
fprintf(outfile, " arm_ldr_ptr++;\n");
- if (data_index >= sizeof(data_allocated))
- error("%s: too many data", name);
- if (!data_allocated[data_index]) {
- ELF_RELOC *rel;
- int i, addend, type;
- const char *sym_name, *p;
- char relname[1024];
-
- data_allocated[data_index] = 1;
-
- /* data value */
- addend = get32((uint32_t *)(p_start + pc_offset));
- relname[0] = '\0';
- for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
- if (rel->r_offset == (pc_offset + start_offset)) {
- sym_name = get_rel_sym_name(rel);
- /* the compiler leave some unnecessary references to the code */
- get_reloc_expr(relname, sizeof(relname), sym_name);
- type = ELF32_R_TYPE(rel->r_info);
- if (type != R_ARM_ABS32)
- error("%s: unsupported data relocation", name);
- break;
- }
- }
- fprintf(outfile, " arm_data_ptr[%d] = 0x%x",
- data_index, addend);
- if (relname[0] != '\0')
- fprintf(outfile, " + %s", relname);
- fprintf(outfile, ";\n");
- }
}
}
p += 4;
}
+
+ /* Copy and relocate the constant pool data. */
data_size = (p_end - p_start) - min_offset;
if (data_size > 0 && outfile) {
- fprintf(outfile, " arm_data_ptr += %d;\n", data_size >> 2);
+ spare += min_offset;
+ fprintf(outfile, " arm_data_ptr -= %d;\n", data_size >> 2);
+ fprintf(outfile, " arm_pool_ptr -= %d;\n", data_size);
+ fprintf(outfile, " if (arm_pool_ptr > gen_code_ptr + %d)\n"
+ " arm_pool_ptr = gen_code_ptr + %d;\n",
+ spare, spare);
+
+ data_index = 0;
+ for (pc_offset = min_offset;
+ pc_offset < p_end - p_start;
+ pc_offset += 4) {
+
+ ELF_RELOC *rel;
+ int i, addend, type;
+ const char *sym_name;
+ char relname[1024];
+
+ /* data value */
+ addend = get32((uint32_t *)(p_start + pc_offset));
+ relname[0] = '\0';
+ for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+ if (rel->r_offset == (pc_offset + start_offset)) {
+ sym_name = get_rel_sym_name(rel);
+ /* the compiler leave some unnecessary references to the code */
+ get_reloc_expr(relname, sizeof(relname), sym_name);
+ type = ELF32_R_TYPE(rel->r_info);
+ if (type != R_ARM_ABS32)
+ error("%s: unsupported data relocation", name);
+ break;
+ }
+ }
+ fprintf(outfile, " arm_data_ptr[%d] = 0x%x",
+ data_index, addend);
+ if (relname[0] != '\0')
+ fprintf(outfile, " + %s", relname);
+ fprintf(outfile, ";\n");
+
+ data_index++;
+ }
}
- /* the last instruction must be a mov pc, lr */
if (p == p_start)
goto arm_ret_error;
p -= 4;
insn = get32((uint32_t *)p);
- if ((insn & 0xffff0000) != 0xe91b0000) {
+ /* The last instruction must be an ldm instruction. There are several
+ forms generated by gcc:
+ ldmib sp, {..., pc} (implies a sp adjustment of +4)
+ ldmia sp, {..., pc}
+ ldmea fp, {..., pc} */
+ if ((insn & 0xffff8000) == 0xe99d8000) {
+ if (outfile) {
+ fprintf(outfile,
+ " *(uint32_t *)(gen_code_ptr + %d) = 0xe28dd004;\n",
+ p - p_start);
+ }
+ p += 4;
+ } else if ((insn & 0xffff8000) != 0xe89d8000
+ && (insn & 0xffff8000) != 0xe91b8000) {
arm_ret_error:
if (!outfile)
printf("%s: invalid epilog\n", name);
}
- return p - p_start;
+ return p - p_start;
}
#endif
}
#elif defined(HOST_ARM)
{
+ uint32_t insn;
+
if ((p_end - p_start) <= 16)
error("%s: function too small", name);
if (get32((uint32_t *)p_start) != 0xe1a0c00d ||
error("%s: invalid prolog", name);
p_start += 12;
start_offset += 12;
+ insn = get32((uint32_t *)p_start);
+ if ((insn & 0xffffff00) == 0xe24dd000) {
+ /* Stack adjustment. Assume op uses the frame pointer. */
+ p_start -= 4;
+ start_offset -= 4;
+ }
copy_size = arm_emit_ldr_info(name, start_offset, NULL, p_start, p_end,
relocs, nb_relocs);
}
int type;
int addend;
int reloc_offset;
-
+ uint32_t insn;
+
+ insn = get32((uint32_t *)(p_start + 4));
+ /* If prologue ends in sub sp, sp, #const then assume
+ op has a stack frame and needs the frame pointer. */
+ if ((insn & 0xffffff00) == 0xe24dd000) {
+ int i;
+ uint32_t opcode;
+ opcode = 0xe28db000; /* add fp, sp, #0. */
+#if 0
+/* ??? Need to undo the extra stack adjustment at the end of the op.
+ For now just leave the stack misaligned and hope it doesn't break anything
+ too important. */
+ if ((insn & 4) != 0) {
+ /* Preserve doubleword stack alignment. */
+ fprintf(outfile,
+ " *(uint32_t *)(gen_code_ptr + 4)= 0x%x;\n",
+ insn + 4);
+ opcode -= 4;
+ }
+#endif
+ insn = get32((uint32_t *)(p_start - 4));
+ /* Calculate the size of the saved registers,
+ excluding pc. */
+ for (i = 0; i < 15; i++) {
+ if (insn & (1 << i))
+ opcode += 4;
+ }
+ fprintf(outfile,
+ " *(uint32_t *)gen_code_ptr = 0x%x;\n", opcode);
+ }
arm_emit_ldr_info(name, start_offset, outfile, p_start, p_end,
relocs, nb_relocs);
reloc_offset, name, addend);
break;
case R_ARM_PC24:
+ case R_ARM_JUMP24:
+ case R_ARM_CALL:
fprintf(outfile, " arm_reloc_pc24((uint32_t *)(gen_code_ptr + %d), 0x%x, %s);\n",
reloc_offset, addend, name);
break;
} else {
/* generate big code generation switch */
+
+#ifdef HOST_ARM
+ /* We need to know the size of all the ops so we can figure out when
+ to emit constant pools. This must be consistent with opc.h. */
+fprintf(outfile,
+"static const uint32_t arm_opc_size[] = {\n"
+" 0,\n" /* end */
+" 0,\n" /* nop */
+" 0,\n" /* nop1 */
+" 0,\n" /* nop2 */
+" 0,\n"); /* nop3 */
+ for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+ const char *name;
+ name = get_sym_name(sym);
+ if (strstart(name, OP_PREFIX, NULL)) {
+ fprintf(outfile, " %d,\n", sym->st_size);
+ }
+ }
+fprintf(outfile,
+"};\n");
+#endif
+
fprintf(outfile,
"int dyngen_code(uint8_t *gen_code_buf,\n"
" uint16_t *label_offsets, uint16_t *jmp_offsets,\n"
" const uint32_t *opparam_ptr;\n");
#ifdef HOST_ARM
+/* Arm is tricky because it uses constant pools for loading immediate values.
+ We assume (and require) each function is code followed by a constant pool.
+ All the ops are small so this should be ok. For each op we figure
+ out how much "spare" range we have in the load instructions. This allows
+ us to insert subsequent ops in between the op and the constant pool,
+ eliminating the neeed to jump around the pool.
+
+ We currently generate:
+
+ [ For this example we assume merging would move op1_pool out of range.
+ In practice we should be able to combine many ops before the offset
+ limits are reached. ]
+ op1_code;
+ op2_code;
+ goto op3;
+ op2_pool;
+ op1_pool;
+op3:
+ op3_code;
+ ret;
+ op3_pool;
+
+ Ideally we'd put op1_pool before op2_pool, but that requires two passes.
+ */
fprintf(outfile,
" uint8_t *last_gen_code_ptr = gen_code_buf;\n"
" LDREntry *arm_ldr_ptr = arm_ldr_table;\n"
-" uint32_t *arm_data_ptr = arm_data_table;\n");
+" uint32_t *arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+/* Initialise the parmissible pool offset to an arbitary large value. */
+" uint8_t *arm_pool_ptr = gen_code_buf + 0x1000000;\n");
#endif
#ifdef HOST_IA64
{
/* Generate prologue, if needed. */
fprintf(outfile,
-" for(;;) {\n"
-" switch(*opc_ptr++) {\n"
-);
+" for(;;) {\n");
+
+#ifdef HOST_ARM
+/* Generate constant pool if needed */
+fprintf(outfile,
+" if (gen_code_ptr + arm_opc_size[*opc_ptr] >= arm_pool_ptr) {\n"
+" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 1);\n"
+" last_gen_code_ptr = gen_code_ptr;\n"
+" arm_ldr_ptr = arm_ldr_table;\n"
+" arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+" arm_pool_ptr = gen_code_ptr + 0x1000000;\n"
+" }\n");
+#endif
+
+fprintf(outfile,
+" switch(*opc_ptr++) {\n");
for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
const char *name;
" goto the_end;\n"
" }\n");
-#ifdef HOST_ARM
-/* generate constant table if needed */
-fprintf(outfile,
-" if ((gen_code_ptr - last_gen_code_ptr) >= (MAX_FRAG_SIZE - MAX_OP_SIZE)) {\n"
-" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 1);\n"
-" last_gen_code_ptr = gen_code_ptr;\n"
-" arm_ldr_ptr = arm_ldr_table;\n"
-" arm_data_ptr = arm_data_table;\n"
-" }\n");
-#endif
-
fprintf(outfile,
" }\n"
/* generate some code patching */
#ifdef HOST_ARM
-fprintf(outfile, "gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 0);\n");
+fprintf(outfile,
+"if (arm_data_ptr != arm_data_table + ARM_LDR_TABLE_SIZE)\n"
+" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 0);\n");
#endif
/* flush instruction cache */
fprintf(outfile, "flush_icache_range((unsigned long)gen_code_buf, (unsigned long)gen_code_ptr);\n");