1 /* rx.c --- opcode semantics for stand-alone RX simulator.
3 Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
6 This file is part of the GNU simulators.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include "opcode/rx.h"
36 static const char * id_names[] = {
38 "RXO_mov", /* d = s (signed) */
39 "RXO_movbi", /* d = [s,s2] (signed) */
40 "RXO_movbir", /* [s,s2] = d (signed) */
41 "RXO_pushm", /* s..s2 */
42 "RXO_popm", /* s..s2 */
43 "RXO_xchg", /* s <-> d */
44 "RXO_stcc", /* d = s if cond(s2) */
45 "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
47 /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
48 that d may be "None". */
61 "RXO_adc", /* d = d + s + carry */
62 "RXO_sbb", /* d = d - s - ~carry */
63 "RXO_abs", /* d = |s| */
64 "RXO_max", /* d = max(d,s) */
65 "RXO_min", /* d = min(d,s) */
66 "RXO_emul", /* d:64 = d:32 * s */
67 "RXO_emulu", /* d:64 = d:32 * s (unsigned) */
69 "RXO_rolc", /* d <<= 1 through carry */
70 "RXO_rorc", /* d >>= 1 through carry*/
71 "RXO_rotl", /* d <<= #s without carry */
72 "RXO_rotr", /* d >>= #s without carry*/
73 "RXO_revw", /* d = revw(s) */
74 "RXO_revl", /* d = revl(s) */
75 "RXO_branch", /* pc = d if cond(s) */
76 "RXO_branchrel",/* pc += d if cond(s) */
77 "RXO_jsr", /* pc = d */
78 "RXO_jsrrel", /* pc += d */
104 "RXO_sat", /* sat(d) */
107 "RXO_fadd", /* d op= s */
116 "RXO_bset", /* d |= (1<<s) */
117 "RXO_bclr", /* d &= ~(1<<s) */
118 "RXO_btst", /* s & (1<<s2) */
119 "RXO_bnot", /* d ^= (1<<s) */
120 "RXO_bmcc", /* d<s> = cond(s2) */
122 "RXO_clrpsw", /* flag index in d */
123 "RXO_setpsw", /* flag index in d */
124 "RXO_mvtipl", /* new IPL in s */
128 "RXO_rtd", /* undocumented */
130 "RXO_dbt", /* undocumented */
131 "RXO_int", /* vector id in s */
135 "RXO_sccnd", /* d = cond(s) ? 1 : 0 */
138 static const char * optype_names[] = {
140 "#Imm", /* #addend */
142 "[Rn]", /* [Rn + addend] */
145 " cc ", /* eq, gtu, etc */
146 "Flag", /* [UIOSZC] */
147 "RbRi" /* [Rb + scale * Ri] */
150 #define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
151 #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
154 static unsigned long long benchmark_start_cycle;
155 static unsigned long long benchmark_end_cycle;
157 static int op_cache[N_RXT][N_RXT][N_RXT];
158 static int op_cache_rev[N_MAP];
159 static int op_cache_idx = 0;
162 op_lookup (int a, int b, int c)
164 if (op_cache[a][b][c])
165 return op_cache[a][b][c];
167 if (op_cache_idx >= N_MAP)
169 printf("op_cache_idx exceeds %d\n", N_MAP);
172 op_cache[a][b][c] = op_cache_idx;
173 op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
178 op_cache_string (int map)
181 static char cb[5][20];
184 map = op_cache_rev[map];
189 sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
193 static unsigned long long cycles_per_id[N_RXO][N_MAP];
194 static unsigned long long times_per_id[N_RXO][N_MAP];
195 static unsigned long long memory_stalls;
196 static unsigned long long register_stalls;
197 static unsigned long long branch_stalls;
198 static unsigned long long branch_alignment_stalls;
199 static unsigned long long fast_returns;
201 static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
202 static int prev_opcode_id = RXO_unknown;
209 #endif /* CYCLE_STATS */
212 #ifdef CYCLE_ACCURATE
214 static int new_rt = -1;
216 /* Number of cycles to add if an insn spans an 8-byte boundary. */
217 static int branch_alignment_penalty = 0;
221 static int running_benchmark = 1;
223 #define tprintf if (trace && running_benchmark) printf
225 jmp_buf decode_jmp_buf;
226 unsigned int rx_cycles = 0;
228 #ifdef CYCLE_ACCURATE
229 /* If nonzero, memory was read at some point and cycle latency might
231 static int memory_source = 0;
232 /* If nonzero, memory was written and extra cycles might be
234 static int memory_dest = 0;
237 cycles (int throughput)
239 tprintf("%d cycles\n", throughput);
240 regs.cycle_count += throughput;
243 /* Number of execution (E) cycles the op uses. For memory sources, we
244 include the load micro-op stall as two extra E cycles. */
245 #define E(c) cycles (memory_source ? c + 2 : c)
246 #define E1 cycles (1)
247 #define E2 cycles (2)
248 #define EBIT cycles (memory_source ? 2 : 1)
250 /* Check to see if a read latency must be applied for a given register. */
254 tprintf("register %d load stall\n", r); \
255 regs.cycle_count ++; \
256 STATS(register_stalls ++); \
263 tprintf ("Rt now %d\n", r); \
267 #else /* !CYCLE_ACCURATE */
277 #endif /* else CYCLE_ACCURATE */
279 static int size2bytes[] = {
280 4, 1, 1, 1, 2, 2, 2, 3, 4
287 #define rx_abort() _rx_abort(__FILE__, __LINE__)
289 _rx_abort (const char *file, int line)
291 if (strrchr (file, '/'))
292 file = strrchr (file, '/') + 1;
293 fprintf(stderr, "abort at %s:%d\n", file, line);
297 static unsigned char *get_byte_base;
298 static RX_Opcode_Decoded **decode_cache_base;
299 static SI get_byte_page;
305 decode_cache_base = 0;
310 maybe_get_mem_page (SI tpc)
312 if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
314 get_byte_page = tpc & NONPAGE_MASK;
315 get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
316 decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
320 /* This gets called a *lot* so optimize it. */
322 rx_get_byte (void *vdata)
324 RX_Data *rx_data = (RX_Data *)vdata;
325 SI tpc = rx_data->dpc;
327 /* See load.c for an explanation of this. */
331 maybe_get_mem_page (tpc);
334 return get_byte_base [tpc];
338 get_op (const RX_Opcode_Decoded *rd, int i)
340 const RX_Opcode_Operand *o = rd->op + i;
345 case RX_Operand_None:
348 case RX_Operand_Immediate: /* #addend */
351 case RX_Operand_Register: /* Rn */
353 rv = get_reg (o->reg);
356 case RX_Operand_Predec: /* [-Rn] */
357 put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
359 case RX_Operand_Postinc: /* [Rn+] */
360 case RX_Operand_Indirect: /* [Rn + addend] */
361 case RX_Operand_TwoReg: /* [Rn + scale * R2] */
362 #ifdef CYCLE_ACCURATE
364 if (o->type == RX_Operand_TwoReg)
367 if (regs.m2m == M2M_BOTH)
369 tprintf("src memory stall\n");
380 if (o->type == RX_Operand_TwoReg)
381 addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
383 addr = get_reg (o->reg) + o->addend;
390 case RX_Byte: /* undefined extension */
393 rv = mem_get_qi (addr);
396 case RX_Word: /* undefined extension */
399 rv = mem_get_hi (addr);
403 rv = mem_get_psi (addr);
407 rv = mem_get_si (addr);
411 if (o->type == RX_Operand_Postinc)
412 put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
416 case RX_Operand_Condition: /* eq, gtu, etc */
417 return condition_true (o->reg);
419 case RX_Operand_Flag: /* [UIOSZC] */
420 return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
423 /* if we've gotten here, we need to clip/extend the value according
430 case RX_Byte: /* undefined extension */
431 rv |= 0xdeadbe00; /* keep them honest */
439 rv = sign_ext (rv, 8);
442 case RX_Word: /* undefined extension */
443 rv |= 0xdead0000; /* keep them honest */
451 rv = sign_ext (rv, 16);
465 put_op (const RX_Opcode_Decoded *rd, int i, int v)
467 const RX_Opcode_Operand *o = rd->op + i;
473 if (o->type != RX_Operand_Register)
477 case RX_Byte: /* undefined extension */
478 v |= 0xdeadbe00; /* keep them honest */
489 case RX_Word: /* undefined extension */
490 v |= 0xdead0000; /* keep them honest */
498 v = sign_ext (v, 16);
511 case RX_Operand_None:
512 /* Opcodes like TST and CMP use this. */
515 case RX_Operand_Immediate: /* #addend */
516 case RX_Operand_Condition: /* eq, gtu, etc */
519 case RX_Operand_Register: /* Rn */
524 case RX_Operand_Predec: /* [-Rn] */
525 put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
527 case RX_Operand_Postinc: /* [Rn+] */
528 case RX_Operand_Indirect: /* [Rn + addend] */
529 case RX_Operand_TwoReg: /* [Rn + scale * R2] */
531 #ifdef CYCLE_ACCURATE
532 if (regs.m2m == M2M_BOTH)
534 tprintf("dst memory stall\n");
544 if (o->type == RX_Operand_TwoReg)
545 addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
547 addr = get_reg (o->reg) + o->addend;
554 case RX_Byte: /* undefined extension */
557 mem_put_qi (addr, v);
560 case RX_Word: /* undefined extension */
563 mem_put_hi (addr, v);
567 mem_put_psi (addr, v);
571 mem_put_si (addr, v);
575 if (o->type == RX_Operand_Postinc)
576 put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
580 case RX_Operand_Flag: /* [UIOSZC] */
582 regs.r_psw |= (1 << o->reg);
584 regs.r_psw &= ~(1 << o->reg);
589 #define PD(x) put_op (opcode, 0, x)
590 #define PS(x) put_op (opcode, 1, x)
591 #define PS2(x) put_op (opcode, 2, x)
592 #define GD() get_op (opcode, 0)
593 #define GS() get_op (opcode, 1)
594 #define GS2() get_op (opcode, 2)
595 #define DSZ() size2bytes[opcode->op[0].size]
596 #define SSZ() size2bytes[opcode->op[0].size]
597 #define S2SZ() size2bytes[opcode->op[0].size]
599 /* "Universal" sources. */
600 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
601 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
606 int rsp = get_reg (sp);
609 mem_put_si (rsp, val);
612 /* Just like the above, but tag the memory as "pushed pc" so if anyone
613 tries to write to it, it will cause an error. */
617 int rsp = get_reg (sp);
620 mem_put_si (rsp, val);
621 mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
628 int rsp = get_reg (sp);
629 rv = mem_get_si (rsp);
639 int rsp = get_reg (sp);
640 if (mem_get_content_type (rsp) != MC_PUSHED_PC)
641 execution_error (SIM_ERR_CORRUPT_STACK, rsp);
642 rv = mem_get_si (rsp);
643 mem_set_content_range (rsp, rsp+3, MC_UNINIT);
649 #define MATH_OP(vop,c) \
653 ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
654 tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
655 ma = sign_ext (uma, DSZ() * 8); \
656 mb = sign_ext (umb, DSZ() * 8); \
657 sll = (long long) ma vop (long long) mb vop c; \
658 tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
659 set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
664 #define LOGIC_OP(vop) \
669 tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
675 #define SHIFT_OP(val, type, count, OP, carry_mask) \
680 tprintf("%lld " #OP " %d\n", val, count); \
681 for (i = 0; i < count; i ++) \
683 c = val & carry_mask; \
687 set_oszc (val, 4, c); \
713 fop_fadd (fp_t s1, fp_t s2, fp_t *d)
715 *d = rxfp_add (s1, s2);
720 fop_fmul (fp_t s1, fp_t s2, fp_t *d)
722 *d = rxfp_mul (s1, s2);
727 fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
729 *d = rxfp_div (s1, s2);
734 fop_fsub (fp_t s1, fp_t s2, fp_t *d)
736 *d = rxfp_sub (s1, s2);
740 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
741 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
744 return do_fp_exception (opcode_pc)
746 #define FLOAT_OP(func) \
753 do_store = fop_##func (fa, fb, &fc); \
754 tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
759 if ((fc & 0x80000000UL) != 0) \
761 if ((fc & 0x7fffffffUL) == 0) \
763 set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
766 #define carry (FLAG_C ? 1 : 0)
772 } exception_info[] = {
773 { 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
774 { 0xFFFFFFD4UL, "access violation", SIGSEGV },
775 { 0xFFFFFFDCUL, "undefined opcode", SIGILL },
776 { 0xFFFFFFE4UL, "floating point", SIGFPE }
778 #define EX_PRIVILEDGED 0
780 #define EX_UNDEFINED 2
781 #define EX_FLOATING 3
782 #define EXCEPTION(n) \
783 return generate_exception (n, opcode_pc)
785 #define PRIVILEDGED() \
787 EXCEPTION (EX_PRIVILEDGED)
790 generate_exception (unsigned long type, SI opcode_pc)
792 SI old_psw, old_pc, new_pc;
794 new_pc = mem_get_si (exception_info[type].vaddr);
795 /* 0x00020000 is the value used to initialise the known
796 exception vectors (see rx.ld), but it is a reserved
797 area of memory so do not try to access it, and if the
798 value has not been changed by the program then the
799 vector has not been installed. */
800 if (new_pc == 0 || new_pc == 0x00020000)
803 return RX_MAKE_STOPPED (exception_info[type].signal);
805 fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
806 exception_info[type].str, (unsigned long) opcode_pc);
807 if (type == EX_FLOATING)
809 int mask = FPPENDING ();
810 fprintf (stderr, "Pending FP exceptions:");
811 if (mask & FPSWBITS_FV)
812 fprintf(stderr, " Invalid");
813 if (mask & FPSWBITS_FO)
814 fprintf(stderr, " Overflow");
815 if (mask & FPSWBITS_FZ)
816 fprintf(stderr, " Division-by-zero");
817 if (mask & FPSWBITS_FU)
818 fprintf(stderr, " Underflow");
819 if (mask & FPSWBITS_FX)
820 fprintf(stderr, " Inexact");
821 if (mask & FPSWBITS_CE)
822 fprintf(stderr, " Unimplemented");
823 fprintf(stderr, "\n");
825 return RX_MAKE_EXITED (1);
828 tprintf ("Triggering %s exception\n", exception_info[type].str);
830 old_psw = regs.r_psw;
831 regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
836 return RX_MAKE_STEPPED ();
840 generate_access_exception (void)
844 rv = generate_exception (EX_ACCESS, regs.r_pc);
846 longjmp (decode_jmp_buf, rv);
850 do_fp_exception (unsigned long opcode_pc)
853 EXCEPTION (EX_FLOATING);
854 return RX_MAKE_STEPPED ();
858 op_is_memory (const RX_Opcode_Decoded *rd, int i)
860 switch (rd->op[i].type)
862 case RX_Operand_Predec:
863 case RX_Operand_Postinc:
864 case RX_Operand_Indirect:
870 #define OM(i) op_is_memory (opcode, i)
872 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
877 unsigned int uma=0, umb=0;
880 unsigned long long ll;
882 unsigned long opcode_pc;
884 const RX_Opcode_Decoded *opcode;
886 unsigned long long prev_cycle_count;
888 #ifdef CYCLE_ACCURATE
893 prev_cycle_count = regs.cycle_count;
896 #ifdef CYCLE_ACCURATE
903 maybe_get_mem_page (regs.r_pc);
905 opcode_pc = regs.r_pc;
907 /* Note that we don't word-swap this point, there's no point. */
908 if (decode_cache_base[opcode_pc] == NULL)
910 RX_Opcode_Decoded *opcode_w;
911 rx_data.dpc = opcode_pc;
912 opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
913 opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
914 rx_get_byte, &rx_data);
919 opcode = decode_cache_base[opcode_pc];
920 opcode_size = opcode->n_bytes;
923 #ifdef CYCLE_ACCURATE
924 if (branch_alignment_penalty)
926 if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
928 tprintf("1 cycle branch alignment penalty\n");
929 cycles (branch_alignment_penalty);
931 branch_alignment_stalls ++;
934 branch_alignment_penalty = 0;
938 regs.r_pc += opcode_size;
940 rx_flagmask = opcode->flags_s;
941 rx_flagand = ~(int)opcode->flags_0;
942 rx_flagor = opcode->flags_1;
948 tprintf("|%lld| = ", sll);
951 tprintf("%lld\n", sll);
972 if (opcode->op[0].type == RX_Operand_Register)
984 if (opcode->op[0].type == RX_Operand_Register)
999 if (opcode->op[0].type == RX_Operand_Register)
1009 if (opcode->op[1].type == RX_Operand_None || GS())
1011 #ifdef CYCLE_ACCURATE
1012 SI old_pc = regs.r_pc;
1016 #ifdef CYCLE_ACCURATE
1017 delta = regs.r_pc - old_pc;
1018 if (delta >= 0 && delta < 16
1021 tprintf("near forward branch bonus\n");
1027 branch_alignment_penalty = 1;
1034 #ifdef CYCLE_ACCURATE
1045 #ifdef CYCLE_ACCURATE
1046 /* Note: specs say 3, chip says 2. */
1047 if (delta >= 0 && delta < 16
1050 tprintf("near forward branch bonus\n");
1056 branch_alignment_penalty = 1;
1063 #ifdef CYCLE_ACCURATE
1071 int old_psw = regs.r_psw;
1073 DO_RETURN (RX_MAKE_HIT_BREAK ());
1074 if (regs.r_intb == 0)
1076 tprintf("BREAK hit, no vector table.\n");
1077 DO_RETURN (RX_MAKE_EXITED(1));
1079 regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1082 regs.r_pc = mem_get_si (regs.r_intb);
1090 if (opcode->op[0].type == RX_Operand_Register)
1102 if (opcode->op[1].type == RX_Operand_Register)
1106 umb = ma & (1 << mb);
1107 set_zc (! umb, umb);
1112 v = 1 << opcode->op[0].reg;
1121 case RXO_div: /* d = d / s */
1124 tprintf("%d / %d = ", mb, ma);
1125 if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
1128 set_flags (FLAGBIT_O, FLAGBIT_O);
1134 set_flags (FLAGBIT_O, 0);
1137 /* Note: spec says 3 to 22 cycles, we are pessimistic. */
1141 case RXO_divu: /* d = d / s */
1144 tprintf("%u / %u = ", umb, uma);
1148 set_flags (FLAGBIT_O, FLAGBIT_O);
1154 set_flags (FLAGBIT_O, 0);
1157 /* Note: spec says 2 to 20 cycles, we are pessimistic. */
1164 sll = (long long)ma * (long long)mb;
1165 tprintf("%d * %d = %lld\n", ma, mb, sll);
1166 put_reg (opcode->op[0].reg, sll);
1167 put_reg (opcode->op[0].reg + 1, sll >> 32);
1174 ll = (long long)uma * (long long)umb;
1175 tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
1176 put_reg (opcode->op[0].reg, ll);
1177 put_reg (opcode->op[0].reg + 1, ll >> 32);
1207 regs.r_psw = regs.r_bpsw;
1208 regs.r_pc = regs.r_bpc;
1209 #ifdef CYCLE_ACCURATE
1210 regs.fast_return = 0;
1223 mb = rxfp_ftoi (ma, FPRM_ZERO);
1226 tprintf("(int) %g = %d\n", int2float(ma), mb);
1235 int rc = rx_syscall (regs.r[5]);
1236 if (! RX_STEPPED (rc))
1241 int old_psw = regs.r_psw;
1242 regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1245 regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
1253 mb = rxfp_itof (ma, regs.r_fpsw);
1255 tprintf("(float) %d = %x\n", ma, mb);
1264 #ifdef CYCLE_ACCURATE
1269 #ifdef CYCLE_ACCURATE
1270 regs.link_register = regs.r_pc;
1272 pushpc (get_reg (pc));
1273 if (opcode->id == RXO_jsrrel)
1275 #ifdef CYCLE_ACCURATE
1276 delta = v - regs.r_pc;
1279 #ifdef CYCLE_ACCURATE
1280 /* Note: docs say 3, chip says 2 */
1281 if (delta >= 0 && delta < 16)
1283 tprintf ("near forward jsr bonus\n");
1288 branch_alignment_penalty = 1;
1291 regs.fast_return = 1;
1297 ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1299 put_reg64 (acc64, ll + regs.r_acc);
1304 ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1306 put_reg64 (acc64, ll + regs.r_acc);
1333 if (opcode->op[0].type == RX_Operand_Register
1334 && opcode->op[0].reg == 16 /* PSW */)
1336 /* Special case, LDC and POPC can't ever modify PM. */
1337 int pm = regs.r_psw & FLAGBIT_PM;
1342 v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1348 /* various things can't be changed in user mode. */
1349 if (opcode->op[0].type == RX_Operand_Register)
1350 if (opcode->op[0].reg == 32)
1352 v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1353 v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1355 if (opcode->op[0].reg == 34 /* ISP */
1356 || opcode->op[0].reg == 37 /* BPSW */
1357 || opcode->op[0].reg == 39 /* INTB */
1358 || opcode->op[0].reg == 38 /* VCT */)
1359 /* These are ignored. */
1369 #ifdef CYCLE_ACCURATE
1370 if ((opcode->op[0].type == RX_Operand_Predec
1371 && opcode->op[1].type == RX_Operand_Register)
1372 || (opcode->op[0].type == RX_Operand_Postinc
1373 && opcode->op[1].type == RX_Operand_Register))
1375 /* Special case: push reg doesn't cause a memory stall. */
1377 tprintf("push special case\n");
1396 ll = (unsigned long long) US1() * (unsigned long long) v;
1403 ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
1405 put_reg64 (acc64, ll);
1411 ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
1413 put_reg64 (acc64, ll);
1418 PD (get_reg (acchi));
1423 PD (get_reg (acclo));
1428 PD (get_reg (accmi));
1433 put_reg (acchi, GS ());
1438 put_reg (acclo, GS ());
1443 regs.r_psw &= ~ FLAGBITS_IPL;
1444 regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
1459 /* POPM cannot pop R0 (sp). */
1460 if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1461 EXCEPTION (EX_UNDEFINED);
1462 if (opcode->op[1].reg >= opcode->op[2].reg)
1464 regs.r_pc = opcode_pc;
1465 DO_RETURN (RX_MAKE_STOPPED (SIGILL));
1467 for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
1471 put_reg (v, pop ());
1476 /* PUSHM cannot push R0 (sp). */
1477 if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1478 EXCEPTION (EX_UNDEFINED);
1479 if (opcode->op[1].reg >= opcode->op[2].reg)
1481 regs.r_pc = opcode_pc;
1482 return RX_MAKE_STOPPED (SIGILL);
1484 for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
1489 cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
1493 ll = get_reg64 (acc64) << GS ();
1494 ll += 0x80000000ULL;
1495 if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
1496 ll = 0x00007fff00000000ULL;
1497 else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
1498 ll = 0xffff800000000000ULL;
1500 ll &= 0xffffffff00000000ULL;
1501 put_reg64 (acc64, ll);
1507 regs.r_pc = poppc ();
1508 regs.r_psw = poppc ();
1510 regs.r_psw |= FLAGBIT_U;
1511 #ifdef CYCLE_ACCURATE
1512 regs.fast_return = 0;
1519 umb = (((uma >> 24) & 0xff)
1520 | ((uma >> 8) & 0xff00)
1521 | ((uma << 8) & 0xff0000)
1522 | ((uma << 24) & 0xff000000UL));
1529 umb = (((uma >> 8) & 0x00ff00ff)
1530 | ((uma << 8) & 0xff00ff00UL));
1538 #ifdef CYCLE_ACCURATE
1542 while (regs.r[3] != 0)
1546 switch (opcode->size)
1549 ma = mem_get_si (regs.r[1]);
1550 mb = mem_get_si (regs.r[2]);
1555 ma = sign_ext (mem_get_hi (regs.r[1]), 16);
1556 mb = sign_ext (mem_get_hi (regs.r[2]), 16);
1561 ma = sign_ext (mem_get_qi (regs.r[1]), 8);
1562 mb = sign_ext (mem_get_qi (regs.r[2]), 8);
1569 /* We do the multiply as a signed value. */
1570 sll = (long long)ma * (long long)mb;
1571 tprintf(" %016llx = %d * %d\n", sll, ma, mb);
1572 /* but we do the sum as unsigned, while sign extending the operands. */
1573 tmp = regs.r[4] + (sll & 0xffffffffUL);
1574 regs.r[4] = tmp & 0xffffffffUL;
1577 tmp += regs.r[5] + (sll & 0xffffffffUL);
1578 regs.r[5] = tmp & 0xffffffffUL;
1581 tmp += regs.r[6] + (sll & 0xffffffffUL);
1582 regs.r[6] = tmp & 0xffffffffUL;
1583 tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1584 (unsigned long) regs.r[6],
1585 (unsigned long) regs.r[5],
1586 (unsigned long) regs.r[4]);
1590 if (regs.r[6] & 0x00008000)
1591 regs.r[6] |= 0xffff0000UL;
1593 regs.r[6] &= 0x0000ffff;
1594 ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
1595 if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
1596 set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
1598 set_flags (FLAGBIT_O|FLAGBIT_S, ma);
1599 #ifdef CYCLE_ACCURATE
1600 switch (opcode->size)
1603 cycles (6 + 4 * tx);
1606 cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
1609 cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
1619 ma = v & 0x80000000UL;
1631 uma |= (carry ? 0x80000000UL : 0);
1632 set_szc (uma, 4, mb);
1642 uma = (uma << mb) | (uma >> (32-mb));
1645 set_szc (uma, 4, mb);
1655 uma = (uma >> mb) | (uma << (32-mb));
1656 mb = uma & 0x80000000;
1658 set_szc (uma, 4, mb);
1666 mb = rxfp_ftoi (ma, regs.r_fpsw);
1669 tprintf("(int) %g = %d\n", int2float(ma), mb);
1676 #ifdef CYCLE_ACCURATE
1679 regs.r_pc = poppc ();
1680 #ifdef CYCLE_ACCURATE
1681 /* Note: specs say 5, chip says 3. */
1682 if (regs.fast_return && regs.link_register == regs.r_pc)
1687 tprintf("fast return bonus\n");
1691 regs.fast_return = 0;
1692 branch_alignment_penalty = 1;
1698 if (opcode->op[2].type == RX_Operand_Register)
1701 /* RTSD cannot pop R0 (sp). */
1702 put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
1703 if (opcode->op[2].reg == 0)
1704 EXCEPTION (EX_UNDEFINED);
1705 #ifdef CYCLE_ACCURATE
1706 tx = opcode->op[0].reg - opcode->op[2].reg + 1;
1708 for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
1711 put_reg (i, pop ());
1716 #ifdef CYCLE_ACCURATE
1719 put_reg (0, get_reg (0) + GS());
1721 put_reg (pc, poppc());
1722 #ifdef CYCLE_ACCURATE
1723 if (regs.fast_return && regs.link_register == regs.r_pc)
1725 tprintf("fast return bonus\n");
1729 cycles (tx < 3 ? 3 : tx + 1);
1733 cycles (tx < 5 ? 5 : tx + 1);
1735 regs.fast_return = 0;
1736 branch_alignment_penalty = 1;
1741 if (FLAG_O && FLAG_S)
1743 else if (FLAG_O && ! FLAG_S)
1749 MATH_OP (-, ! carry);
1761 #ifdef CYCLE_ACCURATE
1764 while (regs.r[3] != 0)
1766 uma = mem_get_qi (regs.r[1] ++);
1767 umb = mem_get_qi (regs.r[2] ++);
1769 if (uma != umb || uma == 0)
1775 set_zc (0, ((int)uma - (int)umb) >= 0);
1776 cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
1780 v = 1 << opcode->op[0].reg;
1791 #ifdef CYCLE_ACCURATE
1796 uma = mem_get_qi (regs.r[2] --);
1797 mem_put_qi (regs.r[1]--, uma);
1800 #ifdef CYCLE_ACCURATE
1802 cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
1804 cycles (2 + 3 * (tx % 4));
1810 #ifdef CYCLE_ACCURATE
1815 uma = mem_get_qi (regs.r[2] ++);
1816 mem_put_qi (regs.r[1]++, uma);
1819 cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1823 #ifdef CYCLE_ACCURATE
1826 while (regs.r[3] != 0)
1828 uma = mem_get_qi (regs.r[2] ++);
1829 mem_put_qi (regs.r[1]++, uma);
1834 cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1837 case RXO_shar: /* d = ma >> mb */
1838 SHIFT_OP (sll, int, mb, >>=, 1);
1842 case RXO_shll: /* d = ma << mb */
1843 SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
1847 case RXO_shlr: /* d = ma >> mb */
1848 SHIFT_OP (ll, unsigned int, mb, >>=, 1);
1854 #ifdef CYCLE_ACCURATE
1857 switch (opcode->size)
1860 while (regs.r[3] != 0)
1862 mem_put_si (regs.r[1], regs.r[2]);
1869 while (regs.r[3] != 0)
1871 mem_put_hi (regs.r[1], regs.r[2]);
1875 cycles (2 + (int)(tx / 2) + tx % 2);
1878 while (regs.r[3] != 0)
1880 mem_put_qi (regs.r[1], regs.r[2]);
1884 cycles (2 + (int)(tx / 4) + tx % 4);
1899 regs.r_psw |= FLAGBIT_I;
1900 DO_RETURN (RX_MAKE_STOPPED(0));
1908 #ifdef CYCLE_ACCURATE
1916 switch (opcode->size)
1920 while (regs.r[3] != 0)
1923 umb = mem_get_si (get_reg (1));
1928 cycles (3 + 3 * tx);
1931 uma = get_reg (2) & 0xffff;
1932 while (regs.r[3] != 0)
1935 umb = mem_get_hi (get_reg (1));
1940 cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
1943 uma = get_reg (2) & 0xff;
1944 while (regs.r[3] != 0)
1947 umb = mem_get_qi (regs.r[1]);
1952 cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
1960 set_zc (0, ((int)uma - (int)umb) >= 0);
1965 #ifdef CYCLE_ACCURATE
1970 switch (opcode->size)
1974 while (regs.r[3] != 0)
1977 umb = mem_get_si (get_reg (1));
1982 cycles (3 + 3 * tx);
1985 uma = get_reg (2) & 0xffff;
1986 while (regs.r[3] != 0)
1989 umb = mem_get_hi (get_reg (1));
1994 cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
1997 uma = get_reg (2) & 0xff;
1998 while (regs.r[3] != 0)
2001 umb = mem_get_qi (regs.r[1]);
2006 cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2014 set_zc (0, ((int)uma - (int)umb) >= 0);
2019 regs.r_psw |= FLAGBIT_I;
2020 DO_RETURN (RX_MAKE_STOPPED(0));
2023 #ifdef CYCLE_ACCURATE
2026 v = GS (); /* This is the memory operand, if any. */
2027 PS (GD ()); /* and this may change the address register. */
2030 #ifdef CYCLE_ACCURATE
2031 /* all M cycles happen during xchg's cycles. */
2042 EXCEPTION (EX_UNDEFINED);
2045 #ifdef CYCLE_ACCURATE
2048 regs.m2m |= M2M_SRC;
2050 regs.m2m |= M2M_DST;
2057 if (prev_cycle_count == regs.cycle_count)
2059 printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
2065 if (running_benchmark)
2067 int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
2070 cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
2071 times_per_id[opcode->id][omap] ++;
2073 times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
2075 prev_opcode_id = opcode->id;
2080 return RX_MAKE_STEPPED ();
2085 reset_pipeline_stats (void)
2087 memset (cycles_per_id, 0, sizeof(cycles_per_id));
2088 memset (times_per_id, 0, sizeof(times_per_id));
2090 register_stalls = 0;
2092 branch_alignment_stalls = 0;
2094 memset (times_per_pair, 0, sizeof(times_per_pair));
2095 running_benchmark = 1;
2097 benchmark_start_cycle = regs.cycle_count;
2101 halt_pipeline_stats (void)
2103 running_benchmark = 0;
2104 benchmark_end_cycle = regs.cycle_count;
2109 pipeline_stats (void)
2116 #ifdef CYCLE_ACCURATE
2119 printf ("cycles: %llu\n", regs.cycle_count);
2123 printf ("cycles: %13s\n", comma (regs.cycle_count));
2127 if (benchmark_start_cycle)
2128 printf ("bmark: %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
2131 for (i = 0; i < N_RXO; i++)
2132 for (o1 = 0; o1 < N_MAP; o1 ++)
2133 if (times_per_id[i][o1])
2134 printf("%13s %13s %7.2f %s %s\n",
2135 comma (cycles_per_id[i][o1]),
2136 comma (times_per_id[i][o1]),
2137 (double)cycles_per_id[i][o1] / times_per_id[i][o1],
2138 op_cache_string(o1),
2142 for (p = 0; p < N_RXO; p ++)
2143 for (p1 = 0; p1 < N_MAP; p1 ++)
2144 for (i = 0; i < N_RXO; i ++)
2145 for (o1 = 0; o1 < N_MAP; o1 ++)
2146 if (times_per_pair[p][p1][i][o1])
2148 printf("%13s %s %-9s -> %s %s\n",
2149 comma (times_per_pair[p][p1][i][o1]),
2150 op_cache_string(p1),
2152 op_cache_string(o1),
2157 printf("%13s memory stalls\n", comma (memory_stalls));
2158 printf("%13s register stalls\n", comma (register_stalls));
2159 printf("%13s branches taken (non-return)\n", comma (branch_stalls));
2160 printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
2161 printf("%13s fast returns\n", comma (fast_returns));