1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Broadcom Corporation (SiByte).
5 This file is part of GDB, the GNU debugger.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation, Inc.,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 /* Within mdmx.c we refer to the sim_cpu directly. */
27 #define SD (CPU_STATE(CPU))
28 #define SD_ cpu, cia, -1
30 /* MDMX Representations
32 An 8-bit packed byte element (OB) is always unsigned.
33 The 24-bit accumulators are signed and are represented as 32-bit
34 signed values, which are reduced to 24-bit signed values prior to
35 Round and Clamp operations.
37 A 16-bit packed halfword element (QH) is always signed.
38 The 48-bit accumulators are signed and are represented as 64-bit
39 signed values, which are reduced to 48-bit signed values prior to
40 Round and Clamp operations.
42 The code below assumes a 2's-complement representation of signed
43 quantities. Care is required to clear extended sign bits when
46 The code (and the code for arithmetic shifts in mips.igen) also makes
47 the (not guaranteed portable) assumption that right shifts of signed
48 quantities in C do sign extension. */
50 typedef unsigned64 unsigned48;
51 #define MASK48 (UNSIGNED64 (0xffffffffffff))
53 typedef unsigned32 unsigned24;
54 #define MASK24 (UNSIGNED32 (0xffffff))
57 mdmx_ob, /* OB (octal byte) */
58 mdmx_qh /* QH (quad half-word) */
62 sel_elem, /* element select */
63 sel_vect, /* vector select */
64 sel_imm /* immediate select */
67 #define OB_MAX ((unsigned8)0xFF)
68 #define QH_MIN ((signed16)0x8000)
69 #define QH_MAX ((signed16)0x7FFF)
71 #define OB_CLAMP(x) ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
72 #define QH_CLAMP(x) ((signed16)((x) < QH_MIN ? QH_MIN : \
73 ((x) > QH_MAX ? QH_MAX : (x))))
75 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
76 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
77 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
79 #define QH_ELEM(v,fmtsel) \
80 ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
81 #define OB_ELEM(v,fmtsel) \
82 ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
85 typedef signed16 (*QH_FUNC)(signed16, signed16);
86 typedef unsigned8 (*OB_FUNC)(unsigned8, unsigned8);
88 /* vectorized logical operators */
91 AndQH(signed16 ts, signed16 tt)
93 return (signed16)((unsigned16)ts & (unsigned16)tt);
97 AndOB(unsigned8 ts, unsigned8 tt)
103 NorQH(signed16 ts, signed16 tt)
105 return (signed16)(((unsigned16)ts | (unsigned16)tt) ^ 0xFFFF);
109 NorOB(unsigned8 ts, unsigned8 tt)
111 return (ts | tt) ^ 0xFF;
115 OrQH(signed16 ts, signed16 tt)
117 return (signed16)((unsigned16)ts | (unsigned16)tt);
121 OrOB(unsigned8 ts, unsigned8 tt)
127 XorQH(signed16 ts, signed16 tt)
129 return (signed16)((unsigned16)ts ^ (unsigned16)tt);
133 XorOB(unsigned8 ts, unsigned8 tt)
139 SLLQH(signed16 ts, signed16 tt)
141 unsigned32 s = (unsigned32)tt & 0xF;
142 return (signed16)(((unsigned32)ts << s) & 0xFFFF);
146 SLLOB(unsigned8 ts, unsigned8 tt)
148 unsigned32 s = tt & 0x7;
149 return (ts << s) & 0xFF;
153 SRLQH(signed16 ts, signed16 tt)
155 unsigned32 s = (unsigned32)tt & 0xF;
156 return (signed16)((unsigned16)ts >> s);
160 SRLOB(unsigned8 ts, unsigned8 tt)
162 unsigned32 s = tt & 0x7;
167 /* Vectorized arithmetic operators. */
170 AddQH(signed16 ts, signed16 tt)
172 signed32 t = (signed32)ts + (signed32)tt;
177 AddOB(unsigned8 ts, unsigned8 tt)
179 unsigned32 t = (unsigned32)ts + (unsigned32)tt;
184 SubQH(signed16 ts, signed16 tt)
186 signed32 t = (signed32)ts - (signed32)tt;
191 SubOB(unsigned8 ts, unsigned8 tt)
194 t = (signed32)ts - (signed32)tt;
201 MinQH(signed16 ts, signed16 tt)
203 return (ts < tt ? ts : tt);
207 MinOB(unsigned8 ts, unsigned8 tt)
209 return (ts < tt ? ts : tt);
213 MaxQH(signed16 ts, signed16 tt)
215 return (ts > tt ? ts : tt);
219 MaxOB(unsigned8 ts, unsigned8 tt)
221 return (ts > tt ? ts : tt);
225 MulQH(signed16 ts, signed16 tt)
227 signed32 t = (signed32)ts * (signed32)tt;
232 MulOB(unsigned8 ts, unsigned8 tt)
234 unsigned32 t = (unsigned32)ts * (unsigned32)tt;
238 /* "msgn" and "sra" are defined only for QH format. */
241 MsgnQH(signed16 ts, signed16 tt)
245 t = (tt == QH_MIN ? QH_MAX : -tt);
255 SRAQH(signed16 ts, signed16 tt)
257 unsigned32 s = (unsigned32)tt & 0xF;
258 return (signed16)((signed32)ts >> s);
262 /* Dispatch tables for operations that update a CPR. */
264 static const QH_FUNC qh_func[] = {
265 AndQH, NorQH, OrQH, XorQH, SLLQH, SRLQH,
266 AddQH, SubQH, MinQH, MaxQH,
267 MulQH, MsgnQH, SRAQH, NULL, NULL
270 static const OB_FUNC ob_func[] = {
271 AndOB, NorOB, OrOB, XorOB, SLLOB, SRLOB,
272 AddOB, SubOB, MinOB, MaxOB,
273 MulOB, NULL, NULL, NULL, NULL
276 /* Auxiliary functions for CPR updates. */
278 /* Vector mapping for QH format. */
280 qh_vector_op(unsigned64 v1, unsigned64 v2, QH_FUNC func)
282 unsigned64 result = 0;
286 for (i = 0; i < 64; i += 16)
288 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
289 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
291 result |= ((unsigned64)((unsigned16)h) << i);
297 qh_map_op(unsigned64 v1, signed16 h2, QH_FUNC func)
299 unsigned64 result = 0;
303 for (i = 0; i < 64; i += 16)
305 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
307 result |= ((unsigned64)((unsigned16)h) << i);
313 /* Vector operations for OB format. */
316 ob_vector_op(unsigned64 v1, unsigned64 v2, OB_FUNC func)
318 unsigned64 result = 0;
322 for (i = 0; i < 64; i += 8)
324 b1 = v1 & 0xFF; v1 >>= 8;
325 b2 = v2 & 0xFF; v2 >>= 8;
327 result |= ((unsigned64)b << i);
333 ob_map_op(unsigned64 v1, unsigned8 b2, OB_FUNC func)
335 unsigned64 result = 0;
339 for (i = 0; i < 64; i += 8)
341 b1 = v1 & 0xFF; v1 >>= 8;
343 result |= ((unsigned64)b << i);
349 /* Primary entry for operations that update CPRs. */
351 mdmx_cpr_op(sim_cpu *cpu,
359 unsigned64 result = 0;
361 switch (MX_FMT (fmtsel))
364 switch (MX_VT (fmtsel))
367 op2 = ValueFPR(vt, fmt_mdmx);
368 result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
371 result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
374 result = qh_map_op(op1, vt, qh_func[op]);
379 switch (MX_VT (fmtsel))
382 op2 = ValueFPR(vt, fmt_mdmx);
383 result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
386 result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
389 result = ob_map_op(op1, vt, ob_func[op]);
401 /* Operations that update CCs */
404 qh_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
410 for (i = 0; i < 4; i++)
412 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
413 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
414 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
415 ((cond & MX_C_LT) && (h1 < h2));
421 qh_map_test(sim_cpu *cpu, unsigned64 v1, signed16 h2, int cond)
427 for (i = 0; i < 4; i++)
429 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
430 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
431 ((cond & MX_C_LT) && (h1 < h2));
437 ob_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
443 for (i = 0; i < 8; i++)
445 b1 = v1 & 0xFF; v1 >>= 8;
446 b2 = v2 & 0xFF; v2 >>= 8;
447 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
448 ((cond & MX_C_LT) && (b1 < b2));
454 ob_map_test(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int cond)
460 for (i = 0; i < 8; i++)
462 b1 = (unsigned8)(v1 & 0xFF); v1 >>= 8;
463 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
464 ((cond & MX_C_LT) && (b1 < b2));
471 mdmx_cc_op(sim_cpu *cpu,
480 switch (MX_FMT (fmtsel))
483 switch (MX_VT (fmtsel))
486 op2 = ValueFPR(vt, fmt_mdmx);
487 qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
490 qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
493 qh_map_test(cpu, v1, vt, cond);
498 switch (MX_VT (fmtsel))
501 op2 = ValueFPR(vt, fmt_mdmx);
502 ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
505 ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
508 ob_map_test(cpu, v1, vt, cond);
518 /* Pick operations. */
521 qh_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
523 unsigned64 result = 0;
528 for (i = 0; i < 4; i++)
530 h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
531 v1 >>= 16; v2 >>= 16;
532 result |= ((unsigned64)h << s);
539 qh_map_pick(sim_cpu *cpu, unsigned64 v1, signed16 h2, int tf)
541 unsigned64 result = 0;
546 for (i = 0; i < 4; i++)
548 h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (unsigned16)h2;
550 result |= ((unsigned64)h << s);
557 ob_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
559 unsigned64 result = 0;
564 for (i = 0; i < 8; i++)
566 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
568 result |= ((unsigned64)b << s);
575 ob_map_pick(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int tf)
577 unsigned64 result = 0;
582 for (i = 0; i < 8; i++)
584 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
586 result |= ((unsigned64)b << s);
594 mdmx_pick_op(sim_cpu *cpu,
601 unsigned64 result = 0;
604 switch (MX_FMT (fmtsel))
607 switch (MX_VT (fmtsel))
610 op2 = ValueFPR(vt, fmt_mdmx);
611 result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
614 result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
617 result = qh_map_pick(cpu, v1, vt, tf);
622 switch (MX_VT (fmtsel))
625 op2 = ValueFPR(vt, fmt_mdmx);
626 result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
629 result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
632 result = ob_map_pick(cpu, v1, vt, tf);
645 typedef void (*QH_ACC)(signed48 *a, signed16 ts, signed16 tt);
648 AccAddAQH(signed48 *a, signed16 ts, signed16 tt)
650 *a += (signed48)ts + (signed48)tt;
654 AccAddLQH(signed48 *a, signed16 ts, signed16 tt)
656 *a = (signed48)ts + (signed48)tt;
660 AccMulAQH(signed48 *a, signed16 ts, signed16 tt)
662 *a += (signed48)ts * (signed48)tt;
666 AccMulLQH(signed48 *a, signed16 ts, signed16 tt)
668 *a = (signed48)ts * (signed48)tt;
672 SubMulAQH(signed48 *a, signed16 ts, signed16 tt)
674 *a -= (signed48)ts * (signed48)tt;
678 SubMulLQH(signed48 *a, signed16 ts, signed16 tt)
680 *a = -((signed48)ts * (signed48)tt);
684 AccSubAQH(signed48 *a, signed16 ts, signed16 tt)
686 *a += (signed48)ts - (signed48)tt;
690 AccSubLQH(signed48 *a, signed16 ts, signed16 tt)
692 *a = (signed48)ts - (signed48)tt;
696 typedef void (*OB_ACC)(signed24 *acc, unsigned8 ts, unsigned8 tt);
699 AccAddAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
701 *a += (signed24)ts + (signed24)tt;
705 AccAddLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
707 *a = (signed24)ts + (signed24)tt;
711 AccMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
713 *a += (signed24)ts * (signed24)tt;
717 AccMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
719 *a = (signed24)ts * (signed24)tt;
723 SubMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
725 *a -= (signed24)ts * (signed24)tt;
729 SubMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
731 *a = -((signed24)ts * (signed24)tt);
735 AccSubAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
737 *a += (signed24)ts - (signed24)tt;
741 AccSubLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
743 *a = (signed24)ts - (signed24)tt;
747 /* Dispatch tables for operations that update a CPR. */
749 static const QH_ACC qh_acc[] = {
750 AccAddAQH, AccAddAQH, AccMulAQH, AccMulLQH,
751 SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH
754 static const OB_ACC ob_acc[] = {
755 AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
756 SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB
761 qh_vector_acc(signed48 a[], unsigned64 v1, unsigned64 v2, QH_ACC acc)
766 for (i = 0; i < 4; i++)
768 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
769 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
770 (*acc)(&a[i], h1, h2);
775 qh_map_acc(signed48 a[], unsigned64 v1, signed16 h2, QH_ACC acc)
780 for (i = 0; i < 4; i++)
782 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
783 (*acc)(&a[i], h1, h2);
788 ob_vector_acc(signed24 a[], unsigned64 v1, unsigned64 v2, OB_ACC acc)
793 for (i = 0; i < 8; i++)
795 b1 = v1 & 0xFF; v1 >>= 8;
796 b2 = v2 & 0xFF; v2 >>= 8;
797 (*acc)(&a[i], b1, b2);
802 ob_map_acc(signed24 a[], unsigned64 v1, unsigned8 b2, OB_ACC acc)
807 for (i = 0; i < 8; i++)
809 b1 = v1 & 0xFF; v1 >>= 8;
810 (*acc)(&a[i], b1, b2);
815 /* Primary entry for operations that accumulate */
817 mdmx_acc_op(sim_cpu *cpu,
826 switch (MX_FMT (fmtsel))
829 switch (MX_VT (fmtsel))
832 op2 = ValueFPR(vt, fmt_mdmx);
833 qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
836 qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
839 qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
844 switch (MX_VT (fmtsel))
847 op2 = ValueFPR(vt, fmt_mdmx);
848 ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
851 ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
854 ob_map_acc(ACC.ob, op1, op2, ob_acc[op]);
864 /* Reading and writing accumulator (no conversion). */
867 mdmx_rac_op(sim_cpu *cpu,
876 shift = op; /* L = 00, M = 01, H = 10. */
882 shift <<= 4; /* 16 bits per element. */
883 for (i = 3; i >= 0; --i)
886 result |= ((ACC.qh[i] >> shift) & 0xFFFF);
890 shift <<= 3; /* 8 bits per element. */
891 for (i = 7; i >= 0; --i)
894 result |= ((ACC.ob[i] >> shift) & 0xFF);
904 mdmx_wacl(sim_cpu *cpu,
915 for (i = 0; i < 4; i++)
917 signed32 s = (signed16)(vs & 0xFFFF);
918 ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
919 vs >>= 16; vt >>= 16;
923 for (i = 0; i < 8; i++)
925 signed16 s = (signed8)(vs & 0xFF);
926 ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
936 mdmx_wach(sim_cpu *cpu,
946 for (i = 0; i < 4; i++)
948 signed32 s = (signed16)(vs & 0xFFFF);
949 ACC.qh[i] &= ~((signed48)0xFFFF << 32);
950 ACC.qh[i] |= ((signed48)s << 32);
955 for (i = 0; i < 8; i++)
957 ACC.ob[i] &= ~((signed24)0xFF << 16);
958 ACC.ob[i] |= ((signed24)(vs & 0xFF) << 16);
968 /* Reading and writing accumulator (rounding conversions).
969 Enumerating function guarantees s >= 0 for QH ops. */
971 typedef signed16 (*QH_ROUND)(signed48 a, signed16 s);
973 #define QH_BIT(n) ((unsigned48)1 << (n))
974 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
977 RNASQH(signed48 a, signed16 s)
987 if ((a & QH_BIT(47)) == 0)
989 if (s > 0 && ((a >> (s-1)) & 1) == 1)
996 if (s > 0 && ((a >> (s-1)) & 1) == 1)
998 if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1004 result = (signed16)t;
1010 RNAUQH(signed48 a, signed16 s)
1018 result = ((unsigned48)a & MASK48) >> 47;
1021 t = ((unsigned48)a & MASK48) >> s;
1022 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1026 result = (signed16)t;
1032 RNESQH(signed48 a, signed16 s)
1035 signed16 result = 0;
1042 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1044 if (s == 1 || (a & QH_ONES(s-1)) == 0)
1049 if ((a & QH_BIT(47)) == 0)
1059 result = (signed16)t;
1065 RNEUQH(signed48 a, signed16 s)
1073 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1076 t = ((unsigned48)a & MASK48) >> s;
1077 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1079 if (s > 1 && (a & QH_ONES(s-1)) != 0)
1086 result = (signed16)t;
1092 RZSQH(signed48 a, signed16 s)
1095 signed16 result = 0;
1102 if ((a & QH_BIT(47)) == 0)
1112 result = (signed16)t;
1118 RZUQH(signed48 a, signed16 s)
1121 signed16 result = 0;
1126 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1129 t = ((unsigned48)a & MASK48) >> s;
1132 result = (signed16)t;
1138 typedef unsigned8 (*OB_ROUND)(signed24 a, unsigned8 s);
1140 #define OB_BIT(n) ((unsigned24)1 << (n))
1141 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1144 RNAUOB(signed24 a, unsigned8 s)
1152 result = ((unsigned24)a & MASK24) >> 23;
1155 t = ((unsigned24)a & MASK24) >> s;
1156 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1158 result = OB_CLAMP(t);
1164 RNEUOB(signed24 a, unsigned8 s)
1172 result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1175 t = ((unsigned24)a & MASK24) >> s;
1176 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1178 if (s > 1 && (a & OB_ONES(s-1)) != 0)
1183 result = OB_CLAMP(t);
1189 RZUOB(signed24 a, unsigned8 s)
1198 t = ((unsigned24)a & MASK24) >> s;
1199 result = OB_CLAMP(t);
1205 static const QH_ROUND qh_round[] = {
1206 RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH, RZUQH
1209 static const OB_ROUND ob_round[] = {
1210 NULL, RNAUOB, NULL, RNEUOB, NULL, RZUOB
1215 qh_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, QH_ROUND round)
1217 unsigned64 result = 0;
1222 for (i = 0; i < 4; i++)
1224 h2 = (signed16)(v2 & 0xFFFF);
1226 h = (*round)(ACC.qh[i], h2);
1229 UnpredictableResult ();
1233 result |= ((unsigned64)((unsigned16)h) << s);
1240 qh_map_round(sim_cpu *cpu, address_word cia, signed16 h2, QH_ROUND round)
1242 unsigned64 result = 0;
1247 for (i = 0; i < 4; i++)
1250 h = (*round)(ACC.qh[i], h2);
1253 UnpredictableResult ();
1256 result |= ((unsigned64)((unsigned16)h) << s);
1263 ob_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, OB_ROUND round)
1265 unsigned64 result = 0;
1270 for (i = 0; i < 8; i++)
1272 b2 = v2 & 0xFF; v2 >>= 8;
1273 b = (*round)(ACC.ob[i], b2);
1274 result |= ((unsigned64)b << s);
1281 ob_map_round(sim_cpu *cpu, address_word cia, unsigned8 b2, OB_ROUND round)
1283 unsigned64 result = 0;
1288 for (i = 0; i < 8; i++)
1290 b = (*round)(ACC.ob[i], b2);
1291 result |= ((unsigned64)b << s);
1299 mdmx_round_op(sim_cpu *cpu,
1306 unsigned64 result = 0;
1308 switch (MX_FMT (fmtsel))
1311 switch (MX_VT (fmtsel))
1314 op2 = ValueFPR(vt, fmt_mdmx);
1315 result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1318 op2 = ValueFPR(vt, fmt_mdmx);
1319 result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1322 result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1327 switch (MX_VT (fmtsel))
1330 op2 = ValueFPR(vt, fmt_mdmx);
1331 result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1334 op2 = ValueFPR(vt, fmt_mdmx);
1335 result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1338 result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1350 /* Shuffle operation. */
1353 enum {vs, ss, vt} source;
1357 static const sh_map ob_shuffle[][8] = {
1358 /* MDMX 2.0 encodings (3-4, 6-7). */
1359 /* vr5400 encoding (5), otherwise. */
1361 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1362 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1363 {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1364 {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1365 {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1366 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1367 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}} /* mixl */
1370 static const sh_map qh_shuffle[][4] = {
1371 {{vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* mixh */
1372 {{vt,0}, {vs,0}, {vt,1}, {vs,1}}, /* mixl */
1373 {{vt,1}, {vt,3}, {vs,1}, {vs,3}}, /* pach */
1375 {{vt,1}, {vs,0}, {vt,3}, {vs,2}}, /* bfla */
1377 {{vt,2}, {vt,3}, {vs,2}, {vs,3}}, /* repa */
1378 {{vt,0}, {vt,1}, {vs,0}, {vs,1}} /* repb */
1383 mdmx_shuffle(sim_cpu *cpu,
1389 unsigned64 result = 0;
1393 if ((shop & 0x3) == 0x1) /* QH format. */
1397 for (i = 0; i < 4; i++)
1401 switch (qh_shuffle[op][i].source)
1413 result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1417 else if ((shop & 0x1) == 0x0) /* OB format. */
1421 for (i = 0; i < 8; i++)
1424 unsigned int ishift = 8*ob_shuffle[op][i].index;
1426 switch (ob_shuffle[op][i].source)
1429 b = (op1 >> ishift) & 0xFF;
1432 b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1435 b = (op2 >> ishift) & 0xFF;
1441 result |= ((unsigned64)b << s);