1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Broadcom Corporation (SiByte).
5 This file is part of GDB, the GNU debugger.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation, Inc.,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 /* Within mdmx.c we refer to the sim_cpu directly. */
27 #define SD (CPU_STATE(CPU))
28 #define SD_ cpu, cia, -1
30 /* MDMX Representations
32 An 8-bit packed byte element (OB) is always unsigned.
33 The 24-bit accumulators are signed and are represented as 32-bit
34 signed values, which are reduced to 24-bit signed values prior to
35 Round and Clamp operations.
37 A 16-bit packed halfword element (QH) is always signed.
38 The 48-bit accumulators are signed and are represented as 64-bit
39 signed values, which are reduced to 48-bit signed values prior to
40 Round and Clamp operations.
42 The code below assumes a 2's-complement representation of signed
43 quantities. Care is required to clear extended sign bits when
46 The code (and the code for arithmetic shifts in mips.igen) also makes
47 the (not guaranteed portable) assumption that right shifts of signed
48 quantities in C do sign extension. */
50 typedef unsigned64 unsigned48;
51 #define MASK48 (UNSIGNED64 (0xffffffffffff))
53 typedef unsigned32 unsigned24;
54 #define MASK24 (UNSIGNED32 (0xffffff))
57 mdmx_ob, /* OB (octal byte) */
58 mdmx_qh /* QH (quad half-word) */
62 sel_elem, /* element select */
63 sel_vect, /* vector select */
64 sel_imm /* immediate select */
67 #define OB_MAX ((unsigned8)0xFF)
68 #define QH_MIN ((signed16)0x8000)
69 #define QH_MAX ((signed16)0x7FFF)
71 #define OB_CLAMP(x) ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
72 #define QH_CLAMP(x) ((signed16)((x) < QH_MIN ? QH_MIN : \
73 ((x) > QH_MAX ? QH_MAX : (x))))
75 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
76 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
77 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
79 #define QH_ELEM(v,fmtsel) \
80 ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
81 #define OB_ELEM(v,fmtsel) \
82 ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
85 typedef signed16 (*QH_FUNC)(signed16, signed16);
86 typedef unsigned8 (*OB_FUNC)(unsigned8, unsigned8);
88 /* vectorized logical operators */
91 AndQH(signed16 ts, signed16 tt)
93 return (signed16)((unsigned16)ts & (unsigned16)tt);
97 AndOB(unsigned8 ts, unsigned8 tt)
103 NorQH(signed16 ts, signed16 tt)
105 return (signed16)(((unsigned16)ts | (unsigned16)tt) ^ 0xFFFF);
109 NorOB(unsigned8 ts, unsigned8 tt)
111 return (ts | tt) ^ 0xFF;
115 OrQH(signed16 ts, signed16 tt)
117 return (signed16)((unsigned16)ts | (unsigned16)tt);
121 OrOB(unsigned8 ts, unsigned8 tt)
127 XorQH(signed16 ts, signed16 tt)
129 return (signed16)((unsigned16)ts ^ (unsigned16)tt);
133 XorOB(unsigned8 ts, unsigned8 tt)
139 SLLQH(signed16 ts, signed16 tt)
141 unsigned32 s = (unsigned32)tt & 0xF;
142 return (signed16)(((unsigned32)ts << s) & 0xFFFF);
146 SLLOB(unsigned8 ts, unsigned8 tt)
148 unsigned32 s = tt & 0x7;
149 return (ts << s) & 0xFF;
153 SRLQH(signed16 ts, signed16 tt)
155 unsigned32 s = (unsigned32)tt & 0xF;
156 return (signed16)((unsigned16)ts >> s);
160 SRLOB(unsigned8 ts, unsigned8 tt)
162 unsigned32 s = tt & 0x7;
167 /* Vectorized arithmetic operators. */
170 AddQH(signed16 ts, signed16 tt)
172 signed32 t = (signed32)ts + (signed32)tt;
177 AddOB(unsigned8 ts, unsigned8 tt)
179 unsigned32 t = (unsigned32)ts + (unsigned32)tt;
184 SubQH(signed16 ts, signed16 tt)
186 signed32 t = (signed32)ts - (signed32)tt;
191 SubOB(unsigned8 ts, unsigned8 tt)
194 t = (signed32)ts - (signed32)tt;
201 MinQH(signed16 ts, signed16 tt)
203 return (ts < tt ? ts : tt);
207 MinOB(unsigned8 ts, unsigned8 tt)
209 return (ts < tt ? ts : tt);
213 MaxQH(signed16 ts, signed16 tt)
215 return (ts > tt ? ts : tt);
219 MaxOB(unsigned8 ts, unsigned8 tt)
221 return (ts > tt ? ts : tt);
225 MulQH(signed16 ts, signed16 tt)
227 signed32 t = (signed32)ts * (signed32)tt;
232 MulOB(unsigned8 ts, unsigned8 tt)
234 unsigned32 t = (unsigned32)ts * (unsigned32)tt;
238 /* "msgn" and "sra" are defined only for QH format. */
241 MsgnQH(signed16 ts, signed16 tt)
245 t = (tt == QH_MIN ? QH_MAX : -tt);
254 SRAQH(signed16 ts, signed16 tt)
256 unsigned32 s = (unsigned32)tt & 0xF;
257 return (signed16)((signed32)ts >> s);
261 /* "pabsdiff" and "pavg" are defined only for OB format. */
264 AbsDiffOB(unsigned8 ts, unsigned8 tt)
266 return (ts >= tt ? ts - tt : tt - ts);
270 AvgOB(unsigned8 ts, unsigned8 tt)
272 return ((unsigned32)ts + (unsigned32)tt + 1) >> 1;
276 /* Dispatch tables for operations that update a CPR. */
278 static const QH_FUNC qh_func[] = {
279 AndQH, NorQH, OrQH, XorQH, SLLQH, SRLQH,
280 AddQH, SubQH, MinQH, MaxQH,
281 MulQH, MsgnQH, SRAQH, NULL, NULL
284 static const OB_FUNC ob_func[] = {
285 AndOB, NorOB, OrOB, XorOB, SLLOB, SRLOB,
286 AddOB, SubOB, MinOB, MaxOB,
287 MulOB, NULL, NULL, AbsDiffOB, AvgOB
290 /* Auxiliary functions for CPR updates. */
292 /* Vector mapping for QH format. */
294 qh_vector_op(unsigned64 v1, unsigned64 v2, QH_FUNC func)
296 unsigned64 result = 0;
300 for (i = 0; i < 64; i += 16)
302 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
303 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
305 result |= ((unsigned64)((unsigned16)h) << i);
311 qh_map_op(unsigned64 v1, signed16 h2, QH_FUNC func)
313 unsigned64 result = 0;
317 for (i = 0; i < 64; i += 16)
319 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
321 result |= ((unsigned64)((unsigned16)h) << i);
327 /* Vector operations for OB format. */
330 ob_vector_op(unsigned64 v1, unsigned64 v2, OB_FUNC func)
332 unsigned64 result = 0;
336 for (i = 0; i < 64; i += 8)
338 b1 = v1 & 0xFF; v1 >>= 8;
339 b2 = v2 & 0xFF; v2 >>= 8;
341 result |= ((unsigned64)b << i);
347 ob_map_op(unsigned64 v1, unsigned8 b2, OB_FUNC func)
349 unsigned64 result = 0;
353 for (i = 0; i < 64; i += 8)
355 b1 = v1 & 0xFF; v1 >>= 8;
357 result |= ((unsigned64)b << i);
363 /* Primary entry for operations that update CPRs. */
365 mdmx_cpr_op(sim_cpu *cpu,
373 unsigned64 result = 0;
375 switch (MX_FMT (fmtsel))
378 switch (MX_VT (fmtsel))
381 op2 = ValueFPR(vt, fmt_mdmx);
382 result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
385 result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
388 result = qh_map_op(op1, vt, qh_func[op]);
393 switch (MX_VT (fmtsel))
396 op2 = ValueFPR(vt, fmt_mdmx);
397 result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
400 result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
403 result = ob_map_op(op1, vt, ob_func[op]);
415 /* Operations that update CCs */
418 qh_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
424 for (i = 0; i < 4; i++)
426 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
427 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
428 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
429 ((cond & MX_C_LT) && (h1 < h2));
435 qh_map_test(sim_cpu *cpu, unsigned64 v1, signed16 h2, int cond)
441 for (i = 0; i < 4; i++)
443 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
444 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
445 ((cond & MX_C_LT) && (h1 < h2));
451 ob_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
457 for (i = 0; i < 8; i++)
459 b1 = v1 & 0xFF; v1 >>= 8;
460 b2 = v2 & 0xFF; v2 >>= 8;
461 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
462 ((cond & MX_C_LT) && (b1 < b2));
468 ob_map_test(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int cond)
474 for (i = 0; i < 8; i++)
476 b1 = (unsigned8)(v1 & 0xFF); v1 >>= 8;
477 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
478 ((cond & MX_C_LT) && (b1 < b2));
485 mdmx_cc_op(sim_cpu *cpu,
494 switch (MX_FMT (fmtsel))
497 switch (MX_VT (fmtsel))
500 op2 = ValueFPR(vt, fmt_mdmx);
501 qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
504 qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
507 qh_map_test(cpu, v1, vt, cond);
512 switch (MX_VT (fmtsel))
515 op2 = ValueFPR(vt, fmt_mdmx);
516 ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
519 ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
522 ob_map_test(cpu, v1, vt, cond);
532 /* Pick operations. */
535 qh_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
537 unsigned64 result = 0;
542 for (i = 0; i < 4; i++)
544 h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
545 v1 >>= 16; v2 >>= 16;
546 result |= ((unsigned64)h << s);
553 qh_map_pick(sim_cpu *cpu, unsigned64 v1, signed16 h2, int tf)
555 unsigned64 result = 0;
560 for (i = 0; i < 4; i++)
562 h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (unsigned16)h2;
564 result |= ((unsigned64)h << s);
571 ob_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
573 unsigned64 result = 0;
578 for (i = 0; i < 8; i++)
580 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
582 result |= ((unsigned64)b << s);
589 ob_map_pick(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int tf)
591 unsigned64 result = 0;
596 for (i = 0; i < 8; i++)
598 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
600 result |= ((unsigned64)b << s);
608 mdmx_pick_op(sim_cpu *cpu,
615 unsigned64 result = 0;
618 switch (MX_FMT (fmtsel))
621 switch (MX_VT (fmtsel))
624 op2 = ValueFPR(vt, fmt_mdmx);
625 result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
628 result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
631 result = qh_map_pick(cpu, v1, vt, tf);
636 switch (MX_VT (fmtsel))
639 op2 = ValueFPR(vt, fmt_mdmx);
640 result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
643 result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
646 result = ob_map_pick(cpu, v1, vt, tf);
659 typedef void (*QH_ACC)(signed48 *a, signed16 ts, signed16 tt);
662 AccAddAQH(signed48 *a, signed16 ts, signed16 tt)
664 *a += (signed48)ts + (signed48)tt;
668 AccAddLQH(signed48 *a, signed16 ts, signed16 tt)
670 *a = (signed48)ts + (signed48)tt;
674 AccMulAQH(signed48 *a, signed16 ts, signed16 tt)
676 *a += (signed48)ts * (signed48)tt;
680 AccMulLQH(signed48 *a, signed16 ts, signed16 tt)
682 *a = (signed48)ts * (signed48)tt;
686 SubMulAQH(signed48 *a, signed16 ts, signed16 tt)
688 *a -= (signed48)ts * (signed48)tt;
692 SubMulLQH(signed48 *a, signed16 ts, signed16 tt)
694 *a = -((signed48)ts * (signed48)tt);
698 AccSubAQH(signed48 *a, signed16 ts, signed16 tt)
700 *a += (signed48)ts - (signed48)tt;
704 AccSubLQH(signed48 *a, signed16 ts, signed16 tt)
706 *a = (signed48)ts - (signed48)tt;
710 typedef void (*OB_ACC)(signed24 *acc, unsigned8 ts, unsigned8 tt);
713 AccAddAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
715 *a += (signed24)ts + (signed24)tt;
719 AccAddLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
721 *a = (signed24)ts + (signed24)tt;
725 AccMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
727 *a += (signed24)ts * (signed24)tt;
731 AccMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
733 *a = (signed24)ts * (signed24)tt;
737 SubMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
739 *a -= (signed24)ts * (signed24)tt;
743 SubMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
745 *a = -((signed24)ts * (signed24)tt);
749 AccSubAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
751 *a += (signed24)ts - (signed24)tt;
755 AccSubLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
757 *a = (signed24)ts - (signed24)tt;
761 AccAbsDiffOB(signed24 *a, unsigned8 ts, unsigned8 tt)
763 unsigned8 t = (ts >= tt ? ts - tt : tt - ts);
768 /* Dispatch tables for operations that update a CPR. */
770 static const QH_ACC qh_acc[] = {
771 AccAddAQH, AccAddAQH, AccMulAQH, AccMulLQH,
772 SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
776 static const OB_ACC ob_acc[] = {
777 AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
778 SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
784 qh_vector_acc(signed48 a[], unsigned64 v1, unsigned64 v2, QH_ACC acc)
789 for (i = 0; i < 4; i++)
791 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
792 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
793 (*acc)(&a[i], h1, h2);
798 qh_map_acc(signed48 a[], unsigned64 v1, signed16 h2, QH_ACC acc)
803 for (i = 0; i < 4; i++)
805 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
806 (*acc)(&a[i], h1, h2);
811 ob_vector_acc(signed24 a[], unsigned64 v1, unsigned64 v2, OB_ACC acc)
816 for (i = 0; i < 8; i++)
818 b1 = v1 & 0xFF; v1 >>= 8;
819 b2 = v2 & 0xFF; v2 >>= 8;
820 (*acc)(&a[i], b1, b2);
825 ob_map_acc(signed24 a[], unsigned64 v1, unsigned8 b2, OB_ACC acc)
830 for (i = 0; i < 8; i++)
832 b1 = v1 & 0xFF; v1 >>= 8;
833 (*acc)(&a[i], b1, b2);
838 /* Primary entry for operations that accumulate */
840 mdmx_acc_op(sim_cpu *cpu,
849 switch (MX_FMT (fmtsel))
852 switch (MX_VT (fmtsel))
855 op2 = ValueFPR(vt, fmt_mdmx);
856 qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
859 qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
862 qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
867 switch (MX_VT (fmtsel))
870 op2 = ValueFPR(vt, fmt_mdmx);
871 ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
874 ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
877 ob_map_acc(ACC.ob, op1, op2, ob_acc[op]);
887 /* Reading and writing accumulator (no conversion). */
890 mdmx_rac_op(sim_cpu *cpu,
899 shift = op; /* L = 00, M = 01, H = 10. */
905 shift <<= 4; /* 16 bits per element. */
906 for (i = 3; i >= 0; --i)
909 result |= ((ACC.qh[i] >> shift) & 0xFFFF);
913 shift <<= 3; /* 8 bits per element. */
914 for (i = 7; i >= 0; --i)
917 result |= ((ACC.ob[i] >> shift) & 0xFF);
927 mdmx_wacl(sim_cpu *cpu,
938 for (i = 0; i < 4; i++)
940 signed32 s = (signed16)(vs & 0xFFFF);
941 ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
942 vs >>= 16; vt >>= 16;
946 for (i = 0; i < 8; i++)
948 signed16 s = (signed8)(vs & 0xFF);
949 ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
959 mdmx_wach(sim_cpu *cpu,
969 for (i = 0; i < 4; i++)
971 signed32 s = (signed16)(vs & 0xFFFF);
972 ACC.qh[i] &= ~((signed48)0xFFFF << 32);
973 ACC.qh[i] |= ((signed48)s << 32);
978 for (i = 0; i < 8; i++)
980 ACC.ob[i] &= ~((signed24)0xFF << 16);
981 ACC.ob[i] |= ((signed24)(vs & 0xFF) << 16);
991 /* Reading and writing accumulator (rounding conversions).
992 Enumerating function guarantees s >= 0 for QH ops. */
994 typedef signed16 (*QH_ROUND)(signed48 a, signed16 s);
996 #define QH_BIT(n) ((unsigned48)1 << (n))
997 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1000 RNASQH(signed48 a, signed16 s)
1003 signed16 result = 0;
1010 if ((a & QH_BIT(47)) == 0)
1012 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1019 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1021 if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1027 result = (signed16)t;
1033 RNAUQH(signed48 a, signed16 s)
1041 result = ((unsigned48)a & MASK48) >> 47;
1044 t = ((unsigned48)a & MASK48) >> s;
1045 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1049 result = (signed16)t;
1055 RNESQH(signed48 a, signed16 s)
1058 signed16 result = 0;
1065 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1067 if (s == 1 || (a & QH_ONES(s-1)) == 0)
1072 if ((a & QH_BIT(47)) == 0)
1082 result = (signed16)t;
1088 RNEUQH(signed48 a, signed16 s)
1096 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1099 t = ((unsigned48)a & MASK48) >> s;
1100 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1102 if (s > 1 && (a & QH_ONES(s-1)) != 0)
1109 result = (signed16)t;
1115 RZSQH(signed48 a, signed16 s)
1118 signed16 result = 0;
1125 if ((a & QH_BIT(47)) == 0)
1135 result = (signed16)t;
1141 RZUQH(signed48 a, signed16 s)
1144 signed16 result = 0;
1149 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1152 t = ((unsigned48)a & MASK48) >> s;
1155 result = (signed16)t;
1161 typedef unsigned8 (*OB_ROUND)(signed24 a, unsigned8 s);
1163 #define OB_BIT(n) ((unsigned24)1 << (n))
1164 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1167 RNAUOB(signed24 a, unsigned8 s)
1175 result = ((unsigned24)a & MASK24) >> 23;
1178 t = ((unsigned24)a & MASK24) >> s;
1179 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1181 result = OB_CLAMP(t);
1187 RNEUOB(signed24 a, unsigned8 s)
1195 result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1198 t = ((unsigned24)a & MASK24) >> s;
1199 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1201 if (s > 1 && (a & OB_ONES(s-1)) != 0)
1206 result = OB_CLAMP(t);
1212 RZUOB(signed24 a, unsigned8 s)
1221 t = ((unsigned24)a & MASK24) >> s;
1222 result = OB_CLAMP(t);
1228 static const QH_ROUND qh_round[] = {
1229 RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH, RZUQH
1232 static const OB_ROUND ob_round[] = {
1233 NULL, RNAUOB, NULL, RNEUOB, NULL, RZUOB
1238 qh_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, QH_ROUND round)
1240 unsigned64 result = 0;
1245 for (i = 0; i < 4; i++)
1247 h2 = (signed16)(v2 & 0xFFFF);
1249 h = (*round)(ACC.qh[i], h2);
1252 UnpredictableResult ();
1256 result |= ((unsigned64)((unsigned16)h) << s);
1263 qh_map_round(sim_cpu *cpu, address_word cia, signed16 h2, QH_ROUND round)
1265 unsigned64 result = 0;
1270 for (i = 0; i < 4; i++)
1273 h = (*round)(ACC.qh[i], h2);
1276 UnpredictableResult ();
1279 result |= ((unsigned64)((unsigned16)h) << s);
1286 ob_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, OB_ROUND round)
1288 unsigned64 result = 0;
1293 for (i = 0; i < 8; i++)
1295 b2 = v2 & 0xFF; v2 >>= 8;
1296 b = (*round)(ACC.ob[i], b2);
1297 result |= ((unsigned64)b << s);
1304 ob_map_round(sim_cpu *cpu, address_word cia, unsigned8 b2, OB_ROUND round)
1306 unsigned64 result = 0;
1311 for (i = 0; i < 8; i++)
1313 b = (*round)(ACC.ob[i], b2);
1314 result |= ((unsigned64)b << s);
1322 mdmx_round_op(sim_cpu *cpu,
1329 unsigned64 result = 0;
1331 switch (MX_FMT (fmtsel))
1334 switch (MX_VT (fmtsel))
1337 op2 = ValueFPR(vt, fmt_mdmx);
1338 result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1341 op2 = ValueFPR(vt, fmt_mdmx);
1342 result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1345 result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1350 switch (MX_VT (fmtsel))
1353 op2 = ValueFPR(vt, fmt_mdmx);
1354 result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1357 op2 = ValueFPR(vt, fmt_mdmx);
1358 result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1361 result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1373 /* Shuffle operation. */
1376 enum {vs, ss, vt} source;
1380 static const sh_map ob_shuffle[][8] = {
1381 /* MDMX 2.0 encodings (3-4, 6-7). */
1382 /* vr5400 encoding (5), otherwise. */
1384 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1385 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1386 {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1387 {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1388 {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1389 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1390 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}} /* mixl */
1393 static const sh_map qh_shuffle[][4] = {
1394 {{vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* mixh */
1395 {{vt,0}, {vs,0}, {vt,1}, {vs,1}}, /* mixl */
1396 {{vt,1}, {vt,3}, {vs,1}, {vs,3}}, /* pach */
1398 {{vt,1}, {vs,0}, {vt,3}, {vs,2}}, /* bfla */
1400 {{vt,2}, {vt,3}, {vs,2}, {vs,3}}, /* repa */
1401 {{vt,0}, {vt,1}, {vs,0}, {vs,1}} /* repb */
1406 mdmx_shuffle(sim_cpu *cpu,
1412 unsigned64 result = 0;
1416 if ((shop & 0x3) == 0x1) /* QH format. */
1420 for (i = 0; i < 4; i++)
1424 switch (qh_shuffle[op][i].source)
1436 result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1440 else if ((shop & 0x1) == 0x0) /* OB format. */
1444 for (i = 0; i < 8; i++)
1447 unsigned int ishift = 8*ob_shuffle[op][i].index;
1449 switch (ob_shuffle[op][i].source)
1452 b = (op1 >> ishift) & 0xFF;
1455 b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1458 b = (op2 >> ishift) & 0xFF;
1464 result |= ((unsigned64)b << s);