1 /* Copyright (C) 1998, Cygnus Solutions */
6 #include "sim-assert.h"
12 /* Imported functions */
14 void device_error (device *me, char* message); /* device.c */
17 /* Internal function declarations */
19 static int pke_io_read_buffer(device*, void*, int, address_word,
20 unsigned, sim_cpu*, sim_cia);
21 static int pke_io_write_buffer(device*, const void*, int, address_word,
22 unsigned, sim_cpu*, sim_cia);
23 static void pke_issue(struct pke_device*);
24 static void pke_pc_advance(struct pke_device*, int num_words);
25 static unsigned_4* pke_pc_operand(struct pke_device*, int word_num);
26 static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int word_num);
27 static int pke_track_write(struct pke_device*, const void* src, int len,
28 address_word dest, unsigned_4 sourceaddr);
29 static void pke_attach(SIM_DESC sd, struct pke_device* me);
35 struct pke_device pke0_device =
37 { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
40 NULL, 0, 0, NULL, /* FIFO */
45 struct pke_device pke1_device =
47 { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
50 NULL, 0, 0, NULL, /* FIFO */
56 /* External functions */
59 /* Attach PKE addresses to main memory */
62 pke0_attach(SIM_DESC sd)
64 pke_attach(sd, & pke0_device);
68 pke1_attach(SIM_DESC sd)
70 pke_attach(sd, & pke1_device);
75 /* Issue a PKE instruction if possible */
80 pke_issue(& pke0_device);
86 pke_issue(& pke0_device);
91 /* Internal functions */
94 /* Attach PKE memory regions to simulator */
97 pke_attach(SIM_DESC sd, struct pke_device* me)
105 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
106 PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
108 (device*) &pke0_device,
117 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
118 sizeof(quadword) /*nr_bytes*/,
120 (device*) &pke1_device,
123 /* source-addr tracking word */
129 (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
130 sizeof(unsigned_4) /*nr_bytes*/,
133 zalloc(sizeof(unsigned_4)) /*buffer*/);
138 /* Handle a PKE read; return no. of bytes read */
141 pke_io_read_buffer(device *me_,
149 /* downcast to gather embedding pke_device struct */
150 struct pke_device* me = (struct pke_device*) me_;
152 /* find my address ranges */
153 address_word my_reg_start =
154 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
155 address_word my_fifo_addr =
156 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
158 /* enforce that an access does not span more than one quadword */
159 address_word low = ADDR_TRUNC_QW(addr);
160 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
164 /* classify address & handle */
165 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
168 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
169 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
174 result[0] = result[1] = result[2] = result[3] = 0;
176 /* handle reads to individual registers; clear `readable' on error */
179 /* handle common case of register reading, side-effect free */
180 /* PKE1-only registers*/
186 if(me->pke_number == 0)
189 /* PKE0 & PKE1 common registers*/
208 result[0] = me->regs[reg_num][0];
211 /* handle common case of write-only registers */
217 ASSERT(0); /* test above should prevent this possibility */
220 /* perform transfer & return */
224 memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
236 else if(addr >= my_fifo_addr &&
237 addr < my_fifo_addr + sizeof(quadword))
241 /* FIFO is not readable: return a word of zeroes */
242 memset(dest, 0, nr_bytes);
251 /* Handle a PKE read; return no. of bytes written */
254 pke_io_write_buffer(device *me_,
262 /* downcast to gather embedding pke_device struct */
263 struct pke_device* me = (struct pke_device*) me_;
265 /* find my address ranges */
266 address_word my_reg_start =
267 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
268 address_word my_fifo_addr =
269 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
271 /* enforce that an access does not span more than one quadword */
272 address_word low = ADDR_TRUNC_QW(addr);
273 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
277 /* classify address & handle */
278 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
281 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
282 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
287 input[0] = input[1] = input[2] = input[3] = 0;
289 /* write user-given bytes into input */
290 memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
292 /* handle writes to individual registers; clear `writeable' on error */
296 /* XXX: order of evaluation? STP && STC ?? */
297 if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */
299 /* clear FIFO: also prevents re-execution attempt of
300 possible stalled instruction */
301 me->fifo_num_elements = me->fifo_pc;
302 /* clear registers */
303 memset(me->regs, 0, sizeof(me->regs));
307 if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */
309 PKE_REG_MASK_SET(me, STAT, PFS, 1);
311 if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */
313 /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */
314 PKE_REG_MASK_SET(me, STAT, PSS, 1);
316 if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */
318 /* clear a bunch of status bits */
319 PKE_REG_MASK_SET(me, STAT, PSS, 0);
320 PKE_REG_MASK_SET(me, STAT, PFS, 0);
321 PKE_REG_MASK_SET(me, STAT, PIS, 0);
322 PKE_REG_MASK_SET(me, STAT, INT, 0);
323 PKE_REG_MASK_SET(me, STAT, ER0, 0);
324 PKE_REG_MASK_SET(me, STAT, ER1, 0);
325 /* will allow resumption of possible stalled instruction */
330 /* copy bottom three bits */
331 BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2));
335 /* copy bottom sixteen bits */
336 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15));
337 /* reset MRK bit in STAT */
338 PKE_REG_MASK_SET(me, STAT, MRK, 0);
341 /* handle common case of read-only registers */
342 /* PKE1-only registers - not really necessary to handle separately */
348 if(me->pke_number == 0)
351 /* PKE0 & PKE1 common registers*/
353 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
373 ASSERT(0); /* test above should prevent this possibility */
390 else if(addr >= my_fifo_addr &&
391 addr < my_fifo_addr + sizeof(quadword))
394 struct fifo_quadword* fqw;
396 /* assert transfer size == 128 bits */
397 if(nr_bytes != sizeof(quadword))
400 /* ensure FIFO has enough elements */
401 if(me->fifo_num_elements == me->fifo_buffer_size)
404 int new_fifo_buffer_size = me->fifo_buffer_size + 20;
405 void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(quadword));
409 /* oops, cannot enlarge FIFO any more */
410 device_error(me_, "Cannot enlarge FIFO buffer\n");
414 me->fifo_buffer_size = new_fifo_buffer_size;
417 /* add new quadword at end of FIFO */
418 fqw = & me->fifo[me->fifo_num_elements];
419 memcpy((void*) fqw->data, src, nr_bytes);
420 sim_read(CPU_STATE(cpu),
421 (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_SRCADDR : DMA_CHANNEL1_SRCADDR),
422 (void*) & fqw->source_address,
423 sizeof(address_word));
424 sim_read(CPU_STATE(cpu),
425 (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_PKTFLAG : DMA_CHANNEL1_PKTFLAG),
426 (void*) & fqw->dma_tag_present,
430 me->fifo_num_elements++;
432 /* set FQC to "1" as FIFO is now not empty */
433 PKE_REG_MASK_SET(me, STAT, FQC, 1);
445 /* Issue & swallow next PKE opcode if possible/available */
448 pke_issue(struct pke_device* me)
450 struct fifo_quadword* fqw;
452 unsigned_4 cmd, intr, num;
454 int next_pps_state; /* PPS after this instruction issue attempt */
456 /* 1 -- test go / no-go for PKE execution */
458 /* check for stall/halt control bits */
459 /* XXX: What is the PEW bit for? */
460 if(PKE_REG_MASK_GET(me, STAT, PSS) ||
461 PKE_REG_MASK_GET(me, STAT, PFS) ||
462 /* maskable stall controls: ER0, ER1, PIS */
463 (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
464 (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
465 (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
469 /* XXX: handle PSS by *skipping* instruction? */
471 /* confirm availability of new quadword of PKE instructions */
472 if(me->fifo_num_elements <= me->fifo_pc)
476 /* 2 -- fetch PKE instruction */
478 /* "fetch" instruction quadword */
479 fqw = & me->fifo[me->fifo_pc];
481 /* skip over DMA tags, if present */
482 if((fqw->dma_tag_present != 0) && (me->qw_pc < 2))
484 ASSERT(me->qw_pc == 0);
485 /* XXX: check validity of DMA tag; if bad, set ER0 flag */
489 /* "fetch" instruction word */
490 fw = fqw->data[me->qw_pc];
492 /* store it in PKECODE register */
493 me->regs[PKE_REG_CODE][0] = fw;
496 /* 3 -- decode PKE instruction */
498 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
499 so op-code is in top byte. */
500 intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
501 cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
502 num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
503 imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
507 /* set INT flag in STAT register */
508 PKE_REG_MASK_SET(me, STAT, INT, 1);
509 /* XXX: send interrupt to R5900? */
513 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
514 next_pps_state = PKE_REG_STAT_PPS_IDLE; /* assume instruction completes */
517 if(IS_PKE_CMD(cmd, PKENOP))
519 /* no work required, yey */
520 pke_pc_advance(me, 1);
522 else if(IS_PKE_CMD(cmd, STCYCL))
524 /* copy immediate value into CYCLE reg */
525 me->regs[PKE_REG_CYCLE][0] = imm;
526 pke_pc_advance(me, 1);
528 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
530 /* copy 10 bits to OFFSET field */
531 PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
533 PKE_REG_MASK_SET(me, DBF, DF, 0);
534 /* clear other DBF bit */
535 PKE_REG_MASK_SET(me, STAT, DBF, 0);
536 /* set TOPS = BASE */
537 PKE_REG_MASK_SET(me, TOPS, TOPS,
538 PKE_REG_MASK_GET(me, BASE, BASE));
539 pke_pc_advance(me, 1);
541 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
543 /* copy 10 bits to BASE field */
544 PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
546 PKE_REG_MASK_SET(me, DBF, DF, 0);
547 /* clear other DBF bit */
548 PKE_REG_MASK_SET(me, STAT, DBF, 0);
549 /* set TOPS = BASE */
550 PKE_REG_MASK_SET(me, TOPS, TOPS,
551 PKE_REG_MASK_GET(me, BASE, BASE));
552 pke_pc_advance(me, 1);
554 else if(IS_PKE_CMD(cmd, ITOP))
556 /* copy 10 bits to ITOPS field */
557 PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
558 pke_pc_advance(me, 1);
560 else if(IS_PKE_CMD(cmd, STMOD))
562 /* copy 2 bits to MODE register */
563 PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
564 pke_pc_advance(me, 1);
566 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) /* MSKPATH3 */
568 /* XXX: what to do with this? DMA control register? */
569 pke_pc_advance(me, 1);
571 else if(IS_PKE_CMD(cmd, PKEMARK))
573 /* copy 16 bits to MARK register */
574 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
575 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
576 PKE_REG_MASK_SET(me, STAT, MRK, 1);
577 pke_pc_advance(me, 1);
579 else if(IS_PKE_CMD(cmd, FLUSHE))
581 /* read VU status word */
584 (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
589 /* check if VBS bit is clear, i.e., VU is idle */
590 if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
594 pke_pc_advance(me, 1);
599 next_pps_state = PKE_REG_STAT_PPS_WAIT;
600 /* retry this instruction next clock */
603 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
605 /* read VU status word */
608 (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
613 /* check if VGW bit is clear, i.e., PATH1 is idle */
614 /* simulator design implies PATH2 is always "idle" */
615 if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
616 BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
617 1 /* PATH2 always idle */)
623 pke_pc_advance(me, 1);
628 /* retry this instruction next clock */
631 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
633 /* read VU status word */
636 (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
641 /* check if VGW bit is clear, i.e., PATH1 is idle */
642 /* simulator design implies PATH2 is always "idle" */
643 /* XXX: simulator design implies PATH3 is always "idle" */
644 if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
645 BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
646 1 /* PATH2 always idle */ &&
647 1 /* PATH3 always idle */)
654 pke_pc_advance(me, 1);
659 /* retry this instruction next clock */
662 else if(IS_PKE_CMD(cmd, PKEMSCAL))
664 /* read VU status word */
667 (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
672 /* check if VBS bit is clear, i.e., VU is idle */
673 if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
678 /* perform PKE1-unique processing for microprogram calls */
679 if(me->pke_number == 1)
682 PKE_REG_MASK_SET(me, DBF, DF,
683 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
684 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
685 /* compute new TOPS */
686 PKE_REG_MASK_SET(me, TOPS, TOPS,
687 (PKE_REG_MASK_GET(me, BASE, BASE) +
688 (PKE_REG_MASK_GET(me, DBF, DF) *
689 PKE_REG_MASK_GET(me, OFST, OFFSET))));
690 /* compute new ITOP and TOP */
691 PKE_REG_MASK_SET(me, ITOP, ITOP,
692 PKE_REG_MASK_GET(me, ITOPS, ITOPS));
693 PKE_REG_MASK_SET(me, TOP, TOP,
694 PKE_REG_MASK_GET(me, TOPS, TOPS));
698 vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
699 /* write new PC; callback function gets VU running */
701 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
705 pke_pc_advance(me, 1);
710 next_pps_state = PKE_REG_STAT_PPS_WAIT;
711 /* retry this instruction next clock */
714 else if(IS_PKE_CMD(cmd, PKEMSCNT))
716 /* read VU status word */
719 (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
724 /* check if VBS bit is clear, i.e., VU is idle */
725 if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
730 /* flip DBF etc. for PKE1 */
731 if(me->pke_number == 1)
733 PKE_REG_MASK_SET(me, DBF, DF,
734 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
735 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
736 PKE_REG_MASK_SET(me, TOPS, TOPS,
737 (PKE_REG_MASK_GET(me, BASE, BASE) +
738 (PKE_REG_MASK_GET(me, DBF, DF) *
739 PKE_REG_MASK_GET(me, OFST, OFFSET))));
740 PKE_REG_MASK_SET(me, ITOP, ITOP,
741 PKE_REG_MASK_GET(me, ITOPS, ITOPS));
742 PKE_REG_MASK_SET(me, TOP, TOP,
743 PKE_REG_MASK_GET(me, TOPS, TOPS));
748 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
751 /* rewrite its PC; callback function gets VU running */
753 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
757 pke_pc_advance(me, 1);
762 next_pps_state = PKE_REG_STAT_PPS_WAIT;
763 /* retry this instruction next clock */
766 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
768 /* read VU status word */
771 (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
776 /* check if VGW bit is clear, i.e., PATH1 is idle */
777 /* simulator design implies PATH2 is always "idle" */
778 if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
779 BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
780 1 /* PATH2 always idle */)
787 /* flip DBF etc. for PKE1 */
788 if(me->pke_number == 1)
790 PKE_REG_MASK_SET(me, DBF, DF,
791 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
792 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
793 PKE_REG_MASK_SET(me, TOPS, TOPS,
794 (PKE_REG_MASK_GET(me, BASE, BASE) +
795 (PKE_REG_MASK_GET(me, DBF, DF) *
796 PKE_REG_MASK_GET(me, OFST, OFFSET))));
797 PKE_REG_MASK_SET(me, ITOP, ITOP,
798 PKE_REG_MASK_GET(me, ITOPS, ITOPS));
799 PKE_REG_MASK_SET(me, TOP, TOP,
800 PKE_REG_MASK_GET(me, TOPS, TOPS));
804 vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
805 /* write new PC; callback function gets VU running */
807 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
811 pke_pc_advance(me, 1);
816 next_pps_state = PKE_REG_STAT_PPS_WAIT;
817 /* retry this instruction next clock */
820 else if(IS_PKE_CMD(cmd, STMASK))
822 /* check that FIFO has one more word for STMASK operand */
825 mask = pke_pc_operand(me, 1);
828 /* "transferring" operand */
829 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
830 /* fill the register */
831 PKE_REG_MASK_SET(me, MASK, MASK, *mask);
833 pke_pc_advance(me, 2);
837 /* need to wait for another word */
838 next_pps_state = PKE_REG_STAT_PPS_WAIT;
839 /* retry this instruction next clock */
842 else if(IS_PKE_CMD(cmd, STROW))
844 /* check that FIFO has four more words for STROW operand */
847 last_op = pke_pc_operand(me, 4);
850 /* "transferring" operand */
851 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
853 /* copy ROW registers: must all exist if 4th operand exists */
854 me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
855 me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
856 me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
857 me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
860 pke_pc_advance(me, 5);
864 /* need to wait for another word */
865 next_pps_state = PKE_REG_STAT_PPS_WAIT;
866 /* retry this instruction next clock */
869 else if(IS_PKE_CMD(cmd, STCOL))
871 /* check that FIFO has four more words for STCOL operand */
874 last_op = pke_pc_operand(me, 4);
877 /* "transferring" operand */
878 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
880 /* copy COL registers: must all exist if 4th operand exists */
881 me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
882 me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
883 me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
884 me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
887 pke_pc_advance(me, 5);
891 /* need to wait for another word */
892 next_pps_state = PKE_REG_STAT_PPS_WAIT;
893 /* retry this instruction next clock */
896 else if(IS_PKE_CMD(cmd, MPG))
898 unsigned_4* last_mpg_word;
900 /* map zero to max+1 */
901 if(num==0) num=0x100;
903 /* check that FIFO has a few more words for MPG operand */
904 last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
905 if(last_mpg_word != NULL)
907 /* perform implied FLUSHE */
908 /* read VU status word */
911 (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
916 /* check if VBS bit is clear, i.e., VU is idle */
917 if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
922 /* "transferring" operand */
923 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
925 /* transfer VU instructions, one word per iteration */
926 for(i=0; i<num*2; i++)
928 address_word vu_addr_base, vu_addr;
929 address_word vutrack_addr_base, vutrack_addr;
930 struct fifo_quadword* fq = pke_pc_fifo(me, num);
931 unsigned_4* operand = pke_pc_operand(me, num);
933 /* imm: in 64-bit units for MPG instruction */
937 /* VU*_MEM0 : instruction memory */
938 vu_addr_base = (me->pke_number == 0) ?
939 VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START;
940 vu_addr = vu_addr_base + (imm*2) + i;
942 /* VU*_MEM0_TRACK : source-addr tracking table */
943 vutrack_addr_base = (me->pke_number == 0) ?
944 VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
945 vutrack_addr = vu_addr_base + (imm*2) + i;
947 /* write data into VU memory */
948 pke_track_write(me, operand, sizeof(unsigned_4),
949 vu_addr, fq->source_address);
951 /* write srcaddr into VU srcaddr tracking table */
953 (SIM_ADDR) vutrack_addr,
954 (void*) & fq->source_address,
960 pke_pc_advance(me, 1 + num*2);
965 next_pps_state = PKE_REG_STAT_PPS_WAIT;
966 /* retry this instruction next clock */
968 } /* if FIFO full enough */
971 /* need to wait for another word */
972 next_pps_state = PKE_REG_STAT_PPS_WAIT;
973 /* retry this instruction next clock */
976 else if(IS_PKE_CMD(cmd, DIRECT) || IS_PKE_CMD(cmd, DIRECTHL)) /* treat identically */
978 /* check that FIFO has a few more words for DIRECT operand */
979 unsigned_4* last_direct_word;
981 /* map zero to max+1 */
982 if(imm==0) imm=0x10000;
984 last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */
985 if(last_direct_word != NULL)
991 /* "transferring" operand */
992 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
994 /* transfer GPUIF quadwords, one word per iteration */
995 for(i=0; i<imm*4; i++)
997 struct fifo_quadword* fq = pke_pc_fifo(me, num);
998 unsigned_4* operand = pke_pc_operand(me, num);
1000 /* collect word into quadword */
1001 fifo_data[i%4] = *operand;
1003 /* write to GPUIF FIFO only with full word */
1006 address_word gpuif_fifo = GPUIF_PATH2_FIFO_ADDR+(i/4);
1007 pke_track_write(me, fifo_data, sizeof(quadword),
1008 (SIM_ADDR) gpuif_fifo, fq->source_address);
1010 } /* write collected quadword */
1012 } /* GPUIF xfer loop */
1015 pke_pc_advance(me, 1 + imm*4);
1016 } /* if FIFO full enough */
1019 /* need to wait for another word */
1020 next_pps_state = PKE_REG_STAT_PPS_WAIT;
1021 /* retry this instruction next clock */
1024 else if(IS_PKE_CMD(cmd, UNPACK)) /* warning: monster complexity */
1026 short vn = BIT_MASK_GET(cmd, 2, 3);
1027 short vl = BIT_MASK_GET(cmd, 0, 1);
1028 short vnvl = BIT_MASK_GET(cmd, 0, 3);
1029 int m = BIT_MASK_GET(cmd, 4, 4);
1030 short cl = PKE_REG_MASK_GET(me, CYCLE, CL);
1031 short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
1032 int n, num_operands;
1033 unsigned_4* last_operand_word;
1035 /* map zero to max+1 */
1036 if(num==0) num=0x100;
1038 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1042 n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
1043 num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4));
1045 /* confirm that FIFO has enough words in it */
1046 last_operand_word = pke_pc_operand(me, num_operands);
1047 if(last_operand_word != NULL)
1049 address_word vu_addr_base;
1050 int operand_num, vector_num;
1052 /* "transferring" operand */
1053 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1055 /* XXX: don't check whether VU is idle?? */
1057 if(me->pke_number == 0)
1058 vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
1061 vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
1062 if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */
1063 vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS);
1066 /* XXX: vu_addr overflow check */
1068 /* transfer given number of vectors */
1069 operand_num = 1; /* word index into instruction stream: 1..num_operands */
1070 vector_num = 0; /* vector number being processed: 0..num-1 */
1071 while(operand_num <= num_operands)
1073 quadword vu_old_data;
1074 quadword vu_new_data;
1075 quadword unpacked_data;
1076 address_word vu_addr;
1077 struct fifo_quadword* fq;
1082 /* compute VU destination address, as bytes in R5900 memory */
1085 /* map zero to max+1 */
1086 if(wl == 0) wl = 0x0100;
1087 vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl));
1090 vu_addr = vu_addr_base + 16*vector_num;
1092 /* read old VU data word at address */
1093 sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data));
1095 /* Let sourceaddr track the first operand */
1096 fq = pke_pc_fifo(me, operand_num);
1098 /* For cyclic unpack, next operand quadword may come from instruction stream
1100 if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */
1102 /* clear operand - used only in a "indeterminate" state */
1103 for(i = 0; i < 4; i++)
1104 unpacked_data[i] = 0;
1108 /* compute unpacked words from instruction stream */
1111 case PKE_UNPACK_S_32:
1112 case PKE_UNPACK_V2_32:
1113 case PKE_UNPACK_V3_32:
1114 case PKE_UNPACK_V4_32:
1115 /* copy (vn+1) 32-bit values */
1116 for(i = 0; i < vn+1; i++)
1118 unsigned_4* operand = pke_pc_operand(me, operand_num);
1119 unpacked_data[i] = *operand;
1124 case PKE_UNPACK_S_16:
1125 case PKE_UNPACK_V2_16:
1126 case PKE_UNPACK_V3_16:
1127 case PKE_UNPACK_V4_16:
1128 /* copy (vn+1) 16-bit values, packed two-per-word */
1129 for(i=0; i<vn+1; i+=2)
1131 unsigned_4* operand = pke_pc_operand(me, operand_num);
1132 unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 15, 31);
1133 unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 16, 31, 31);
1138 case PKE_UNPACK_S_8:
1139 case PKE_UNPACK_V2_8:
1140 case PKE_UNPACK_V3_8:
1141 case PKE_UNPACK_V4_8:
1142 /* copy (vn+1) 8-bit values, packed four-per-word */
1143 for(i=0; i<vn+1; i+=4)
1145 unsigned_4* operand = pke_pc_operand(me, operand_num);
1146 unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 7, 31);
1147 unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 8, 15, 31);
1148 unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 16, 23, 31);
1149 unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 24, 31, 31);
1154 case PKE_UNPACK_V4_5:
1155 /* copy four 1/5/5/5-bit values, packed into a sixteen-bit */
1156 for(i=0; i<vn+1; i+=4)
1158 unsigned_4* operand = pke_pc_operand(me, operand_num);
1159 unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 4, 31);
1160 unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 5, 9, 31);
1161 unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 10, 14, 31);
1162 unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 15, 15, 31);
1163 /* ignore other 16 bits in operand */
1168 default: /* bad UNPACK code */
1170 /* XXX: how to handle? */
1171 /* set ER1 flag in STAT register */
1172 PKE_REG_MASK_SET(me, STAT, ER1, 1);
1177 /* compute replacement word - function of vn, vl, mask */
1178 if(m) /* use mask register? */
1180 /* compute index into mask register for this word */
1181 int mask_index = PKE_LIMIT(vector_num % wl, 3); /* wl != 0, set above */
1183 for(i=0; i<3; i++) /* loop over columns */
1185 int mask_op = PKE_MASKREG_GET(me, mask_index, i);
1186 unsigned_4* masked_value = NULL;
1187 unsigned_4 zero = 0;
1191 case PKE_MASKREG_INPUT:
1192 /* for vn == 0, all columns are copied from column 0 */
1194 masked_value = & unpacked_data[0];
1196 masked_value = & zero; /* XXX: what to put here? */
1198 masked_value = & unpacked_data[i];
1201 case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
1202 masked_value = & me->regs[PKE_REG_R0 + i][0];
1205 case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
1206 masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0];
1209 case PKE_MASKREG_NOTHING:
1210 /* "write inhibit" by re-copying old data */
1211 masked_value = & vu_old_data[i];
1216 /* no other cases possible */
1219 /* copy masked value for column */
1220 memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4));
1221 } /* loop over columns */
1225 /* no mask - just copy over entire unpacked quadword */
1226 memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
1229 /* process STMOD register for accumulation operations */
1230 switch(PKE_REG_MASK_GET(me, MODE, MDE))
1232 case PKE_MODE_ADDROW: /* add row registers to output data */
1234 /* exploit R0..R3 contiguity */
1235 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1238 case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
1241 /* exploit R0..R3 contiguity */
1242 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1243 me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
1247 case PKE_MODE_INPUT: /* pass data through */
1252 /* write replacement word */
1253 pke_track_write(me, vu_new_data, sizeof(vu_new_data),
1254 (SIM_ADDR) vu_addr, fq->source_address);
1256 /* next vector please */
1258 } /* vector transfer loop */
1259 } /* PKE FIFO full enough */
1262 /* need to wait for another word */
1263 next_pps_state = PKE_REG_STAT_PPS_WAIT;
1264 /* retry this instruction next clock */
1270 /* set ER1 flag in STAT register */
1271 PKE_REG_MASK_SET(me, STAT, ER1, 1);
1272 /* advance over faulty word */
1273 pke_pc_advance(me, 1);
1276 /* PKE is now idle or waiting */
1277 PKE_REG_MASK_SET(me, STAT, PPS, next_pps_state);
1285 /* advance the PC by given number of words; update STAT/FQC field */
1288 pke_pc_advance(struct pke_device* me, int num_words)
1290 ASSERT(num_words > 0);
1292 me->qw_pc += num_words;
1293 /* handle overflow */
1294 while(me->qw_pc >= 4)
1300 /* clear FQC if FIFO is now empty */
1301 if(me->fifo_num_elements == me->fifo_pc)
1303 PKE_REG_MASK_SET(me, STAT, FQC, 0);
1310 /* Return pointer to given operand# in FIFO. `word_num' starts at 1.
1311 If FIFO is not full enough, return 0. */
1314 pke_pc_operand(struct pke_device* me, int word_num)
1316 int new_qw_pc, new_fifo_pc;
1317 unsigned_4* operand;
1319 ASSERT(word_num > 0);
1321 new_fifo_pc = me->fifo_pc;
1322 new_qw_pc += me->qw_pc + word_num;
1324 /* handle overflow */
1325 while(new_qw_pc >= 4)
1331 /* not enough elements */
1332 if(me->fifo_num_elements == me->fifo_pc)
1335 operand = & me->fifo[new_fifo_pc].data[new_qw_pc];
1342 /* Return pointer to FIFO quadword containing given operand# in FIFO.
1343 `word_num' starts at 1. If FIFO is not full enough, return 0. */
1345 struct fifo_quadword*
1346 pke_pc_fifo(struct pke_device* me, int word_num)
1348 int new_qw_pc, new_fifo_pc;
1349 struct fifo_quadword* operand;
1351 ASSERT(word_num > 0);
1353 new_fifo_pc = me->fifo_pc;
1354 new_qw_pc += me->qw_pc + word_num;
1356 /* handle overflow */
1357 while(new_qw_pc >= 4)
1363 /* not enough elements */
1364 if(me->fifo_num_elements == me->fifo_pc)
1367 operand = & me->fifo[new_fifo_pc];
1374 /* Write a bunch of bytes into simulator memory. Store the given source address into the
1375 PKE sourceaddr tracking word. */
1377 pke_track_write(struct pke_device* me, const void* src, int len,
1378 address_word dest, unsigned_4 sourceaddr)
1381 unsigned_4 no_sourceaddr = 0;
1383 /* write srcaddr into PKE srcaddr tracking */
1385 (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
1386 (void*) & sourceaddr,
1387 sizeof(unsigned_4));
1389 /* write bytes into simulator */
1390 rc = sim_write(NULL,
1395 /* clear srcaddr from PKE srcaddr tracking */
1397 (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
1398 (void*) & no_sourceaddr,
1399 sizeof(unsigned_4));