1 /**************************************************************************
3 * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved.
4 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 **************************************************************************/
26 * PPC code generation.
27 * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
28 * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
31 * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
32 * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
33 * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
40 #include "util/u_memory.h"
41 #include "util/u_debug.h"
42 #include "rtasm_execmem.h"
43 #include "rtasm_ppc.h"
47 ppc_init_func(struct ppc_function *p)
51 memset(p, 0, sizeof(*p));
54 p->max_inst = 100; /* first guess at buffer size */
55 p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
63 /* only allow using gp registers 3..12 for now */
64 for (i = 0; i < 3; i++)
65 ppc_reserve_register(p, i);
66 for (i = 12; i < PPC_NUM_REGS; i++)
67 ppc_reserve_register(p, i);
72 ppc_release_func(struct ppc_function *p)
74 assert(p->num_inst <= p->max_inst);
75 if (p->store != NULL) {
76 rtasm_exec_free(p->store);
83 ppc_num_instructions(const struct ppc_function *p)
89 void (*ppc_get_func(struct ppc_function *p))(void)
93 if (DISASSEM && p->store)
94 debug_printf("disassemble %p %p\n", p->store, p->csr);
96 if (p->store == p->error_overflow)
97 return (void (*)(void)) NULL;
100 return (void (*)(void)) pointer_to_func(p->store);
105 ppc_dump_func(const struct ppc_function *p)
108 for (i = 0; i < p->num_inst; i++) {
109 debug_printf("%3u: 0x%08x\n", i, p->store[i]);
115 ppc_print_code(struct ppc_function *p, boolean enable)
122 ppc_indent(struct ppc_function *p, int spaces)
129 indent(const struct ppc_function *p)
132 for (i = 0; i < p->indent; i++) {
139 ppc_comment(struct ppc_function *p, int rel_indent, const char *s)
142 p->indent += rel_indent;
144 p->indent -= rel_indent;
151 * Mark a register as being unavailable.
154 ppc_reserve_register(struct ppc_function *p, int reg)
156 assert(reg < PPC_NUM_REGS);
157 p->reg_used |= (1 << reg);
163 * Allocate a general purpose register.
164 * \return register index or -1 if none left.
167 ppc_allocate_register(struct ppc_function *p)
170 for (i = 0; i < PPC_NUM_REGS; i++) {
171 const uint32_t mask = 1 << i;
172 if ((p->reg_used & mask) == 0) {
177 printf("OUT OF PPC registers!\n");
183 * Mark the given general purpose register as "unallocated".
186 ppc_release_register(struct ppc_function *p, int reg)
188 assert(reg < PPC_NUM_REGS);
189 assert(p->reg_used & (1 << reg));
190 p->reg_used &= ~(1 << reg);
195 * Allocate a floating point register.
196 * \return register index or -1 if none left.
199 ppc_allocate_fp_register(struct ppc_function *p)
202 for (i = 0; i < PPC_NUM_FP_REGS; i++) {
203 const uint32_t mask = 1 << i;
204 if ((p->fp_used & mask) == 0) {
209 printf("OUT OF PPC FP registers!\n");
215 * Mark the given floating point register as "unallocated".
218 ppc_release_fp_register(struct ppc_function *p, int reg)
220 assert(reg < PPC_NUM_FP_REGS);
221 assert(p->fp_used & (1 << reg));
222 p->fp_used &= ~(1 << reg);
227 * Allocate a vector register.
228 * \return register index or -1 if none left.
231 ppc_allocate_vec_register(struct ppc_function *p)
234 for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
235 const uint32_t mask = 1 << i;
236 if ((p->vec_used & mask) == 0) {
241 printf("OUT OF PPC VEC registers!\n");
247 * Mark the given vector register as "unallocated".
250 ppc_release_vec_register(struct ppc_function *p, int reg)
252 assert(reg < PPC_NUM_VEC_REGS);
253 assert(p->vec_used & (1 << reg));
254 p->vec_used &= ~(1 << reg);
259 * Append instruction to instruction buffer. Grow buffer if out of room.
262 emit_instruction(struct ppc_function *p, uint32_t inst_bits)
265 return; /* out of memory, drop the instruction */
267 if (p->num_inst == p->max_inst) {
268 /* allocate larger buffer */
270 p->max_inst *= 2; /* 2x larger */
271 newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
273 memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
275 rtasm_exec_free(p->store);
284 p->store[p->num_inst++] = inst_bits;
300 emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
301 const char *format, boolean transpose)
309 emit_instruction(p, inst.bits);
313 printf(format, vD, vB, vA);
315 printf(format, vD, vA, vB);
333 emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
343 emit_instruction(p, inst.bits);
346 printf(format, vD, vA, vB);
364 emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC,
374 emit_instruction(p, inst.bits);
377 printf(format, vD, vA, vB, vC);
393 emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
400 emit_instruction(p, inst.bits);
418 emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
425 inst.inst.unused = 0x0;
429 emit_instruction(p, inst.bits);
433 dump_xl(const char *name, uint inst)
438 debug_printf("%s = 0x%08x\n", name, inst);
439 debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
440 debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
441 debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
442 debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
443 debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
444 debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
445 debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
462 emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2,
471 inst.inst.unused = 0x0;
472 emit_instruction(p, inst.bits);
475 printf(format, vrs, ra, rb);
491 emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si,
492 const char *format, boolean transpose)
495 assert(si >= -32768);
500 inst.inst.si = (unsigned) (si & 0xffff);
501 emit_instruction(p, inst.bits);
505 printf(format, rt, si, ra);
507 printf(format, rt, ra, si);
526 emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
527 uint rc, const char *format)
534 inst.inst.unused = 0x0;
537 emit_instruction(p, inst.bits);
540 printf(format, frt, fra, frb);
559 emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
560 uint op2, uint rc, const char *format)
570 emit_instruction(p, inst.bits);
573 printf(format, rt, ra, rb);
582 ** float vector arithmetic
585 /** vector float add */
587 ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
589 emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE);
592 /** vector float substract */
594 ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
596 emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE);
599 /** vector float min */
601 ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
603 emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE);
606 /** vector float max */
608 ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
610 emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE);
613 /** vector float mult add: vD = vA * vB + vC */
615 ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
618 emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n");
621 /** vector float negative mult subtract: vD = vA - vB * vC */
623 ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
626 emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n");
629 /** vector float compare greater than */
631 ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
633 emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u");
636 /** vector float compare greater than or equal to */
638 ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
640 emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u");
643 /** vector float compare equal */
645 ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
647 emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u");
650 /** vector float 2^x */
652 ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
654 emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE);
657 /** vector float log2(x) */
659 ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
661 emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE);
664 /** vector float reciprocol */
666 ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
668 emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE);
671 /** vector float reciprocol sqrt estimate */
673 ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
675 emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE);
678 /** vector float round to negative infinity */
680 ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
682 emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE);
685 /** vector float round to positive infinity */
687 ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
689 emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE);
692 /** vector float round to nearest int */
694 ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
696 emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE);
699 /** vector float round to int toward zero */
701 ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
703 emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE);
706 /** vector store: store vR at mem[rA+rB] */
708 ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB)
710 emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n");
713 /** vector load: vR = mem[rA+rB] */
715 ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB)
717 emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n");
720 /** load vector element word: vR = mem_word[ra+rb] */
722 ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB)
724 emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n");
731 ** vector bitwise operations
736 ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
738 emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE);
741 /** vector and complement */
743 ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
745 emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE);
750 ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
752 emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE);
757 ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
759 emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE);
764 ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
766 emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE);
769 /** Pseudo-instruction: vector move */
771 ppc_vmove(struct ppc_function *p, uint vD, uint vA)
773 boolean print = p->print;
775 ppc_vor(p, vD, vA, vA);
778 printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA);
783 /** Set vector register to {0,0,0,0} */
785 ppc_vzero(struct ppc_function *p, uint vr)
787 boolean print = p->print;
789 ppc_vxor(p, vr, vr, vr);
792 printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr);
801 ** Vector shuffle / select / splat / etc
804 /** vector permute */
806 ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
808 emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u");
813 ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
815 emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u");
818 /** vector splat byte */
820 ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
822 emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE);
825 /** vector splat half word */
827 ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
829 emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE);
832 /** vector splat word */
834 ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
836 emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE);
839 /** vector splat signed immediate word */
841 ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
845 emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE);
848 /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
850 ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
852 emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE);
859 ** integer arithmetic
864 ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
866 emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE);
869 /** rt = ra + (imm << 16) */
871 ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
873 emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE);
878 ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
880 emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n");
883 /** rt = ra AND ra */
885 ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
887 emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */
890 /** rt = ra AND imm */
892 ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
894 /* note argument order */
895 emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE);
900 ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
902 emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */
905 /** rt = ra OR imm */
907 ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
909 /* note argument order */
910 emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE);
913 /** rt = ra XOR ra */
915 ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
917 emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */
920 /** rt = ra XOR imm */
922 ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
924 /* note argument order */
925 emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE);
928 /** pseudo instruction: move: rt = ra */
930 ppc_mr(struct ppc_function *p, uint rt, uint ra)
932 ppc_or(p, rt, ra, ra);
935 /** pseudo instruction: load immediate: rt = imm */
937 ppc_li(struct ppc_function *p, uint rt, int imm)
939 boolean print = p->print;
941 ppc_addi(p, rt, 0, imm);
944 printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm);
949 /** rt = imm << 16 */
951 ppc_lis(struct ppc_function *p, uint rt, int imm)
953 ppc_addis(p, rt, 0, imm);
958 ppc_load_int(struct ppc_function *p, uint rt, int imm)
960 ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */
961 ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */
968 ** integer load/store
971 /** store rs at memory[(ra)+d],
972 * then update ra = (ra)+d
975 ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
977 emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE);
980 /** store rs at memory[(ra)+d] */
982 ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
984 emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE);
987 /** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */
989 ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
991 emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE);
997 ** Float (non-vector) arithmetic
1000 /** add: frt = fra + frb */
1002 ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
1004 emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n");
1007 /** sub: frt = fra - frb */
1009 ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
1011 emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n");
1014 /** convert to int: rt = (int) ra */
1016 ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
1018 emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n");
1021 /** store frs at mem[(ra)+offset] */
1023 ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
1025 emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1028 /** store frs at mem[(ra)+(rb)] */
1030 ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
1032 emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n");
1035 /** load frt = mem[(ra)+offset] */
1037 ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
1039 emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1047 ** branch instructions
1050 /** BLR: Branch to link register (p. 35) */
1052 ppc_blr(struct ppc_function *p)
1054 emit_i(p, 18, 0, 0, 1);
1061 /** Branch Conditional to Link Register (p. 36) */
1063 ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
1065 emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
1068 printf("bclr\t%u %u %u\n", condOp, branchHint, condReg);
1072 /** Pseudo instruction: return from subroutine */
1074 ppc_return(struct ppc_function *p)
1076 ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);