From: Brian Paul Date: Wed, 23 May 2001 14:27:03 +0000 (+0000) Subject: SPARC assembly optimizations from David Miller. X-Git-Tag: 062012170305~27118 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7943b349d696f8030f0d2f836ad42a762f4c6026;p=profile%2Fivi%2Fmesa.git SPARC assembly optimizations from David Miller. --- diff --git a/src/mesa/math/m_debug_util.h b/src/mesa/math/m_debug_util.h index dc8024a..efedda9 100644 --- a/src/mesa/math/m_debug_util.h +++ b/src/mesa/math/m_debug_util.h @@ -1,4 +1,4 @@ -/* $Id: m_debug_util.h,v 1.3 2001/03/30 14:44:43 gareth Exp $ */ +/* $Id: m_debug_util.h,v 1.4 2001/05/23 14:27:03 brianp Exp $ */ /* * Mesa 3-D graphics library @@ -38,7 +38,9 @@ * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher) * (hope, you don't try to debug Mesa on a 386 ;) */ -#if defined(__GNUC__) && defined(__i386__) && defined(USE_X86_ASM) +#if defined(__GNUC__) && \ + ((defined(__i386__) && defined(USE_X86_ASM)) || \ + (defined(__sparc__) && defined(USE_SPARC_ASM))) #define RUN_DEBUG_BENCHMARK #endif @@ -67,6 +69,8 @@ extern char *mesa_profile; * It is assumed that all calculations are done in the cache. */ +#if defined(__i386__) + #if 1 /* PPro, PII, PIII version */ /* Profiling on the P6 architecture requires a little more work, due to @@ -183,6 +187,30 @@ extern char *mesa_profile; #endif +#elif defined(__sparc__) + +#define INIT_COUNTER() \ + do { counter_overhead = 5; } while(0) + +#define BEGIN_RACE(x) \ +x = LONG_MAX; \ +for (cycle_i = 0; cycle_i <10; cycle_i++) { \ + register long cycle_tmp1 asm("l0"); \ + register long cycle_tmp2 asm("l1"); \ + /* rd %tick, %l0 */ \ + __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */ + +#define END_RACE(x) \ + /* rd %tick, %l1 */ \ + __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \ + if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \ +} \ +x -= counter_overhead; + +#else +#error Your processor is not supported for RUN_XFORM_BENCHMARK +#endif + #else #define BEGIN_RACE(x) diff --git a/src/mesa/math/m_xform.c b/src/mesa/math/m_xform.c index 6943084..c043b09 100644 --- a/src/mesa/math/m_xform.c +++ b/src/mesa/math/m_xform.c @@ -1,4 +1,4 @@ -/* $Id: m_xform.c,v 1.13 2001/05/21 16:33:41 gareth Exp $ */ +/* $Id: m_xform.c,v 1.14 2001/05/23 14:27:03 brianp Exp $ */ /* * Mesa 3-D graphics library @@ -56,6 +56,10 @@ #include "X86/common_x86_asm.h" #endif +#ifdef USE_SPARC_ASM +#include "SPARC/sparc.h" +#endif + clip_func _mesa_clip_tab[5]; clip_func _mesa_clip_np_tab[5]; dotprod_func _mesa_dotprod_tab[5]; @@ -206,6 +210,9 @@ _math_init_transformation( void ) #ifdef USE_X86_ASM _mesa_init_all_x86_transform_asm(); #endif +#ifdef USE_SPARC_ASM + _mesa_init_all_sparc_transform_asm(); +#endif } void diff --git a/src/mesa/sparc/clip.S b/src/mesa/sparc/clip.S new file mode 100644 index 0000000..a569428 --- /dev/null +++ b/src/mesa/sparc/clip.S @@ -0,0 +1,234 @@ +/* $Id: clip.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */ + +#ifdef __sparc_v9__ +#define LDPTR ldx +#define V4F_DATA 0x00 +#define V4F_START 0x08 +#define V4F_COUNT 0x10 +#define V4F_STRIDE 0x14 +#define V4F_SIZE 0x18 +#define V4F_FLAGS 0x1c +#else +#define LDPTR ld +#define V4F_DATA 0x00 +#define V4F_START 0x04 +#define V4F_COUNT 0x08 +#define V4F_STRIDE 0x0c +#define V4F_SIZE 0x10 +#define V4F_FLAGS 0x14 +#endif + +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 + + .text + .align 64 + +one_dot_zero: + .word 0x3f800000 /* 1.0f */ + + /* This trick is shamelessly stolen from the x86 + * Mesa asm. Very clever, and we can do it too + * since we have the necessary add with carry + * instructions on Sparc. + */ +clip_table: + .byte 0, 1, 0, 2, 4, 5, 4, 6 + .byte 0, 1, 0, 2, 8, 9, 8, 10 + .byte 32, 33, 32, 34, 36, 37, 36, 38 + .byte 32, 33, 32, 34, 40, 41, 40, 42 + .byte 0, 1, 0, 2, 4, 5, 4, 6 + .byte 0, 1, 0, 2, 8, 9, 8, 10 + .byte 16, 17, 16, 18, 20, 21, 20, 22 + .byte 16, 17, 16, 18, 24, 25, 24, 26 + .byte 63, 61, 63, 62, 55, 53, 55, 54 + .byte 63, 61, 63, 62, 59, 57, 59, 58 + .byte 47, 45, 47, 46, 39, 37, 39, 38 + .byte 47, 45, 47, 46, 43, 41, 43, 42 + .byte 63, 61, 63, 62, 55, 53, 55, 54 + .byte 63, 61, 63, 62, 59, 57, 59, 58 + .byte 31, 29, 31, 30, 23, 21, 23, 22 + .byte 31, 29, 31, 30, 27, 25, 27, 26 + +/* GLvector4f *clip_vec, GLvector4f *proj_vec, + GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */ + + .align 64 +__pc_tramp: + retl + nop + + .globl _mesa_sparc_cliptest_points4 +_mesa_sparc_cliptest_points4: + save %sp, -64, %sp + call __pc_tramp + sub %o7, (. - one_dot_zero - 4), %g1 + ld [%g1 + 0x0], %f4 + add %g1, 0x4, %g1 + + ld [%i0 + V4F_STRIDE], %l1 + ld [%i0 + V4F_COUNT], %g7 + LDPTR [%i0 + V4F_START], %i0 + LDPTR [%i1 + V4F_START], %i5 + ldub [%i3], %g2 + ldub [%i4], %g3 + sll %g3, 8, %g3 + or %g2, %g3, %g2 + + ld [%i1 + V4F_FLAGS], %g3 + or %g3, VEC_SIZE_4, %g3 + st %g3, [%i1 + V4F_FLAGS] + mov 3, %g3 + st %g3, [%i1 + V4F_SIZE] + st %g7, [%i1 + V4F_COUNT] + clr %l2 + clr %l0 + + /* l0: i + * g7: count + * l1: stride + * l2: c + * g2: (tmpAndMask << 8) | tmpOrMask + * g1: clip_table + * i0: from[stride][i] + * i2: clipMask + * i5: vProj[4][i] + */ + +1: ld [%i0 + 0x0c], %f3 ! LSU Group + ld [%i0 + 0x0c], %g5 ! LSU Group + ld [%i0 + 0x08], %g4 ! LSU Group + fdivs %f4, %f3, %f8 ! FGM + addcc %g5, %g5, %g5 ! IEU1 Group + addx %g0, 0x0, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x04], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x00], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + ldub [%g1 + %g3], %g3 ! LSU Group + cmp %g3, 0 ! IEU1 Group, stall + be 2f ! CTI + stb %g3, [%i2] ! LSU + sll %g3, 8, %g4 ! IEU1 Group + add %l2, 1, %l2 ! IEU0 + st %g0, [%i5 + 0x00] ! LSU + or %g4, 0xff, %g4 ! IEU0 Group + or %g2, %g3, %g2 ! IEU1 + st %g0, [%i5 + 0x04] ! LSU + and %g2, %g4, %g2 ! IEU0 Group + st %g0, [%i5 + 0x08] ! LSU + b 3f ! CTI + st %f4, [%i5 + 0x0c] ! LSU Group +2: ld [%i0 + 0x00], %f0 ! LSU Group + ld [%i0 + 0x04], %f1 ! LSU Group + ld [%i0 + 0x08], %f2 ! LSU Group + fmuls %f0, %f8, %f0 ! FGM + st %f0, [%i5 + 0x00] ! LSU Group + fmuls %f1, %f8, %f1 ! FGM + st %f1, [%i5 + 0x04] ! LSU Group + fmuls %f2, %f8, %f2 ! FGM + st %f2, [%i5 + 0x08] ! LSU Group + st %f8, [%i5 + 0x0c] ! LSU Group +3: add %i5, 0x10, %i5 ! IEU1 + add %l0, 1, %l0 ! IEU0 Group + add %i2, 1, %i2 ! IEU0 Group + cmp %l0, %g7 ! IEU1 Group + bne 1b ! CTI + add %i0, %l1, %i0 ! IEU0 Group + stb %g2, [%i3] ! LSU + srl %g2, 8, %g3 ! IEU0 Group + cmp %l2, %g7 ! IEU1 Group + bl,a 1f ! CTI + clr %g3 ! IEU0 +1: stb %g3, [%i4] ! LSU Group + ret ! CTI Group + restore %i1, 0x0, %o0 + + .globl _mesa_sparc_cliptest_points4_np +_mesa_sparc_cliptest_points4_np: + save %sp, -64, %sp + + call __pc_tramp + sub %o7, (. - one_dot_zero - 4), %g1 + add %g1, 0x4, %g1 + + ld [%i0 + V4F_STRIDE], %l1 + ld [%i0 + V4F_COUNT], %g7 + LDPTR [%i0 + V4F_START], %i0 + LDPTR [%i1 + V4F_START], %i5 + ldub [%i3], %g2 + ldub [%i4], %g3 + sll %g3, 8, %g3 + or %g2, %g3, %g2 + + ld [%i1 + V4F_FLAGS], %g3 + or %g3, VEC_SIZE_4, %g3 + st %g3, [%i1 + V4F_FLAGS] + mov 3, %g3 + st %g3, [%i1 + V4F_SIZE] + st %g7, [%i1 + V4F_COUNT] + clr %l2 + clr %l0 + + /* l0: i + * g7: count + * l1: stride + * l2: c + * g2: (tmpAndMask << 8) | tmpOrMask + * g1: clip_table + * i0: from[stride][i] + * i2: clipMask + */ + +1: ld [%i0 + 0x0c], %g5 ! LSU Group + ld [%i0 + 0x08], %g4 ! LSU Group + addcc %g5, %g5, %g5 ! IEU1 Group + addx %g0, 0x0, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x04], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x00], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + ldub [%g1 + %g3], %g3 ! LSU Group + cmp %g3, 0 ! IEU1 Group, stall + be 2f ! CTI + stb %g3, [%i2] ! LSU + sll %g3, 8, %g4 ! IEU1 Group + add %l2, 1, %l2 ! IEU0 + or %g4, 0xff, %g4 ! IEU0 Group + or %g2, %g3, %g2 ! IEU1 + and %g2, %g4, %g2 ! IEU0 Group +2: add %l0, 1, %l0 ! IEU0 Group + add %i2, 1, %i2 ! IEU0 Group + cmp %l0, %g7 ! IEU1 Group + bne 1b ! CTI + add %i0, %l1, %i0 ! IEU0 Group + stb %g2, [%i3] ! LSU + srl %g2, 8, %g3 ! IEU0 Group + cmp %l2, %g7 ! IEU1 Group + bl,a 1f ! CTI + clr %g3 ! IEU0 +1: stb %g3, [%i4] ! LSU Group + ret ! CTI Group + restore %i1, 0x0, %o0 diff --git a/src/mesa/sparc/sparc.c b/src/mesa/sparc/sparc.c new file mode 100644 index 0000000..83741cf --- /dev/null +++ b/src/mesa/sparc/sparc.c @@ -0,0 +1,109 @@ +/* $Id: sparc.c,v 1.1 2001/05/23 14:27:03 brianp Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.1 + * + * Copyright (C) 1999 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Sparc assembly code by David S. Miller + */ + + +#include "glheader.h" +#include "context.h" +#include "math/m_vertices.h" +#include "math/m_xform.h" +#include "tnl/t_context.h" +#include "sparc.h" + +#ifdef DEBUG +#include "math/m_debug.h" +#endif + +#define XFORM_ARGS GLvector4f *to_vec, \ + const GLfloat m[16], \ + const GLvector4f *from_vec + +#define DECLARE_XFORM_GROUP(pfx, sz) \ + extern void _mesa_##pfx##_transform_points##sz##_general(XFORM_ARGS); \ + extern void _mesa_##pfx##_transform_points##sz##_identity(XFORM_ARGS); \ + extern void _mesa_##pfx##_transform_points##sz##_3d_no_rot(XFORM_ARGS); \ + extern void _mesa_##pfx##_transform_points##sz##_perspective(XFORM_ARGS); \ + extern void _mesa_##pfx##_transform_points##sz##_2d(XFORM_ARGS); \ + extern void _mesa_##pfx##_transform_points##sz##_2d_no_rot(XFORM_ARGS); \ + extern void _mesa_##pfx##_transform_points##sz##_3d(XFORM_ARGS); + +#define ASSIGN_XFORM_GROUP(pfx, sz) \ + _mesa_transform_tab[sz][MATRIX_GENERAL] = \ + _mesa_##pfx##_transform_points##sz##_general; \ + _mesa_transform_tab[sz][MATRIX_IDENTITY] = \ + _mesa_##pfx##_transform_points##sz##_identity; \ + _mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \ + _mesa_##pfx##_transform_points##sz##_3d_no_rot; \ + _mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \ + _mesa_##pfx##_transform_points##sz##_perspective; \ + _mesa_transform_tab[sz][MATRIX_2D] = \ + _mesa_##pfx##_transform_points##sz##_2d; \ + _mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \ + _mesa_##pfx##_transform_points##sz##_2d_no_rot; \ + _mesa_transform_tab[sz][MATRIX_3D] = \ + _mesa_##pfx##_transform_points##sz##_3d; + + +#ifdef USE_SPARC_ASM +DECLARE_XFORM_GROUP(sparc, 1) +DECLARE_XFORM_GROUP(sparc, 2) +DECLARE_XFORM_GROUP(sparc, 3) +DECLARE_XFORM_GROUP(sparc, 4) + +extern GLvector4f *_mesa_sparc_cliptest_points4(GLvector4f *clip_vec, + GLvector4f *proj_vec, + GLubyte clipMask[], + GLubyte *orMask, + GLubyte *andMask); + +extern GLvector4f *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec, + GLvector4f *proj_vec, + GLubyte clipMask[], + GLubyte *orMask, + GLubyte *andMask); +#endif + +void _mesa_init_all_sparc_transform_asm(void) +{ +#ifdef USE_SPARC_ASM + ASSIGN_XFORM_GROUP(sparc, 1) + ASSIGN_XFORM_GROUP(sparc, 2) + ASSIGN_XFORM_GROUP(sparc, 3) + ASSIGN_XFORM_GROUP(sparc, 4) + + _mesa_clip_tab[4] = _mesa_sparc_cliptest_points4; + _mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np; + +#ifdef DEBUG + _math_test_all_transform_functions("sparc"); + _math_test_all_cliptest_functions("sparc"); +#endif + +#endif +} diff --git a/src/mesa/sparc/sparc.h b/src/mesa/sparc/sparc.h new file mode 100644 index 0000000..64422a3 --- /dev/null +++ b/src/mesa/sparc/sparc.h @@ -0,0 +1,37 @@ +/* $Id: sparc.h,v 1.1 2001/05/23 14:27:03 brianp Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.1 + * + * Copyright (C) 1999 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Sparc assembly code by David S. Miller + */ + + +#ifndef SPARC_H +#define SPARC_H + +extern void _mesa_init_all_sparc_transform_asm(void); + +#endif /* !(SPARC_H) */ diff --git a/src/mesa/sparc/sparc_matrix.h b/src/mesa/sparc/sparc_matrix.h new file mode 100644 index 0000000..4b452fd --- /dev/null +++ b/src/mesa/sparc/sparc_matrix.h @@ -0,0 +1,277 @@ +/* $Id: sparc_matrix.h,v 1.1 2001/05/23 14:27:03 brianp Exp $ */ + +#define M0 %f16 +#define M1 %f17 +#define M2 %f18 +#define M3 %f19 +#define M4 %f20 +#define M5 %f21 +#define M6 %f22 +#define M7 %f23 +#define M8 %f24 +#define M9 %f25 +#define M10 %f26 +#define M11 %f27 +#define M12 %f28 +#define M13 %f29 +#define M14 %f30 +#define M15 %f31 + +/* Seems to work, disable if unaligned traps begin to appear... -DaveM */ +#define USE_LD_DOUBLE + +#ifndef USE_LD_DOUBLE + +#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ld [BASE + ( 3 * 0x4)], M3; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14; \ + ld [BASE + (15 * 0x4)], M15 + +#define LDMATRIX_0_1_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13 + +#define LDMATRIX_0_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13 + +#define LDMATRIX_0_1_2_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ld [BASE + ( 3 * 0x4)], M3; \ + ld [BASE + ( 4 * 0x4)], M4; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + ( 6 * 0x4)], M6; \ + ld [BASE + ( 7 * 0x4)], M7; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14; \ + ld [BASE + (15 * 0x4)], M15 + +#define LDMATRIX_0_1_4_5_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + ( 4 * 0x4)], M4; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13 + +#define LDMATRIX_0_5_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13 + +#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ld [BASE + ( 3 * 0x4)], M3; \ + ld [BASE + ( 4 * 0x4)], M4; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + ( 6 * 0x4)], M6; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ld [BASE + ( 3 * 0x4)], M3; \ + ld [BASE + ( 4 * 0x4)], M4; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + ( 6 * 0x4)], M6; \ + ld [BASE + ( 7 * 0x4)], M7; \ + ld [BASE + ( 8 * 0x4)], M8; \ + ld [BASE + ( 9 * 0x4)], M9; \ + ld [BASE + (10 * 0x4)], M10; \ + ld [BASE + (11 * 0x4)], M11; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14; \ + ld [BASE + (15 * 0x4)], M15 + +#define LDMATRIX_0_5_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13 + +#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 1 * 0x4)], M1; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ld [BASE + ( 4 * 0x4)], M4; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + ( 6 * 0x4)], M6; \ + ld [BASE + ( 8 * 0x4)], M8; \ + ld [BASE + ( 9 * 0x4)], M9; \ + ld [BASE + (10 * 0x4)], M10; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_10_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (10 * 0x4)], M10; \ + ld [BASE + (12 * 0x4)], M12; \ + ld [BASE + (13 * 0x4)], M13; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_8_9_10_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + ( 8 * 0x4)], M8; \ + ld [BASE + ( 9 * 0x4)], M9; \ + ld [BASE + (10 * 0x4)], M10; \ + ld [BASE + (14 * 0x4)], M14 + +#else /* !(USE_LD_DOUBLE) */ + +#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + ( 2 * 0x4)], M2; \ + ldd [BASE + (12 * 0x4)], M12; \ + ldd [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_1_12_13(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + (12 * 0x4)], M12 + +#define LDMATRIX_0_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + (12 * 0x4)], M12 + +#define LDMATRIX_0_1_2_12_13_14(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ldd [BASE + (12 * 0x4)], M12; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + (12 * 0x4)], M12; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + ( 2 * 0x4)], M2; \ + ldd [BASE + ( 4 * 0x4)], M4; \ + ldd [BASE + ( 6 * 0x4)], M6; \ + ldd [BASE + (12 * 0x4)], M12; \ + ldd [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ldd [BASE + (12 * 0x4)], M12 + +#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + ( 2 * 0x4)], M2; \ + ldd [BASE + ( 4 * 0x4)], M4; \ + ld [BASE + ( 6 * 0x4)], M6; \ + ldd [BASE + (12 * 0x4)], M12; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ldd [BASE + (12 * 0x4)], M12; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + ( 2 * 0x4)], M2; \ + ldd [BASE + ( 4 * 0x4)], M4; \ + ldd [BASE + ( 6 * 0x4)], M6; \ + ldd [BASE + ( 8 * 0x4)], M8; \ + ldd [BASE + (10 * 0x4)], M10; \ + ldd [BASE + (12 * 0x4)], M12; \ + ldd [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_1_4_5_12_13(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ldd [BASE + ( 4 * 0x4)], M4; \ + ldd [BASE + (12 * 0x4)], M12 + +#define LDMATRIX_0_5_12_13(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ldd [BASE + (12 * 0x4)], M12 + +#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \ + ldd [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 2 * 0x4)], M2; \ + ldd [BASE + ( 4 * 0x4)], M4; \ + ld [BASE + ( 6 * 0x4)], M6; \ + ldd [BASE + ( 8 * 0x4)], M8; \ + ld [BASE + (10 * 0x4)], M10; \ + ldd [BASE + (12 * 0x4)], M12; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_10_12_13_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (10 * 0x4)], M10; \ + ldd [BASE + (12 * 0x4)], M12; \ + ld [BASE + (14 * 0x4)], M14 + +#define LDMATRIX_0_5_8_9_10_14(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ldd [BASE + ( 8 * 0x4)], M8; \ + ld [BASE + (10 * 0x4)], M10; \ + ld [BASE + (14 * 0x4)], M14 + +#endif /* USE_LD_DOUBLE */ diff --git a/src/mesa/sparc/xform.S b/src/mesa/sparc/xform.S new file mode 100644 index 0000000..368fdd9 --- /dev/null +++ b/src/mesa/sparc/xform.S @@ -0,0 +1,1410 @@ +/* $Id: xform.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */ + + /* TODO + * + * 1) It would be nice if load/store double could be used + * at least for the matrix parts. I think for the matrices + * it is safe, but for the vertices it probably is not due to + * things like glInterleavedArrays etc. + * + * UPDATE: Trying this now in sparc_matrix.h -DaveM_990624 + * + * 2) One extremely slick trick would be if we could enclose + * groups of xform calls on the same vertices such that + * we just load the matrix into f16-->f31 before the calls + * and then we would not have to do them here. This may be + * tricky and not much of a gain though. + */ + +#ifdef __sparc_v9__ +#define LDPTR ldx +#define V4F_DATA 0x00 +#define V4F_START 0x08 +#define V4F_COUNT 0x10 +#define V4F_STRIDE 0x14 +#define V4F_SIZE 0x18 +#define V4F_FLAGS 0x1c +#else +#define LDPTR ld +#define V4F_DATA 0x00 +#define V4F_START 0x04 +#define V4F_COUNT 0x08 +#define V4F_STRIDE 0x0c +#define V4F_SIZE 0x10 +#define V4F_FLAGS 0x14 +#endif + +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 + + .text + .align 64 + +__set_v4f_1: + ld [%o0 + V4F_FLAGS], %g2 + mov 1, %g1 + st %g1, [%o0 + V4F_SIZE] + or %g2, VEC_SIZE_1, %g2 + retl + st %g2, [%o0 + V4F_FLAGS] +__set_v4f_2: + ld [%o0 + V4F_FLAGS], %g2 + mov 2, %g1 + st %g1, [%o0 + V4F_SIZE] + or %g2, VEC_SIZE_2, %g2 + retl + st %g2, [%o0 + V4F_FLAGS] +__set_v4f_3: + ld [%o0 + V4F_FLAGS], %g2 + mov 3, %g1 + st %g1, [%o0 + V4F_SIZE] + or %g2, VEC_SIZE_3, %g2 + retl + st %g2, [%o0 + V4F_FLAGS] +__set_v4f_4: + ld [%o0 + V4F_FLAGS], %g2 + mov 4, %g1 + st %g1, [%o0 + V4F_SIZE] + or %g2, VEC_SIZE_4, %g2 + retl + st %g2, [%o0 + V4F_FLAGS] + +#include "sparc_matrix.h" + + /* First the raw versions. */ + + .globl _mesa_sparc_transform_points1_general +_mesa_sparc_transform_points1_general: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_3_12_13_14_15(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + ld [%g1 + 0x00], %f8 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0 + fmuls %f0, M1, %f2 ! FGM Group + fmuls %f0, M2, %f3 ! FGM Group + fmuls %f0, M3, %f4 ! FGM Group + fmuls %f8, M0, %f9 ! FGM Group f1 available + fadds %f1, M12, %f1 ! FGA + st %f1, [%g2 + 0x00] ! LSU + fmuls %f8, M1, %f10 ! FGM Group f2 available + fadds %f2, M13, %f2 ! FGA + st %f2, [%g2 + 0x04] ! LSU + fmuls %f8, M2, %f11 ! FGM Group f3 available + fadds %f3, M14, %f3 ! FGA + st %f3, [%g2 + 0x08] ! LSU + fmuls %f8, M3, %f12 ! FGM Group f4 available + fadds %f4, M15, %f4 ! FGA + st %f4, [%g2 + 0x0c] ! LSU + fadds %f9, M12, %f9 ! FGA Group f9 available + st %f9, [%g2 + 0x10] ! LSU + fadds %f10, M13, %f10 ! FGA Group f10 available + st %f10, [%g2 + 0x14] ! LSU + fadds %f11, M14, %f11 ! FGA Group f11 available + st %f11, [%g2 + 0x18] ! LSU + fadds %f12, M15, %f12 ! FGA Group f12 available + st %f12, [%g2 + 0x1c] ! LSU + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 ! LSU Group + fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0 + fmuls %f0, M1, %f2 ! FGM Group + fmuls %f0, M2, %f3 ! FGM Group + fmuls %f0, M3, %f4 ! FGM Group + fadds %f1, M12, %f1 ! FGA Group + st %f1, [%g2 + 0x00] ! LSU + fadds %f2, M13, %f2 ! FGA Group + st %f2, [%g2 + 0x04] ! LSU + fadds %f3, M14, %f3 ! FGA Group + st %f3, [%g2 + 0x08] ! LSU + fadds %f4, M15, %f4 ! FGA Group + st %f4, [%g2 + 0x0c] ! LSU + +3: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points1_identity +_mesa_sparc_transform_points1_identity: + cmp %o0, %o2 + be 4f + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + ld [%g1 + 0x00], %f1 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + st %f0, [%g2 + 0x00] ! LSU Group + cmp %o1, %o2 ! IEU1 + st %f1, [%g2 + 0x10] ! LSU Group + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 + addx %g0, %g0, %g0 + st %f0, [%g2 + 0x00] + +3: + ba __set_v4f_1 + nop + +4: retl + nop + + .globl _mesa_sparc_transform_points1_2d +_mesa_sparc_transform_points1_2d: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_12_13(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + ld [%g1 + 0x00], %f8 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f1 ! FGM Group + fmuls %f0, M1, %f2 ! FGM Group + fmuls %f8, M0, %f9 ! FGM Group + fmuls %f8, M1, %f10 ! FGM Group + fadds %f1, M12, %f3 ! FGA Group f1 available + st %f3, [%g2 + 0x00] ! LSU + fadds %f2, M13, %f4 ! FGA Group f2 available + st %f4, [%g2 + 0x04] ! LSU + fadds %f9, M12, %f11 ! FGA Group f9 available + st %f11, [%g2 + 0x10] ! LSU + fadds %f10, M13, %f12 ! FGA Group f10 available + st %f12, [%g2 + 0x14] ! LSU + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 + fmuls %f0, M0, %f1 + fmuls %f0, M1, %f2 + fadds %f1, M12, %f3 + st %f3, [%g2 + 0x00] + fadds %f2, M13, %f4 + st %f4, [%g2 + 0x04] + +3: + ba __set_v4f_2 + nop + + .globl _mesa_sparc_transform_points1_2d_no_rot +_mesa_sparc_transform_points1_2d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_12_13(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + ld [%g1 + 0x00], %f4 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f1 ! FGM Group + fmuls %f4, M0, %f5 ! FGM Group + fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available + st %f3, [%g2 + 0x00] ! LSU + st M13, [%g2 + 0x04] ! LSU Group, f5 available + fadds %f5, M12, %f6 ! FGA + st %f6, [%g2 + 0x10] ! LSU Group + st M13, [%g2 + 0x14] ! LSU Group + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 + fmuls %f0, M0, %f1 + fadds %f1, M12, %f3 + st %f3, [%g2 + 0x00] + st M13, [%g2 + 0x04] + +3: + ba __set_v4f_2 + nop + + .globl _mesa_sparc_transform_points1_3d +_mesa_sparc_transform_points1_3d: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_12_13_14(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + ld [%g1 + 0x00], %f4 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f1 ! FGM Group + fmuls %f0, M1, %f2 ! FGM Group + fmuls %f0, M2, %f3 ! FGM Group + fmuls %f4, M0, %f5 ! FGM Group + fadds %f1, M12, %f1 ! FGA Group, f1 available + st %f1, [%g2 + 0x00] ! LSU + fmuls %f4, M1, %f6 ! FGM + fadds %f2, M13, %f2 ! FGA Group, f2 available + st %f2, [%g2 + 0x04] ! LSU + fmuls %f4, M2, %f7 ! FGM + fadds %f3, M14, %f3 ! FGA Group, f3 available + st %f3, [%g2 + 0x08] ! LSU + fadds %f5, M12, %f5 ! FGA Group, f5 available + st %f5, [%g2 + 0x10] ! LSU + fadds %f6, M13, %f6 ! FGA Group, f6 available + st %f6, [%g2 + 0x14] ! LSU + fadds %f7, M14, %f7 ! FGA Group, f7 available + st %f7, [%g2 + 0x18] ! LSU + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 + fmuls %f0, M0, %f1 + fmuls %f0, M1, %f2 + fmuls %f0, M2, %f3 + fadds %f1, M12, %f1 + st %f1, [%g2 + 0x00] + fadds %f2, M13, %f2 + st %f2, [%g2 + 0x04] + fadds %f3, M14, %f3 + st %f3, [%g2 + 0x08] + +3: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points1_3d_no_rot +_mesa_sparc_transform_points1_3d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_12_13_14(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + ld [%g1 + 0x00], %f2 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f1 ! FGM Group + fmuls %f2, M0, %f3 ! FGM Group + fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available + st %f1, [%g2 + 0x00] ! LSU + fadds %f3, M12, %f3 ! FGA Group, f3 available + st M13, [%g2 + 0x04] ! LSU + st M14, [%g2 + 0x08] ! LSU Group + st %f3, [%g2 + 0x10] ! LSU Group + st M13, [%g2 + 0x14] ! LSU Group + st M14, [%g2 + 0x18] ! LSU Group + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 + fmuls %f0, M0, %f1 + fadds %f1, M12, %f1 + st %f1, [%g2 + 0x00] + st M13, [%g2 + 0x04] + st M14, [%g2 + 0x08] + +3: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points1_perspective +_mesa_sparc_transform_points1_perspective: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_14(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + ld [%g1 + 0x00], %f2 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f1 ! FGM Group + st %f1, [%g2 + 0x00] ! LSU + fmuls %f2, M0, %f3 ! FGM Group + st %g0, [%g2 + 0x04] ! LSU + st M14, [%g2 + 0x08] ! LSU Group + st %g0, [%g2 + 0x0c] ! LSU Group + st %f3, [%g2 + 0x10] ! LSU Group + st %g0, [%g2 + 0x14] ! LSU Group + st M14, [%g2 + 0x18] ! LSU Group + st %g0, [%g2 + 0x1c] ! LSU Group + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 + fmuls %f0, M0, %f1 + st %f1, [%g2 + 0x00] + st %g0, [%g2 + 0x04] + st M14, [%g2 + 0x08] + st %g0, [%g2 + 0x0c] + +3: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points2_general +_mesa_sparc_transform_points2_general: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f2 ! FGM Group + fmuls %f0, M1, %f3 ! FGM Group + fmuls %f0, M2, %f4 ! FGM Group + fmuls %f0, M3, %f5 ! FGM Group + fadds %f2, M12, %f2 ! FGA Group f2 available + fmuls %f1, M4, %f6 ! FGM + fadds %f3, M13, %f3 ! FGA Group f3 available + fmuls %f1, M5, %f7 ! FGM + fadds %f4, M14, %f4 ! FGA Group f4 available + fmuls %f1, M6, %f8 ! FGM + fadds %f5, M15, %f5 ! FGA Group f5 available + fmuls %f1, M7, %f9 ! FGM + fadds %f2, %f6, %f2 ! FGA Group f6 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f3, %f7, %f3 ! FGA Group f7 available + st %f3, [%g2 + 0x04] ! LSU + fadds %f4, %f8, %f4 ! FGA Group f8 available + st %f4, [%g2 + 0x08] ! LSU + fadds %f5, %f9, %f5 ! FGA Group f9 available + st %f5, [%g2 + 0x0c] ! LSU + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points2_identity +_mesa_sparc_transform_points2_identity: + cmp %o2, %o0 + be 3f + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + add %o1, 1, %o1 ! IEU0 + ld [%g1 + 0x04], %f1 ! LSU Group + add %g1, %o5, %g1 ! IEU0 + cmp %o1, %g3 ! IEU1 + st %f0, [%g2 + 0x00] ! LSU Group + st %f1, [%g2 + 0x04] ! LSU Group + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 +2: + ba __set_v4f_2 + nop + +3: retl + nop + + .globl _mesa_sparc_transform_points2_2d +_mesa_sparc_transform_points2_2d: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_4_5_12_13(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f2 ! FGM + ld [%g1 + 0x00], %f8 ! LSU Group + fmuls %f0, M1, %f3 ! FGM + ld [%g1 + 0x04], %f9 ! LSU Group + fmuls %f1, M4, %f6 ! FGM + fmuls %f1, M5, %f7 ! FGM Group + add %g1, %o5, %g1 ! IEU0 + fmuls %f8, M0, %f10 ! FGM Group f2 available + fadds %f2, M12, %f2 ! FGA + fmuls %f8, M1, %f11 ! FGM Group f3 available + fadds %f3, M13, %f3 ! FGA + fmuls %f9, M4, %f12 ! FGM Group + fmuls %f9, M5, %f13 ! FGM Group + fadds %f10, M12, %f10 ! FGA Group f2, f10 available + fadds %f2, %f6, %f2 ! FGA Group f3, f11 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f11, M13, %f11 ! FGA Group f12 available + fadds %f3, %f7, %f3 ! FGA Group f13 available + st %f3, [%g2 + 0x04] ! LSU + fadds %f10, %f12, %f10 ! FGA Group f10 available + st %f10, [%g2 + 0x10] ! LSU + fadds %f11, %f13, %f11 ! FGA Group f11 available + st %f11, [%g2 + 0x14] ! LSU + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + fmuls %f0, M0, %f2 ! FGM Group + fmuls %f0, M1, %f3 ! FGM Group + fmuls %f1, M4, %f6 ! FGM Group + fmuls %f1, M5, %f7 ! FGM Group + fadds %f2, M12, %f2 ! FGA Group f2 available + fadds %f3, M13, %f3 ! FGA Group f3 available + fadds %f2, %f6, %f2 ! FGA Group 2 cycle stall, f2 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f3, %f7, %f3 ! FGA Group f3 available + st %f3, [%g2 + 0x04] ! LSU + +3: + ba __set_v4f_2 + nop + + .globl _mesa_sparc_transform_points2_2d_no_rot +_mesa_sparc_transform_points2_2d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_12_13(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + ld [%g1 + 0x00], %f4 ! LSU Group + fmuls %f0, M0, %f2 ! FGM + ld [%g1 + 0x04], %f5 ! LSU Group + fmuls %f1, M5, %f3 ! FGM + fmuls %f4, M0, %f6 ! FGM Group + add %g1, %o5, %g1 ! IEU0 + fmuls %f5, M5, %f7 ! FGM Group + fadds %f2, M12, %f2 ! FGA Group f2 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f3, M13, %f3 ! FGA Group f3 available + st %f3, [%g2 + 0x04] ! LSU + fadds %f6, M12, %f6 ! FGA Group f6 available + st %f6, [%g2 + 0x10] ! LSU + fadds %f7, M13, %f7 ! FGA Group f7 available + st %f7, [%g2 + 0x14] ! LSU + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + fmuls %f0, M0, %f2 ! FGM Group + fmuls %f1, M5, %f3 ! FGM Group + fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f3, M13, %f3 ! FGA Group f3 available + st %f3, [%g2 + 0x04] ! LSU + +3: + ba __set_v4f_2 + nop + + /* orig: 12 cycles */ + .globl _mesa_sparc_transform_points2_3d +_mesa_sparc_transform_points2_3d: + ld [%o2 + V4F_STRIDE], %o5 + ld [%o2 + V4F_START], %g1 + ld [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o1 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + add %o1, 2, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + ld [%g1 + 0x00], %f9 ! LSU Group + fmuls %f0, M0, %f2 ! FGM + ld [%g1 + 0x04], %f10 ! LSU Group + fmuls %f0, M1, %f3 ! FGM + fmuls %f0, M2, %f4 ! FGM Group + add %g1, %o5, %g1 ! IEU0 + fmuls %f1, M4, %f6 ! FGM Group + fmuls %f1, M5, %f7 ! FGM Group f2 available + fadds %f2, M12, %f2 ! FGA + fmuls %f1, M6, %f8 ! FGM Group f3 available + fadds %f3, M13, %f3 ! FGA + fmuls %f9, M0, %f11 ! FGM Group f4 available + fadds %f4, M14, %f4 ! FGA + fmuls %f9, M1, %f12 ! FGM Group f6 available + fmuls %f9, M2, %f13 ! FGM Group f2, f7 available + fadds %f2, %f6, %f2 ! FGA + st %f2, [%g2 + 0x00] ! LSU + fmuls %f10, M4, %f14 ! FGM Group f3, f8 available + fadds %f3, %f7, %f3 ! FGA + st %f3, [%g2 + 0x04] ! LSU + fmuls %f10, M5, %f15 ! FGM Group f4, f11 available + fadds %f11, M12, %f11 ! FGA + fmuls %f10, M6, %f0 ! FGM Group f12 available + fadds %f12, M13, %f12 ! FGA + fadds %f13, M14, %f13 ! FGA Group f13 available + fadds %f4, %f8, %f4 ! FGA Group f14 available + st %f4, [%g2 + 0x08] ! LSU + fadds %f11, %f14, %f11 ! FGA Group f15, f11 available + st %f11, [%g2 + 0x10] ! LSU + fadds %f12, %f15, %f12 ! FGA Group f0, f12 available + st %f12, [%g2 + 0x14] ! LSU + fadds %f13, %f0, %f13 ! FGA Group f13 available + st %f13, [%g2 + 0x18] ! LSU + + cmp %o1, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o1, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + fmuls %f0, M0, %f2 ! FGM Group + fmuls %f0, M1, %f3 ! FGM Group + fmuls %f0, M2, %f4 ! FGM Group + fmuls %f1, M4, %f6 ! FGM Group + fmuls %f1, M5, %f7 ! FGM Group f2 available + fadds %f2, M12, %f2 ! FGA + fmuls %f1, M6, %f8 ! FGM Group f3 available + fadds %f3, M13, %f3 ! FGA + fadds %f4, M14, %f4 ! FGA Group f4 available + fadds %f2, %f6, %f2 ! FGA Group stall, f2, f6, f7 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f3, %f7, %f3 ! FGA Group f3, f8 available + st %f3, [%g2 + 0x04] ! LSU + fadds %f4, %f8, %f4 ! FGA Group f4 available + st %f4, [%g2 + 0x08] ! LSU + +3: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points2_3d_no_rot +_mesa_sparc_transform_points2_3d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_12_13_14(%o1) + + cmp %g3, 1 + st %g3, [%o0 + V4F_COUNT] + bl 3f + clr %o3 + + be 2f + andn %g3, 1, %o2 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + add %o3, 2, %o3 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + ld [%g1 + 0x00], %f4 ! LSU Group + fmuls %f0, M0, %f2 ! FGM + ld [%g1 + 0x04], %f5 ! LSU Group + fmuls %f1, M5, %f3 ! FGM + fmuls %f4, M0, %f6 ! FGM Group + add %g1, %o5, %g1 ! IEU0 + fmuls %f5, M5, %f7 ! FGM Group + fadds %f2, M12, %f2 ! FGA Group f2 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f3, M13, %f3 ! FGA Group f3 available + st %f3, [%g2 + 0x04] ! LSU + fadds %f6, M12, %f6 ! FGA Group f6 available + st M14, [%g2 + 0x08] ! LSU + fadds %f7, M13, %f7 ! FGA Group f7 available + st %f6, [%g2 + 0x10] ! LSU + st %f7, [%g2 + 0x14] ! LSU Group + st M14, [%g2 + 0x18] ! LSU Group + cmp %o3, %o2 ! IEU1 + bne 1b ! CTI + add %g2, 0x20, %g2 ! IEU0 Group + + cmp %o3, %g3 + be 3f + nop + +2: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + fmuls %f0, M0, %f2 ! FGM Group + fmuls %f1, M5, %f3 ! FGM Group + fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available + st %f2, [%g2 + 0x00] ! LSU + fadds %f3, M13, %f3 ! FGA Group f3 available + st %f3, [%g2 + 0x04] ! LSU + st M14, [%g2 + 0x08] ! LSU Group + +3: ld [%o1 + (14 * 0x4)], %g3 + cmp %g3, 0 + bne __set_v4f_3 + nop + ba __set_v4f_2 + nop + + .globl _mesa_sparc_transform_points2_perspective +_mesa_sparc_transform_points2_perspective: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_14(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 + ld [%g1 + 0x04], %f1 + add %o1, 1, %o1 + add %g1, %o5, %g1 + fmuls %f0, M0, %f2 + st %f2, [%g2 + 0x00] + fmuls %f1, M5, %f3 + st %f3, [%g2 + 0x04] + st M14, [%g2 + 0x08] + st %g0, [%g2 + 0x0c] + cmp %o1, %g3 + bne 1b + add %g2, 0x10, %g2 +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points3_general +_mesa_sparc_transform_points3_general: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f3 ! FGM + fmuls %f1, M4, %f7 ! FGM Group + fmuls %f0, M1, %f4 ! FGM Group + fmuls %f1, M5, %f8 ! FGM Group + fmuls %f0, M2, %f5 ! FGM Group f3 available + fmuls %f1, M6, %f9 ! FGM Group f7 available + fadds %f3, %f7, %f3 ! FGA + fmuls %f0, M3, %f6 ! FGM Group f4 available + fmuls %f1, M7, %f10 ! FGM Group f8 available + fadds %f4, %f8, %f4 ! FGA + fmuls %f2, M8, %f7 ! FGM Group f5 available + fmuls %f2, M9, %f8 ! FGM Group f9,f3 available + fadds %f5, %f9, %f5 ! FGA + fmuls %f2, M10, %f9 ! FGM Group f6 available + fadds %f6, %f10, %f6 ! FGA Group f10,f4 available + fmuls %f2, M11, %f10 ! FGM + fadds %f3, M12, %f3 ! FGA Group f7 available + fadds %f4, M13, %f4 ! FGA Group f8,f5 available + fadds %f5, M14, %f5 ! FGA Group f9 available + fadds %f6, M15, %f6 ! FGA Group f10,f6 available + fadds %f3, %f7, %f3 ! FGA Group f3 available + st %f3, [%g2 + 0x00] ! LSU + fadds %f4, %f8, %f4 ! FGA Group f4 available + st %f4, [%g2 + 0x04] ! LSU + fadds %f5, %f9, %f5 ! FGA Group f5 available + st %f5, [%g2 + 0x08] ! LSU + fadds %f6, %f10, %f6 ! FGA Group f6 available + st %f6, [%g2 + 0x0c] ! LSU + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points3_identity +_mesa_sparc_transform_points3_identity: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 + ld [%g1 + 0x04], %f1 + ld [%g1 + 0x08], %f2 + add %o1, 1, %o1 + add %g1, %o5, %g1 + cmp %o1, %g3 + st %f0, [%g2 + 0x00] + st %f1, [%g2 + 0x04] + st %f2, [%g2 + 0x08] + bne 1b + add %g2, 0x10, %g2 +2: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points3_2d +_mesa_sparc_transform_points3_2d: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_4_5_12_13(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f3 ! FGM + fmuls %f0, M1, %f4 ! FGM Group + fmuls %f1, M4, %f6 ! FGM Group + fmuls %f1, M5, %f7 ! FGM Group + fadds %f3, M12, %f3 ! FGA Group f3 available + fadds %f4, M13, %f4 ! FGA Group f4 available + fadds %f3, %f6, %f3 ! FGA Group f6 available + st %f3, [%g2 + 0x00] ! LSU + fadds %f4, %f7, %f4 ! FGA Group f7 available + st %f4, [%g2 + 0x04] ! LSU + st %f2, [%g2 + 0x08] ! LSU Group + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points3_2d_no_rot +_mesa_sparc_transform_points3_2d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_12_13(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f3 ! FGM + fmuls %f1, M5, %f4 ! FGM Group + st %f2, [%g2 + 0x08] ! LSU + fadds %f3, M12, %f3 ! FGA Group + st %f3, [%g2 + 0x00] ! LSU + fadds %f4, M13, %f4 ! FGA Group + st %f4, [%g2 + 0x04] ! LSU + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points3_3d +_mesa_sparc_transform_points3_3d: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f3 ! FGM + fmuls %f1, M4, %f6 ! FGM Group + fmuls %f0, M1, %f4 ! FGM Group + fmuls %f1, M5, %f7 ! FGM Group + fmuls %f0, M2, %f5 ! FGM Group f3 available + fmuls %f1, M6, %f8 ! FGM Group f6 available + fadds %f3, %f6, %f3 ! FGA + fmuls %f2, M8, %f9 ! FGM Group f4 available + fmuls %f2, M9, %f10 ! FGM Group f7 available + fadds %f4, %f7, %f4 ! FGA + fmuls %f2, M10, %f11 ! FGM Group f5 available + fadds %f5, %f8, %f5 ! FGA Group f8, f3 available + fadds %f3, %f9, %f3 ! FGA Group f9 available + fadds %f4, %f10, %f4 ! FGA Group f10, f4 available + fadds %f5, %f11, %f5 ! FGA Group stall, f11, f5 available + fadds %f3, M12, %f3 ! FGA Group f3 available + st %f3, [%g2 + 0x00] ! LSU + fadds %f4, M13, %f4 ! FGA Group f4 available + st %f4, [%g2 + 0x04] ! LSU + fadds %f5, M14, %f5 ! FGA Group f5 available + st %f5, [%g2 + 0x08] ! LSU + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points3_3d_no_rot +_mesa_sparc_transform_points3_3d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_10_12_13_14(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + cmp %o1, %g3 ! IEU1 Group + fmuls %f0, M0, %f3 ! FGM + fmuls %f1, M5, %f4 ! FGM Group + fmuls %f2, M10, %f5 ! FGM Group + fadds %f3, M12, %f3 ! FGA Group, stall, f3 available + st %f3, [%g2 + 0x00] ! LSU + fadds %f4, M13, %f4 ! FGA Group, f4 available + st %f4, [%g2 + 0x04] ! LSU + fadds %f5, M14, %f5 ! FGA Group, f5 available + st %f5, [%g2 + 0x08] ! LEU + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_3 + nop + + .globl _mesa_sparc_transform_points3_perspective +_mesa_sparc_transform_points3_perspective: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_8_9_10_14(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f3 ! FGM + fmuls %f2, M8, %f6 ! FGM Group + fmuls %f1, M5, %f4 ! FGM Group + fmuls %f2, M9, %f7 ! FGM Group + fmuls %f2, M10, %f5 ! FGM Group f3 available + fadds %f3, %f6, %f3 ! FGA Group f6 available + st %f3, [%g2 + 0x00] ! LSU + fadds %f4, %f7, %f4 ! FGA Group stall, f4, f7 available + st %f4, [%g2 + 0x04] ! LSU + fadds %f5, M14, %f5 ! FGA Group + st %f5, [%g2 + 0x08] ! LSU + fnegs %f2, %f6 ! FGA Group + st %f6, [%g2 + 0x0c] ! LSU + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points4_general +_mesa_sparc_transform_points4_general: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + ld [%g1 + 0x0c], %f3 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f4 ! FGM Group + fmuls %f1, M4, %f8 ! FGM Group + fmuls %f0, M1, %f5 ! FGM Group + fmuls %f1, M5, %f9 ! FGM Group + fmuls %f0, M2, %f6 ! FGM Group f4 available + fmuls %f1, M6, %f10 ! FGM Group f8 available + fadds %f4, %f8, %f4 ! FGA + fmuls %f0, M3, %f7 ! FGM Group f5 available + fmuls %f1, M7, %f11 ! FGM Group f9 available + fadds %f5, %f9, %f5 ! FGA + fmuls %f2, M8, %f12 ! FGM Group f6 available + fmuls %f2, M9, %f13 ! FGM Group f10, f4 available + fadds %f6, %f10, %f6 ! FGA + fmuls %f2, M10, %f14 ! FGM Group f7 available + fmuls %f2, M11, %f15 ! FGM Group f11, f5 available + fadds %f7, %f11, %f7 ! FGA + fmuls %f3, M12, %f8 ! FGM Group f12 available + fadds %f4, %f12, %f4 ! FGA + fmuls %f3, M13, %f9 ! FGM Group f13, f6 available + fadds %f5, %f13, %f5 ! FGA + fmuls %f3, M14, %f10 ! FGM Group f14 available + fadds %f6, %f14, %f6 ! FGA + fmuls %f3, M15, %f11 ! FGM Group f15, f7 available + fadds %f7, %f15, %f7 ! FGA + fadds %f4, %f8, %f4 ! FGA Group f8, f4 available + st %f4, [%g2 + 0x00] ! LSU + fadds %f5, %f9, %f5 ! FGA Group f9, f5 available + st %f5, [%g2 + 0x04] ! LSU + fadds %f6, %f10, %f6 ! FGA Group f10, f6 available + st %f6, [%g2 + 0x08] ! LSU + fadds %f7, %f11, %f7 ! FGA Group f11, f7 available + st %f7, [%g2 + 0x0c] ! LSU + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points4_identity +_mesa_sparc_transform_points4_identity: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 + ld [%g1 + 0x04], %f1 + ld [%g1 + 0x08], %f2 + add %o1, 1, %o1 + ld [%g1 + 0x0c], %f3 + add %g1, %o5, %g1 + st %f0, [%g2 + 0x00] + st %f1, [%g2 + 0x04] + st %f2, [%g2 + 0x08] + cmp %o1, %g3 + st %f3, [%g2 + 0x0c] + bne 1b + add %g2, 0x10, %g2 +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points4_2d +_mesa_sparc_transform_points4_2d: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_4_5_12_13(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + ld [%g1 + 0x0c], %f3 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f4 ! FGM + fmuls %f1, M4, %f8 ! FGM Group + fmuls %f0, M1, %f5 ! FGM Group + fmuls %f1, M5, %f9 ! FGM Group f4 available + fmuls %f3, M12, %f12 ! FGM Group + fmuls %f3, M13, %f13 ! FGM Group f8 available + fadds %f4, %f8, %f4 ! FGA + fadds %f5, %f9, %f5 ! FGA Group stall, f5, f9 available + fadds %f4, %f12, %f4 ! FGA Group 2 cycle stall, f4, f12, f13 avail + st %f4, [%g2 + 0x00] ! LSU + fadds %f5, %f13, %f5 ! FGA Group f5 available + st %f5, [%g2 + 0x04] ! LSU + st %f2, [%g2 + 0x08] ! LSU Group + st %f3, [%g2 + 0x0c] ! LSU Group + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points4_2d_no_rot +_mesa_sparc_transform_points4_2d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_4_5_12_13(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 + ld [%g1 + 0x04], %f1 + ld [%g1 + 0x08], %f2 + ld [%g1 + 0x0c], %f3 + add %o1, 1, %o1 + add %g1, %o5, %g1 + fmuls %f0, M0, %f4 + fmuls %f3, M12, %f8 + fmuls %f1, M5, %f5 + fmuls %f3, M13, %f9 + fadds %f4, %f8, %f4 + st %f4, [%g2 + 0x00] + fadds %f5, %f9, %f5 + st %f5, [%g2 + 0x04] + st %f2, [%g2 + 0x08] + st %f3, [%g2 + 0x0c] + cmp %o1, %g3 + bne 1b + add %g2, 0x10, %g2 +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points4_3d +_mesa_sparc_transform_points4_3d: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + ld [%g1 + 0x0c], %f3 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f4 ! FGM + fmuls %f1, M4, %f7 ! FGM Group + fmuls %f0, M1, %f5 ! FGM Group + fmuls %f1, M5, %f8 ! FGM Group + fmuls %f0, M2, %f6 ! FGM Group f4 available + fmuls %f1, M6, %f9 ! FGM Group f7 available + fadds %f4, %f7, %f4 ! FGA + fmuls %f2, M8, %f10 ! FGM Group f5 available + fmuls %f2, M9, %f11 ! FGM Group f8 available + fadds %f5, %f8, %f5 ! FGA + fmuls %f2, M10, %f12 ! FGM Group f6 available + fmuls %f3, M12, %f13 ! FGM Group f9, f4 available + fadds %f6, %f9, %f6 ! FGA + fmuls %f3, M13, %f14 ! FGM Group f10 available + fadds %f4, %f10, %f4 ! FGA + fmuls %f3, M14, %f15 ! FGM Group f11, f5 available + fadds %f5, %f11, %f5 ! FGA + fadds %f6, %f12, %f6 ! FGA Group stall, f12, f13, f6 available + fadds %f4, %f13, %f4 ! FGA Group f14, f4 available + st %f4, [%g2 + 0x00] ! LSU + fadds %f5, %f14, %f5 ! FGA Group f15, f5 available + st %f5, [%g2 + 0x04] ! LSU + fadds %f6, %f15, %f6 ! FGA Group f6 available + st %f6, [%g2 + 0x08] ! LSU + st %f3, [%g2 + 0x0c] ! LSU Group + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points4_3d_no_rot +_mesa_sparc_transform_points4_3d_no_rot: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_10_12_13_14(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + ld [%g1 + 0x0c], %f3 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f4 ! FGM + fmuls %f3, M12, %f7 ! FGM Group + fmuls %f1, M5, %f5 ! FGM Group + fmuls %f3, M13, %f8 ! FGM Group + fmuls %f2, M10, %f6 ! FGM Group f4 available + fmuls %f3, M14, %f9 ! FGM Group f7 available + fadds %f4, %f7, %f4 ! FGA + st %f4, [%g2 + 0x00] ! LSU + fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available + st %f5, [%g2 + 0x04] ! LSU + fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available + st %f6, [%g2 + 0x08] ! LSU + st %f3, [%g2 + 0x0c] ! LSU Group + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop + + .globl _mesa_sparc_transform_points4_perspective +_mesa_sparc_transform_points4_perspective: + ld [%o2 + V4F_STRIDE], %o5 + LDPTR [%o2 + V4F_START], %g1 + LDPTR [%o0 + V4F_START], %g2 + ld [%o2 + V4F_COUNT], %g3 + + LDMATRIX_0_5_8_9_10_14(%o1) + + cmp %g3, 0 + st %g3, [%o0 + V4F_COUNT] + be 2f + clr %o1 + +1: ld [%g1 + 0x00], %f0 ! LSU Group + ld [%g1 + 0x04], %f1 ! LSU Group + ld [%g1 + 0x08], %f2 ! LSU Group + ld [%g1 + 0x0c], %f3 ! LSU Group + add %o1, 1, %o1 ! IEU0 + add %g1, %o5, %g1 ! IEU1 + fmuls %f0, M0, %f4 ! FGM + fmuls %f2, M8, %f7 ! FGM Group + fmuls %f1, M5, %f5 ! FGM Group + fmuls %f2, M9, %f8 ! FGM Group + fmuls %f2, M10, %f6 ! FGM Group f4 available + fmuls %f3, M14, %f9 ! FGM Group f7 available + fadds %f4, %f7, %f4 ! FGA + st %f4, [%g2 + 0x00] ! LSU + fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available + st %f5, [%g2 + 0x04] ! LSU + fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available + st %f6, [%g2 + 0x08] ! LSU + fnegs %f2, %f7 ! FGA Group + st %f7, [%g2 + 0x0c] ! LSU + cmp %o1, %g3 ! IEU1 + bne 1b ! CTI + add %g2, 0x10, %g2 ! IEU0 Group +2: + ba __set_v4f_4 + nop