From fc2427e81b1c648550d0368652d6a475df785027 Mon Sep 17 00:00:00 2001 From: Gareth Hughes Date: Mon, 23 Oct 2000 00:16:28 +0000 Subject: [PATCH] Major audit of all Mesa's x86 assembly code. This round is basically general cleanups - more to come. Added P6 architecture timing to debug_xform routines. Still need to add test_all_vertex_functions test for the v16 asm. Dynamic reconfiguration of counter overhead for more accurate benchmarking. --- src/mesa/main/blend.c | 8 +- src/mesa/main/context.c | 7 +- src/mesa/x86/3dnow.c | 175 ++++++++++++-------------- src/mesa/x86/3dnow.h | 75 ++--------- src/mesa/x86/assyntax.h | 2 +- src/mesa/x86/clip_args.h | 76 ++++++++++++ src/mesa/x86/common_x86.c | 109 +++++++++------- src/mesa/x86/common_x86_asm.S | 152 +++++++++++++++++++++++ src/mesa/x86/common_x86_asm.h | 63 ++++++++++ src/mesa/x86/common_x86_features.h | 77 ++++++++++++ src/mesa/x86/mmx.h | 11 +- src/mesa/x86/mmx_blend.S | 4 - src/mesa/x86/x86.c | 154 ++++++++++++----------- src/mesa/x86/x86.h | 20 +-- src/mesa/x86/x86_cliptest.S | 248 +++++++++++++++++++++++++++++++++++++ src/mesa/x86/xform_args.h | 74 +++++++++++ 16 files changed, 955 insertions(+), 300 deletions(-) create mode 100644 src/mesa/x86/clip_args.h create mode 100644 src/mesa/x86/common_x86_asm.S create mode 100644 src/mesa/x86/common_x86_asm.h create mode 100644 src/mesa/x86/common_x86_features.h create mode 100644 src/mesa/x86/x86_cliptest.S create mode 100644 src/mesa/x86/xform_args.h diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index c4e8e86..8ea3297 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -1,4 +1,4 @@ -/* $Id: blend.c,v 1.19 2000/10/19 18:08:05 brianp Exp $ */ +/* $Id: blend.c,v 1.20 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library @@ -826,7 +826,7 @@ blend_general( GLcontext *ctx, GLuint n, const GLubyte mask[], #if defined(USE_MMX_ASM) #include "X86/mmx.h" -#include "X86/common_x86asm.h" +#include "X86/common_x86_asm.h" #endif @@ -846,8 +846,8 @@ static void set_blend_function( GLcontext *ctx ) /* Hmm. A table here would have 12^4 == way too many entries. * Provide a hook for MMX instead. */ - if (gl_x86_cpu_features & GL_CPU_MMX) { - gl_mmx_set_blend_function (ctx); + if ( cpu_has_mmx ) { + gl_mmx_set_blend_function( ctx ); } else #endif diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index c57f37c..8a5bf55 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1,4 +1,4 @@ -/* $Id: context.c,v 1.94 2000/10/21 00:02:47 brianp Exp $ */ +/* $Id: context.c,v 1.95 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library @@ -454,6 +454,7 @@ one_time_init( void ) gl_init_translate(); gl_init_vbrender(); gl_init_vbxform(); + gl_init_vertices(); if (getenv("MESA_DEBUG")) { _glapi_noop_enable_warnings(GL_TRUE); @@ -884,7 +885,7 @@ init_attrib_groups( GLcontext *ctx ) ctx->Current.Primitive = (GLenum) (GL_POLYGON + 1); ctx->Current.Flag = (VERT_NORM | - VERT_INDEX | + VERT_INDEX | VERT_RGBA | VERT_EDGE | VERT_TEX0_1 | @@ -2114,4 +2115,4 @@ GLenum gl_reduce_prim[GL_POLYGON+1] = { GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, -}; +}; diff --git a/src/mesa/x86/3dnow.c b/src/mesa/x86/3dnow.c index 5dc3b38..3becc8c 100644 --- a/src/mesa/x86/3dnow.c +++ b/src/mesa/x86/3dnow.c @@ -1,4 +1,4 @@ -/* $Id: 3dnow.c,v 1.7 2000/09/17 21:12:40 gareth Exp $ */ +/* $Id: 3dnow.c,v 1.8 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library @@ -24,88 +24,75 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /* * 3DNow! optimizations contributed by * Holger Waechtler */ -#if defined(USE_3DNOW_ASM) && defined(USE_X86_ASM) -#include "3dnow.h" - -#include -#include -#include -#include +#include "glheader.h" #include "context.h" #include "types.h" -#include "xform.h" #include "vertices.h" +#include "xform.h" +#include "3dnow.h" #ifdef DEBUG #include "debug_xform.h" #endif - - -#define XFORM_ARGS GLvector4f *to_vec, \ - const GLfloat m[16], \ - const GLvector4f *from_vec, \ - const GLubyte *mask, \ +#define XFORM_ARGS GLvector4f *to_vec, \ + const GLfloat m[16], \ + const GLvector4f *from_vec, \ + const GLubyte *mask, \ const GLubyte flag - -#define DECLARE_XFORM_GROUP( pfx, v, masked ) \ - extern void _ASMAPI gl_##pfx##_transform_points##v##_general_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##v##_identity_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##v##_3d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##v##_perspective_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##v##_2d_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##v##_2d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##v##_3d_##masked(XFORM_ARGS); - - - -#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \ - gl_transform_tab[cma][vsize][MATRIX_GENERAL] = \ - gl_##pfx##_transform_points##vsize##_general_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_IDENTITY] = \ - gl_##pfx##_transform_points##vsize##_identity_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] = \ - gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] = \ - gl_##pfx##_transform_points##vsize##_perspective_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D] = \ - gl_##pfx##_transform_points##vsize##_2d_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] = \ - gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D] = \ - gl_##pfx##_transform_points##vsize##_3d_##masked; - - - - -#define NORM_ARGS const GLmatrix *mat, \ - GLfloat scale, \ - const GLvector3f *in, \ - const GLfloat *lengths, \ - const GLubyte mask[], \ +#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS ); + + +#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \ + gl_transform_tab[cma][sz][MATRIX_GENERAL] = \ + gl_##pfx##_transform_points##sz##_general_##masked; \ + gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \ + gl_##pfx##_transform_points##sz##_identity_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \ + gl_##pfx##_transform_points##sz##_perspective_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D] = \ + gl_##pfx##_transform_points##sz##_2d_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D] = \ + gl_##pfx##_transform_points##sz##_3d_##masked; + + + +#define NORM_ARGS const GLmatrix *mat, \ + GLfloat scale, \ + const GLvector3f *in, \ + const GLfloat *lengths, \ + const GLubyte mask[], \ GLvector3f *dest - #define DECLARE_NORM_GROUP( pfx, masked ) \ - extern void _ASMAPI gl_##pfx##_rescale_normals_##masked(NORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_normalize_normals_##masked(NORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_normals_##masked(NORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked(NORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked(NORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked(NORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked(NORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked(NORM_ARGS); - + extern void _ASMAPI gl_##pfx##_rescale_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_normalize_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked( NORM_ARGS ); #define ASSIGN_NORM_GROUP( pfx, cma, masked ) \ @@ -127,24 +114,7 @@ gl_##pfx##_transform_normalize_normals_no_rot_##masked; -extern void _ASMAPI gl_3dnow_project_vertices( GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride ); - -extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride, - const GLubyte *clipmask ); - -extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert, - const GLfloat *m, - const GLfloat *src, - GLuint src_stride, - GLuint count ); - - +#ifdef USE_3DNOW_ASM DECLARE_XFORM_GROUP( 3dnow, 1, raw ) DECLARE_XFORM_GROUP( 3dnow, 2, raw ) DECLARE_XFORM_GROUP( 3dnow, 3, raw ) @@ -159,8 +129,28 @@ DECLARE_NORM_GROUP( 3dnow, raw ) /*DECLARE_NORM_GROUP( 3dnow, masked )*/ -void gl_init_3dnow_asm_transforms( void ) +extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert, + const GLfloat *m, + const GLfloat *src, + GLuint src_stride, + GLuint count ); + +extern void _ASMAPI gl_3dnow_project_vertices( GLfloat *first, + GLfloat *last, + const GLfloat *m, + GLuint stride ); + +extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first, + GLfloat *last, + const GLfloat *m, + GLuint stride, + const GLubyte *clipmask ); +#endif + + +void gl_init_3dnow_transform_asm( void ) { +#ifdef USE_3DNOW_ASM ASSIGN_XFORM_GROUP( 3dnow, 0, 1, raw ); ASSIGN_XFORM_GROUP( 3dnow, 0, 2, raw ); ASSIGN_XFORM_GROUP( 3dnow, 0, 3, raw ); @@ -178,21 +168,18 @@ void gl_init_3dnow_asm_transforms( void ) gl_test_all_transform_functions( "3DNow!" ); gl_test_all_normal_transform_functions( "3DNow!" ); #endif - - /* Hook in some stuff for vertices.c. - */ - gl_xform_points3_v16_general = gl_v16_3dnow_general_xform; - gl_project_v16 = gl_3dnow_project_vertices; - gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices; +#endif } -#else - -/* silence compiler warning */ -extern void _mesa_3dnow_dummy_function( void ); - -void _mesa_3dnow_dummy_function( void ) +void gl_init_3dnow_vertex_asm( void ) { -} +#ifdef USE_3DNOW_ASM + gl_xform_points3_v16_general = gl_v16_3dnow_general_xform; + gl_project_v16 = gl_3dnow_project_vertices; + gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices; +#if 0 + gl_test_all_vertex_functions( "3DNow!" ); +#endif #endif +} diff --git a/src/mesa/x86/3dnow.h b/src/mesa/x86/3dnow.h index b06cc5b..1e17cc4 100644 --- a/src/mesa/x86/3dnow.h +++ b/src/mesa/x86/3dnow.h @@ -1,21 +1,21 @@ -/* $Id: 3dnow.h,v 1.1 1999/08/19 00:55:42 jtg Exp $ */ +/* $Id: 3dnow.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.5 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -24,72 +24,17 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /* * 3DNow! optimizations contributed by * Holger Waechtler */ - -#ifndef _3dnow_h -#define _3dnow_h - - +#ifndef __3DNOW_H__ +#define __3DNOW_H__ #include "xform.h" - -void gl_init_3dnow_asm_transforms (void); - - - - -#if 0 -GLvector4f *gl_project_points( GLvector4f *proj_vec, - const GLvector4f *clip_vec ) -{ - __asm__ ( - " femms \n" - " \n" - " movq (%0), %%mm0 # x1 | x0 \n" - " movq 8(%0), %%mm1 # oow | x2 \n" - " \n" - "1: movq %%mm1, %%mm2 # oow | x2 \n" - " addl %2, %0 # next point \n" - " \n" - " punpckhdq %%mm2, %%mm2 # oow | oow \n" - " addl $16, %1 # next point \n" - " \n" - " pfrcp %%mm2, %%mm3 # 1/oow | 1/oow \n" - " decl %3 \n" - " \n" - " pfmul %%mm3, %%mm0 # x1/oow | x0/oow \n" - " movq %%mm0, -16(%1) # write r0, r1 \n" - " \n" - " pfmul %%mm3, %%mm1 # 1 | x2/oow \n" - " movq (%0), %%mm0 # x1 | x0 \n" - " \n" - " movd %%mm1, 8(%1) # write r2 \n" - " movd %%mm3, 12(%1) # write r3 \n" - " \n" - " movq 8(%0), %%mm1 # oow | x2 \n" - " ja 1b \n" - " \n" - " femms \n" - " " - ::"a" (clip_vec->start), - "c" (proj_vec->start), - "g" (clip_vec->stride), - "d" (clip_vec->count) - ); - - proj_vec->flags |= VEC_SIZE_4; - proj_vec->size = 3; - proj_vec->count = clip_vec->count; - return proj_vec; -} -#endif - - +void gl_init_3dnow_transform_asm( void ); +void gl_init_3dnow_vertex_asm( void ); #endif diff --git a/src/mesa/x86/assyntax.h b/src/mesa/x86/assyntax.h index 34bd11f..b9bb1f7 100644 --- a/src/mesa/x86/assyntax.h +++ b/src/mesa/x86/assyntax.h @@ -1,4 +1,4 @@ -/* $Id: assyntax.h,v 1.15 2000/09/18 22:49:04 gareth Exp $ */ +/* $Id: assyntax.h,v 1.16 2000/10/23 00:16:28 gareth Exp $ */ #ifndef __ASSYNTAX_H__ #define __ASSYNTAX_H__ diff --git a/src/mesa/x86/clip_args.h b/src/mesa/x86/clip_args.h new file mode 100644 index 0000000..0829ec7 --- /dev/null +++ b/src/mesa/x86/clip_args.h @@ -0,0 +1,76 @@ +/* $Id: clip_args.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Clip test function interface for assembly code. Simply define + * FRAME_OFFSET to the number of bytes pushed onto the stack before + * using the ARG_* argument macros. + * + * Gareth Hughes + */ + +#ifndef __CLIP_ARGS_H__ +#define __CLIP_ARGS_H__ + +/* Offsets into GLvector4f + */ +#define V4F_DATA 0 +#define V4F_START 4 +#define V4F_COUNT 8 +#define V4F_STRIDE 12 +#define V4F_SIZE 16 +#define V4F_FLAGS 20 + +/* GLvector4f flags + */ +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 + +/* + * Offsets for clip_func arguments + * + * typedef GLvector4f *(*clip_func)( GLvector4f *vClip, + * GLvector4f *vProj, + * GLubyte clipMask[], + * GLubyte *orMask, + * GLubyte *andMask ); + */ + +#define OFFSET_SOURCE 4 +#define OFFSET_DEST 8 +#define OFFSET_CLIP 12 +#define OFFSET_OR 16 +#define OFFSET_AND 20 + +#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) +#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) +#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) +#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP) +#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP) + +#endif diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c index e779fe1..93d2116 100644 --- a/src/mesa/x86/common_x86.c +++ b/src/mesa/x86/common_x86.c @@ -1,21 +1,21 @@ -/* $Id: common_x86.c,v 1.6 2000/01/25 17:04:47 brianp Exp $ */ +/* $Id: common_x86.c,v 1.7 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library - * Version: 3.3 - * + * Version: 3.5 + * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -26,81 +26,102 @@ /* - * Check CPU capabilities & initialize optimized funtions for this particular - * processor. + * Check CPU capabilities & initialize optimized funtions for this particular + * processor. * - * Written by Holger Waechtler - * Changed by Andre Werthmann for using the - * new Katmai functions + * Written by Holger Waechtler + * Changed by Andre Werthmann for using the + * new Katmai functions. */ #include #include -#include "common_x86asm.h" + +#include "common_x86_asm.h" + int gl_x86_cpu_features = 0; -static void message(const char *msg) +/* No reason for this to be public. + */ +extern int gl_identify_x86_cpu_features( void ); + + +static void message( const char *msg ) { - if (getenv("MESA_DEBUG")) - fprintf(stderr, "%s\n", msg); + if ( getenv( "MESA_DEBUG" ) ) { + fprintf( stderr, "%s\n", msg ); + } } -void gl_init_all_x86_asm (void) +void gl_init_all_x86_transform_asm( void ) { #ifdef USE_X86_ASM - gl_x86_cpu_features = gl_identify_x86_cpu_features (); - gl_x86_cpu_features |= GL_CPU_AnyX86; + gl_x86_cpu_features = gl_identify_x86_cpu_features(); - if (getenv("MESA_NO_ASM") != 0) + if ( getenv( "MESA_NO_ASM" ) ) { gl_x86_cpu_features = 0; - - if (gl_x86_cpu_features & GL_CPU_GenuineIntel) { - message("GenuineIntel cpu detected."); } - if (gl_x86_cpu_features) { - gl_init_x86_asm_transforms (); + if ( gl_x86_cpu_features ) { + gl_init_x86_transform_asm(); } #ifdef USE_MMX_ASM - if (gl_x86_cpu_features & GL_CPU_MMX) { - char *s = getenv( "MESA_NO_MMX" ); - if (s == NULL) { - message("MMX cpu detected."); + if ( cpu_has_mmx ) { + if ( getenv( "MESA_NO_MMX" ) == 0 ) { + message( "MMX cpu detected." ); } else { - gl_x86_cpu_features &= (~GL_CPU_MMX); + gl_x86_cpu_features &= ~(X86_FEATURE_MMX); } } #endif - #ifdef USE_3DNOW_ASM - if (gl_x86_cpu_features & GL_CPU_3Dnow) { - char *s = getenv( "MESA_NO_3DNOW" ); - if (s == NULL) { - message("3Dnow cpu detected."); - gl_init_3dnow_asm_transforms (); + if ( cpu_has_3dnow ) { + if ( getenv( "MESA_NO_3DNOW" ) == 0 ) { + message( "3Dnow cpu detected." ); + gl_init_3dnow_transform_asm(); } else { - gl_x86_cpu_features &= (~GL_CPU_3Dnow); + gl_x86_cpu_features &= ~(X86_FEATURE_3DNOW); } } #endif - #ifdef USE_KATMAI_ASM - if (gl_x86_cpu_features & GL_CPU_Katmai) { - char *s = getenv( "MESA_NO_KATMAI" ); - if (s == NULL) { - message("Katmai cpu detected."); - gl_init_katmai_asm_transforms (); + if ( cpu_has_xmm ) { + if ( getenv( "MESA_NO_KATMAI" ) == 0 ) { + message( "Katmai cpu detected." ); + gl_init_katmai_transform_asm(); } else { - gl_x86_cpu_features &= (~GL_CPU_Katmai); + gl_x86_cpu_features &= ~(X86_FEATURE_XMM); } } #endif - #endif } +/* Note: the above function must be called before this one, so that + * gl_x86_cpu_features gets correctly initialized. + */ +void gl_init_all_x86_vertex_asm( void ) +{ +#ifdef USE_X86_ASM + if ( gl_x86_cpu_features ) { + gl_init_x86_vertex_asm(); + } + +#ifdef USE_3DNOW_ASM + if ( cpu_has_3dnow && getenv( "MESA_NO_3DNOW" ) == 0 ) { + gl_init_3dnow_vertex_asm(); + } +#endif + +#ifdef USE_KATMAI_ASM + if ( cpu_has_xmm && getenv( "MESA_NO_KATMAI" ) == 0 ) { + gl_init_katmai_vertex_asm(); + } +#endif +#endif +} diff --git a/src/mesa/x86/common_x86_asm.S b/src/mesa/x86/common_x86_asm.S new file mode 100644 index 0000000..675711e --- /dev/null +++ b/src/mesa/x86/common_x86_asm.S @@ -0,0 +1,152 @@ +/* $Id: common_x86_asm.S,v 1.2 2000/10/23 00:16:28 gareth Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Check extended CPU capabilities. Now justs returns the raw CPUID + * feature information, allowing the higher level code to interpret the + * results. + * + * Written by Holger Waechtler + * + * Cleaned up and simplified by Gareth Hughes + */ + +#include "assyntax.h" +#include "common_x86_features.h" + + +/* Intel vendor string + */ +#define GENU 0x756e6547 /* "Genu" */ +#define INEI 0x49656e69 /* "ineI" */ +#define NTEL 0x6c65746e /* "ntel" */ + +/* AMD vendor string + */ +#define AUTH 0x68747541 /* "Auth" */ +#define ENTI 0x69746e65 /* "enti" */ +#define CAMD 0x444d4163 /* "cAMD" */ + + + SEG_DATA + +/* We might want to print out some useful messages. + */ +LLBL( found_intel ): STRING( "Genuine Intel processor found\n\0" ) +LLBL( found_amd ): STRING( "Authentic AMD processor found\n\0" ) + + + SEG_TEXT + +ALIGNTEXT4 +GLOBL GLNAME( gl_identify_x86_cpu_features ) +GLNAME( gl_identify_x86_cpu_features ): + + PUSH_L ( EBX ) + + /* Test for the CPUID command. If the ID Flag bit in EFLAGS + * (bit 21) is writable, the CPUID command is present. + */ + PUSHF_L + POP_L ( EAX ) + MOV_L ( EAX, ECX ) + XOR_L ( CONST(0x00200000), EAX ) + PUSH_L ( EAX ) + POPF_L + PUSHF_L + POP_L ( EAX ) + + /* Verify the ID Flag bit has been written. + */ + CMP_L ( ECX, EAX ) + JZ ( LLBL ( cpuid_done ) ) + + /* Get the CPU vendor info. + */ + XOR_L ( EAX, EAX ) + CPUID + + /* Test for Intel processors. We must look for the + * "GenuineIntel" string in EBX, ECX and EDX. + */ + CMP_L ( CONST(GENU), EBX ) + JNE ( LLBL( cpuid_amd ) ) + CMP_L ( CONST(INEI), EDX ) + JNE ( LLBL( cpuid_amd ) ) + CMP_L ( CONST(NTEL), ECX ) + JNE ( LLBL( cpuid_amd ) ) + + /* We have an Intel processor, so we can get the feature + * information with an CPUID input value of 1. + */ + MOV_L ( CONST(0x1), EAX ) + CPUID + MOV_L ( EDX, EAX ) + JMP ( LLBL( cpuid_done ) ) + +LLBL( cpuid_amd ): + + /* Test for AMD processors. We must look for the + * "AuthenticAMD" string in EBX, ECX and EDX. + */ + CMP_L ( CONST(AUTH), EBX ) + JNE ( LLBL( cpuid_other ) ) + CMP_L ( CONST(ENTI), EDX ) + JNE ( LLBL( cpuid_other ) ) + CMP_L ( CONST(CAMD), ECX ) + JNE ( LLBL( cpuid_other ) ) + + /* We have an AMD processor, so we can get the feature + * information after we verify that the extended functions are + * supported. + */ + MOV_L ( CONST(0x80000000), EAX ) + CPUID + TEST_L ( EAX, EAX ) + JZ ( LLBL ( cpuid_failed ) ) + + MOV_L ( CONST(0x80000001), EAX ) + CPUID + MOV_L ( EDX, EAX ) + JMP ( LLBL ( cpuid_done ) ) + +LLBL( cpuid_other ): + + /* Test for other processors here when required. + */ + +LLBL( cpuid_failed ): + + /* If we can't determine the feature information, we must + * return zero to indicate that no platform-specific + * optimizations can be used. + */ + MOV_L ( CONST(0), EAX ) + +LLBL ( cpuid_done ): + + POP_L ( EBX ) + RET diff --git a/src/mesa/x86/common_x86_asm.h b/src/mesa/x86/common_x86_asm.h new file mode 100644 index 0000000..880be22 --- /dev/null +++ b/src/mesa/x86/common_x86_asm.h @@ -0,0 +1,63 @@ +/* $Id: common_x86_asm.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Check CPU capabilities & initialize optimized funtions for this particular + * processor. + * + * Written by Holger Waechtler + * Changed by Andre Werthmann for using the + * new Katmai functions + * + * Reimplemented by Gareth Hughes in a more + * future-proof manner, based on code in the Linux kernel. + */ + +#ifndef __COMMON_X86_ASM_H__ +#define __COMMON_X86_ASM_H__ + +#include "common_x86_features.h" + +#ifdef HAVE_CONFIG_H +#include "conf.h" +#endif + +#ifdef USE_X86_ASM +#include "x86.h" +#ifdef USE_3DNOW_ASM +#include "3dnow.h" +#endif +#ifdef USE_KATMAI_ASM +#include "katmai.h" +#endif +#endif + +extern int gl_x86_cpu_features; + +extern void gl_init_all_x86_transform_asm( void ); +extern void gl_init_all_x86_vertex_asm( void ); + +#endif diff --git a/src/mesa/x86/common_x86_features.h b/src/mesa/x86/common_x86_features.h new file mode 100644 index 0000000..2f575c8 --- /dev/null +++ b/src/mesa/x86/common_x86_features.h @@ -0,0 +1,77 @@ +/* $Id: common_x86_features.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * x86 CPUID feature information. The raw data is returned by + * gl_identify_x86_cpu_features() and interpreted with the cpu_has_* + * helper macros. + * + * Gareth Hughes + */ + +#ifndef __COMMON_X86_FEATURES_H__ +#define __COMMON_X86_FEATURES_H__ + +/* Capabilities of CPUs + */ +#define X86_FEATURE_FPU 0x00000001 +#define X86_FEATURE_VME 0x00000002 +#define X86_FEATURE_DE 0x00000004 +#define X86_FEATURE_PSE 0x00000008 +#define X86_FEATURE_TSC 0x00000010 +#define X86_FEATURE_MSR 0x00000020 +#define X86_FEATURE_PAE 0x00000040 +#define X86_FEATURE_MCE 0x00000080 +#define X86_FEATURE_CX8 0x00000100 +#define X86_FEATURE_APIC 0x00000200 +#define X86_FEATURE_10 0x00000400 +#define X86_FEATURE_SEP 0x00000800 +#define X86_FEATURE_MTRR 0x00001000 +#define X86_FEATURE_PGE 0x00002000 +#define X86_FEATURE_MCA 0x00004000 +#define X86_FEATURE_CMOV 0x00008000 +#define X86_FEATURE_PAT 0x00010000 +#define X86_FEATURE_PSE36 0x00020000 +#define X86_FEATURE_18 0x00040000 +#define X86_FEATURE_19 0x00080000 +#define X86_FEATURE_20 0x00100000 +#define X86_FEATURE_21 0x00200000 +#define X86_FEATURE_MMXEXT 0x00400000 +#define X86_FEATURE_MMX 0x00800000 +#define X86_FEATURE_FXSR 0x01000000 +#define X86_FEATURE_XMM 0x02000000 +#define X86_FEATURE_26 0x04000000 +#define X86_FEATURE_27 0x08000000 +#define X86_FEATURE_28 0x10000000 +#define X86_FEATURE_29 0x20000000 +#define X86_FEATURE_3DNOWEXT 0x40000000 +#define X86_FEATURE_3DNOW 0x80000000 + +#define cpu_has_mmx (gl_x86_cpu_features & X86_FEATURE_MMX) +#define cpu_has_xmm (gl_x86_cpu_features & X86_FEATURE_XMM) +#define cpu_has_3dnow (gl_x86_cpu_features & X86_FEATURE_3DNOW) + +#endif diff --git a/src/mesa/x86/mmx.h b/src/mesa/x86/mmx.h index f0e05cf..0fad398 100644 --- a/src/mesa/x86/mmx.h +++ b/src/mesa/x86/mmx.h @@ -1,20 +1,21 @@ +/* $Id: mmx.h,v 1.3 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.5 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL diff --git a/src/mesa/x86/mmx_blend.S b/src/mesa/x86/mmx_blend.S index e7d6e11..21fa36e 100644 --- a/src/mesa/x86/mmx_blend.S +++ b/src/mesa/x86/mmx_blend.S @@ -350,7 +350,3 @@ LLBL(GMBT_1): MOV_L ( EBP, ESP ) POP_L ( EBP ) RET - - - - diff --git a/src/mesa/x86/x86.c b/src/mesa/x86/x86.c index 2db200d..fcd0978 100644 --- a/src/mesa/x86/x86.c +++ b/src/mesa/x86/x86.c @@ -1,21 +1,21 @@ -/* $Id: x86.c,v 1.8 2000/06/27 22:10:01 brianp Exp $ */ +/* $Id: x86.c,v 1.9 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.5 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -28,7 +28,6 @@ * Intel x86 assembly code by Josh Vanderhoof */ - #include "glheader.h" #include "context.h" #include "types.h" @@ -36,89 +35,104 @@ #include "xform.h" #include "x86.h" -#ifdef USE_X86_ASM -extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert, - GLfloat *last_vert, - GLubyte *or_mask, - GLubyte *and_mask, - GLubyte *clip_mask ); - - -extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest, - const GLfloat *m, - const GLfloat *src, - GLuint src_stride, - GLuint count ); +#ifdef DEBUG +#include "debug_xform.h" #endif -#define XFORM_ARGS GLvector4f *to_vec, \ - const GLfloat m[16], \ - const GLvector4f *from_vec, \ - const GLubyte *mask, \ +#define XFORM_ARGS GLvector4f *to_vec, \ + const GLfloat m[16], \ + const GLvector4f *from_vec, \ + const GLubyte *mask, \ const GLubyte flag -#define DECLARE_XFORM_GROUP(pfx, vsize, masked) \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_general_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_identity_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_perspective_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_##masked(XFORM_ARGS); - -#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \ - gl_transform_tab[cma][vsize][MATRIX_GENERAL] \ - = gl_##pfx##_transform_points##vsize##_general_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_IDENTITY] \ - = gl_##pfx##_transform_points##vsize##_identity_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] \ - = gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] \ - = gl_##pfx##_transform_points##vsize##_perspective_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D] \ - = gl_##pfx##_transform_points##vsize##_2d_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] \ - = gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D] \ - = gl_##pfx##_transform_points##vsize##_3d_##masked; + +#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS ); + + +#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \ + gl_transform_tab[cma][sz][MATRIX_GENERAL] = \ + gl_##pfx##_transform_points##sz##_general_##masked; \ + gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \ + gl_##pfx##_transform_points##sz##_identity_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \ + gl_##pfx##_transform_points##sz##_perspective_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D] = \ + gl_##pfx##_transform_points##sz##_2d_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D] = \ + gl_##pfx##_transform_points##sz##_3d_##masked; #ifdef USE_X86_ASM - DECLARE_XFORM_GROUP( x86, 2, raw ) - DECLARE_XFORM_GROUP( x86, 3, raw ) - DECLARE_XFORM_GROUP( x86, 4, raw ) - DECLARE_XFORM_GROUP( x86, 2, masked ) - DECLARE_XFORM_GROUP( x86, 3, masked ) - DECLARE_XFORM_GROUP( x86, 4, masked ) - - extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec, - GLvector4f *proj_vec, - GLubyte clipMask[], - GLubyte *orMask, - GLubyte *andMask ); +DECLARE_XFORM_GROUP( x86, 2, raw ) +DECLARE_XFORM_GROUP( x86, 3, raw ) +DECLARE_XFORM_GROUP( x86, 4, raw ) +DECLARE_XFORM_GROUP( x86, 2, masked ) +DECLARE_XFORM_GROUP( x86, 3, masked ) +DECLARE_XFORM_GROUP( x86, 4, masked ) + + +extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec, + GLvector4f *proj_vec, + GLubyte clipMask[], + GLubyte *orMask, + GLubyte *andMask ); + + +extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert, + GLfloat *last_vert, + GLubyte *or_mask, + GLubyte *and_mask, + GLubyte *clip_mask ); + + +extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest, + const GLfloat *m, + const GLfloat *src, + GLuint src_stride, + GLuint count ); #endif -void gl_init_x86_asm_transforms( void ) +void gl_init_x86_transform_asm( void ) { #ifdef USE_X86_ASM - ASSIGN_XFORM_GROUP( x86, 0, 2, raw ) - ASSIGN_XFORM_GROUP( x86, 0, 3, raw ) - ASSIGN_XFORM_GROUP( x86, 0, 4, raw ) + ASSIGN_XFORM_GROUP( x86, 0, 2, raw ); + ASSIGN_XFORM_GROUP( x86, 0, 3, raw ); + ASSIGN_XFORM_GROUP( x86, 0, 4, raw ); - ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked ) - ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked ) - ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked ) + ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked ); + ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked ); + ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked ); /* XXX this function has been found to cause FP overflow exceptions */ gl_clip_tab[4] = gl_x86_cliptest_points4; #ifdef DEBUG - gl_test_all_transform_functions("x86"); + gl_test_all_transform_functions( "x86" ); +#endif #endif +} - gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4; - gl_xform_points3_v16_general = gl_v16_x86_general_xform; +void gl_init_x86_vertex_asm( void ) +{ +#ifdef USE_X86_ASM + gl_xform_points3_v16_general = gl_v16_x86_general_xform; + gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4; + +#if 0 + gl_test_all_vertex_functions( "x86" ); +#endif #endif } diff --git a/src/mesa/x86/x86.h b/src/mesa/x86/x86.h index c7aca91..88afd18 100644 --- a/src/mesa/x86/x86.h +++ b/src/mesa/x86/x86.h @@ -1,21 +1,21 @@ -/* $Id: x86.h,v 1.1 1999/08/19 00:55:42 jtg Exp $ */ +/* $Id: x86.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.5 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -28,10 +28,10 @@ * Intel x86 assembly code by Josh Vanderhoof */ +#ifndef __X86_H__ +#define __X86_H__ -#ifndef X86_H -#define X86_H - -extern void gl_init_x86_asm_transforms(void); +extern void gl_init_x86_transform_asm( void ); +extern void gl_init_x86_vertex_asm( void ); #endif diff --git a/src/mesa/x86/x86_cliptest.S b/src/mesa/x86/x86_cliptest.S new file mode 100644 index 0000000..b1dd844 --- /dev/null +++ b/src/mesa/x86/x86_cliptest.S @@ -0,0 +1,248 @@ +/* $Id: x86_cliptest.S,v 1.2 2000/10/23 00:16:28 gareth Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "clip_args.h" + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +/* + * Table for clip test. + * + * bit6 = SRC(3) < 0 + * bit5 = SRC(2) < 0 + * bit4 = abs(S(2)) > abs(S(3)) + * bit3 = SRC(1) < 0 + * bit2 = abs(S(1)) > abs(S(3)) + * bit1 = SRC(0) < 0 + * bit0 = abs(S(0)) > abs(S(3)) + */ + + SEG_DATA + +clip_table: + D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 + D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 + D_BYTE 32, 33, 32, 34, 36, 37, 36, 38 + D_BYTE 32, 33, 32, 34, 40, 41, 40, 42 + D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 + D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 + D_BYTE 16, 17, 16, 18, 20, 21, 20, 22 + D_BYTE 16, 17, 16, 18, 24, 25, 24, 26 + D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 + D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 + D_BYTE 47, 45, 47, 46, 39, 37, 39, 38 + D_BYTE 47, 45, 47, 46, 43, 41, 43, 42 + D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 + D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 + D_BYTE 31, 29, 31, 30, 23, 21, 23, 22 + D_BYTE 31, 29, 31, 30, 27, 25, 27, 26 + + + SEG_TEXT + +/* + * gl_x86_cliptest_points4 + * + * AL: ormask + * AH: andmask + * EBX: temp0 + * ECX: temp1 + * EDX: clipmask[] + * ESI: clip[] + * EDI: proj[] + * EBP: temp2 + */ + +#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC) +#define ELFPIC +#endif + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_cliptest_points4 ) +GLNAME( gl_x86_cliptest_points4 ): + +#ifdef ELFPIC +#define FRAME_OFFSET 20 +#else +#define FRAME_OFFSET 16 +#endif + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBP ) + PUSH_L( EBX ) + +#ifdef ELFPIC + /* store pointer to clip_table on stack */ + CALL( LLBL( ctp4_get_eip ) ) + ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) + MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) + PUSH_L( EBX ) + JMP( LLBL( ctp4_clip_table_ready ) ) + +LLBL( ctp4_get_eip ): + /* store eip in ebx */ + MOV_L( REGIND(ESP), EBX ) + RET + +LLBL( ctp4_clip_table_ready ): +#endif + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_CLIP, EDX ) + MOV_L( ARG_OR, EBX ) + + MOV_L( ARG_AND, EBP ) + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */ + + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDX, ECX ) + + MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */ + CMP_L( ECX, EDX ) + + MOV_B( REGIND(EBX), AL ) + MOV_B( REGIND(EBP), AH ) + + JZ( LLBL( ctp4_finish ) ) + +ALIGNTEXT16 +LLBL( ctp4_top ): + + FLD1 /* F3 */ + FDIV_S( SRC(3) ) + + MOV_L( SRC(3), EBP ) + MOV_L( SRC(2), EBX ) + + XOR_L( ECX, ECX ) + ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ + + ADC_L( ECX, ECX ) + ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + MOV_L( SRC(1), EBX ) + + ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + MOV_L( SRC(0), EBX ) + + ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + +#ifdef ELFPIC + MOV_L( REGIND(ESP), EBP ) /* clip_table */ + + MOV_B( REGBI(EBP, ECX), CL ) +#else + MOV_B( REGOFF(clip_table,ECX), CL ) +#endif + + OR_B( CL, AL ) + AND_B( CL, AH ) + + TEST_B( CL, CL ) + MOV_B( CL, REGIND(EDX) ) + + JZ( LLBL( ctp4_proj ) ) + + FSTP( ST(0) ) /* */ + JMP( LLBL( ctp4_next ) ) + +LLBL( ctp4_proj ): + + FLD_S( SRC(0) ) /* F0 F3 */ + FMUL2( ST(1), ST(0) ) + + FLD_S( SRC(1) ) /* F1 F0 F3 */ + FMUL2( ST(2), ST(0) ) + + FLD_S( SRC(2) ) /* F2 F1 F0 F3 */ + FMUL2( ST(3), ST(0) ) + + FXCH( ST(2) ) /* F0 F1 F2 F3 */ + FSTP_S( DST(0) ) /* F1 F2 F3 */ + FSTP_S( DST(1) ) /* F2 F3 */ + FSTP_S( DST(2) ) /* F3 */ + FSTP_S( DST(3) ) /* */ + +LLBL( ctp4_next ): + + INC_L( EDX ) + ADD_L( CONST(16), EDI ) + + ADD_L( ARG_SOURCE, ESI ) + CMP_L( EDX, ARG_CLIP ) + + JNZ( LLBL( ctp4_top ) ) + + MOV_L( ARG_OR, ECX ) + MOV_L( ARG_AND, EDX ) + + MOV_B( AL, REGIND(ECX) ) + MOV_B( AH, REGIND(EDX) ) + +LLBL( ctp4_finish ): + + MOV_L( ARG_DEST, EAX ) +#ifdef ELFPIC + POP_L( ESI ) /* discard ptr to clip_table */ +#endif + POP_L( EBX ) + POP_L( EBP ) + POP_L( EDI ) + POP_L( ESI ) + + RET diff --git a/src/mesa/x86/xform_args.h b/src/mesa/x86/xform_args.h new file mode 100644 index 0000000..b69f0b1 --- /dev/null +++ b/src/mesa/x86/xform_args.h @@ -0,0 +1,74 @@ +/* $Id: xform_args.h,v 1.2 2000/10/23 00:16:29 gareth Exp $ */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Transform function interface for assembly code. Simply define + * FRAME_OFFSET to the number of bytes pushed onto the stack before + * using the ARG_* argument macros. + * + * Gareth Hughes + */ + +#ifndef __XFORM_ARGS_H__ +#define __XFORM_ARGS_H__ + +/* Offsets into GLvector4f + */ +#define V4F_DATA 0 +#define V4F_START 4 +#define V4F_COUNT 8 +#define V4F_STRIDE 12 +#define V4F_SIZE 16 +#define V4F_FLAGS 20 + +/* GLvector4f flags + */ +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 + +/* Offsets for transform_func arguments + * + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, + * const GLubyte *clipmask, + * const GLubyte flag ); + */ +#define OFFSET_DEST 4 +#define OFFSET_MATRIX 8 +#define OFFSET_SOURCE 12 +#define OFFSET_CLIP 16 +#define OFFSET_FLAG 20 + +#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) +#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) +#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) +#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) +#define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP) + +#endif -- 2.7.4