From 93a458840c77b784fb74738f734ba2c5d22ca7a7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Aug 2003 18:12:22 +0000 Subject: [PATCH] Shared dri driver resources --- src/mesa/drivers/dri/common/depthtmp.h | 176 +++++ src/mesa/drivers/dri/common/hwlog.c | 142 ++++ src/mesa/drivers/dri/common/hwlog.h | 101 +++ src/mesa/drivers/dri/common/mm.c | 197 +++++ src/mesa/drivers/dri/common/mm.h | 82 +++ src/mesa/drivers/dri/common/mmx.h | 560 ++++++++++++++ src/mesa/drivers/dri/common/spantmp.h | 259 +++++++ src/mesa/drivers/dri/common/stenciltmp.h | 147 ++++ src/mesa/drivers/dri/common/texmem.c | 1173 ++++++++++++++++++++++++++++++ src/mesa/drivers/dri/common/texmem.h | 293 ++++++++ src/mesa/drivers/dri/common/utils.c | 186 +++++ src/mesa/drivers/dri/common/utils.h | 54 ++ src/mesa/drivers/dri/common/vblank.c | 325 +++++++++ src/mesa/drivers/dri/common/vblank.h | 62 ++ 14 files changed, 3757 insertions(+) create mode 100644 src/mesa/drivers/dri/common/depthtmp.h create mode 100644 src/mesa/drivers/dri/common/hwlog.c create mode 100644 src/mesa/drivers/dri/common/hwlog.h create mode 100644 src/mesa/drivers/dri/common/mm.c create mode 100644 src/mesa/drivers/dri/common/mm.h create mode 100644 src/mesa/drivers/dri/common/mmx.h create mode 100644 src/mesa/drivers/dri/common/spantmp.h create mode 100644 src/mesa/drivers/dri/common/stenciltmp.h create mode 100644 src/mesa/drivers/dri/common/texmem.c create mode 100644 src/mesa/drivers/dri/common/texmem.h create mode 100644 src/mesa/drivers/dri/common/utils.c create mode 100644 src/mesa/drivers/dri/common/utils.h create mode 100644 src/mesa/drivers/dri/common/vblank.c create mode 100644 src/mesa/drivers/dri/common/vblank.h diff --git a/src/mesa/drivers/dri/common/depthtmp.h b/src/mesa/drivers/dri/common/depthtmp.h new file mode 100644 index 0000000..f3da61e --- /dev/null +++ b/src/mesa/drivers/dri/common/depthtmp.h @@ -0,0 +1,176 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/common/depthtmp.h,v 1.5 2001/03/21 16:14:20 dawes Exp $ */ + +#ifndef DBG +#define DBG 0 +#endif + + +#ifndef HAVE_HW_DEPTH_SPANS +#define HAVE_HW_DEPTH_SPANS 0 +#endif +#ifndef HAVE_HW_DEPTH_PIXELS +#define HAVE_HW_DEPTH_PIXELS 0 +#endif + +#ifndef HW_READ_LOCK +#define HW_READ_LOCK() HW_LOCK() +#endif +#ifndef HW_READ_UNLOCK +#define HW_READ_UNLOCK() HW_UNLOCK() +#endif + +static void TAG(WriteDepthSpan)( GLcontext *ctx, + GLuint n, GLint x, GLint y, + const GLdepth *depth, + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint x1; + GLint n1; + LOCAL_DEPTH_VARS; + + y = Y_FLIP( y ); + +#if HAVE_HW_DEPTH_SPANS + (void) x1; (void) n1; + + if ( DBG ) fprintf( stderr, "WriteDepthSpan 0..%d (x1 %d)\n", + (int)n, (int)x ); + + WRITE_DEPTH_SPAN(); +#else + HW_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN( x, y, n, x1, n1, i ); + + if ( DBG ) fprintf( stderr, "WriteDepthSpan %d..%d (x1 %d)\n", + (int)i, (int)n1, (int)x1 ); + + if ( mask ) { + for ( ; i < n1 ; i++, x1++ ) { + if ( mask[i] ) WRITE_DEPTH( x1, y, depth[i] ); + } + } else { + for ( ; i < n1 ; i++, x1++ ) { + WRITE_DEPTH( x1, y, depth[i] ); + } + } + } + HW_ENDCLIPLOOP(); +#endif + } + HW_WRITE_UNLOCK(); +} + +static void TAG(WriteDepthPixels)( GLcontext *ctx, + GLuint n, + const GLint x[], + const GLint y[], + const GLdepth depth[], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint i; + LOCAL_DEPTH_VARS; + + if ( DBG ) fprintf( stderr, "WriteDepthPixels\n" ); + +#if HAVE_HW_DEPTH_PIXELS + (void) i; + + WRITE_DEPTH_PIXELS(); +#else + HW_CLIPLOOP() + { + for ( i = 0 ; i < n ; i++ ) { + if ( mask[i] ) { + const int fy = Y_FLIP( y[i] ); + if ( CLIPPIXEL( x[i], fy ) ) + WRITE_DEPTH( x[i], fy, depth[i] ); + } + } + } + HW_ENDCLIPLOOP(); +#endif + } + HW_WRITE_UNLOCK(); +} + + +/* Read depth spans and pixels + */ +static void TAG(ReadDepthSpan)( GLcontext *ctx, + GLuint n, GLint x, GLint y, + GLdepth depth[] ) +{ + HW_READ_LOCK() + { + GLint x1, n1; + LOCAL_DEPTH_VARS; + + y = Y_FLIP( y ); + + if ( DBG ) fprintf( stderr, "ReadDepthSpan\n" ); + +#if HAVE_HW_DEPTH_SPANS + (void) x1; (void) n1; + + READ_DEPTH_SPAN(); +#else + HW_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN( x, y, n, x1, n1, i ); + for ( ; i < n1 ; i++ ) + READ_DEPTH( depth[i], (x1+i), y ); + } + HW_ENDCLIPLOOP(); +#endif + } + HW_READ_UNLOCK(); +} + +static void TAG(ReadDepthPixels)( GLcontext *ctx, GLuint n, + const GLint x[], const GLint y[], + GLdepth depth[] ) +{ + HW_READ_LOCK() + { + GLint i; + LOCAL_DEPTH_VARS; + + if ( DBG ) fprintf( stderr, "ReadDepthPixels\n" ); + +#if HAVE_HW_DEPTH_PIXELS + (void) i; + + READ_DEPTH_PIXELS(); +#else + HW_CLIPLOOP() + { + for ( i = 0 ; i < n ;i++ ) { + int fy = Y_FLIP( y[i] ); + if ( CLIPPIXEL( x[i], fy ) ) + READ_DEPTH( depth[i], x[i], fy ); + } + } + HW_ENDCLIPLOOP(); +#endif + } + HW_READ_UNLOCK(); +} + + +#if HAVE_HW_DEPTH_SPANS +#undef WRITE_DEPTH_SPAN +#undef WRITE_DEPTH_PIXELS +#undef READ_DEPTH_SPAN +#undef READ_DEPTH_PIXELS +#else +#undef WRITE_DEPTH +#undef READ_DEPTH +#endif +#undef TAG diff --git a/src/mesa/drivers/dri/common/hwlog.c b/src/mesa/drivers/dri/common/hwlog.c new file mode 100644 index 0000000..ff6e7d9 --- /dev/null +++ b/src/mesa/drivers/dri/common/hwlog.c @@ -0,0 +1,142 @@ +/* + * GLX Hardware Device Driver common code + * + * Based on the original MGA G200 driver (c) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + * Wittawat Yamwong + */ +/* $XFree86: xc/lib/GL/mesa/src/drv/common/hwlog.c,v 1.3 2001/08/18 02:51:03 dawes Exp $ */ + +#include "hwlog.h" +hwlog_t hwlog = { 0,0,0, "[???] "}; + + +/* Should be shared, but is this a good place for it? + */ +#include +#include + + +int usec( void ) +{ + struct timeval tv; + struct timezone tz; + + gettimeofday( &tv, &tz ); + + return (tv.tv_sec & 2047) * 1000000 + tv.tv_usec; +} + + +#ifdef HW_LOG_ENABLED +int hwOpenLog(const char *filename, char *prefix) +{ + hwCloseLog(); + hwSetLogLevel(0); + hwlog.prefix=prefix; + if (!filename) + return -1; + if ((hwlog.file = fopen(filename,"w")) == NULL) + return -1; + return 0; +} + +void hwCloseLog() +{ + if (hwlog.file) { + fclose(hwlog.file); + hwlog.file = NULL; + } +} + +int hwIsLogReady() +{ + return (hwlog.file != NULL); +} + +void hwSetLogLevel(int level) +{ + hwlog.level = level; +} + +int hwGetLogLevel() +{ + return hwlog.level; +} + +void hwLog(int level, const char *format, ...) +{ + va_list ap; + va_start(ap,format); + hwLogv(level,format,ap); + va_end(ap); +} + +void hwLogv(int l, const char *format, va_list ap) +{ + if (hwlog.file && (l <= hwlog.level)) { + vfprintf(hwlog.file,format,ap); + fflush(hwlog.file); + } +} + +void hwMsg(int l, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + + if (l <= hwlog.level) { + if (hwIsLogReady()) { + int t = usec(); + + hwLog(l, "%6i:", t - hwlog.timeTemp); + hwlog.timeTemp = t; + hwLogv(l, format, ap); + } else { + fprintf(stderr, hwlog.prefix); + vfprintf(stderr, format, ap); + } + } + + va_end(ap); +} + +#else /* ifdef HW_LOG_ENABLED */ + +int hwlogdummy() +{ + return 0; +} + +#endif + +void hwError(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + + fprintf(stderr, hwlog.prefix); + vfprintf(stderr, format, ap); + hwLogv(0, format, ap); + + va_end(ap); +} diff --git a/src/mesa/drivers/dri/common/hwlog.h b/src/mesa/drivers/dri/common/hwlog.h new file mode 100644 index 0000000..6c5a185 --- /dev/null +++ b/src/mesa/drivers/dri/common/hwlog.h @@ -0,0 +1,101 @@ +/* + * GLX Hardware Device Driver common code + * + * Based on the original MGA G200 driver (c) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + * Wittawat Yamwong + */ +/* $XFree86: xc/lib/GL/mesa/src/drv/common/hwlog.h,v 1.5 2001/10/31 23:22:57 tsi Exp $ */ + +/* Usage: + * - use mgaError for error messages. Always write to X error and log file. + * - use mgaMsg for debugging. Can be disabled by undefining MGA_LOG_ENABLED. + */ + +#ifndef HWLOG_INC +#define HWLOG_INC + + +#include +#include + +#define DBG_LEVEL_BASE 1 +#define DBG_LEVEL_VERBOSE 10 +#define DBG_LEVEL_ENTEREXIT 20 + +typedef struct +{ + FILE *file; + int level; + unsigned int timeTemp; + char *prefix; +} hwlog_t; + +extern hwlog_t hwlog; + + +#ifdef HW_LOG_ENABLED + +/* open and close log file. */ +int hwOpenLog(const char *filename, char *prefix); +void hwCloseLog(void); + +/* return 1 if log file is succesfully opened */ +int hwIsLogReady(void); + +/* set current log level to 'level'. Messages with level less than or equal + the current log level will be written to the log file. */ +void hwSetLogLevel(int level); +int hwGetLogLevel(void); + +/* hwLog and hwLogv write a message to the log file. */ +/* do not call these directly, use hwMsg() instead */ +void hwLog(int level, const char *format, ...); +void hwLogv(int level, const char *format, va_list ap); + +int usec( void ); + +/* hwMsg writes a message to the log file or to the standard X error file. */ +void hwMsg(int level, const char *format, ...); + + +#else + + +static __inline__ int hwOpenLog(const char *f, char *prefix) { hwlog.prefix=prefix; return -1; } +#define hwIsLogReady() (0) +#define hwGetLogLevel() (-1) +#define hwLogLevel(n) (0) +#define hwLog() +#define hwMsg() + +#define hwCloseLog() +#define hwSetLogLevel(x) +#define hwLogv(l,f,a) + + +#endif + +void hwError(const char *format, ...); + + +#endif diff --git a/src/mesa/drivers/dri/common/mm.c b/src/mesa/drivers/dri/common/mm.c new file mode 100644 index 0000000..b3be954 --- /dev/null +++ b/src/mesa/drivers/dri/common/mm.c @@ -0,0 +1,197 @@ +/* + * GLX Hardware Device Driver common code + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +/* $XFree86: xc/lib/GL/mesa/src/drv/common/mm.c,v 1.4 2002/10/30 12:51:27 alanh Exp $ */ + +#include +#include + +#include "mm.h" +#include "hwlog.h" + + +void mmDumpMemInfo( memHeap_t *heap ) +{ + TMemBlock *p; + + fprintf(stderr, "Memory heap %p:\n", heap); + if (heap == 0) { + fprintf(stderr, " heap == 0\n"); + } else { + p = (TMemBlock *)heap; + while (p) { + fprintf(stderr, " Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? '.':'U', + p->reserved ? 'R':'.'); + p = p->next; + } + } + fprintf(stderr, "End of memory blocks\n"); +} + +memHeap_t *mmInit(int ofs, + int size) +{ + PMemBlock blocks; + + if (size <= 0) { + return 0; + } + blocks = (TMemBlock *) calloc(1,sizeof(TMemBlock)); + if (blocks) { + blocks->ofs = ofs; + blocks->size = size; + blocks->free = 1; + return (memHeap_t *)blocks; + } else + return 0; +} + + +static TMemBlock* SliceBlock(TMemBlock *p, + int startofs, int size, + int reserved, int alignment) +{ + TMemBlock *newblock; + + /* break left */ + if (startofs > p->ofs) { + newblock = (TMemBlock*) calloc(1,sizeof(TMemBlock)); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->next = p->next; + p->size -= newblock->size; + p->next = newblock; + p = newblock; + } + + /* break right */ + if (size < p->size) { + newblock = (TMemBlock*) calloc(1,sizeof(TMemBlock)); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->next = p->next; + p->size = size; + p->next = newblock; + } + + /* p = middle block */ + p->align = alignment; + p->free = 0; + p->reserved = reserved; + return p; +} + +PMemBlock mmAllocMem( memHeap_t *heap, int size, int align2, int startSearch) +{ + int mask,startofs,endofs; + TMemBlock *p; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + mask = (1 << align2)-1; + startofs = 0; + p = (TMemBlock *)heap; + while (p) { + if ((p)->free) { + startofs = (p->ofs + mask) & ~mask; + if ( startofs < startSearch ) { + startofs = startSearch; + } + endofs = startofs+size; + if (endofs <= (p->ofs+p->size)) + break; + } + p = p->next; + } + if (!p) + return NULL; + p = SliceBlock(p,startofs,size,0,mask+1); + p->heap = heap; + return p; +} + +static __inline__ int Join2Blocks(TMemBlock *p) +{ + if (p->free && p->next && p->next->free) { + TMemBlock *q = p->next; + p->size += q->size; + p->next = q->next; + free(q); + return 1; + } + return 0; +} + +int mmFreeMem(PMemBlock b) +{ + TMemBlock *p,*prev; + + if (!b) + return 0; + if (!b->heap) { + fprintf(stderr, "no heap\n"); + return -1; + } + p = b->heap; + prev = NULL; + while (p && p != b) { + prev = p; + p = p->next; + } + if (!p || p->free || p->reserved) { + if (!p) + fprintf(stderr, "block not found in heap\n"); + else if (p->free) + fprintf(stderr, "block already free\n"); + else + fprintf(stderr, "block is reserved\n"); + return -1; + } + p->free = 1; + Join2Blocks(p); + if (prev) + Join2Blocks(prev); + return 0; +} + + +void mmDestroy(memHeap_t *heap) +{ + TMemBlock *p,*q; + + if (!heap) + return; + p = (TMemBlock *)heap; + while (p) { + q = p->next; + free(p); + p = q; + } +} diff --git a/src/mesa/drivers/dri/common/mm.h b/src/mesa/drivers/dri/common/mm.h new file mode 100644 index 0000000..d52871d --- /dev/null +++ b/src/mesa/drivers/dri/common/mm.h @@ -0,0 +1,82 @@ +/* + * GLX Hardware Device Driver common code + * Copyright (C) 1999 Keith Whitwell + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef MM_INC +#define MM_INC + +struct mem_block_t { + struct mem_block_t *next; + struct mem_block_t *heap; + int ofs,size; + int align; + int free:1; + int reserved:1; +}; +typedef struct mem_block_t TMemBlock; +typedef struct mem_block_t *PMemBlock; + +/* a heap is just the first block in a chain */ +typedef struct mem_block_t memHeap_t; + +static __inline__ int mmBlockSize(PMemBlock b) +{ return b->size; } + +static __inline__ int mmOffset(PMemBlock b) +{ return b->ofs; } + +/* + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +memHeap_t *mmInit( int ofs, int size ); + +/* + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +PMemBlock mmAllocMem( memHeap_t *heap, int size, int align2, + int startSearch ); + +/* + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +int mmFreeMem( PMemBlock b ); + +/* + * destroy MM + */ +void mmDestroy( memHeap_t *mmInit ); + +/* For debuging purpose. */ +void mmDumpMemInfo( memHeap_t *mmInit ); + +#endif diff --git a/src/mesa/drivers/dri/common/mmx.h b/src/mesa/drivers/dri/common/mmx.h new file mode 100644 index 0000000..49ce7e3 --- /dev/null +++ b/src/mesa/drivers/dri/common/mmx.h @@ -0,0 +1,560 @@ +/* mmx.h + + MultiMedia eXtensions GCC interface library for IA32. + + To use this library, simply include this header file + and compile with GCC. You MUST have inlining enabled + in order for mmx_ok() to work; this can be done by + simply using -O on the GCC command line. + + Compiling with -DMMX_TRACE will cause detailed trace + output to be sent to stderr for each mmx operation. + This adds lots of code, and obviously slows execution to + a crawl, but can be very useful for debugging. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT + LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR ANY PARTICULAR PURPOSE. + + 1997-98 by H. Dietz and R. Fisher + + History: + 97-98* R.Fisher Early versions + 980501 R.Fisher Original Release + 980611* H.Dietz Rewrite, correctly implementing inlines, and + R.Fisher including direct register accesses. + 980616 R.Fisher Release of 980611 as 980616. + 980714 R.Fisher Minor corrections to Makefile, etc. + 980715 R.Fisher mmx_ok() now prevents optimizer from using + clobbered values. + mmx_ok() now checks if cpuid instruction is + available before trying to use it. + 980726* R.Fisher mm_support() searches for AMD 3DNow, Cyrix + Extended MMX, and standard MMX. It returns a + value which is positive if any of these are + supported, and can be masked with constants to + see which. mmx_ok() is now a call to this + 980726* R.Fisher Added i2r support for shift functions + 980919 R.Fisher Fixed AMD extended feature recognition bug. + 980921 R.Fisher Added definition/check for _MMX_H. + Added "float s[2]" to mmx_t for use with + 3DNow and EMMX. So same mmx_t can be used. + 981013 R.Fisher Fixed cpuid function 1 bug (looked at wrong reg) + Fixed psllq_i2r error in mmxtest.c + + * Unreleased (internal or interim) versions + + Notes: + It appears that the latest gas has the pand problem fixed, therefore + I'll undefine BROKEN_PAND by default. + String compares may be quicker than the multiple test/jumps in vendor + test sequence in mmx_ok(), but I'm not concerned with that right now. + + Acknowledgments: + Jussi Laako for pointing out the errors ultimately found to be + connected to the failure to notify the optimizer of clobbered values. + Roger Hardiman for reminding us that CPUID isn't everywhere, and that + someone may actually try to use this on a machine without CPUID. + Also for suggesting code for checking this. + Robert Dale for pointing out the AMD recognition bug. + Jimmy Mayfield and Carl Witty for pointing out the Intel recognition + bug. + Carl Witty for pointing out the psllq_i2r test bug. +*/ + +#ifndef _MMX_H +#define _MMX_H + +//#define MMX_TRACE + +/* Warning: at this writing, the version of GAS packaged + with most Linux distributions does not handle the + parallel AND operation mnemonic correctly. If the + symbol BROKEN_PAND is defined, a slower alternative + coding will be used. If execution of mmxtest results + in an illegal instruction fault, define this symbol. +*/ +#undef BROKEN_PAND + + +/* The type of an value that fits in an MMX register + (note that long long constant values MUST be suffixed + by LL and unsigned long long values by ULL, lest + they be truncated by the compiler) +*/ +typedef union { + long long q; /* Quadword (64-bit) value */ + unsigned long long uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} mmx_t; + +/* Helper functions for the instruction macros that follow... + (note that memory-to-register, m2r, instructions are nearly + as efficient as register-to-register, r2r, instructions; + however, memory-to-memory instructions are really simulated + as a convenience, and are only 1/3 as efficient) +*/ +#ifdef MMX_TRACE + +/* Include the stuff for printing a trace to stderr... +*/ + +#include + +#define mmx_i2r(op, imm, reg) \ + { \ + mmx_t mmx_trace; \ + mmx_trace = (imm); \ + fprintf(stderr, #op "_i2r(" #imm "=0x%016llx, ", mmx_trace.q); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (imm)); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \ + } + +#define mmx_m2r(op, mem, reg) \ + { \ + mmx_t mmx_trace; \ + mmx_trace = (mem); \ + fprintf(stderr, #op "_m2r(" #mem "=0x%016llx, ", mmx_trace.q); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem)); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \ + } + +#define mmx_r2m(op, reg, mem) \ + { \ + mmx_t mmx_trace; \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #op "_r2m(" #reg "=0x%016llx, ", mmx_trace.q); \ + mmx_trace = (mem); \ + fprintf(stderr, #mem "=0x%016llx) => ", mmx_trace.q); \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=X" (mem) \ + : /* nothing */ ); \ + mmx_trace = (mem); \ + fprintf(stderr, #mem "=0x%016llx\n", mmx_trace.q); \ + } + +#define mmx_r2r(op, regs, regd) \ + { \ + mmx_t mmx_trace; \ + __asm__ __volatile__ ("movq %%" #regs ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #op "_r2r(" #regs "=0x%016llx, ", mmx_trace.q); \ + __asm__ __volatile__ ("movq %%" #regd ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #regd "=0x%016llx) => ", mmx_trace.q); \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd); \ + __asm__ __volatile__ ("movq %%" #regd ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #regd "=0x%016llx\n", mmx_trace.q); \ + } + +#define mmx_m2m(op, mems, memd) \ + { \ + mmx_t mmx_trace; \ + mmx_trace = (mems); \ + fprintf(stderr, #op "_m2m(" #mems "=0x%016llx, ", mmx_trace.q); \ + mmx_trace = (memd); \ + fprintf(stderr, #memd "=0x%016llx) => ", mmx_trace.q); \ + __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ + #op " %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (memd) \ + : "X" (mems)); \ + mmx_trace = (memd); \ + fprintf(stderr, #memd "=0x%016llx\n", mmx_trace.q); \ + } + +#else + +/* These macros are a lot simpler without the tracing... +*/ + +#define mmx_i2r(op, imm, reg) \ + __asm__ __volatile__ (#op " $" #imm ", %%" #reg \ + : /* nothing */ \ + : /* nothing */); + +#define mmx_m2r(op, mem, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem)) + +#define mmx_r2m(op, reg, mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=X" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op, regs, regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + +#define mmx_m2m(op, mems, memd) \ + __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ + #op " %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (memd) \ + : "X" (mems)) + +#endif + + +/* 1x64 MOVe Quadword + (this is both a load and a store... + in fact, it is the only way to store) +*/ +#define movq_m2r(var, reg) mmx_m2r(movq, var, reg) +#define movq_r2m(reg, var) mmx_r2m(movq, reg, var) +#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd) +#define movq(vars, vard) \ + __asm__ __volatile__ ("movq %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (vard) \ + : "X" (vars)) + + +/* 1x32 MOVe Doubleword + (like movq, this is both load and store... + but is most useful for moving things between + mmx registers and ordinary registers) +*/ +#define movd_m2r(var, reg) mmx_m2r(movd, var, reg) +#define movd_r2m(reg, var) mmx_r2m(movd, reg, var) +#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd) +#define movd(vars, vard) \ + __asm__ __volatile__ ("movd %1, %%mm0\n\t" \ + "movd %%mm0, %0" \ + : "=X" (vard) \ + : "X" (vars)) + + +/* 2x32, 4x16, and 8x8 Parallel ADDs +*/ +#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg) +#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd) +#define paddd(vars, vard) mmx_m2m(paddd, vars, vard) + +#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg) +#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd) +#define paddw(vars, vard) mmx_m2m(paddw, vars, vard) + +#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg) +#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd) +#define paddb(vars, vard) mmx_m2m(paddb, vars, vard) + + +/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic +*/ +#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg) +#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd) +#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard) + +#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg) +#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd) +#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard) + + +/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic +*/ +#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg) +#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd) +#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard) + +#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg) +#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd) +#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel SUBs +*/ +#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg) +#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd) +#define psubd(vars, vard) mmx_m2m(psubd, vars, vard) + +#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg) +#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd) +#define psubw(vars, vard) mmx_m2m(psubw, vars, vard) + +#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg) +#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd) +#define psubb(vars, vard) mmx_m2m(psubb, vars, vard) + + +/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic +*/ +#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg) +#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd) +#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard) + +#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg) +#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd) +#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard) + + +/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic +*/ +#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg) +#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd) +#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard) + +#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg) +#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd) +#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard) + + +/* 4x16 Parallel MULs giving Low 4x16 portions of results +*/ +#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg) +#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd) +#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard) + + +/* 4x16 Parallel MULs giving High 4x16 portions of results +*/ +#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg) +#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd) +#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard) + + +/* 4x16->2x32 Parallel Mul-ADD + (muls like pmullw, then adds adjacent 16-bit fields + in the multiply result to make the final 2x32 result) +*/ +#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg) +#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd) +#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard) + + +/* 1x64 bitwise AND +*/ +#ifdef BROKEN_PAND +#define pand_m2r(var, reg) \ + { \ + mmx_m2r(pandn, (mmx_t) -1LL, reg); \ + mmx_m2r(pandn, var, reg); \ + } +#define pand_r2r(regs, regd) \ + { \ + mmx_m2r(pandn, (mmx_t) -1LL, regd); \ + mmx_r2r(pandn, regs, regd) \ + } +#define pand(vars, vard) \ + { \ + movq_m2r(vard, mm0); \ + mmx_m2r(pandn, (mmx_t) -1LL, mm0); \ + mmx_m2r(pandn, vars, mm0); \ + movq_r2m(mm0, vard); \ + } +#else +#define pand_m2r(var, reg) mmx_m2r(pand, var, reg) +#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd) +#define pand(vars, vard) mmx_m2m(pand, vars, vard) +#endif + + +/* 1x64 bitwise AND with Not the destination +*/ +#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg) +#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd) +#define pandn(vars, vard) mmx_m2m(pandn, vars, vard) + + +/* 1x64 bitwise OR +*/ +#define por_m2r(var, reg) mmx_m2r(por, var, reg) +#define por_r2r(regs, regd) mmx_r2r(por, regs, regd) +#define por(vars, vard) mmx_m2m(por, vars, vard) + + +/* 1x64 bitwise eXclusive OR +*/ +#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg) +#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd) +#define pxor(vars, vard) mmx_m2m(pxor, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality + (resulting fields are either 0 or -1) +*/ +#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd) +#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard) + +#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd) +#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard) + +#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd) +#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than + (resulting fields are either 0 or -1) +*/ +#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd) +#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard) + +#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd) +#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard) + +#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd) +#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard) + + +/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical +*/ +#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg) +#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg) +#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd) +#define psllq(vars, vard) mmx_m2m(psllq, vars, vard) + +#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg) +#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg) +#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd) +#define pslld(vars, vard) mmx_m2m(pslld, vars, vard) + +#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg) +#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg) +#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd) +#define psllw(vars, vard) mmx_m2m(psllw, vars, vard) + + +/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical +*/ +#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg) +#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg) +#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd) +#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard) + +#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg) +#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg) +#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd) +#define psrld(vars, vard) mmx_m2m(psrld, vars, vard) + +#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg) +#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg) +#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd) +#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard) + + +/* 2x32 and 4x16 Parallel Shift Right Arithmetic +*/ +#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg) +#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg) +#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd) +#define psrad(vars, vard) mmx_m2m(psrad, vars, vard) + +#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg) +#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg) +#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd) +#define psraw(vars, vard) mmx_m2m(psraw, vars, vard) + + +/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate + (packs source and dest fields into dest in that order) +*/ +#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg) +#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd) +#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard) + +#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg) +#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd) +#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard) + + +/* 4x16->8x8 PACK and Unsigned Saturate + (packs source and dest fields into dest in that order) +*/ +#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg) +#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd) +#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard) + + +/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low + (interleaves low half of dest with low half of source + as padding in each result field) +*/ +#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg) +#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd) +#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard) + +#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg) +#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd) +#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard) + +#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg) +#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd) +#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard) + + +/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High + (interleaves high half of dest with high half of source + as padding in each result field) +*/ +#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg) +#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd) +#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard) + +#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg) +#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd) +#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard) + +#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg) +#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd) +#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard) + + +/* Empty MMx State + (used to clean-up when going from mmx to float use + of the registers that are shared by both; note that + there is no float-to-mmx operation needed, because + only the float tag word info is corruptible) +*/ +#ifdef MMX_TRACE + +#define emms() \ + { \ + fprintf(stderr, "emms()\n"); \ + __asm__ __volatile__ ("emms"); \ + } + +#else + +#define emms() __asm__ __volatile__ ("emms") + +#endif + +#endif + diff --git a/src/mesa/drivers/dri/common/spantmp.h b/src/mesa/drivers/dri/common/spantmp.h new file mode 100644 index 0000000..888be04 --- /dev/null +++ b/src/mesa/drivers/dri/common/spantmp.h @@ -0,0 +1,259 @@ +#ifndef DBG +#define DBG 0 +#endif + +#ifndef HW_WRITE_LOCK +#define HW_WRITE_LOCK() HW_LOCK() +#endif + +#ifndef HW_WRITE_UNLOCK +#define HW_WRITE_UNLOCK() HW_UNLOCK() +#endif + +#ifndef HW_READ_LOCK +#define HW_READ_LOCK() HW_LOCK() +#endif + +#ifndef HW_READ_UNLOCK +#define HW_READ_UNLOCK() HW_UNLOCK() +#endif + +#ifndef HW_READ_CLIPLOOP +#define HW_READ_CLIPLOOP() HW_CLIPLOOP() +#endif + +#ifndef HW_WRITE_CLIPLOOP +#define HW_WRITE_CLIPLOOP() HW_CLIPLOOP() +#endif + + +static void TAG(WriteRGBASpan)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + const GLubyte rgba[][4], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint x1; + GLint n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + HW_WRITE_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + + if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n", + (int)i, (int)n1, (int)x1); + + if (mask) + { + for (;n1>0;i++,x1++,n1--) + if (mask[i]) + WRITE_RGBA( x1, y, + rgba[i][0], rgba[i][1], + rgba[i][2], rgba[i][3] ); + } + else + { + for (;n1>0;i++,x1++,n1--) + WRITE_RGBA( x1, y, + rgba[i][0], rgba[i][1], + rgba[i][2], rgba[i][3] ); + } + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + +static void TAG(WriteRGBSpan)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + const GLubyte rgb[][3], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint x1; + GLint n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + HW_WRITE_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + + if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n", + (int)i, (int)n1, (int)x1); + + if (mask) + { + for (;n1>0;i++,x1++,n1--) + if (mask[i]) + WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 ); + } + else + { + for (;n1>0;i++,x1++,n1--) + WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 ); + } + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + +static void TAG(WriteRGBAPixels)( const GLcontext *ctx, + GLuint n, + const GLint x[], + const GLint y[], + const GLubyte rgba[][4], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint i; + LOCAL_VARS; + + if (DBG) fprintf(stderr, "WriteRGBAPixels\n"); + + HW_WRITE_CLIPLOOP() + { + for (i=0;i0;i++,x1++,n1--) + if (mask[i]) + WRITE_PIXEL( x1, y, p ); + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + + +static void TAG(WriteMonoRGBAPixels)( const GLcontext *ctx, + GLuint n, + const GLint x[], const GLint y[], + const GLchan color[], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint i; + LOCAL_VARS; + INIT_MONO_PIXEL(p, color); + + if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n"); + + HW_WRITE_CLIPLOOP() + { + for (i=0;i0;i++,x1++,n1--) + READ_RGBA( rgba[i], x1, y ); + } + HW_ENDCLIPLOOP(); + } + HW_READ_UNLOCK(); +} + + +static void TAG(ReadRGBAPixels)( const GLcontext *ctx, + GLuint n, const GLint x[], const GLint y[], + GLubyte rgba[][4], const GLubyte mask[] ) +{ + HW_READ_LOCK() + { + GLint i; + LOCAL_VARS; + + if (DBG) fprintf(stderr, "ReadRGBAPixels\n"); + + HW_READ_CLIPLOOP() + { + for (i=0;i + * Keith Whitwell + * Kevin E. Martin + * Gareth Hughes + */ +/* $XFree86:$ */ + +/** \file texmem.c + * Implements all of the device-independent texture memory management. + * + * Currently, only a simple LRU texture memory management policy is + * implemented. In the (hopefully very near) future, better policies will be + * implemented. The idea is that the DRI should be able to run in one of two + * modes. In the default mode the DRI will dynamically attempt to discover + * the best texture management policy for the running application. In the + * other mode, the user (via some sort of as yet TBD mechanism) will select + * a texture management policy that is known to work well with the + * application. + */ + +#include "texmem.h" +#include "simple_list.h" +#include "imports.h" + +#include + + + +static unsigned dummy_swap_counter; + + +/** + * Calculate \f$\log_2\f$ of a value. This is a particularly poor + * implementation of this function. However, since system performance is in + * no way dependent on this function, the slowness of the implementation is + * irrelevent. + * + * \param n Value whose \f$\log_2\f$ is to be calculated + */ + +static unsigned +driLog2( unsigned n ) +{ + unsigned log2; + + + for ( log2 = 1 ; n > 1 ; log2++ ) { + n >>= 1; + } + + return log2; +} + + + + +/** + * Determine if a texture is resident in textureable memory. Depending on + * the driver, this may or may not be on-card memory. It could be AGP memory + * or anyother type of memory from which the hardware can directly read + * texels. + * + * This function is intended to be used as the \c IsTextureResident function + * in the device's \c dd_function_table. + * + * \param ctx GL context pointer (currently unused) + * \param texObj Texture object to be tested + */ + +GLboolean +driIsTextureResident( GLcontext * ctx, + struct gl_texture_object * texObj ) +{ + driTextureObject * t; + + + t = (driTextureObject *) texObj->DriverData; + return( (t != NULL) && (t->memBlock != NULL) ); +} + + + + +/** + * (Re)initialize the global circular LRU list. The last element + * in the array (\a heap->nrRegions) is the sentinal. Keeping it + * at the end of the array allows the other elements of the array + * to be addressed rationally when looking up objects at a particular + * location in texture memory. + * + * \param heap Texture heap to be reset + */ + +static void resetGlobalLRU( driTexHeap * heap ) +{ + drmTextureRegionPtr list = heap->global_regions; + unsigned sz = 1U << heap->logGranularity; + unsigned i; + + for (i = 0 ; (i+1) * sz <= heap->size ; i++) { + list[i].prev = i-1; + list[i].next = i+1; + list[i].age = 0; + } + + i--; + list[0].prev = heap->nrRegions; + list[i].prev = i-1; + list[i].next = heap->nrRegions; + list[heap->nrRegions].prev = i; + list[heap->nrRegions].next = 0; + heap->global_age[0] = 0; +} + +/** + * Print out debugging information about the local texture LRU. + * + * \param heap Texture heap to be printed + * \param callername Name of calling function + */ +static void printLocalLRU( driTexHeap * heap, const char *callername ) +{ + driTextureObject *t; + unsigned sz = 1U << heap->logGranularity; + + fprintf( stderr, "%s in %s:\nLocal LRU, heap %d:\n", + __FUNCTION__, callername, heap->heapId ); + + foreach ( t, &heap->texture_objects ) { + if (!t->memBlock) + continue; + if (!t->tObj) { + fprintf( stderr, "Placeholder (%p) %d at 0x%x sz 0x%x\n", + t, + t->memBlock->ofs / sz, + t->memBlock->ofs, + t->memBlock->size ); + } else { + fprintf( stderr, "Texture (%p) at 0x%x sz 0x%x\n", + t, + t->memBlock->ofs, + t->memBlock->size ); + } + } + foreach ( t, heap->swapped_objects ) { + if (!t->tObj) { + fprintf( stderr, "Swapped Placeholder (%p)\n", t ); + } else { + fprintf( stderr, "Swapped Texture (%p)\n", t ); + } + } + + fprintf( stderr, "\n" ); +} + +/** + * Print out debugging information about the global texture LRU. + * + * \param heap Texture heap to be printed + * \param callername Name of calling function + */ +static void printGlobalLRU( driTexHeap * heap, const char *callername ) +{ + drmTextureRegionPtr list = heap->global_regions; + int i, j; + + fprintf( stderr, "%s in %s:\nGlobal LRU, heap %d list %p:\n", + __FUNCTION__, callername, heap->heapId, list ); + + for ( i = 0, j = heap->nrRegions ; i < heap->nrRegions ; i++ ) { + fprintf( stderr, "list[%d] age %d next %d prev %d in_use %d\n", + j, list[j].age, list[j].next, list[j].prev, list[j].in_use ); + j = list[j].next; + if ( j == heap->nrRegions ) break; + } + + if ( j != heap->nrRegions ) { + fprintf( stderr, "Loop detected in global LRU\n" ); + for ( i = 0 ; i < heap->nrRegions ; i++ ) { + fprintf( stderr, "list[%d] age %d next %d prev %d in_use %d\n", + i, list[i].age, list[i].next, list[i].prev, list[i].in_use ); + } + } + + fprintf( stderr, "\n" ); +} + + +/** + * Called by the client whenever it touches a local texture. + * + * \param t Texture object that the client has accessed + */ + +void driUpdateTextureLRU( driTextureObject * t ) +{ + driTexHeap * heap; + drmTextureRegionPtr list; + unsigned shift; + unsigned start; + unsigned end; + unsigned i; + + + heap = t->heap; + if ( heap != NULL ) { + shift = heap->logGranularity; + start = t->memBlock->ofs >> shift; + end = (t->memBlock->ofs + t->memBlock->size - 1) >> shift; + + + heap->local_age = ++heap->global_age[0]; + list = heap->global_regions; + + + /* Update the context's local LRU + */ + + move_to_head( & heap->texture_objects, t ); + + + for (i = start ; i <= end ; i++) { + list[i].in_use = 1; + list[i].age = heap->local_age; + + /* remove_from_list(i) + */ + list[(unsigned)list[i].next].prev = list[i].prev; + list[(unsigned)list[i].prev].next = list[i].next; + + /* insert_at_head(list, i) + */ + list[i].prev = heap->nrRegions; + list[i].next = list[heap->nrRegions].next; + list[(unsigned)list[heap->nrRegions].next].prev = i; + list[heap->nrRegions].next = i; + } + + if ( 0 ) { + printGlobalLRU( heap, __FUNCTION__ ); + printLocalLRU( heap, __FUNCTION__ ); + } + } +} + + + + +/** + * Keep track of swapped out texture objects. + * + * \param t Texture object to be "swapped" out of its texture heap + */ + +void driSwapOutTextureObject( driTextureObject * t ) +{ + unsigned face; + + + if ( t->memBlock != NULL ) { + assert( t->heap != NULL ); + mmFreeMem( t->memBlock ); + t->memBlock = NULL; + + if (t->timestamp > t->heap->timestamp) + t->heap->timestamp = t->timestamp; + + t->heap->texture_swaps[0]++; + move_to_tail( t->heap->swapped_objects, t ); + t->heap = NULL; + } + else { + assert( t->heap == NULL ); + } + + + for ( face = 0 ; face < 6 ; face++ ) { + t->dirty_images[face] = ~0; + } +} + + + + +/** + * Destroy hardware state associated with texture \a t. Calls the + * \a destroy_texture_object method associated with the heap from which + * \a t was allocated. + * + * \param t Texture object to be destroyed + */ + +void driDestroyTextureObject( driTextureObject * t ) +{ + driTexHeap * heap; + + + if ( 0 ) { + fprintf( stderr, "[%s:%d] freeing %p (tObj = %p, DriverData = %p)\n", + __FILE__, __LINE__, + t, + (t != NULL) ? t->tObj : NULL, + (t != NULL && t->tObj != NULL) ? t->tObj->DriverData : NULL ); + } + + if ( t != NULL ) { + if ( t->memBlock ) { + heap = t->heap; + assert( heap != NULL ); + + heap->texture_swaps[0]++; + + mmFreeMem( t->memBlock ); + t->memBlock = NULL; + + if (t->timestamp > t->heap->timestamp) + t->heap->timestamp = t->timestamp; + + heap->destroy_texture_object( heap->driverContext, t ); + t->heap = NULL; + } + + if ( t->tObj != NULL ) { + assert( t->tObj->DriverData == t ); + t->tObj->DriverData = NULL; + } + + remove_from_list( t ); + FREE( t ); + } + + if ( 0 ) { + fprintf( stderr, "[%s:%d] done freeing %p\n", __FILE__, __LINE__, t ); + } +} + + + + +/** + * Update the local heap's representation of texture memory based on + * data in the SAREA. This is done each time it is detected that some other + * direct rendering client has held the lock. This pertains to both our local + * textures and the textures belonging to other clients. Keep track of other + * client's textures by pushing a placeholder texture onto the LRU list -- + * these are denoted by \a tObj being \a NULL. + * + * \param heap Heap whose state is to be updated + * \param offset Byte offset in the heap that has been stolen + * \param size Size, in bytes, of the stolen block + * \param in_use Non-zero if the block is in-use by another context + */ + +static void driTexturesGone( driTexHeap * heap, int offset, int size, + int in_use ) +{ + driTextureObject * t; + driTextureObject * tmp; + + + foreach_s ( t, tmp, & heap->texture_objects ) { + if ( (t->memBlock->ofs < (offset + size)) + && ((t->memBlock->ofs + t->memBlock->size) > offset) ) { + /* It overlaps - kick it out. If the texture object is just a + * place holder, then destroy it all together. Otherwise, mark + * it as being swapped out. + */ + + if ( t->tObj != NULL ) { + driSwapOutTextureObject( t ); + } + else { + if ( in_use && + offset == t->memBlock->ofs && size == t->memBlock->size ) { + /* Matching placeholder already exists */ + return; + } else { + driDestroyTextureObject( t ); + } + } + } + } + + + if ( in_use ) { + t = (driTextureObject *) CALLOC( heap->texture_object_size ); + if ( t == NULL ) return; + + t->memBlock = mmAllocMem( heap->memory_heap, size, 0, offset ); + if ( t->memBlock == NULL ) { + fprintf( stderr, "Couldn't alloc placeholder: heap %u sz %x ofs %x\n", heap->heapId, + (int)size, (int)offset ); + mmDumpMemInfo( heap->memory_heap ); + return; + } + t->heap = heap; + insert_at_head( & heap->texture_objects, t ); + } +} + + + + +/** + * Called by the client on lock contention to determine whether textures have + * been stolen. If another client has modified a region in which we have + * textures, then we need to figure out which of our textures have been + * removed and update our global LRU. + * + * \param heap Texture heap to be updated + */ + +void driAgeTextures( driTexHeap * heap ) +{ + drmTextureRegionPtr list = heap->global_regions; + unsigned sz = 1U << (heap->logGranularity); + unsigned i, nr = 0; + + + /* Have to go right round from the back to ensure stuff ends up + * LRU in the local list... Fix with a cursor pointer. + */ + + for (i = list[heap->nrRegions].prev ; + i != heap->nrRegions && nr < heap->nrRegions ; + i = list[i].prev, nr++) { + /* If switching texturing schemes, then the SAREA might not have been + * properly cleared, so we need to reset the global texture LRU. + */ + + if ( (i * sz) > heap->size ) { + nr = heap->nrRegions; + break; + } + + if (list[i].age > heap->local_age) + driTexturesGone( heap, i * sz, sz, list[i].in_use); + } + + /* Loop or uninitialized heap detected. Reset. + */ + + if (nr == heap->nrRegions) { + driTexturesGone( heap, 0, heap->size, 0); + resetGlobalLRU( heap ); + } + + if ( 0 ) { + printGlobalLRU( heap, __FUNCTION__ ); + printLocalLRU( heap, __FUNCTION__ ); + } + + heap->local_age = heap->global_age[0]; +} + + + + +/** + * Allocate memory from a texture heap to hold a texture object. This + * routine will attempt to allocate memory for the texture from the heaps + * specified by \c heap_array in order. That is, first it will try to + * allocate from \c heap_array[0], then \c heap_array[1], and so on. + * + * \param heap_array Array of pointers to texture heaps to use + * \param nr_heaps Number of heap pointer in \a heap_array + * \param t Texture object for which space is needed + * \return The ID of the heap from which memory was allocated, or -1 if + * memory could not be allocated. + * + * \bug The replacement policy implemented by this function is horrible. + */ + + +int +driAllocateTexture( driTexHeap * const * heap_array, unsigned nr_heaps, + driTextureObject * t ) +{ + driTexHeap * heap; + driTextureObject * temp; + driTextureObject * cursor; + unsigned id; + + + /* In case it already has texture space, initialize heap. This also + * prevents GCC from issuing a warning that heap might be used + * uninitialized. + */ + + heap = t->heap; + + + /* Run through each of the existing heaps and try to allocate a buffer + * to hold the texture. + */ + + for ( id = 0 ; (t->memBlock == NULL) && (id < nr_heaps) ; id++ ) { + heap = heap_array[ id ]; + if ( heap != NULL ) { + t->memBlock = mmAllocMem( heap->memory_heap, t->totalSize, + heap->alignmentShift, 0 ); + } + } + + + /* Kick textures out until the requested texture fits. + */ + + if ( t->memBlock == NULL ) { + for ( id = 0 ; (t->memBlock == NULL) && (id < nr_heaps) ; id++ ) { + heap = heap_array[ id ]; + if ( t->totalSize <= heap->size ) { + + for ( cursor = heap->texture_objects.prev, temp = cursor->prev; + cursor != &heap->texture_objects ; + cursor = temp, temp = cursor->prev ) { + + /* The the LRU element. If the texture is bound to one of + * the texture units, then we cannot kick it out. + */ + if ( cursor->bound /* || cursor->reserved */ ) { + continue; + } + + /* If this is a placeholder, there's no need to keep it */ + if (cursor->tObj) + driSwapOutTextureObject( cursor ); + else + driDestroyTextureObject( cursor ); + + t->memBlock = mmAllocMem( heap->memory_heap, t->totalSize, + heap->alignmentShift, 0 ); + + if (t->memBlock) + break; + } + } /* if ( t->totalSize <= heap->size ) ... */ + } + } + + + if ( t->memBlock != NULL ) { + /* id and heap->heapId may or may not be the same value here. + */ + + assert( heap != NULL ); + assert( (t->heap == NULL) || (t->heap == heap) ); + + t->heap = heap; + return heap->heapId; + } + else { + assert( t->heap == NULL ); + + fprintf( stderr, "[%s:%d] unable to allocate texture\n", + __FUNCTION__, __LINE__ ); + return -1; + } +} + + + + + + +/** + * Set the location where the texture-swap counter is stored. + */ + +void +driSetTextureSwapCounterLocation( driTexHeap * heap, unsigned * counter ) +{ + heap->texture_swaps = (counter == NULL) ? & dummy_swap_counter : counter; +} + + + + +/** + * Create a new heap for texture data. + * + * \param heap_id Device-dependent heap identifier. This value + * will returned by driAllocateTexture when memory + * is allocated from this heap. + * \param context Device-dependent driver context. This is + * supplied as the first parameter to the + * \c destroy_tex_obj function. + * \param size Size, in bytes, of the texture region + * \param alignmentShift Alignment requirement for textures. If textures + * must be allocated on a 4096 byte boundry, this + * would be 12. + * \param nr_regions Number of regions into which this texture space + * should be partitioned + * \param global_regions Array of \c drmTextureRegion structures in the SAREA + * \param global_age Pointer to the global texture age in the SAREA + * \param swapped_objects Pointer to the list of texture objects that are + * not in texture memory (i.e., have been swapped + * out). + * \param texture_object_size Size, in bytes, of a device-dependent texture + * object + * \param destroy_tex_obj Function used to destroy a device-dependent + * texture object + * + * \sa driDestroyTextureHeap + */ + +driTexHeap * +driCreateTextureHeap( unsigned heap_id, void * context, unsigned size, + unsigned alignmentShift, unsigned nr_regions, + drmTextureRegionPtr global_regions, unsigned * global_age, + driTextureObject * swapped_objects, + unsigned texture_object_size, + destroy_texture_object_t * destroy_tex_obj + ) +{ + driTexHeap * heap; + unsigned l; + + + if ( 0 ) + fprintf( stderr, "%s( %u, %p, %u, %u, %u )\n", + __FUNCTION__, + heap_id, context, size, alignmentShift, nr_regions ); + + heap = (driTexHeap *) CALLOC( sizeof( driTexHeap ) ); + if ( heap != NULL ) { + l = driLog2( (size - 1) / nr_regions ); + if ( l < alignmentShift ) + { + l = alignmentShift; + } + + heap->logGranularity = l; + heap->size = size & ~((1L << l) - 1); + + heap->memory_heap = mmInit( 0, heap->size ); + if ( heap->memory_heap != NULL ) { + heap->heapId = heap_id; + heap->driverContext = context; + + heap->alignmentShift = alignmentShift; + heap->nrRegions = nr_regions; + heap->global_regions = global_regions; + heap->global_age = global_age; + heap->swapped_objects = swapped_objects; + heap->texture_object_size = texture_object_size; + heap->destroy_texture_object = destroy_tex_obj; + + /* Force global heap init */ + if (heap->global_age == 0) + heap->local_age = ~0; + else + heap->local_age = 0; + + make_empty_list( & heap->texture_objects ); + driSetTextureSwapCounterLocation( heap, NULL ); + } + else { + FREE( heap ); + heap = NULL; + } + } + + + if ( 0 ) + fprintf( stderr, "%s returning %p\n", __FUNCTION__, heap ); + + return heap; +} + + + + +/** Destroys a texture heap + * + * \param heap Texture heap to be destroyed + */ + +void +driDestroyTextureHeap( driTexHeap * heap ) +{ + driTextureObject * t; + driTextureObject * temp; + + + if ( heap != NULL ) { + foreach_s( t, temp, & heap->texture_objects ) { + driDestroyTextureObject( t ); + } + foreach_s( t, temp, heap->swapped_objects ) { + driDestroyTextureObject( t ); + } + + mmDestroy( heap->memory_heap ); + FREE( heap ); + } +} + + + + +/****************************************************************************/ +/** + * Determine how many texels (including all mipmap levels) would be required + * for a texture map of size \f$2^^\c base_size_log2\f$ would require. + * + * \param base_size_log2 \f$log_2\f$ of the size of a side of the texture + * \param dimensions Number of dimensions of the texture. Either 2 or 3. + * \param faces Number of faces of the texture. Either 1 or 6 (for cube maps). + * \return Number of texels + */ + +static unsigned +texels_this_map_size( int base_size_log2, unsigned dimensions, unsigned faces ) +{ + unsigned texels; + + + assert( (faces == 1) || (faces == 6) ); + assert( (dimensions == 2) || (dimensions == 3) ); + + texels = 0; + if ( base_size_log2 >= 0 ) { + texels = (1U << (dimensions * base_size_log2)); + + /* See http://www.mail-archive.com/dri-devel@lists.sourceforge.net/msg03636.html + * for the complete explaination of why this formulation is used. + * Basically, the smaller mipmap levels sum to 0.333 the size of the + * level 0 map. The total size is therefore the size of the map + * multipled by 1.333. The +2 is there to round up. + */ + + texels = (texels * 4 * faces + 2) / 3; + } + + return texels; +} + + + + +struct maps_per_heap { + unsigned c[32]; +}; + +static void +fill_in_maximums( driTexHeap * const * heaps, unsigned nr_heaps, + unsigned max_bytes_per_texel, unsigned max_size, + unsigned mipmaps_at_once, unsigned dimensions, + unsigned faces, struct maps_per_heap * max_textures ) +{ + unsigned heap; + unsigned log2_size; + unsigned mask; + + + /* Determine how many textures of each size can be stored in each + * texture heap. + */ + + for ( heap = 0 ; heap < nr_heaps ; heap++ ) { + if ( heaps[ heap ] == NULL ) { + (void) memset( max_textures[ heap ].c, 0, + sizeof( max_textures[ heap ].c ) ); + continue; + } + + mask = (1U << heaps[ heap ]->logGranularity) - 1; + + if ( 0 ) { + fprintf( stderr, "[%s:%d] heap[%u] = %u bytes, mask = 0x%08x\n", + __FILE__, __LINE__, + heap, heaps[ heap ]->size, mask ); + } + + for ( log2_size = max_size ; log2_size > 0 ; log2_size-- ) { + unsigned total; + + + /* Determine the total number of bytes required by a texture of + * size log2_size. + */ + + total = texels_this_map_size( log2_size, dimensions, faces ) + - texels_this_map_size( log2_size - mipmaps_at_once, + dimensions, faces ); + total *= max_bytes_per_texel; + total = (total + mask) & ~mask; + + /* The number of textures of a given size that will fit in a heap + * is equal to the size of the heap divided by the size of the + * texture. + */ + + max_textures[ heap ].c[ log2_size ] = heaps[ heap ]->size / total; + + if ( 0 ) { + fprintf( stderr, "[%s:%d] max_textures[%u].c[%02u] " + "= 0x%08x / 0x%08x " + "= %u (%u)\n", + __FILE__, __LINE__, + heap, log2_size, + heaps[ heap ]->size, total, + heaps[ heap ]->size / total, + max_textures[ heap ].c[ log2_size ] ); + } + } + } +} + + +static unsigned +get_max_size( unsigned nr_heaps, + unsigned texture_units, + unsigned max_size, + int all_textures_one_heap, + struct maps_per_heap * max_textures ) +{ + unsigned heap; + unsigned log2_size; + + + /* Determine the largest texture size such that a texture of that size + * can be bound to each texture unit at the same time. Some hardware + * may require that all textures be in the same texture heap for + * multitexturing. + */ + + for ( log2_size = max_size ; log2_size > 0 ; log2_size-- ) { + unsigned total = 0; + + for ( heap = 0 ; heap < nr_heaps ; heap++ ) + { + total += max_textures[ heap ].c[ log2_size ]; + + if ( 0 ) { + fprintf( stderr, "[%s:%d] max_textures[%u].c[%02u] = %u, " + "total = %u\n", __FILE__, __LINE__, heap, log2_size, + max_textures[ heap ].c[ log2_size ], total ); + } + + if ( (max_textures[ heap ].c[ log2_size ] >= texture_units) + || (!all_textures_one_heap && (total >= texture_units)) ) { + /* The number of mipmap levels is the log-base-2 of the + * maximum texture size plus 1. If the maximum texture size + * is 1x1, the log-base-2 is 0 and 1 mipmap level (the base + * level) is available. + */ + + return log2_size + 1; + } + } + } + + /* This should NEVER happen. It should always be possible to have at + * *least* a 1x1 texture in memory! + */ + assert( log2_size != 0 ); + return 0; +} + +#define SET_MAX(f,v) \ + do { if ( max_sizes[v] != 0 ) { limits-> f = max_sizes[v]; } } while( 0 ) + +#define SET_MAX_RECT(f,v) \ + do { if ( max_sizes[v] != 0 ) { limits-> f = 1 << max_sizes[v]; } } while( 0 ) + + +/** + * Given the amount of texture memory, the number of texture units, and the + * maximum size of a texel, calculate the maximum texture size the driver can + * adverteise. + * + * \param heaps Texture heaps for this card + * \param nr_heap Number of texture heaps + * \param limits OpenGL contants. MaxTextureUnits must be set. + * \param max_bytes_per_texel Maximum size of a single texel, in bytes + * \param max_2D_size \f$\log_2\f$ of the maximum 2D texture size (i.e., + * 1024x1024 textures, this would be 10) + * \param max_3D_size \f$\log_2\f$ of the maximum 3D texture size (i.e., + * 1024x1024x1024 textures, this would be 10) + * \param max_cube_size \f$\log_2\f$ of the maximum cube texture size (i.e., + * 1024x1024 textures, this would be 10) + * \param max_rect_size \f$\log_2\f$ of the maximum texture rectangle size + * (i.e., 1024x1024 textures, this would be 10). This is a power-of-2 + * even though texture rectangles need not be a power-of-2. + * \param mipmaps_at_once Total number of mipmaps that can be used + * at one time. For most hardware this will be \f$\c max_size + 1\f$. + * For hardware that does not support mipmapping, this will be 1. + * \param all_textures_one_heap True if the hardware requires that all + * textures be in a single texture heap for multitexturing. + */ + +void +driCalculateMaxTextureLevels( driTexHeap * const * heaps, + unsigned nr_heaps, + struct gl_constants * limits, + unsigned max_bytes_per_texel, + unsigned max_2D_size, + unsigned max_3D_size, + unsigned max_cube_size, + unsigned max_rect_size, + unsigned mipmaps_at_once, + int all_textures_one_heap ) +{ + struct maps_per_heap max_textures[8]; + unsigned i; + const unsigned dimensions[4] = { 2, 3, 2, 2 }; + const unsigned faces[4] = { 1, 1, 6, 1 }; + unsigned max_sizes[4]; + unsigned mipmaps[4]; + + + max_sizes[0] = max_2D_size; + max_sizes[1] = max_3D_size; + max_sizes[2] = max_cube_size; + max_sizes[3] = max_rect_size; + + mipmaps[0] = mipmaps_at_once; + mipmaps[1] = mipmaps_at_once; + mipmaps[2] = 1; + mipmaps[3] = mipmaps_at_once; + + + /* Calculate the maximum number of texture levels in two passes. The + * first pass determines how many textures of each power-of-two size + * (including all mipmap levels for that size) can fit in each texture + * heap. The second pass finds the largest texture size that allows + * a texture of that size to be bound to every texture unit. + */ + + for ( i = 0 ; i < 4 ; i++ ) { + if ( max_sizes[ i ] != 0 ) { + fill_in_maximums( heaps, nr_heaps, max_bytes_per_texel, + max_sizes[ i ], mipmaps[ i ], + dimensions[ i ], faces[ i ], + max_textures ); + + max_sizes[ i ] = get_max_size( nr_heaps, + limits->MaxTextureUnits, + max_sizes[ i ], + all_textures_one_heap, + max_textures ); + } + } + + SET_MAX( MaxTextureLevels, 0 ); + SET_MAX( Max3DTextureLevels, 1 ); + SET_MAX( MaxCubeTextureLevels, 2 ); + SET_MAX_RECT( MaxTextureRectSize, 3 ); +} + + + + +/** + * Perform initial binding of default textures objects on a per unit, per + * texture target basis. + * + * \param ctx Current OpenGL context + * \param swapped List of swapped-out textures + * \param targets Bit-mask of value texture targets + */ + +void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped, + GLuint targets ) +{ + struct gl_texture_object *texObj; + GLuint tmp = ctx->Texture.CurrentUnit; + unsigned i; + + + for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) { + ctx->Texture.CurrentUnit = i; + + if ( (targets & DRI_TEXMGR_DO_TEXTURE_1D) != 0 ) { + texObj = ctx->Texture.Unit[i].Current1D; + ctx->Driver.BindTexture( ctx, GL_TEXTURE_1D, texObj ); + move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); + } + + if ( (targets & DRI_TEXMGR_DO_TEXTURE_2D) != 0 ) { + texObj = ctx->Texture.Unit[i].Current2D; + ctx->Driver.BindTexture( ctx, GL_TEXTURE_2D, texObj ); + move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); + } + + if ( (targets & DRI_TEXMGR_DO_TEXTURE_3D) != 0 ) { + texObj = ctx->Texture.Unit[i].Current3D; + ctx->Driver.BindTexture( ctx, GL_TEXTURE_3D, texObj ); + move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); + } + + if ( (targets & DRI_TEXMGR_DO_TEXTURE_CUBE) != 0 ) { + texObj = ctx->Texture.Unit[i].CurrentCubeMap; + ctx->Driver.BindTexture( ctx, GL_TEXTURE_CUBE_MAP_ARB, texObj ); + move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); + } + + if ( (targets & DRI_TEXMGR_DO_TEXTURE_RECT) != 0 ) { + texObj = ctx->Texture.Unit[i].CurrentRect; + ctx->Driver.BindTexture( ctx, GL_TEXTURE_RECTANGLE_NV, texObj ); + move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); + } + } + + ctx->Texture.CurrentUnit = tmp; +} + + + + +/** + * Verify that the specified texture is in the specificed heap. + * + * \param tex Texture to be tested. + * \param heap Texture memory heap to be tested. + * \return True if the texture is in the heap, false otherwise. + */ + +static GLboolean +check_in_heap( const driTextureObject * tex, const driTexHeap * heap ) +{ +#if 1 + return tex->heap == heap; +#else + driTextureObject * curr; + + foreach( curr, & heap->texture_objects ) { + if ( curr == tex ) { + break; + } + } + + return curr == tex; +#endif +} + + + +/****************************************************************************/ +/** + * Validate the consistency of a set of texture heaps. + * Original version by Keith Whitwell in r200/r200_sanity.c. + */ + +GLboolean +driValidateTextureHeaps( driTexHeap * const * texture_heaps, + unsigned nr_heaps, const driTextureObject * swapped ) +{ + driTextureObject *t; + unsigned i; + + for ( i = 0 ; i < nr_heaps ; i++ ) { + int last_end = 0; + unsigned textures_in_heap = 0; + unsigned blocks_in_mempool = 0; + const driTexHeap * heap = texture_heaps[i]; + const memHeap_t * p = heap->memory_heap; + + /* Check each texture object has a MemBlock, and is linked into + * the correct heap. + * + * Check the texobj base address corresponds to the MemBlock + * range. Check the texobj size (recalculate???) fits within + * the MemBlock. + * + * Count the number of texobj's using this heap. + */ + + foreach ( t, &heap->texture_objects ) { + if ( !check_in_heap( t, heap ) ) { + fprintf( stderr, "%s memory block for texture object @ %p not " + "found in heap #%d\n", + __FUNCTION__, t, i ); + return GL_FALSE; + } + + + if ( t->totalSize > t->memBlock->size ) { + fprintf( stderr, "%s: Memory block for texture object @ %p is " + "only %u bytes, but %u are required\n", + __FUNCTION__, t, t->totalSize, t->memBlock->size ); + return GL_FALSE; + } + + textures_in_heap++; + } + + /* Validate the contents of the heap: + * - Ordering + * - Overlaps + * - Bounds + */ + + while ( p != NULL ) { + if (p->reserved) { + fprintf( stderr, "%s: Block (%08x,%x), is reserved?!\n", + __FUNCTION__, p->ofs, p->size ); + return GL_FALSE; + } + + if (p->ofs != last_end) { + fprintf( stderr, "%s: blocks_in_mempool = %d, last_end = %d, p->ofs = %d\n", + __FUNCTION__, blocks_in_mempool, last_end, p->ofs ); + return GL_FALSE; + } + + if (!p->reserved && !p->free) { + blocks_in_mempool++; + } + + last_end = p->ofs + p->size; + p = p->next; + } + + if (textures_in_heap != blocks_in_mempool) { + fprintf( stderr, "%s: Different number of textures objects (%u) and " + "inuse memory blocks (%u)\n", + __FUNCTION__, textures_in_heap, blocks_in_mempool ); + return GL_FALSE; + } + +#if 0 + fprintf( stderr, "%s: textures_in_heap = %u\n", + __FUNCTION__, textures_in_heap ); +#endif + } + + + /* Check swapped texobj's have zero memblocks + */ + i = 0; + foreach ( t, swapped ) { + if ( t->memBlock != NULL ) { + fprintf( stderr, "%s: Swapped texobj %p has non-NULL memblock %p\n", + __FUNCTION__, t, t->memBlock ); + return GL_FALSE; + } + i++; + } + +#if 0 + fprintf( stderr, "%s: swapped texture count = %u\n", i ); +#endif + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/common/texmem.h b/src/mesa/drivers/dri/common/texmem.h new file mode 100644 index 0000000..7199704 --- /dev/null +++ b/src/mesa/drivers/dri/common/texmem.h @@ -0,0 +1,293 @@ +/* + * Copyright 2000-2001 VA Linux Systems, Inc. + * (c) Copyright IBM Corporation 2002 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Ian Romanick + * Keith Whitwell + * Kevin E. Martin + * Gareth Hughes + */ +/* $XFree86:$ */ + +/** \file texmem.h + * Public interface to the DRI texture memory management routines. + * + * \sa texmem.c + */ + +#ifndef DRI_TEXMEM_H +#define DRI_TEXMEM_H + +#include "mtypes.h" +#include "mm.h" +#include "xf86drm.h" + +struct dri_tex_heap; +typedef struct dri_tex_heap driTexHeap; + +struct dri_texture_object; +typedef struct dri_texture_object driTextureObject; + + +/** + * Base texture object type. Each driver will extend this type with its own + * private data members. + */ + +struct dri_texture_object { + struct dri_texture_object * next; + struct dri_texture_object * prev; + + driTexHeap * heap; /**< Texture heap currently stored in */ + struct gl_texture_object * tObj;/**< Pointer to Mesa texture object + * If NULL, this texture object is a + * "placeholder" object representing + * texture memory in use by another context. + * A placeholder should have a heap and a memBlock. + */ + PMemBlock memBlock; /**< Memory block containing texture */ + unsigned bound; /**< Texture unit currently bound to */ + + unsigned totalSize; /**< Total size of the texture, + * including all mipmap levels + */ + + unsigned dirty_images[6]; /**< Flags for whether or not images + * need to be uploaded to local or + * AGP texture space. One flag set + * for each cube face for cubic + * textures. Bit zero corresponds to + * the base-level, which may or may + * not be the level zero mipmap. + */ + + unsigned timestamp; /**< Timestamp used to + * synchronize with 3d engine + * in hardware where textures + * are uploaded directly to + * the framebuffer. + */ + + unsigned firstLevel; /**< Image in \c tObj->Image that + * corresponds to the base-level of + * this texture object. + */ + + unsigned lastLevel; /**< Last image in \c tObj->Image used + * by the current LOD settings of this + * texture object. This value must be + * greater than or equal to + * \c firstLevel. + */ +}; + + +typedef void (destroy_texture_object_t)( void * driverContext, + driTextureObject * t ); + +/** + * Client-private representation of texture memory state. + * + * Clients will place one or more of these structs in their driver + * context struct to manage one or more global texture heaps. + */ + +struct dri_tex_heap { + + /** Client-supplied heap identifier + */ + unsigned heapId; + + /** Pointer to the client's private context + */ + void *driverContext; + + /** Total size of the heap, in bytes + */ + unsigned size; + + /** \brief \f$log_2\f$ of size of single heap region + * + * Each context takes memory from the global texture heap in + * \f$2^{logGranularity}\f$ byte blocks. The value of + * \a logGranularity is based on the amount of memory represented + * by the heap and the maximum number of regions in the SAREA. Given + * \a b bytes of texture memory an \a n regions in the SAREA, + * \a logGranularity will be \f$\lfloor\log_2( b / n )\rfloor\f$. + */ + unsigned logGranularity; + + /** \brief Required alignment of allocations in this heap + * + * The alignment shift is supplied to \a mmAllocMem when memory is + * allocated from this heap. The value of \a alignmentShift will + * typically reflect some require of the hardware. This value has + * \b no \b relation to \a logGranularity. \a alignmentShift is a + * per-context value. + * + * \sa mmAllocMem + */ + unsigned alignmentShift; + + /** Number of elements in global list (the SAREA). + */ + unsigned nrRegions; + + /** Pointer to SAREA \a driTexRegion array + */ + drmTextureRegionPtr global_regions; + + /** Pointer to the texture state age (generation number) in the SAREA + */ + unsigned * global_age; + + /** Local age (generation number) of texture state + */ + unsigned local_age; + + /** Memory heap used to manage texture memory represented by + * this texture heap. + */ + memHeap_t * memory_heap; + + /** List of objects that we currently believe to be in texture + * memory. + */ + driTextureObject texture_objects; + + /** Pointer to the list of texture objects that are not in + * texture memory. + */ + driTextureObject * swapped_objects; + + /** Size of the driver-speicific texture object. + */ + unsigned texture_object_size; + + + /** + * \brief Function to destroy driver-specific texture object data. + * + * This function is supplied by the driver so that the texture manager + * can release all resources associated with a texture object. This + * function should only release driver-specific data. That is, + * \a driDestroyTextureObject will release the texture memory + * associated with the texture object, it will release the memory + * for the texture object itself, and it will unlink the texture + * object from the texture object lists. + * + * \param driverContext Pointer to the driver supplied context + * \param t Texture object that is to be destroyed + * \sa driDestroyTextureObject + */ + + destroy_texture_object_t * destroy_texture_object; + + + /** + */ + unsigned * texture_swaps; + + /** + * Timestamp used to synchronize with 3d engine in hardware + * where textures are uploaded directly to the + * framebuffer. + */ + unsigned timestamp; +}; + + + + +/** + * Called by the client on lock contention to determine whether textures have + * been stolen. If another client has modified a region in which we have + * textures, then we need to figure out which of our textures have been + * removed and update our global LRU. + * + * \param heap Texture heap to be updated + * \hideinitializer + */ + +#define DRI_AGE_TEXTURES( heap ) \ + do { \ + if ( ((heap) != NULL) \ + && ((heap)->local_age != (heap)->global_age[0]) ) \ + driAgeTextures( heap ); \ + } while( 0 ) + + + + +/* This should be called whenever there has been contention on the hardware + * lock. driAgeTextures should not be called directly. Instead, clients + * should use DRI_AGE_TEXTURES, above. + */ + +void driAgeTextures( driTexHeap * heap ); + +void driUpdateTextureLRU( driTextureObject * t ); +void driSwapOutTextureObject( driTextureObject * t ); +void driDestroyTextureObject( driTextureObject * t ); +int driAllocateTexture( driTexHeap * const * heap_array, unsigned nr_heaps, + driTextureObject * t ); + +GLboolean driIsTextureResident( GLcontext * ctx, + struct gl_texture_object * texObj ); + +driTexHeap * driCreateTextureHeap( unsigned heap_id, void * context, + unsigned size, unsigned alignmentShift, unsigned nr_regions, + drmTextureRegionPtr global_regions, unsigned * global_age, + driTextureObject * swapped_objects, unsigned texture_object_size, + destroy_texture_object_t * destroy_tex_obj ); +void driDestroyTextureHeap( driTexHeap * heap ); + +void +driCalculateMaxTextureLevels( driTexHeap * const * heaps, + unsigned nr_heaps, + struct gl_constants * limits, + unsigned max_bytes_per_texel, + unsigned max_2D_size, + unsigned max_3D_size, + unsigned max_cube_size, + unsigned max_rect_size, + unsigned mipmaps_at_once, + int all_textures_one_heap ); + +void +driSetTextureSwapCounterLocation( driTexHeap * heap, unsigned * counter ); + +#define DRI_TEXMGR_DO_TEXTURE_1D 0x0001 +#define DRI_TEXMGR_DO_TEXTURE_2D 0x0002 +#define DRI_TEXMGR_DO_TEXTURE_3D 0x0004 +#define DRI_TEXMGR_DO_TEXTURE_CUBE 0x0008 +#define DRI_TEXMGR_DO_TEXTURE_RECT 0x0010 + +void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped, + GLuint targets ); + +GLboolean driValidateTextureHeaps( driTexHeap * const * texture_heaps, + unsigned nr_heaps, const driTextureObject * swapped ); + +#endif /* DRI_TEXMEM_H */ diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c new file mode 100644 index 0000000..bc83fef --- /dev/null +++ b/src/mesa/drivers/dri/common/utils.c @@ -0,0 +1,186 @@ +/* + * (c) Copyright IBM Corporation 2002 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Ian Romanick + */ +/* $XFree86:$ */ + +#include +#include +#include "mtypes.h" +#include "extensions.h" +#include "utils.h" + +#if defined(USE_X86_ASM) +#include "X86/common_x86_asm.h" +#endif + +unsigned +driParseDebugString( const char * debug, + const struct dri_debug_control * control ) +{ + unsigned flag; + + + flag = 0; + if ( debug != NULL ) { + while( control->string != NULL ) { + if ( strstr( debug, control->string ) != NULL ) { + flag |= control->flag; + } + + control++; + } + } + + return flag; +} + + + + +unsigned +driGetRendererString( char * buffer, const char * hardware_name, + const char * driver_date, GLuint agp_mode ) +{ +#ifdef USE_X86_ASM + char * x86_str = ""; + char * mmx_str = ""; + char * tdnow_str = ""; + char * sse_str = ""; +#endif + unsigned offset; + + + offset = sprintf( buffer, "Mesa DRI %s %s", hardware_name, driver_date ); + + /* Append any AGP-specific information. + */ + switch ( agp_mode ) { + case 1: + case 2: + case 4: + case 8: + offset += sprintf( & buffer[ offset ], " AGP %ux", agp_mode ); + break; + + default: + break; + } + + /* Append any CPU-specific information. + */ +#ifdef USE_X86_ASM + if ( _mesa_x86_cpu_features ) { + x86_str = " x86"; + } +# ifdef USE_MMX_ASM + if ( cpu_has_mmx ) { + mmx_str = (cpu_has_mmxext) ? "/MMX+" : "/MMX"; + } +# endif +# ifdef USE_3DNOW_ASM + if ( cpu_has_3dnow ) { + tdnow_str = (cpu_has_3dnowext) ? "/3DNow!+" : "/3DNow!"; + } +# endif +# ifdef USE_SSE_ASM + if ( cpu_has_xmm ) { + sse_str = (cpu_has_xmm2) ? "/SSE2" : "/SSE"; + } +# endif + + offset += sprintf( & buffer[ offset ], "%s%s%s%s", + x86_str, mmx_str, tdnow_str, sse_str ); + +#elif defined(USE_SPARC_ASM) + + offset += sprintf( & buffer[ offset ], " Sparc" ); + +#endif + + return offset; +} + + + + +void driInitExtensions( GLcontext * ctx, + const char * const extensions_to_enable[], + GLboolean enable_imaging ) +{ + unsigned i; + + if ( enable_imaging ) { + _mesa_enable_imaging_extensions( ctx ); + } + + for ( i = 0 ; extensions_to_enable[i] != NULL ; i++ ) { + _mesa_enable_extension( ctx, extensions_to_enable[i] ); + } +} + + + + +GLboolean +driCheckDriDdxDrmVersions(__DRIscreenPrivate *sPriv, + const char * driver_name, + int dri_major, int dri_minor, + int ddx_major, int ddx_minor, + int drm_major, int drm_minor) +{ + static const char format[] = "%s DRI driver expected %s version %d.%d.x " + "but got version %d.%d.%d"; + int major, minor, patch; + +#ifndef _SOLO + /* Check the DRI version */ + if (XF86DRIQueryVersion(sPriv->display, &major, &minor, &patch)) { + if (major != dri_major || minor < dri_minor) { + __driUtilMessage(format, "DRI", driver_name, dri_major, dri_minor, + major, minor, patch); + return GL_FALSE; + } + } +#else + (void)major;(void)minor;(void)patch; +#endif + + /* Check that the DDX driver version is compatible */ + if (sPriv->ddxMajor != ddx_major || sPriv->ddxMinor < ddx_minor) { + __driUtilMessage(format, "DDX", driver_name, ddx_major, ddx_minor, + sPriv->ddxMajor, sPriv->ddxMinor, sPriv->ddxPatch); + return GL_FALSE; + } + + /* Check that the DRM driver version is compatible */ + if (sPriv->drmMajor != drm_major || sPriv->drmMinor < drm_minor) { + __driUtilMessage(format, "DRM", driver_name, drm_major, drm_minor, + sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch); + return GL_FALSE; + } + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h new file mode 100644 index 0000000..d6506c5 --- /dev/null +++ b/src/mesa/drivers/dri/common/utils.h @@ -0,0 +1,54 @@ +/* + * (c) Copyright IBM Corporation 2002 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Ian Romanick + */ +/* $XFree86:$ */ + +#ifndef DRI_DEBUG_H +#define DRI_DEBUG_H + +#include "context.h" +#include "dri_util.h" + +struct dri_debug_control +{ + const char * string; + unsigned flag; +}; + +extern unsigned driParseDebugString( const char * debug, + const struct dri_debug_control * control ); + +extern unsigned driGetRendererString( char * buffer, + const char * hardware_name, const char * driver_date, GLuint agp_mode ); + +extern void driInitExtensions( GLcontext * ctx, + const char * const card_extensions[], GLboolean enable_imaging ); + +extern GLboolean driCheckDriDdxDrmVersions( __DRIscreenPrivate *sPriv, + const char * driver_name, int dri_major, int dri_minor, + int ddx_major, int ddx_minor, int drm_major, int drm_minor ); + +#endif /* DRI_DEBUG_H */ diff --git a/src/mesa/drivers/dri/common/vblank.c b/src/mesa/drivers/dri/common/vblank.c new file mode 100644 index 0000000..e91dd65 --- /dev/null +++ b/src/mesa/drivers/dri/common/vblank.c @@ -0,0 +1,325 @@ +/* -*- mode: c; c-basic-offset: 3 -*- */ +/* + * (c) Copyright IBM Corporation 2002 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Ian Romanick + */ +/* $XFree86:$ */ + +#include "glheader.h" +#include "xf86drm.h" +#include "mtypes.h" +#include "macros.h" +#include "dd.h" +#include "vblank.h" + + +/****************************************************************************/ +/** + * Get the current MSC refresh counter. + * + * Stores the 64-bit count of vertical refreshes since some (arbitrary) + * point in time in \c count. Unless the value wraps around, which it + * may, it will never decrease. + * + * \warning This function is called from \c glXGetVideoSyncSGI, which expects + * a \c count of type \c unsigned (32-bit), and \c glXGetSyncValuesOML, which + * expects a \c count of type \c int64_t (signed 64-bit). The kernel ioctl + * currently always returns a \c sequence of type \c unsigned. + * + * \param priv Pointer to the DRI screen private struct. + * \param count Storage to hold MSC counter. + * \return Zero is returned on success. A negative errno value + * is returned on failure. + */ +int driGetMSC32( __DRIscreenPrivate * priv, int64_t * count ) +{ + drmVBlank vbl; + int ret; + + /* Don't wait for anything. Just get the current refresh count. */ + + vbl.request.type = DRM_VBLANK_RELATIVE; + vbl.request.sequence = 0; + + ret = drmWaitVBlank( priv->fd, &vbl ); + *count = (int64_t)vbl.reply.sequence; + + return ret; +} + + +/****************************************************************************/ +/** + * Wait for a specified refresh count. This implements most of the + * functionality of \c glXWaitForMscOML from the GLX_OML_sync_control spec. + * Waits for the \c target_msc refresh. If that has already passed, it + * waits until \f$(MSC \bmod divisor)\f$ is equal to \c remainder. If + * \c target_msc is 0, use the behavior of glXWaitVideoSyncSGI(), which + * omits the initial check against a target MSC value. + * + * This function is actually something of a hack. The problem is that, at + * the time of this writing, none of the existing DRM modules support an + * ioctl that returns a 64-bit count (at least not on 32-bit platforms). + * However, this function exists to support a GLX function that requires + * the use of 64-bit counts. As such, there is a little bit of ugly + * hackery at the end of this function to make the 32-bit count act like + * a 64-bit count. There are still some cases where this will break, but + * I believe it catches the most common cases. + * + * The real solution is to provide an ioctl that uses a 64-bit count. + * + * \param dpy Pointer to the \c Display. + * \param priv Pointer to the DRI drawable private. + * \param target_msc Desired refresh count to wait for. A value of 0 + * means to use the glXWaitVideoSyncSGI() behavior. + * \param divisor MSC divisor if \c target_msc is already reached. + * \param remainder Desired MSC remainder if \c target_msc is already + * reached. + * \param msc Buffer to hold MSC when done waiting. + * + * \return Zero on success or \c GLX_BAD_CONTEXT on failure. + */ + +int driWaitForMSC32( __DRIdrawablePrivate *priv, + int64_t target_msc, int64_t divisor, int64_t remainder, + int64_t * msc ) +{ + drmVBlank vbl; + + + if ( divisor != 0 ) { + unsigned int target = (unsigned int)target_msc; + unsigned int next = target; + unsigned int r; + int dont_wait = (target_msc == 0); + + do { + /* dont_wait means we're using the glXWaitVideoSyncSGI() behavior. + * The first time around, just get the current count and proceed + * to the test for (MSC % divisor) == remainder. + */ + vbl.request.type = dont_wait ? DRM_VBLANK_RELATIVE : + DRM_VBLANK_ABSOLUTE; + vbl.request.sequence = next; + + if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) { + /* FIXME: This doesn't seem like the right thing to return here. + */ +#ifndef _SOLO + return GLX_BAD_CONTEXT; +#else + return -1; +#endif + } + + dont_wait = 0; + if (target_msc != 0 && vbl.reply.sequence == target) + break; + + /* Assuming the wait-done test fails, the next refresh to wait for + * will be one that satisfies (MSC % divisor) == remainder. The + * value (MSC - (MSC % divisor) + remainder) is the refresh value + * closest to the current value that would satisfy the equation. + * If this refresh has already happened, we add divisor to obtain + * the next refresh after the current one that will satisfy it. + */ + r = (vbl.reply.sequence % (unsigned int)divisor); + next = (vbl.reply.sequence - r + (unsigned int)remainder); + if (next <= vbl.reply.sequence) next += (unsigned int)divisor; + + } while ( r != (unsigned int)remainder ); + } + else { + /* If the \c divisor is zero, just wait until the MSC is greater + * than or equal to \c target_msc. + */ + + vbl.request.type = DRM_VBLANK_ABSOLUTE; + vbl.request.sequence = target_msc; + + if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) { + /* FIXME: This doesn't seem like the right thing to return here. + */ +#ifndef _SOLO + return GLX_BAD_CONTEXT; +#else + return -1; +#endif + } + } + + *msc = (target_msc & 0xffffffff00000000LL); + *msc |= vbl.reply.sequence; + if ( *msc < target_msc ) { + *msc += 0x0000000100000000LL; + } + + return 0; +} + + +/****************************************************************************/ +/** + * Gets a set of default vertical-blank-wait flags based on the internal GLX + * API version and several environment variables. + */ + +GLuint driGetDefaultVBlankFlags( void ) +{ + GLuint flags = 0; + + + flags |= (driCompareGLXAPIVersion( 20030317 ) >= 0) + ? VBLANK_FLAG_INTERVAL : 0; + flags |= (getenv("LIBGL_SYNC_REFRESH") != NULL) + ? VBLANK_FLAG_SYNC : 0; + flags |= (getenv("LIBGL_THROTTLE_REFRESH") != NULL) + ? VBLANK_FLAG_THROTTLE : 0; + + return flags; +} + + +/****************************************************************************/ +/** + * Wrapper to call \c drmWaitVBlank. The main purpose of this function is to + * wrap the error message logging. The error message should only be logged + * the first time the \c drmWaitVBlank fails. If \c drmWaitVBlank is + * successful, \c vbl_seq will be set the sequence value in the reply. + * + * \param vbl Pointer to drmVBlank packet desribing how to wait. + * \param vbl_seq Location to store the current refresh counter. + * \param fd File descriptor use to call into the DRM. + * \return Zero on success or -1 on failure. + */ + +static int do_wait( drmVBlank * vbl, GLuint * vbl_seq, int fd ) +{ + int ret; + + + ret = drmWaitVBlank( fd, vbl ); + if ( ret != 0 ) { + static GLboolean first_time = GL_TRUE; + + if ( first_time ) { + fprintf(stderr, + "%s: drmWaitVBlank returned %d, IRQs don't seem to be" + " working correctly.\nTry running with LIBGL_THROTTLE_REFRESH" + " and LIBL_SYNC_REFRESH unset.\n", __FUNCTION__, ret); + first_time = GL_FALSE; + } + + return -1; + } + + *vbl_seq = vbl->reply.sequence; + return 0; +} + + +/****************************************************************************/ +/** + * Waits for the vertical blank for use with glXSwapBuffers. + * + * \param vbl_seq Vertical blank sequence number (MSC) after the last buffer + * swap. Updated after this wait. + * \param flags \c VBLANK_FLAG bits that control how long to wait. + * \param missed_deadline Set to \c GL_TRUE if the MSC after waiting is later + * than the "target" based on \c flags. The idea is that if + * \c missed_deadline is set, then the application is not + * achieving its desired framerate. + * \return Zero on success, -1 on error. + */ + +int +driWaitForVBlank( const __DRIdrawablePrivate *priv, GLuint * vbl_seq, + GLuint flags, GLboolean * missed_deadline ) +{ + drmVBlank vbl; + unsigned original_seq; + unsigned deadline; + unsigned interval; + + + *missed_deadline = GL_FALSE; + if ( (flags & VBLANK_FLAG_NO_IRQ) != 0 ) { + return 0; + } + + + /* VBLANK_FLAG_SYNC means to wait for at least one vertical blank. If + * that flag is not set, do a fake wait for zero vertical blanking + * periods so that we can get the current MSC. + * + * VBLANK_FLAG_INTERVAL and VBLANK_FLAG_THROTTLE mean to wait for at + * least one vertical blank since the last wait. Since do_wait modifies + * vbl_seq, we have to save the original value of vbl_seq for the + * VBLANK_FLAG_INTERVAL / VBLANK_FLAG_THROTTLE calculation later. + */ + + original_seq = *vbl_seq; + + vbl.request.sequence = ((flags & VBLANK_FLAG_SYNC) != 0) ? 1 : 0; + vbl.request.type = DRM_VBLANK_RELATIVE; + + if ( do_wait( & vbl, vbl_seq, priv->driScreenPriv->fd ) != 0 ) { + return -1; + } + + + vbl.request.type = DRM_VBLANK_ABSOLUTE; + + if ( (flags & VBLANK_FLAG_INTERVAL) != 0 ) { +#ifndef _SOLO + interval = priv->pdraw->swap_interval; +#else + interval = 0; +#endif + } + else if ( (flags & VBLANK_FLAG_THROTTLE) != 0 ) { + interval = 1; + } + else { + interval = 0; + } + + + /* Wait until the next vertical blank. If the interval is zero, then + * the deadline is one vertical blank after the previous wait. + */ + + vbl.request.sequence = original_seq + interval; + if ( *vbl_seq < vbl.request.sequence ) { + if ( do_wait( & vbl, vbl_seq, priv->driScreenPriv->fd ) != 0 ) { + return -1; + } + } + + deadline = original_seq + ((interval == 0) ? 1 : interval); + *missed_deadline = ( *vbl_seq > deadline ); + + return 0; +} diff --git a/src/mesa/drivers/dri/common/vblank.h b/src/mesa/drivers/dri/common/vblank.h new file mode 100644 index 0000000..3dab4ea --- /dev/null +++ b/src/mesa/drivers/dri/common/vblank.h @@ -0,0 +1,62 @@ +/* -*- mode: c; c-basic-offset: 3 -*- */ +/* + * (c) Copyright IBM Corporation 2002 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Ian Romanick + */ +/* $XFree86:$ */ + +#ifndef DRI_VBLANK_H +#define DRI_VBLANK_H + +#include "context.h" +#include "dri_util.h" + +#define VBLANK_FLAG_INTERVAL (1U << 0) /* Respect the swap_interval setting + */ +#define VBLANK_FLAG_THROTTLE (1U << 1) /* Wait 1 refresh since last call. + */ +#define VBLANK_FLAG_SYNC (1U << 2) /* Sync to the next refresh. + */ +#define VBLANK_FLAG_NO_IRQ (1U << 7) /* DRM has no IRQ to wait on. + */ + +extern int driGetMSC32( __DRIscreenPrivate * priv, int64_t * count ); +extern int driWaitForMSC32( __DRIdrawablePrivate *priv, + int64_t target_msc, int64_t divisor, int64_t remainder, int64_t * msc ); +extern GLuint driGetDefaultVBlankFlags( void ); +extern int driWaitForVBlank( const __DRIdrawablePrivate *priv, + GLuint * vbl_seq, GLuint flags, GLboolean * missed_deadline ); + +#undef usleep +#include /* for usleep() */ + +#define DO_USLEEP(nr) \ + do { \ + if (0) fprintf(stderr, "%s: usleep for %u\n", __FUNCTION__, nr ); \ + if (1) usleep( nr ); \ + sched_yield(); \ + } while( 0 ) + +#endif /* DRI_VBLANK_H */ -- 2.7.4