Small optimization for big-endian (e.g., PowerPC) systems.
[profile/ivi/mesa.git] / src / mesa / drivers / dri / common / spantmp2.h
1 /*
2  * Copyright 2000-2001 VA Linux Systems, Inc.
3  * (C) Copyright IBM Corporation 2004
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
20  * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25
26 /**
27  * \file spantmp2.h
28  *
29  * Template file of span read / write functions.
30  *
31  * \author Keith Whitwell <keithw@tungstengraphics.com>
32  * \author Gareth Hughes <gareth@nvidia.com>
33  * \author Ian Romanick <idr@us.ibm.com>
34  */
35
36 #include "colormac.h"
37
38 #ifndef DBG
39 #define DBG 0
40 #endif
41
42 #ifndef HW_WRITE_LOCK
43 #define HW_WRITE_LOCK()         HW_LOCK()
44 #endif
45
46 #ifndef HW_WRITE_UNLOCK
47 #define HW_WRITE_UNLOCK()       HW_UNLOCK()
48 #endif
49
50 #ifndef HW_READ_LOCK
51 #define HW_READ_LOCK()          HW_LOCK()
52 #endif
53
54 #ifndef HW_READ_UNLOCK
55 #define HW_READ_UNLOCK()        HW_UNLOCK()
56 #endif
57
58 #ifndef HW_READ_CLIPLOOP
59 #define HW_READ_CLIPLOOP()      HW_CLIPLOOP()
60 #endif
61
62 #ifndef HW_WRITE_CLIPLOOP
63 #define HW_WRITE_CLIPLOOP()     HW_CLIPLOOP()
64 #endif
65
66 #if (SPANTMP_PIXEL_FMT == GL_RGB)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
67
68 #define INIT_MONO_PIXEL(p, color) \
69   p = PACK_COLOR_565( color[0], color[1], color[2] )
70
71 #define WRITE_RGBA( _x, _y, r, g, b, a )                                \
72     do {                                                                \
73        GLshort * _p = (GLshort *) GET_DST_PTR(_x, _y);                  \
74        _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) |       \
75                    (((int)b & 0xf8) >> 3));                             \
76    } while(0)
77
78 #define WRITE_PIXEL( _x, _y, p )                                        \
79    do {                                                                 \
80       GLushort * _p = (GLushort *) GET_DST_PTR(_x, _y);                 \
81       _p[0] = p;                                                        \
82    } while(0)
83
84 #define READ_RGBA( rgba, _x, _y )                                       \
85    do {                                                                 \
86       GLushort p = *(volatile GLshort *) GET_SRC_PTR(_x, _y);           \
87       rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;                         \
88       rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;                         \
89       rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;                         \
90       rgba[3] = 0xff;                                                   \
91    } while (0)
92
93 #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
94
95 # define INIT_MONO_PIXEL(p, color)                       \
96      p = PACK_COLOR_8888(color[3], color[0], color[1], color[2]) 
97     
98 # define WRITE_RGBA(_x, _y, r, g, b, a)                                 \
99     do {                                                                \
100        GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y);                    \
101        _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24));           \
102     } while(0)
103
104 #define WRITE_PIXEL(_x, _y, p)                                          \
105     do {                                                                \
106        GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y);                    \
107        _p[0] = p;                                                       \
108     } while(0)
109
110 # if defined( USE_X86_ASM )
111 #  define READ_RGBA(rgba, _x, _y)                                       \
112     do {                                                                \
113         GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y);            \
114        __asm__ __volatile__( "bswap     %0; rorl $8, %0"                \
115                                 : "=r" (p) : "r" (p) );                 \
116        ((GLuint *)rgba)[0] = p;                                         \
117     } while (0)
118 # elif defined( MESA_BIG_ENDIAN )
119     /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
120      * rotlwi instruction.  It also produces good code on SPARC.
121      */
122 #  define READ_RGBA( rgba, _x, _y )                                     \
123      do {                                                               \
124         GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y);            \
125         GLuint t = p;                                                   \
126         *((uint32_t *) rgba) = (t >> 24) | (p << 8);                    \
127      } while (0)
128 # else
129 #  define READ_RGBA( rgba, _x, _y )                                     \
130      do {                                                               \
131         GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y);            \
132         rgba[0] = (p >> 16) & 0xff;                                     \
133         rgba[1] = (p >>  8) & 0xff;                                     \
134         rgba[2] = (p >>  0) & 0xff;                                     \
135         rgba[3] = (p >> 24) & 0xff;                                     \
136      } while (0)
137 # endif
138
139 #else
140 #error SPANTMP_PIXEL_FMT must be set to a valid value!
141 #endif
142
143 #if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
144 #include "x86/read_rgba_span_x86.h"
145 #include "x86/common_x86_asm.h"
146 #endif
147
148 static void TAG(WriteRGBASpan)( const GLcontext *ctx,
149                                 GLuint n, GLint x, GLint y,
150                                 const GLubyte rgba[][4],
151                                 const GLubyte mask[] )
152 {
153    HW_WRITE_LOCK()
154       {
155          GLint x1;
156          GLint n1;
157          LOCAL_VARS;
158
159          y = Y_FLIP(y);
160
161          HW_WRITE_CLIPLOOP()
162             {
163                GLint i = 0;
164                CLIPSPAN(x,y,n,x1,n1,i);
165
166                if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
167                                 (int)i, (int)n1, (int)x1);
168
169                if (mask)
170                {
171                   for (;n1>0;i++,x1++,n1--)
172                      if (mask[i])
173                         WRITE_RGBA( x1, y,
174                                     rgba[i][0], rgba[i][1],
175                                     rgba[i][2], rgba[i][3] );
176                }
177                else
178                {
179                   for (;n1>0;i++,x1++,n1--)
180                      WRITE_RGBA( x1, y,
181                                  rgba[i][0], rgba[i][1],
182                                  rgba[i][2], rgba[i][3] );
183                }
184             }
185          HW_ENDCLIPLOOP();
186       }
187    HW_WRITE_UNLOCK();
188 }
189
190 static void TAG(WriteRGBSpan)( const GLcontext *ctx,
191                                GLuint n, GLint x, GLint y,
192                                const GLubyte rgb[][3],
193                                const GLubyte mask[] )
194 {
195    HW_WRITE_LOCK()
196       {
197          GLint x1;
198          GLint n1;
199          LOCAL_VARS;
200
201          y = Y_FLIP(y);
202
203          HW_WRITE_CLIPLOOP()
204             {
205                GLint i = 0;
206                CLIPSPAN(x,y,n,x1,n1,i);
207
208                if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
209                                 (int)i, (int)n1, (int)x1);
210
211                if (mask)
212                {
213                   for (;n1>0;i++,x1++,n1--)
214                      if (mask[i])
215                         WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
216                }
217                else
218                {
219                   for (;n1>0;i++,x1++,n1--)
220                      WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
221                }
222             }
223          HW_ENDCLIPLOOP();
224       }
225    HW_WRITE_UNLOCK();
226 }
227
228 static void TAG(WriteRGBAPixels)( const GLcontext *ctx,
229                                GLuint n,
230                                const GLint x[],
231                                const GLint y[],
232                                const GLubyte rgba[][4],
233                                const GLubyte mask[] )
234 {
235    HW_WRITE_LOCK()
236       {
237          GLint i;
238          LOCAL_VARS;
239
240          if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
241
242          HW_WRITE_CLIPLOOP()
243             {
244                if (mask)
245                {
246                   for (i=0;i<n;i++)
247                   {
248                      if (mask[i]) {
249                         const int fy = Y_FLIP(y[i]);
250                         if (CLIPPIXEL(x[i],fy))
251                            WRITE_RGBA( x[i], fy,
252                                        rgba[i][0], rgba[i][1],
253                                        rgba[i][2], rgba[i][3] );
254                      }
255                   }
256                }
257                else
258                {
259                   for (i=0;i<n;i++)
260                   {
261                      const int fy = Y_FLIP(y[i]);
262                      if (CLIPPIXEL(x[i],fy))
263                         WRITE_RGBA( x[i], fy,
264                                     rgba[i][0], rgba[i][1],
265                                     rgba[i][2], rgba[i][3] );
266                   }
267                }
268             }
269          HW_ENDCLIPLOOP();
270       }
271    HW_WRITE_UNLOCK();
272 }
273
274
275 static void TAG(WriteMonoRGBASpan)( const GLcontext *ctx,       
276                                     GLuint n, GLint x, GLint y, 
277                                     const GLchan color[4],
278                                     const GLubyte mask[] )
279 {
280    HW_WRITE_LOCK()
281       {
282          GLint x1;
283          GLint n1;
284          LOCAL_VARS;
285          INIT_MONO_PIXEL(p, color);
286
287          y = Y_FLIP( y );
288
289          if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
290
291          HW_WRITE_CLIPLOOP()
292             {
293                GLint i = 0;
294                CLIPSPAN(x,y,n,x1,n1,i);
295                if (mask)
296                {
297                   for (;n1>0;i++,x1++,n1--)
298                      if (mask[i])
299                         WRITE_PIXEL( x1, y, p );
300                }
301                else
302                {
303                   for (;n1>0;i++,x1++,n1--)
304                      WRITE_PIXEL( x1, y, p );
305                }
306             }
307          HW_ENDCLIPLOOP();
308       }
309    HW_WRITE_UNLOCK();
310 }
311
312
313 static void TAG(WriteMonoRGBAPixels)( const GLcontext *ctx,
314                                       GLuint n,
315                                       const GLint x[], const GLint y[],
316                                       const GLchan color[],
317                                       const GLubyte mask[] ) 
318 {
319    HW_WRITE_LOCK()
320       {
321          GLint i;
322          LOCAL_VARS;
323          INIT_MONO_PIXEL(p, color);
324
325          if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
326
327          HW_WRITE_CLIPLOOP()
328             {
329                if (mask)
330                {
331                   for (i=0;i<n;i++)
332                      if (mask[i]) {
333                         int fy = Y_FLIP(y[i]);
334                         if (CLIPPIXEL( x[i], fy ))
335                            WRITE_PIXEL( x[i], fy, p );
336                      }
337                }
338                else
339                {
340                   for (i=0;i<n;i++) {
341                      int fy = Y_FLIP(y[i]);
342                      if (CLIPPIXEL( x[i], fy ))
343                         WRITE_PIXEL( x[i], fy, p );
344                   }
345                }
346             }
347          HW_ENDCLIPLOOP();
348       }
349    HW_WRITE_UNLOCK();
350 }
351
352
353 static void TAG(ReadRGBASpan)( const GLcontext *ctx,
354                                GLuint n, GLint x, GLint y,
355                                GLubyte rgba[][4])
356 {
357    HW_READ_LOCK()
358       {
359          GLint x1,n1;
360          LOCAL_VARS;
361
362          y = Y_FLIP(y);
363
364          if (DBG) fprintf(stderr, "ReadRGBASpan\n");
365
366          HW_READ_CLIPLOOP()
367             {
368                GLint i = 0;
369                CLIPSPAN(x,y,n,x1,n1,i);
370                for (;n1>0;i++,x1++,n1--)
371                   READ_RGBA( rgba[i], x1, y );
372             }
373          HW_ENDCLIPLOOP();
374       }
375    HW_READ_UNLOCK();
376 }
377
378
379 #if defined(USE_MMX_ASM) && \
380    (SPANTMP_PIXEL_FMT == GL_BGRA) && \
381      (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
382 static void TAG2(ReadRGBASpan,_MMX)( const GLcontext *ctx,
383                                GLuint n, GLint x, GLint y,
384                                GLubyte rgba[][4])
385 {
386 #ifndef USE_INNER_EMMS
387    /* The EMMS instruction is directly in-lined here because using GCC's
388     * built-in _mm_empty function was found to utterly destroy performance.
389     */
390    __asm__ __volatile__( "emms" );
391 #endif
392
393    HW_LOCK()
394      {
395         GLint x1,n1;
396         LOCAL_VARS;
397
398         y = Y_FLIP(y);
399
400         if (DBG) fprintf(stderr, "ReadRGBASpan\n");
401
402         HW_READ_CLIPLOOP()
403           {
404              GLint i = 0;
405              CLIPSPAN(x,y,n,x1,n1,i);
406
407                {
408                   const char * src = GET_SRC_PTR( x1, y );
409                   _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 );
410                }
411           }
412         HW_ENDCLIPLOOP();
413      }
414    HW_UNLOCK();
415 #ifndef USE_INNER_EMMS
416    __asm__ __volatile__( "emms" );
417 #endif
418 }
419 #endif
420
421
422 #if defined(USE_SSE_ASM) && \
423    (SPANTMP_PIXEL_FMT == GL_BGRA) && \
424      (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
425 static void TAG2(ReadRGBASpan,_SSE2)( const GLcontext *ctx,
426                                GLuint n, GLint x, GLint y,
427                                GLubyte rgba[][4])
428 {
429    HW_LOCK()
430      {
431         GLint x1,n1;
432         LOCAL_VARS;
433
434         y = Y_FLIP(y);
435
436         if (DBG) fprintf(stderr, "ReadRGBASpan\n");
437
438         HW_READ_CLIPLOOP()
439           {
440              GLint i = 0;
441              CLIPSPAN(x,y,n,x1,n1,i);
442
443                {
444                   const char * src = GET_SRC_PTR( x1, y );
445                   _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 );
446                }
447           }
448         HW_ENDCLIPLOOP();
449      }
450    HW_UNLOCK();
451 }
452 #endif
453
454 #if defined(USE_SSE_ASM) && \
455    (SPANTMP_PIXEL_FMT == GL_BGRA) && \
456      (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
457 static void TAG2(ReadRGBASpan,_SSE)( const GLcontext *ctx,
458                                GLuint n, GLint x, GLint y,
459                                GLubyte rgba[][4])
460 {
461 #ifndef USE_INNER_EMMS
462    /* The EMMS instruction is directly in-lined here because using GCC's
463     * built-in _mm_empty function was found to utterly destroy performance.
464     */
465    __asm__ __volatile__( "emms" );
466 #endif
467
468    HW_LOCK()
469      {
470         GLint x1,n1;
471         LOCAL_VARS;
472
473         y = Y_FLIP(y);
474
475         if (DBG) fprintf(stderr, "ReadRGBASpan\n");
476
477         HW_READ_CLIPLOOP()
478           {
479              GLint i = 0;
480              CLIPSPAN(x,y,n,x1,n1,i);
481
482                {
483                   const char * src = GET_SRC_PTR( x1, y );
484                   _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 );
485                }
486           }
487         HW_ENDCLIPLOOP();
488      }
489    HW_UNLOCK();
490 #ifndef USE_INNER_EMMS
491    __asm__ __volatile__( "emms" );
492 #endif
493 }
494 #endif
495
496
497 static void TAG(ReadRGBAPixels)( const GLcontext *ctx,
498                                  GLuint n, const GLint x[], const GLint y[],
499                                  GLubyte rgba[][4], const GLubyte mask[] )
500 {
501    HW_READ_LOCK()
502       {
503          GLint i;
504          LOCAL_VARS;
505
506          if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
507
508          HW_READ_CLIPLOOP()
509             {
510                if (mask)
511                {
512                   for (i=0;i<n;i++)
513                      if (mask[i]) {
514                         int fy = Y_FLIP( y[i] );
515                         if (CLIPPIXEL( x[i], fy ))
516                            READ_RGBA( rgba[i], x[i], fy );
517                      }
518                }
519                else
520                {
521                   for (i=0;i<n;i++) {
522                      int fy = Y_FLIP( y[i] );
523                      if (CLIPPIXEL( x[i], fy ))
524                         READ_RGBA( rgba[i], x[i], fy );
525                   }
526                }
527             }
528          HW_ENDCLIPLOOP();
529       }
530    HW_READ_UNLOCK();
531 }
532
533 static void TAG(InitPointers)(struct swrast_device_driver *swdd)
534 {
535    swdd->WriteRGBASpan = TAG(WriteRGBASpan);
536    swdd->WriteRGBSpan = TAG(WriteRGBSpan);
537    swdd->WriteMonoRGBASpan = TAG(WriteMonoRGBASpan);
538    swdd->WriteRGBAPixels = TAG(WriteRGBAPixels);
539    swdd->WriteMonoRGBAPixels = TAG(WriteMonoRGBAPixels);
540    swdd->ReadRGBAPixels = TAG(ReadRGBAPixels);
541
542 #if (SPANTMP_PIXEL_FMT == GL_BGRA) && \
543      (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
544 #if defined(USE_SSE_ASM)
545    if ( cpu_has_xmm2 ) {
546       if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "SSE2" );
547       swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _SSE2);
548    }
549    else
550 #endif
551 #if defined(USE_SSE_ASM)
552    if ( cpu_has_xmm ) {
553       if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "SSE" );
554       swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _SSE);
555    }
556    else
557 #endif
558 #if defined(USE_MMX_ASM)
559    if ( cpu_has_mmx ) {
560       if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "MMX" );
561       swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _MMX);
562    }
563    else
564 #endif
565 #endif
566    {
567       if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "C" );
568       swdd->ReadRGBASpan = TAG(ReadRGBASpan);
569    }
570
571 }
572
573
574 #undef INIT_MONO_PIXEL
575 #undef WRITE_PIXEL
576 #undef WRITE_RGBA
577 #undef READ_RGBA
578 #undef TAG
579 #undef TAG2
580 #undef GET_SRC_PTR
581 #undef GET_DST_PTR
582 #undef SPANTMP_PIXEL_FMT
583 #undef SPANTMP_PIXEL_TYPE