Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / unichrome / via_memcpy.c
1 /*
2  * Copyright (C) 2004 Thomas Hellstrom, All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE CODE SUPPLIER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23
24 /* Thomas' orginal gutted for mesa by Keith Whitwell
25  */
26
27 #include "via_tex.h"
28
29 #if defined( USE_SSE_ASM )
30
31 #define SSE_PREFETCH "  prefetchnta "
32 #define FENCE __asm__ __volatile__ ("sfence":::"memory");
33
34
35 #define PREFETCH1(arch_prefetch,from)                   \
36     __asm__ __volatile__ (                              \
37                           "1:  " arch_prefetch "(%0)\n" \
38                           arch_prefetch "32(%0)\n"      \
39                           arch_prefetch "64(%0)\n"      \
40                           arch_prefetch "96(%0)\n"      \
41                           arch_prefetch "128(%0)\n"     \
42                           arch_prefetch "160(%0)\n"     \
43                           arch_prefetch "192(%0)\n"     \
44                           arch_prefetch "256(%0)\n"     \
45                           arch_prefetch "288(%0)\n"     \
46                           "2:\n"                        \
47                           : : "r" (from) );
48
49
50
51 #define small_memcpy(to,from,n)                                         \
52     {                                                                   \
53         __asm__ __volatile__(                                           \
54                              "movl %2,%%ecx\n\t"                        \
55                              "sarl $2,%%ecx\n\t"                        \
56                              "rep ; movsl\n\t"                          \
57                              "testb $2,%b2\n\t"                         \
58                              "je 1f\n\t"                                \
59                              "movsw\n"                                  \
60                              "1:\ttestb $1,%b2\n\t"                     \
61                              "je 2f\n\t"                                \
62                              "movsb\n"                                  \
63                              "2:"                                       \
64                              :"=&D" (to), "=&S" (from)                  \
65                              :"q" (n),"0" ((long) to),"1" ((long) from) \
66                              : "%ecx","memory");                        \
67     }
68
69
70 #define SSE_CPY(prefetch,from,to,dummy,lcnt)                            \
71     if ((unsigned long) from & 15)                       {              \
72         __asm__ __volatile__ (                                          \
73                               "1:\n"                                    \
74                               prefetch "320(%1)\n"                      \
75                               "  movups (%1), %%xmm0\n"                 \
76                               "  movups 16(%1), %%xmm1\n"               \
77                               "  movntps %%xmm0, (%0)\n"                \
78                               "  movntps %%xmm1, 16(%0)\n"              \
79                               prefetch "352(%1)\n"                      \
80                               "  movups 32(%1), %%xmm2\n"               \
81                               "  movups 48(%1), %%xmm3\n"               \
82                               "  movntps %%xmm2, 32(%0)\n"              \
83                               "  movntps %%xmm3, 48(%0)\n"              \
84                               "  addl $64,%0\n"                         \
85                               "  addl $64,%1\n"                         \
86                               "  decl %2\n"                             \
87                               "  jne 1b\n"                              \
88                               :"=&D"(to), "=&S"(from), "=&r"(dummy)     \
89                               :"0" (to), "1" (from), "2" (lcnt): "memory"); \
90     } else {                                                            \
91         __asm__ __volatile__ (                                          \
92                               "2:\n"                                    \
93                               prefetch "320(%1)\n"                      \
94                               "  movaps (%1), %%xmm0\n"                 \
95                               "  movaps 16(%1), %%xmm1\n"               \
96                               "  movntps %%xmm0, (%0)\n"                \
97                               "  movntps %%xmm1, 16(%0)\n"              \
98                               prefetch "352(%1)\n"                      \
99                               "  movaps 32(%1), %%xmm2\n"               \
100                               "  movaps 48(%1), %%xmm3\n"               \
101                               "  movntps %%xmm2, 32(%0)\n"              \
102                               "  movntps %%xmm3, 48(%0)\n"              \
103                               "  addl $64,%0\n"                         \
104                               "  addl $64,%1\n"                         \
105                               "  decl %2\n"                             \
106                               "  jne 2b\n"                              \
107                               :"=&D"(to), "=&S"(from), "=&r"(dummy)     \
108                               :"0" (to), "1" (from), "2" (lcnt): "memory"); \
109     }
110
111
112
113 /*
114  */
115 void via_sse_memcpy(void *to,
116                     const void *from,
117                     size_t sz)
118
119 {
120    int dummy;
121    int lcnt = sz >> 6;
122    int rest = sz & 63;
123
124    PREFETCH1(SSE_PREFETCH,from);
125
126    if (lcnt > 5) {
127       lcnt -= 5;
128       SSE_CPY(SSE_PREFETCH,from,to,dummy,lcnt);
129       lcnt = 5;
130    }
131    if (lcnt) {
132       SSE_CPY("#",from,to,dummy,lcnt);
133    }
134    if (rest) small_memcpy(to, from, rest);
135    FENCE;
136 }
137
138 #endif /* defined( USE_SSE_ASM ) */