Tizen 2.1 base
[platform/upstream/libbullet.git] / Extras / software_cache / cache / include / nway-opt.h
1 /* --------------------------------------------------------------- */
2 /* PLEASE DO NOT MODIFY THIS SECTION                               */
3 /* This prolog section is automatically generated.                 */
4 /*                                                                 */
5 /* (C) Copyright 2001,2006,                                        */
6 /* International Business Machines Corporation,                    */
7 /*                                                                 */
8 /* All Rights Reserved.                                            */
9 /* --------------------------------------------------------------- */
10 /* PROLOG END TAG zYx                                              */
11 /* nway-opt.h
12  *
13  * Copyright (C) 2006 IBM Corp.
14  *
15  * "Optimized" lookup operations for n-way set associative
16  * software managed cache.
17  */
18 #include <spu_intrinsics.h>
19
20 #ifndef __SPE_CACHE_NWAY_OPT_H_
21 #define __SPE_CACHE_NWAY_OPT_H_
22
23 /* __spe_cache_rd
24  *      Look up and return data from the cache.  If the data
25  *      is not currently in cache then transfer it from main
26  *      storage.
27  *
28  *      This code uses a conditional branch to the cache miss
29  *      handler in the event that the requested data is not
30  *      in the cache.  A branch hint is used to avoid paying
31  *      the branch stall penalty.
32  */
33 #define __spe_cache_rd(type, ea)                                \
34 ({                                                              \
35     int set, idx, lnum, byte;                                   \
36     type ret;                                                   \
37     _spe_cache_nway_lookup_(ea, set, idx);                      \
38         if (unlikely(idx < 0)) {                                        \
39                 idx = _spe_cache_miss_(ea, set, -1);                    \
40         spu_writech(22, SPE_CACHE_SET_TAGMASK(set));            \
41         spu_mfcstat(MFC_TAG_UPDATE_ALL);                        \
42     }                                                           \
43     lnum = _spe_cacheline_num_(set, idx);                       \
44     byte = _spe_cacheline_byte_offset_(ea);                     \
45     ret = *((type *) (&spe_cache_mem[lnum + byte])); \
46         ret;                                                    \
47 })
48
49 /**
50  * __spe_cache_rd_x4
51  *      Fetch four data elements from the cache.
52  *
53  *      This code uses one conditional branch in 
54  *      the event that any of the four elements
55  *      are missing.
56  *
57  *      On a miss, light weight locking is used to 
58  *      avoid casting out entries that were found.
59  *      Further, we wait just once for the transfers,
60  *      allowing for parallel [rather than serial]
61  *      transfers.
62  */
63
64 #define __spe_cache_rd_x4(type, ea_x4)                          \
65 ({                                                              \
66     vector unsigned int missing;                                \
67     unsigned int ms;                                            \
68     vector unsigned int cindex;                                 \
69     unsigned int d0, d1, d2, d3;                                \
70     vector unsigned int s_x4;                                   \
71     vector signed int i_x4;                                     \
72     vector unsigned int ibyte, iline;                           \
73     vector unsigned int ret;                                    \
74     unsigned int idx0, idx1, idx2, idx3;                        \
75                                                                 \
76     _spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4);               \
77     missing = spu_rlmask ((vector unsigned int)i_x4, -8);       \
78     ms = spu_extract (spu_gather (missing), 0);                 \
79                                                                 \
80     ibyte = _spe_cacheline_byte_offset_x4(ea_x4);               \
81                                                                 \
82     iline = _spe_cacheline_num_x4(s_x4,                         \
83                                 (vector unsigned int)i_x4);     \
84                                                                 \
85     cindex = spu_add (iline, ibyte);                            \
86                                                                 \
87     idx0 = spu_extract (cindex, 0);                             \
88     idx1 = spu_extract (cindex, 1);                             \
89     idx2 = spu_extract (cindex, 2);                             \
90     idx3 = spu_extract (cindex, 3);                             \
91                                                                 \
92     d0 = *((type *) (&spe_cache_mem[idx0]));                    \
93     d1 = *((type *) (&spe_cache_mem[idx1]));                    \
94     d2 = *((type *) (&spe_cache_mem[idx2]));                    \
95     d3 = *((type *) (&spe_cache_mem[idx3]));                    \
96                                                                 \
97     ret = _load_vec_uint4 (d0, d1, d2, d3);                     \
98                                                                 \
99     if (unlikely(ms)) {                                         \
100         int b0 = spu_extract (ibyte, 0);                        \
101         int b1 = spu_extract (ibyte, 1);                        \
102         int b2 = spu_extract (ibyte, 2);                        \
103         int b3 = spu_extract (ibyte, 3);                        \
104         int lnum0;                                              \
105         int lnum1;                                              \
106         int lnum2;                                              \
107         int lnum3;                                              \
108         int s0 = spu_extract (s_x4, 0);                         \
109         int s1 = spu_extract (s_x4, 1);                         \
110         int s2 = spu_extract (s_x4, 2);                         \
111         int s3 = spu_extract (s_x4, 3);                         \
112         int i0 = spu_extract (i_x4, 0);                         \
113         int i1 = spu_extract (i_x4, 1);                         \
114         int i2 = spu_extract (i_x4, 2);                         \
115         int i3 = spu_extract (i_x4, 3);                         \
116         unsigned int ea0 = spu_extract(ea_x4, 0);               \
117         unsigned int ea1 = spu_extract(ea_x4, 1);               \
118         unsigned int ea2 = spu_extract(ea_x4, 2);               \
119         unsigned int ea3 = spu_extract(ea_x4, 3);               \
120         int avail = -1;                                         \
121                                                                 \
122         avail &= ~(((i0 < 0) ? 0 : (1 << i0)) |                 \
123                    ((i1 < 0) ? 0 : (1 << i1)) |                 \
124                    ((i2 < 0) ? 0 : (1 << i2)) |                 \
125                    ((i3 < 0) ? 0 : (1 << i3)));                 \
126                                                                 \
127         i0 = _spe_cache_miss_(ea0, s0, avail);                  \
128         avail &= ~(1 << i0);                                    \
129         i1 = _spe_cache_miss_(ea1, s1, avail);                  \
130         avail &= ~(1 << i1);                                    \
131         i2 = _spe_cache_miss_(ea2, s2, avail);                  \
132         avail &= ~(1 << i2);                                    \
133         i3 = _spe_cache_miss_(ea3, s3, avail);                  \
134                                                                 \
135         lnum0 = _spe_cacheline_num_(s0, i0);                    \
136         lnum1 = _spe_cacheline_num_(s1, i1);                    \
137         lnum2 = _spe_cacheline_num_(s2, i2);                    \
138         lnum3 = _spe_cacheline_num_(s3, i3);                    \
139                                                                 \
140         spu_writech(22, SPE_CACHE_SET_TAGMASK(set));            \
141         spu_mfcstat(MFC_TAG_UPDATE_ALL);                        \
142                                                                 \
143         d0 = *((type *) (&spe_cache_mem[lnum0 + b0]));          \
144         d1 = *((type *) (&spe_cache_mem[lnum1 + b1]));          \
145         d2 = *((type *) (&spe_cache_mem[lnum2 + b2]));          \
146         d3 = *((type *) (&spe_cache_mem[lnum3 + b3]));          \
147                                                                 \
148         ret = _load_vec_uint4 (d0, d1, d2, d3);                 \
149     }                                                           \
150     ret;                                                        \
151 })
152
153 #endif /* _SPE_CACHE_NWAY_OPT_H_ */