Extras/software_cache/cache/include/nway-opt.h

   1 /* --------------------------------------------------------------- */
   2 /* PLEASE DO NOT MODIFY THIS SECTION                               */
   3 /* This prolog section is automatically generated.                 */
   4 /*                                                                 */
   5 /* (C) Copyright 2001,2006,                                        */
   6 /* International Business Machines Corporation,                    */
   7 /*                                                                 */
   8 /* All Rights Reserved.                                            */
   9 /* --------------------------------------------------------------- */
  10 /* PROLOG END TAG zYx                                              */
  11 /* nway-opt.h
  12  *
  13  * Copyright (C) 2006 IBM Corp.
  14  *
  15  * "Optimized" lookup operations for n-way set associative
  16  * software managed cache.
  17  */
  18 #include <spu_intrinsics.h>
  19
  20 #ifndef __SPE_CACHE_NWAY_OPT_H_
  21 #define __SPE_CACHE_NWAY_OPT_H_
  22
  23 /* __spe_cache_rd
  24  *      Look up and return data from the cache.  If the data
  25  *      is not currently in cache then transfer it from main
  26  *      storage.
  27  *
  28  *      This code uses a conditional branch to the cache miss
  29  *      handler in the event that the requested data is not
  30  *      in the cache.  A branch hint is used to avoid paying
  31  *      the branch stall penalty.
  32  */
  33 #define __spe_cache_rd(type, ea)                                \
  34 ({                                                              \
  35     int set, idx, lnum, byte;                                   \
  36     type ret;                                                   \
  37     _spe_cache_nway_lookup_(ea, set, idx);                      \
  38         if (unlikely(idx < 0)) {                                        \
  39                 idx = _spe_cache_miss_(ea, set, -1);                    \
  40         spu_writech(22, SPE_CACHE_SET_TAGMASK(set));            \
  41         spu_mfcstat(MFC_TAG_UPDATE_ALL);                        \
  42     }                                                           \
  43     lnum = _spe_cacheline_num_(set, idx);                       \
  44     byte = _spe_cacheline_byte_offset_(ea);                     \
  45     ret = *((type *) (&spe_cache_mem[lnum + byte])); \
  46         ret;                                                    \
  47 })
  48
  49 /**
  50  * __spe_cache_rd_x4
  51  *      Fetch four data elements from the cache.
  52  *
  53  *      This code uses one conditional branch in
  54  *      the event that any of the four elements
  55  *      are missing.
  56  *
  57  *      On a miss, light weight locking is used to
  58  *      avoid casting out entries that were found.
  59  *      Further, we wait just once for the transfers,
  60  *      allowing for parallel [rather than serial]
  61  *      transfers.
  62  */
  63
  64 #define __spe_cache_rd_x4(type, ea_x4)                          \
  65 ({                                                              \
  66     vector unsigned int missing;                                \
  67     unsigned int ms;                                            \
  68     vector unsigned int cindex;                                 \
  69     unsigned int d0, d1, d2, d3;                                \
  70     vector unsigned int s_x4;                                   \
  71     vector signed int i_x4;                                     \
  72     vector unsigned int ibyte, iline;                           \
  73     vector unsigned int ret;                                    \
  74     unsigned int idx0, idx1, idx2, idx3;                        \
  75                                                                 \
  76     _spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4);               \
  77     missing = spu_rlmask ((vector unsigned int)i_x4, -8);       \
  78     ms = spu_extract (spu_gather (missing), 0);                 \
  79                                                                 \
  80     ibyte = _spe_cacheline_byte_offset_x4(ea_x4);               \
  81                                                                 \
  82     iline = _spe_cacheline_num_x4(s_x4,                         \
  83                                 (vector unsigned int)i_x4);     \
  84                                                                 \
  85     cindex = spu_add (iline, ibyte);                            \
  86                                                                 \
  87     idx0 = spu_extract (cindex, 0);                             \
  88     idx1 = spu_extract (cindex, 1);                             \
  89     idx2 = spu_extract (cindex, 2);                             \
  90     idx3 = spu_extract (cindex, 3);                             \
  91                                                                 \
  92     d0 = *((type *) (&spe_cache_mem[idx0]));                    \
  93     d1 = *((type *) (&spe_cache_mem[idx1]));                    \
  94     d2 = *((type *) (&spe_cache_mem[idx2]));                    \
  95     d3 = *((type *) (&spe_cache_mem[idx3]));                    \
  96                                                                 \
  97     ret = _load_vec_uint4 (d0, d1, d2, d3);                     \
  98                                                                 \
  99     if (unlikely(ms)) {                                         \
 100         int b0 = spu_extract (ibyte, 0);                        \
 101         int b1 = spu_extract (ibyte, 1);                        \
 102         int b2 = spu_extract (ibyte, 2);                        \
 103         int b3 = spu_extract (ibyte, 3);                        \
 104         int lnum0;                                              \
 105         int lnum1;                                              \
 106         int lnum2;                                              \
 107         int lnum3;                                              \
 108         int s0 = spu_extract (s_x4, 0);                         \
 109         int s1 = spu_extract (s_x4, 1);                         \
 110         int s2 = spu_extract (s_x4, 2);                         \
 111         int s3 = spu_extract (s_x4, 3);                         \
 112         int i0 = spu_extract (i_x4, 0);                         \
 113         int i1 = spu_extract (i_x4, 1);                         \
 114         int i2 = spu_extract (i_x4, 2);                         \
 115         int i3 = spu_extract (i_x4, 3);                         \
 116         unsigned int ea0 = spu_extract(ea_x4, 0);               \
 117         unsigned int ea1 = spu_extract(ea_x4, 1);               \
 118         unsigned int ea2 = spu_extract(ea_x4, 2);               \
 119         unsigned int ea3 = spu_extract(ea_x4, 3);               \
 120         int avail = -1;                                         \
 121                                                                 \
 122         avail &= ~(((i0 < 0) ? 0 : (1 << i0)) |                 \
 123                    ((i1 < 0) ? 0 : (1 << i1)) |                 \
 124                    ((i2 < 0) ? 0 : (1 << i2)) |                 \
 125                    ((i3 < 0) ? 0 : (1 << i3)));                 \
 126                                                                 \
 127         i0 = _spe_cache_miss_(ea0, s0, avail);                  \
 128         avail &= ~(1 << i0);                                    \
 129         i1 = _spe_cache_miss_(ea1, s1, avail);                  \
 130         avail &= ~(1 << i1);                                    \
 131         i2 = _spe_cache_miss_(ea2, s2, avail);                  \
 132         avail &= ~(1 << i2);                                    \
 133         i3 = _spe_cache_miss_(ea3, s3, avail);                  \
 134                                                                 \
 135         lnum0 = _spe_cacheline_num_(s0, i0);                    \
 136         lnum1 = _spe_cacheline_num_(s1, i1);                    \
 137         lnum2 = _spe_cacheline_num_(s2, i2);                    \
 138         lnum3 = _spe_cacheline_num_(s3, i3);                    \
 139                                                                 \
 140         spu_writech(22, SPE_CACHE_SET_TAGMASK(set));            \
 141         spu_mfcstat(MFC_TAG_UPDATE_ALL);                        \
 142                                                                 \
 143         d0 = *((type *) (&spe_cache_mem[lnum0 + b0]));          \
 144         d1 = *((type *) (&spe_cache_mem[lnum1 + b1]));          \
 145         d2 = *((type *) (&spe_cache_mem[lnum2 + b2]));          \
 146         d3 = *((type *) (&spe_cache_mem[lnum3 + b3]));          \
 147                                                                 \
 148         ret = _load_vec_uint4 (d0, d1, d2, d3);                 \
 149     }                                                           \
 150     ret;                                                        \
 151 })
 152
 153 #endif /* _SPE_CACHE_NWAY_OPT_H_ */