src/slm/thread/ValueCompress.cpp

   1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
   3  *
   4  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
   5  *
   6  * The contents of this file are subject to the terms of either the GNU Lesser
   7  * General Public License Version 2.1 only ("LGPL") or the Common Development and
   8  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
   9  * file except in compliance with the License. You can obtain a copy of the CDDL at
  10  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
  11  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
  12  * specific language governing permissions and limitations under the License. When
  13  * distributing the software, include this License Header Notice in each file and
  14  * include the full text of the License in the License file as well as the
  15  * following notice:
  16  *
  17  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
  18  * (CDDL)
  19  * For Covered Software in this distribution, this License shall be governed by the
  20  * laws of the State of California (excluding conflict-of-law provisions).
  21  * Any litigation relating to this License shall be subject to the jurisdiction of
  22  * the Federal Courts of the Northern District of California and the state courts
  23  * of the State of California, with venue lying in Santa Clara County, California.
  24  *
  25  * Contributor(s):
  26  *
  27  * If you wish your version of this file to be governed by only the CDDL or only
  28  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
  29  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
  30  * license." If you don't indicate a single choice of license, a recipient has the
  31  * option to distribute your version of this file under either the CDDL or the LGPL
  32  * Version 2.1, or to extend the choice of license to its licensees as provided
  33  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
  34  * Version 2 license, then the option applies only if the new code is made subject
  35  * to such option by the copyright holder.
  36  */
  37
  38 #ifdef HAVE_CONFIG_H
  39 #include "config.h"
  40 #endif
  41
  42 #include <stdio.h>
  43
  44 #include "ValueCompress.h"
  45
  46 #ifdef HAVE_LIMIT_H
  47 #include <limit.h>
  48 #endif
  49
  50 #ifndef DBL_MAX
  51 #define DBL_MAX (1E+300)
  52 #endif
  53
  54 struct TVCArrItem {
  55     float m_val;
  56     unsigned m_heapIdx;
  57
  58     TVCArrItem(float val = 0.0, unsigned idx = 0)
  59         : m_val(val), m_heapIdx(idx)
  60     {
  61     }
  62 };
  63
  64 struct TVCHeapItem {
  65     unsigned m_first;
  66     unsigned m_last;
  67     unsigned m_count;
  68     double m_appval;
  69     double m_sum;
  70     double m_dist;
  71
  72     bool
  73     operator<(const TVCHeapItem& b) const
  74     {
  75         return m_appval < b.m_appval;
  76     }
  77
  78     TVCHeapItem(unsigned first = 0, unsigned last = 0, unsigned count = 0,
  79                 double val = 0.0, double sum = 0.0, double dist = 0.0)
  80         : m_first(first), m_last(last), m_count(count),
  81           m_appval(val), m_sum(sum), m_dist(dist)
  82     {
  83     }
  84 };
  85
  86 typedef std::vector<TVCArrItem>     CVCArr;
  87 typedef std::vector<TVCHeapItem>    CVCHeap;
  88
  89
  90 static void
  91 BubbleUpVal(CVCHeap& heap, CVCArr& arr, int idx)
  92 {
  93     while (idx > 0) {
  94         int parent = (idx - 1) / 2;
  95         if (heap[idx] < heap[parent])
  96             break;
  97         for (int h = heap[idx].m_first, t = heap[idx].m_last; h < t; ++h)
  98             arr[h].m_heapIdx = parent;
  99         for (int h = heap[parent].m_first, t = heap[parent].m_last; h < t; ++h)
 100             arr[h].m_heapIdx = idx;
 101         TVCHeapItem hitem = heap[parent];
 102         heap[parent] = heap[idx];
 103         heap[idx] = hitem;
 104         idx = parent;
 105     }
 106 }
 107
 108 static void
 109 IronDownVal(CVCHeap& heap, CVCArr& arr, int idx, int bottom)
 110 {
 111     int left;
 112     while ((left = 2 * idx + 1) < bottom) {
 113         int max = idx;
 114         if (heap[max] < heap[left])
 115             max = left;
 116         if (left + 1 < bottom && heap[max] < heap[left + 1])
 117             max = left + 1;
 118         if (max == idx) break;
 119
 120         for (int h = heap[idx].m_first, t = heap[idx].m_last; h < t; ++h)
 121             arr[h].m_heapIdx = max;
 122         for (int h = heap[max].m_first, t = heap[max].m_last; h < t; ++h)
 123             arr[h].m_heapIdx = idx;
 124         TVCHeapItem hitem = heap[max];
 125         heap[max] = heap[idx];
 126         heap[idx] = hitem;
 127
 128         idx = max;
 129     }
 130 }
 131
 132
 133 /**
 134  * Bubble idx up according to distance
 135  */
 136 static void
 137 BubbleUp(CVCHeap& heap, CVCArr& arr, int idx)
 138 {
 139     while (idx > 0) {
 140         int parent = (idx - 1) / 2;
 141         if (heap[parent].m_dist <= heap[idx].m_dist)
 142             break;
 143         for (int h = heap[idx].m_first, t = heap[idx].m_last; h < t; ++h)
 144             arr[h].m_heapIdx = parent;
 145         for (int h = heap[parent].m_first, t = heap[parent].m_last; h < t; ++h)
 146             arr[h].m_heapIdx = idx;
 147         TVCHeapItem hitem = heap[parent];
 148         heap[parent] = heap[idx];
 149         heap[idx] = hitem;
 150         idx = parent;
 151     }
 152 }
 153
 154 /**
 155  * Iron idx down, but do not let it lower than bottom (< bottom)
 156  */
 157 static void
 158 IronDown(CVCHeap& heap, CVCArr& arr, int idx, int bottom)
 159 {
 160     int left;
 161     while ((left = 2 * idx + 1) < bottom) {
 162         int min = idx;
 163         if (heap[left].m_dist < heap[min].m_dist)
 164             min = left;
 165         if (left + 1 < bottom && heap[left + 1].m_dist < heap[min].m_dist)
 166             min = left + 1;
 167         if (min == idx) break;
 168
 169         for (int h = heap[idx].m_first, t = heap[idx].m_last; h < t; ++h)
 170             arr[h].m_heapIdx = min;
 171         for (int h = heap[min].m_first, t = heap[min].m_last; h < t; ++h)
 172             arr[h].m_heapIdx = idx;
 173         TVCHeapItem hitem = heap[min];
 174         heap[min] = heap[idx];
 175         heap[idx] = hitem;
 176
 177         idx = min;
 178     }
 179 }
 180
 181 void
 182 CValueCompressor::operator()(std::map<float, int>& values,
 183                              std::map<float, int>& map,
 184                              std::vector<float>& table,
 185                              unsigned N) const
 186 {
 187     CVCArr arr;
 188     CVCHeap heap;
 189
 190     std::map<float, int>::const_iterator itv = values.begin();
 191     std::map<float, int>::const_iterator itve = values.end();
 192     for (; itv != itve; ++itv) {
 193         arr.push_back(TVCArrItem(itv->first, arr.size()));
 194         double sum = double(itv->first);
 195         if (itv->second > 0)
 196             sum *= itv->second;
 197         heap.push_back(TVCHeapItem(heap.size(), heap.size() + 1, itv->second,
 198                                    itv->first, sum));
 199     }
 200
 201     for (int i = 0, sz = heap.size() - 1; i < sz; ++i) {
 202         if (heap[i].m_count == 0 || heap[i + 1].m_count == 0) {
 203             heap[i].m_dist = DBL_MAX;
 204         } else {
 205             heap[i].m_dist = heap[i + 1].m_appval - heap[i].m_appval;
 206         }
 207         BubbleUp(heap, arr, i);
 208     }
 209     if (heap.size() > 0) {
 210         heap[heap.size() - 1].m_dist = DBL_MAX;
 211         BubbleUp(heap, arr, heap.size() - 1);
 212     }
 213
 214     int cur, next, hiprev, hinext;
 215
 216     while (heap.size() > N) {
 217         cur = heap[0].m_first;
 218         if (cur == 0) {
 219             hiprev = -1;
 220         } else {
 221             hiprev = arr[cur - 1].m_heapIdx;
 222         }
 223         next = heap[0].m_last;
 224         hinext = arr[next].m_heapIdx;
 225
 226         for (int h = cur; h < next; ++h)
 227             arr[h].m_heapIdx = hinext;
 228         double newval = (heap[0].m_sum + heap[hinext].m_sum) /
 229                         (heap[0].m_count + heap[hinext].m_count);
 230         if (hiprev >= 0)
 231             heap[hiprev].m_dist += (newval - heap[0].m_appval);
 232         heap[hinext].m_first = heap[0].m_first;
 233         heap[hinext].m_count += heap[0].m_count;
 234         heap[hinext].m_sum += heap[0].m_sum;
 235         heap[hinext].m_dist += (heap[hinext].m_appval - newval);
 236         heap[hinext].m_appval = newval;
 237
 238         if (hiprev > hinext)
 239             cur = hiprev, hiprev = hinext, hinext = cur;
 240         IronDown(heap, arr, hinext, heap.size());
 241         if (hiprev > 0)
 242             IronDown(heap, arr, hiprev, heap.size());
 243
 244         heap[0] = heap[heap.size() - 1];
 245         for (int h = heap[0].m_first, t = heap[0].m_last; h < t; ++h)
 246             arr[h].m_heapIdx = 0;
 247         heap.pop_back();
 248         IronDown(heap, arr, 0, heap.size());
 249     }
 250
 251     for (int i = 1, sz = heap.size(); i < sz; ++i)
 252         BubbleUpVal(heap, arr, i);
 253     for (int i = heap.size() - 1; i > 0; --i) {
 254         for (int h = heap[0].m_first, t = heap[0].m_last; h < t; ++h)
 255             arr[h].m_heapIdx = i;
 256         for (int h = heap[i].m_first, t = heap[i].m_last; h < t; ++h)
 257             arr[h].m_heapIdx = 0;
 258         TVCHeapItem hitem = heap[0];
 259         heap[0] = heap[i];
 260         heap[i] = hitem;
 261         IronDownVal(heap, arr, 0, i);
 262     }
 263
 264     map.clear();
 265     for (int i = 0, sz = arr.size(); i < sz; ++i)
 266         map[arr[i].m_val] = arr[i].m_heapIdx;
 267
 268     table.clear();
 269     table.reserve(heap.size());
 270     for (int i = 0, sz = heap.size(); i < sz; ++i)
 271         table.push_back(float(heap[i].m_appval));
 272
 273     /*
 274        for (int i = 0, sz = heap.size(); i < sz; ++i) {
 275         printf("%12lf:\n", heap[i].m_appval);
 276         for (int h = heap[i].m_first, t = heap[i].m_last; h < t; ++h) {
 277             printf("    %12f\n", arr[h].m_val);
 278             if (arr[h].m_heapIdx != i) {
 279                 printf("error, non-consistence found\n");
 280                 return;
 281             }
 282         }
 283         printf("\n");
 284        }
 285      */
 286 }
 287
 288
 289 void
 290 CValueCompressor::operator()(std::map<float, float>& eff2val,
 291                              std::map<float, int>& values,
 292                              std::map<float, int>& v2idx,
 293                              std::vector<float>& table,
 294                              unsigned N) const
 295 {
 296     std::map<float, int> tmp_map;
 297     this->operator()(values, tmp_map, table, N);
 298
 299     v2idx.clear();
 300     std::map<float, int>::iterator itm = tmp_map.begin();
 301     std::map<float, int>::iterator itme = tmp_map.end();
 302     for (; itm != itme; ++itm) {
 303         v2idx[eff2val[itm->first]] = itm->second;
 304     }
 305
 306 /* // Can not be maped back, because some value could not be in the eff2val maps
 307     std::vector<float>::iterator itt = table.begin();
 308     std::vector<float>::iterator itte = table.end();
 309     for (; itt != itte; ++itt)
 310    *itt = eff2val[*itt];
 311  */
 312 }
 313
 314
 315
 316
 317