libs/bimap/example/mi_to_b_path/mi_hashed_indices.cpp

   1 // Boost.Bimap
   2 //
   3 // Copyright (c) 2006-2007 Matias Capeletto
   4 //
   5 // Distributed under the Boost Software License, Version 1.0.
   6 // (See accompanying file LICENSE_1_0.txt or copy at
   7 // http://www.boost.org/LICENSE_1_0.txt)
   8
   9
  10 /*****************************************************************************
  11 Boost.MultiIndex
  12 *****************************************************************************/
  13
  14 #include <boost/config.hpp>
  15
  16 //[ code_mi_to_b_path_mi_hashed_indices
  17
  18 #include <iostream>
  19 #include <iomanip>
  20
  21 #include <boost/tokenizer.hpp>
  22
  23 #include <boost/multi_index_container.hpp>
  24 #include <boost/multi_index/key_extractors.hpp>
  25 #include <boost/multi_index/ordered_index.hpp>
  26 #include <boost/multi_index/hashed_index.hpp>
  27 #include <boost/lambda/lambda.hpp>
  28
  29 using namespace boost::multi_index;
  30 namespace bl = boost::lambda;
  31
  32 // word_counter keeps the ocurrences of words inserted. A hashed
  33 // index allows for fast checking of preexisting entries.
  34
  35 struct word_counter_entry
  36 {
  37     std::string  word;
  38     unsigned int occurrences;
  39
  40     word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {}
  41 };
  42
  43 typedef multi_index_container
  44 <
  45     word_counter_entry,
  46     indexed_by
  47     <
  48         ordered_non_unique
  49         <
  50             BOOST_MULTI_INDEX_MEMBER(
  51                 word_counter_entry,unsigned int,occurrences),
  52             std::greater<unsigned int>
  53         >,
  54         hashed_unique
  55         <
  56             BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word)
  57         >
  58   >
  59
  60 > word_counter;
  61
  62 typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;
  63
  64 int main()
  65 {
  66     std::string text=
  67         "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... "
  68         "...snip..."
  69         "...no se salga un punto de la verdad.";
  70
  71     // feed the text into the container
  72
  73     word_counter   wc;
  74     text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
  75     unsigned int   total_occurrences = 0;
  76
  77     for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end();
  78          it != it_end ; ++it )
  79     {
  80         ++total_occurrences;
  81         word_counter::iterator wit = wc.insert(*it).first;
  82         wc.modify_key( wit, ++ bl::_1 );
  83     }
  84
  85     // list words by frequency of appearance
  86
  87     std::cout << std::fixed << std::setprecision(2);
  88
  89     for( word_counter::iterator wit = wc.begin(), wit_end=wc.end();
  90          wit != wit_end; ++wit )
  91     {
  92         std::cout << std::setw(11) << wit->word << ": "
  93                   << std::setw(5)
  94                   << 100.0 * wit->occurrences / total_occurrences << "%"
  95                   << std::endl;
  96     }
  97
  98     return 0;
  99 }
 100 //]