boost/date_time/tz_db_base.hpp

   1 #ifndef DATE_TIME_TZ_DB_BASE_HPP__
   2 #define DATE_TIME_TZ_DB_BASE_HPP__
   3
   4 /* Copyright (c) 2003-2005 CrystalClear Software, Inc.
   5  * Subject to the Boost Software License, Version 1.0.
   6  * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
   7  * Author: Jeff Garland, Bart Garst
   8  * $Date$
   9  */
  10
  11 #include <map>
  12 #include <vector>
  13 #include <string>
  14 #include <sstream>
  15 #include <fstream>
  16 #include <stdexcept>
  17 #include <boost/tokenizer.hpp>
  18 #include <boost/shared_ptr.hpp>
  19 #include <boost/throw_exception.hpp>
  20 #include <boost/date_time/compiler_config.hpp>
  21 #include <boost/date_time/time_zone_names.hpp>
  22 #include <boost/date_time/time_zone_base.hpp>
  23 #include <boost/date_time/time_parsing.hpp>
  24 #include <boost/algorithm/string.hpp>
  25
  26 namespace boost {
  27   namespace date_time {
  28
  29     //! Exception thrown when tz database cannot locate requested data file
  30     class data_not_accessible : public std::logic_error
  31     {
  32      public:
  33        data_not_accessible() :
  34          std::logic_error(std::string("Unable to locate or access the required datafile."))
  35        {}
  36        data_not_accessible(const std::string& filespec) :
  37          std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec))
  38        {}
  39     };
  40
  41     //! Exception thrown when tz database locates incorrect field structure in data file
  42     class bad_field_count : public std::out_of_range
  43     {
  44      public:
  45        bad_field_count(const std::string& s) :
  46          std::out_of_range(s)
  47       {}
  48     };
  49
  50     //! Creates a database of time_zones from csv datafile
  51     /*! The csv file containing the zone_specs used by the
  52      * tz_db_base is intended to be customized by the
  53      * library user. When customizing this file (or creating your own) the
  54      * file must follow a specific format.
  55      *
  56      * This first line is expected to contain column headings and is therefore
  57      * not processed by the tz_db_base.
  58      *
  59      * Each record (line) must have eleven fields. Some of those fields can
  60      * be empty. Every field (even empty ones) must be enclosed in
  61      * double-quotes.
  62      * Ex:
  63      * @code
  64      * "America/Phoenix" <- string enclosed in quotes
  65      * ""                <- empty field
  66      * @endcode
  67      *
  68      * Some fields represent a length of time. The format of these fields
  69      * must be:
  70      * @code
  71      * "{+|-}hh:mm[:ss]" <- length-of-time format
  72      * @endcode
  73      * Where the plus or minus is mandatory and the seconds are optional.
  74      *
  75      * Since some time zones do not use daylight savings it is not always
  76      * necessary for every field in a zone_spec to contain a value. All
  77      * zone_specs must have at least ID and GMT offset. Zones that use
  78      * daylight savings must have all fields filled except:
  79      * STD ABBR, STD NAME, DST NAME. You should take note
  80      * that DST ABBR is mandatory for zones that use daylight savings
  81      * (see field descriptions for further details).
  82      *
  83      * ******* Fields and their description/details *********
  84      *
  85      * ID:
  86      * Contains the identifying string for the zone_spec. Any string will
  87      * do as long as it's unique. No two ID's can be the same.
  88      *
  89      * STD ABBR:
  90      * STD NAME:
  91      * DST ABBR:
  92      * DST NAME:
  93      * These four are all the names and abbreviations used by the time
  94      * zone being described. While any string will do in these fields,
  95      * care should be taken. These fields hold the strings that will be
  96      * used in the output of many of the local_time classes.
  97      * Ex:
  98      * @code
  99      * time_zone nyc = tz_db.time_zone_from_region("America/New_York");
 100      * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc);
 101      * cout << ny_time.to_long_string() << endl;
 102      * // 2004-Aug-30 00:00:00 Eastern Daylight Time
 103      * cout << ny_time.to_short_string() << endl;
 104      * // 2004-Aug-30 00:00:00 EDT
 105      * @endcode
 106      *
 107      * NOTE: The exact format/function names may vary - see local_time
 108      * documentation for further details.
 109      *
 110      * GMT offset:
 111      * This is the number of hours added to utc to get the local time
 112      * before any daylight savings adjustments are made. Some examples
 113      * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours.
 114      * The format must follow the length-of-time format described above.
 115      *
 116      * DST adjustment:
 117      * The amount of time added to gmt_offset when daylight savings is in
 118      * effect. The format must follow the length-of-time format described
 119      * above.
 120      *
 121      * DST Start Date rule:
 122      * This is a specially formatted string that describes the day of year
 123      * in which the transition take place. It holds three fields of it's own,
 124      * separated by semicolons.
 125      * The first field indicates the "nth" weekday of the month. The possible
 126      * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth),
 127      * and -1 (last).
 128      * The second field indicates the day-of-week from 0-6 (Sun=0).
 129      * The third field indicates the month from 1-12 (Jan=1).
 130      *
 131      * Examples are: "-1;5;9"="Last Friday of September",
 132      * "2;1;3"="Second Monday of March"
 133      *
 134      * Start time:
 135      * Start time is the number of hours past midnight, on the day of the
 136      * start transition, the transition takes place. More simply put, the
 137      * time of day the transition is made (in 24 hours format). The format
 138      * must follow the length-of-time format described above with the
 139      * exception that it must always be positive.
 140      *
 141      * DST End date rule:
 142      * See DST Start date rule. The difference here is this is the day
 143      * daylight savings ends (transition to STD).
 144      *
 145      * End time:
 146      * Same as Start time.
 147      */
 148     template<class time_zone_type, class rule_type>
 149     class tz_db_base {
 150     public:
 151       /* Having CharT as a template parameter created problems
 152        * with posix_time::duration_from_string. Templatizing
 153        * duration_from_string was not possible at this time, however,
 154        * it should be possible in the future (when poor compilers get
 155        * fixed or stop being used).
 156        * Since this class was designed to use CharT as a parameter it
 157        * is simply typedef'd here to ease converting in back to a
 158        * parameter the future */
 159       typedef char char_type;
 160
 161       typedef typename time_zone_type::base_type time_zone_base_type;
 162       typedef typename time_zone_type::time_duration_type time_duration_type;
 163       typedef time_zone_names_base<char_type> time_zone_names;
 164       typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets;
 165       typedef std::basic_string<char_type> string_type;
 166
 167       //! Constructs an empty database
 168       tz_db_base() {}
 169
 170       //! Process csv data file, may throw exceptions
 171       /*! May throw bad_field_count exceptions */
 172       void load_from_stream(std::istream &in)
 173       {
 174         std::string buff;
 175         while( std::getline(in, buff)) {
 176           boost::trim_right(buff);
 177           parse_string(buff);
 178         }
 179       }
 180
 181       //! Process csv data file, may throw exceptions
 182       /*! May throw data_not_accessible, or bad_field_count exceptions */
 183       void load_from_file(const std::string& pathspec)
 184       {
 185         std::string  buff;
 186
 187         std::ifstream ifs(pathspec.c_str());
 188         if(!ifs){
 189           boost::throw_exception(data_not_accessible(pathspec));
 190         }
 191         std::getline(ifs, buff); // first line is column headings
 192         this->load_from_stream(ifs);
 193       }
 194
 195       //! returns true if record successfully added to map
 196       /*! Takes a region name in the form of "America/Phoenix", and a
 197        * time_zone object for that region. The id string must be a unique
 198        * name that does not already exist in the database. */
 199       bool add_record(const string_type& region,
 200                       boost::shared_ptr<time_zone_base_type> tz)
 201       {
 202         typename map_type::value_type p(region, tz);
 203         return (m_zone_map.insert(p)).second;
 204       }
 205
 206       //! Returns a time_zone object built from the specs for the given region
 207       /*! Returns a time_zone object built from the specs for the given
 208        * region. If region does not exist a local_time::record_not_found
 209        * exception will be thrown */
 210       boost::shared_ptr<time_zone_base_type>
 211       time_zone_from_region(const string_type& region) const
 212       {
 213         // get the record
 214         typename map_type::const_iterator record = m_zone_map.find(region);
 215         if(record == m_zone_map.end()){
 216           return boost::shared_ptr<time_zone_base_type>(); //null pointer
 217         }
 218         return record->second;
 219       }
 220
 221       //! Returns a vector of strings holding the time zone regions in the database
 222       std::vector<std::string> region_list() const
 223       {
 224         typedef std::vector<std::string> vector_type;
 225         vector_type regions;
 226         typename map_type::const_iterator itr = m_zone_map.begin();
 227         while(itr != m_zone_map.end()) {
 228           regions.push_back(itr->first);
 229           ++itr;
 230         }
 231         return regions;
 232       }
 233
 234     private:
 235       typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type;
 236       map_type m_zone_map;
 237
 238       // start and end rule are of the same type
 239       typedef typename rule_type::start_rule::week_num week_num;
 240
 241       /* TODO: mechanisms need to be put in place to handle different
 242        * types of rule specs. parse_rules() only handles nth_kday
 243        * rule types. */
 244
 245       //! parses rule specs for transition day rules
 246       rule_type* parse_rules(const string_type& sr, const string_type& er) const
 247       {
 248         using namespace gregorian;
 249         // start and end rule are of the same type,
 250         // both are included here for readability
 251         typedef typename rule_type::start_rule start_rule;
 252         typedef typename rule_type::end_rule end_rule;
 253
 254         // these are: [start|end] nth, day, month
 255         int s_nth = 0, s_d = 0, s_m = 0;
 256         int e_nth = 0, e_d = 0, e_m = 0;
 257         split_rule_spec(s_nth, s_d, s_m, sr);
 258         split_rule_spec(e_nth, e_d, e_m, er);
 259
 260         typename start_rule::week_num s_wn, e_wn;
 261         s_wn = get_week_num(s_nth);
 262         e_wn = get_week_num(e_nth);
 263
 264
 265         return new rule_type(start_rule(s_wn,
 266                                         static_cast<unsigned short>(s_d),
 267                                         static_cast<unsigned short>(s_m)),
 268                              end_rule(e_wn,
 269                                       static_cast<unsigned short>(e_d),
 270                                       static_cast<unsigned short>(e_m)));
 271       }
 272       //! helper function for parse_rules()
 273       week_num get_week_num(int nth) const
 274       {
 275         typedef typename rule_type::start_rule start_rule;
 276         switch(nth){
 277         case 1:
 278           return start_rule::first;
 279         case 2:
 280           return start_rule::second;
 281         case 3:
 282           return start_rule::third;
 283         case 4:
 284           return start_rule::fourth;
 285         case 5:
 286         case -1:
 287           return start_rule::fifth;
 288         default:
 289           // shouldn't get here - add error handling later
 290           break;
 291         }
 292         return start_rule::fifth; // silence warnings
 293       }
 294
 295       //! splits the [start|end]_date_rule string into 3 ints
 296       void split_rule_spec(int& nth, int& d, int& m, string_type rule) const
 297       {
 298         typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type;
 299         typedef boost::tokenizer<char_separator_type,
 300                                  std::basic_string<char_type>::const_iterator,
 301                                  std::basic_string<char_type> > tokenizer;
 302         typedef boost::tokenizer<char_separator_type,
 303                                  std::basic_string<char_type>::const_iterator,
 304                                  std::basic_string<char_type> >::iterator tokenizer_iterator;
 305
 306         const char_type sep_char[] = { ';', '\0'};
 307         char_separator_type sep(sep_char);
 308         tokenizer tokens(rule, sep); // 3 fields
 309
 310         if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) {
 311           std::ostringstream msg;
 312           msg << "Expecting 3 fields, got "
 313               << std::distance ( tokens.begin(), tokens.end ())
 314               << " fields in line: " << rule;
 315           boost::throw_exception(bad_field_count(msg.str()));
 316         }
 317
 318         tokenizer_iterator tok_iter = tokens.begin();
 319         nth = std::atoi(tok_iter->c_str()); ++tok_iter;
 320         d   = std::atoi(tok_iter->c_str()); ++tok_iter;
 321         m   = std::atoi(tok_iter->c_str());
 322       }
 323
 324
 325       //! Take a line from the csv, turn it into a time_zone_type.
 326       /*! Take a line from the csv, turn it into a time_zone_type,
 327        * and add it to the map. Zone_specs in csv file are expected to
 328        * have eleven fields that describe the time zone. Returns true if
 329        * zone_spec successfully added to database */
 330       bool parse_string(string_type& s)
 331       {
 332         std::vector<string_type> result;
 333         typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type;
 334
 335         token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>());
 336
 337         token_iter_type end;
 338         while (i != end) {
 339           result.push_back(*i);
 340           i++;
 341         }
 342
 343         enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET,
 344                          DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE,
 345                          END_TIME, FIELD_COUNT };
 346
 347         //take a shot at fixing gcc 4.x error
 348         const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT);
 349         if (result.size() != expected_fields) {
 350           std::ostringstream msg;
 351           msg << "Expecting " << FIELD_COUNT << " fields, got "
 352             << result.size() << " fields in line: " << s;
 353           boost::throw_exception(bad_field_count(msg.str()));
 354           BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach
 355         }
 356
 357         // initializations
 358         bool has_dst = true;
 359         if(result[DSTABBR] == std::string()){
 360           has_dst = false;
 361         }
 362
 363
 364         // start building components of a time_zone
 365         time_zone_names names(result[STDNAME], result[STDABBR],
 366                               result[DSTNAME], result[DSTABBR]);
 367
 368         time_duration_type utc_offset =
 369           str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]);
 370
 371         dst_adjustment_offsets adjust(time_duration_type(0,0,0),
 372                                       time_duration_type(0,0,0),
 373                                       time_duration_type(0,0,0));
 374
 375         boost::shared_ptr<rule_type> rules;
 376
 377         if(has_dst){
 378           adjust = dst_adjustment_offsets(
 379                                           str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]),
 380                                           str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]),
 381                                           str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME])
 382                                           );
 383
 384           rules =
 385             boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE],
 386                                                      result[END_DATE_RULE]));
 387         }
 388         string_type id(result[ID]);
 389         boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules));
 390         return (add_record(id, zone));
 391
 392       }
 393
 394     };
 395
 396 } } // namespace
 397
 398 #endif // DATE_TIME_TZ_DB_BASE_HPP__