source/common/unicode/rbbi.h

   1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 ***************************************************************************
   5 *   Copyright (C) 1999-2016 International Business Machines Corporation   *
   6 *   and others. All rights reserved.                                      *
   7 ***************************************************************************
   8
   9 **********************************************************************
  10 *   Date        Name        Description
  11 *   10/22/99    alan        Creation.
  12 *   11/11/99    rgillam     Complete port from Java.
  13 **********************************************************************
  14 */
  15
  16 #ifndef RBBI_H
  17 #define RBBI_H
  18
  19 #include "unicode/utypes.h"
  20
  21 /**
  22  * \file
  23  * \brief C++ API: Rule Based Break Iterator
  24  */
  25
  26 #if !UCONFIG_NO_BREAK_ITERATION
  27
  28 #include "unicode/brkiter.h"
  29 #include "unicode/udata.h"
  30 #include "unicode/parseerr.h"
  31 #include "unicode/schriter.h"
  32 #include "unicode/uchriter.h"
  33
  34
  35 struct UTrie;
  36
  37 U_NAMESPACE_BEGIN
  38
  39 /** @internal */
  40 struct RBBIDataHeader;
  41 class  RuleBasedBreakIteratorTables;
  42 class  BreakIterator;
  43 class  RBBIDataWrapper;
  44 class  UStack;
  45 class  LanguageBreakEngine;
  46 class  UnhandledEngine;
  47 struct RBBIStateTable;
  48
  49
  50
  51
  52 /**
  53  *
  54  * A subclass of BreakIterator whose behavior is specified using a list of rules.
  55  * <p>Instances of this class are most commonly created by the factory methods of
  56  *  BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
  57  *  and then used via the abstract API in class BreakIterator</p>
  58  *
  59  * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
  60  *
  61  * <p>This class is not intended to be subclassed.</p>
  62  */
  63 class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator {
  64
  65 private:
  66     /**
  67      * The UText through which this BreakIterator accesses the text
  68      * @internal
  69      */
  70     UText  *fText;
  71
  72     /**
  73      *   A character iterator that refers to the same text as the UText, above.
  74      *   Only included for compatibility with old API, which was based on CharacterIterators.
  75      *   Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
  76      */
  77     CharacterIterator  *fCharIter;
  78
  79     /**
  80      *   When the input text is provided by a UnicodeString, this will point to
  81      *    a characterIterator that wraps that data.  Needed only for the
  82      *    implementation of getText(), a backwards compatibility issue.
  83      */
  84     StringCharacterIterator *fSCharIter;
  85
  86     /**
  87      *  When the input text is provided by a UText, this
  88      *    dummy CharacterIterator over an empty string will
  89      *    be returned from getText()
  90      */
  91     UCharCharacterIterator *fDCharIter;
  92
  93     /**
  94      * The rule data for this BreakIterator instance
  95      * @internal
  96      */
  97     RBBIDataWrapper    *fData;
  98
  99     /** Index of the Rule {tag} values for the most recent match.
 100      *  @internal
 101     */
 102     int32_t             fLastRuleStatusIndex;
 103
 104     /**
 105      * Rule tag value valid flag.
 106      * Some iterator operations don't intrinsically set the correct tag value.
 107      * This flag lets us lazily compute the value if we are ever asked for it.
 108      * @internal
 109      */
 110     UBool               fLastStatusIndexValid;
 111
 112     /**
 113      * Counter for the number of characters encountered with the "dictionary"
 114      *   flag set.
 115      * @internal
 116      */
 117     uint32_t            fDictionaryCharCount;
 118
 119     /**
 120      * When a range of characters is divided up using the dictionary, the break
 121      * positions that are discovered are stored here, preventing us from having
 122      * to use either the dictionary or the state table again until the iterator
 123      * leaves this range of text. Has the most impact for line breaking.
 124      * @internal
 125      */
 126     int32_t*            fCachedBreakPositions;
 127
 128     /**
 129      * The number of elements in fCachedBreakPositions
 130      * @internal
 131      */
 132     int32_t             fNumCachedBreakPositions;
 133
 134     /**
 135      * if fCachedBreakPositions is not null, this indicates which item in the
 136      * cache the current iteration position refers to
 137      * @internal
 138      */
 139     int32_t             fPositionInCache;
 140
 141     /**
 142      *
 143      * If present, UStack of LanguageBreakEngine objects that might handle
 144      * dictionary characters. Searched from top to bottom to find an object to
 145      * handle a given character.
 146      * @internal
 147      */
 148     UStack              *fLanguageBreakEngines;
 149
 150     /**
 151      *
 152      * If present, the special LanguageBreakEngine used for handling
 153      * characters that are in the dictionary set, but not handled by any
 154      * LangugageBreakEngine.
 155      * @internal
 156      */
 157     UnhandledEngine     *fUnhandledBreakEngine;
 158
 159     /**
 160      *
 161      * The type of the break iterator, or -1 if it has not been set.
 162      * @internal
 163      */
 164     int32_t             fBreakType;
 165
 166     //=======================================================================
 167     // constructors
 168     //=======================================================================
 169
 170     /**
 171      * Constructor from a flattened set of RBBI data in malloced memory.
 172      *             RulesBasedBreakIterators built from a custom set of rules
 173      *             are created via this constructor; the rules are compiled
 174      *             into memory, then the break iterator is constructed here.
 175      *
 176      *             The break iterator adopts the memory, and will
 177      *             free it when done.
 178      * @internal
 179      */
 180     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
 181
 182
 183     friend class RBBIRuleBuilder;
 184     /** @internal */
 185     friend class BreakIterator;
 186
 187
 188
 189 public:
 190
 191     /** Default constructor.  Creates an empty shell of an iterator, with no
 192      *  rules or text to iterate over.   Object can subsequently be assigned to.
 193      *  @stable ICU 2.2
 194      */
 195     RuleBasedBreakIterator();
 196
 197     /**
 198      * Copy constructor.  Will produce a break iterator with the same behavior,
 199      * and which iterates over the same text, as the one passed in.
 200      * @param that The RuleBasedBreakIterator passed to be copied
 201      * @stable ICU 2.0
 202      */
 203     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
 204
 205     /**
 206      * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
 207      * @param rules The break rules to be used.
 208      * @param parseError  In the event of a syntax error in the rules, provides the location
 209      *                    within the rules of the problem.
 210      * @param status Information on any errors encountered.
 211      * @stable ICU 2.2
 212      */
 213     RuleBasedBreakIterator( const UnicodeString    &rules,
 214                              UParseError           &parseError,
 215                              UErrorCode            &status);
 216
 217     /**
 218      * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
 219      * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
 220      * Construction of a break iterator in this way is substantially faster than
 221      * constuction from source rules.
 222      *
 223      * Ownership of the storage containing the compiled rules remains with the
 224      * caller of this function.  The compiled rules must not be  modified or
 225      * deleted during the life of the break iterator.
 226      *
 227      * The compiled rules are not compatible across different major versions of ICU.
 228      * The compiled rules are comaptible only between machines with the same
 229      * byte ordering (little or big endian) and the same base character set family
 230      * (ASCII or EBCDIC).
 231      *
 232      * @see #getBinaryRules
 233      * @param compiledRules A pointer to the compiled break rules to be used.
 234      * @param ruleLength The length of the compiled break rules, in bytes.  This
 235      *   corresponds to the length value produced by getBinaryRules().
 236      * @param status Information on any errors encountered, including invalid
 237      *   binary rules.
 238      * @stable ICU 4.8
 239      */
 240     RuleBasedBreakIterator(const uint8_t *compiledRules,
 241                            uint32_t       ruleLength,
 242                            UErrorCode    &status);
 243
 244     /**
 245      * This constructor uses the udata interface to create a BreakIterator
 246      * whose internal tables live in a memory-mapped file.  "image" is an
 247      * ICU UDataMemory handle for the pre-compiled break iterator tables.
 248      * @param image handle to the memory image for the break iterator data.
 249      *        Ownership of the UDataMemory handle passes to the Break Iterator,
 250      *        which will be responsible for closing it when it is no longer needed.
 251      * @param status Information on any errors encountered.
 252      * @see udata_open
 253      * @see #getBinaryRules
 254      * @stable ICU 2.8
 255      */
 256     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
 257
 258     /**
 259      * Destructor
 260      *  @stable ICU 2.0
 261      */
 262     virtual ~RuleBasedBreakIterator();
 263
 264     /**
 265      * Assignment operator.  Sets this iterator to have the same behavior,
 266      * and iterate over the same text, as the one passed in.
 267      * @param that The RuleBasedBreakItertor passed in
 268      * @return the newly created RuleBasedBreakIterator
 269      *  @stable ICU 2.0
 270      */
 271     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
 272
 273     /**
 274      * Equality operator.  Returns TRUE if both BreakIterators are of the
 275      * same class, have the same behavior, and iterate over the same text.
 276      * @param that The BreakIterator to be compared for equality
 277      * @return TRUE if both BreakIterators are of the
 278      * same class, have the same behavior, and iterate over the same text.
 279      *  @stable ICU 2.0
 280      */
 281     virtual UBool operator==(const BreakIterator& that) const;
 282
 283     /**
 284      * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
 285      * and vice versa.
 286      * @param that The BreakIterator to be compared for inequality
 287      * @return TRUE if both BreakIterators are not same.
 288      *  @stable ICU 2.0
 289      */
 290     UBool operator!=(const BreakIterator& that) const;
 291
 292     /**
 293      * Returns a newly-constructed RuleBasedBreakIterator with the same
 294      * behavior, and iterating over the same text, as this one.
 295      * Differs from the copy constructor in that it is polymorphic, and
 296      * will correctly clone (copy) a derived class.
 297      * clone() is thread safe.  Multiple threads may simultaeneously
 298      * clone the same source break iterator.
 299      * @return a newly-constructed RuleBasedBreakIterator
 300      * @stable ICU 2.0
 301      */
 302     virtual BreakIterator* clone() const;
 303
 304     /**
 305      * Compute a hash code for this BreakIterator
 306      * @return A hash code
 307      *  @stable ICU 2.0
 308      */
 309     virtual int32_t hashCode(void) const;
 310
 311     /**
 312      * Returns the description used to create this iterator
 313      * @return the description used to create this iterator
 314      *  @stable ICU 2.0
 315      */
 316     virtual const UnicodeString& getRules(void) const;
 317
 318     //=======================================================================
 319     // BreakIterator overrides
 320     //=======================================================================
 321
 322     /**
 323      * <p>
 324      * Return a CharacterIterator over the text being analyzed.
 325      * The returned character iterator is owned by the break iterator, and must
 326      * not be deleted by the caller.  Repeated calls to this function may
 327      * return the same CharacterIterator.
 328      * </p>
 329      * <p>
 330      * The returned character iterator must not be used concurrently with
 331      * the break iterator.  If concurrent operation is needed, clone the
 332      * returned character iterator first and operate on the clone.
 333      * </p>
 334      * <p>
 335      * When the break iterator is operating on text supplied via a UText,
 336      * this function will fail.  Lacking any way to signal failures, it
 337      * returns an CharacterIterator containing no text.
 338      * The function getUText() provides similar functionality,
 339      * is reliable, and is more efficient.
 340      * </p>
 341      *
 342      * TODO:  deprecate this function?
 343      *
 344      * @return An iterator over the text being analyzed.
 345      * @stable ICU 2.0
 346      */
 347     virtual  CharacterIterator& getText(void) const;
 348
 349
 350     /**
 351       *  Get a UText for the text being analyzed.
 352       *  The returned UText is a shallow clone of the UText used internally
 353       *  by the break iterator implementation.  It can safely be used to
 354       *  access the text without impacting any break iterator operations,
 355       *  but the underlying text itself must not be altered.
 356       *
 357       * @param fillIn A UText to be filled in.  If NULL, a new UText will be
 358       *           allocated to hold the result.
 359       * @param status receives any error codes.
 360       * @return   The current UText for this break iterator.  If an input
 361       *           UText was provided, it will always be returned.
 362       * @stable ICU 3.4
 363       */
 364      virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
 365
 366     /**
 367      * Set the iterator to analyze a new piece of text.  This function resets
 368      * the current iteration position to the beginning of the text.
 369      * @param newText An iterator over the text to analyze.  The BreakIterator
 370      * takes ownership of the character iterator.  The caller MUST NOT delete it!
 371      *  @stable ICU 2.0
 372      */
 373     virtual void adoptText(CharacterIterator* newText);
 374
 375     /**
 376      * Set the iterator to analyze a new piece of text.  This function resets
 377      * the current iteration position to the beginning of the text.
 378      *
 379      * The BreakIterator will retain a reference to the supplied string.
 380      * The caller must not modify or delete the text while the BreakIterator
 381      * retains the reference.
 382      *
 383      * @param newText The text to analyze.
 384      *  @stable ICU 2.0
 385      */
 386     virtual void setText(const UnicodeString& newText);
 387
 388     /**
 389      * Reset the break iterator to operate over the text represented by
 390      * the UText.  The iterator position is reset to the start.
 391      *
 392      * This function makes a shallow clone of the supplied UText.  This means
 393      * that the caller is free to immediately close or otherwise reuse the
 394      * Utext that was passed as a parameter, but that the underlying text itself
 395      * must not be altered while being referenced by the break iterator.
 396      *
 397      * @param text    The UText used to change the text.
 398      * @param status  Receives any error codes.
 399      * @stable ICU 3.4
 400      */
 401     virtual void  setText(UText *text, UErrorCode &status);
 402
 403     /**
 404      * Sets the current iteration position to the beginning of the text, position zero.
 405      * @return The offset of the beginning of the text, zero.
 406      *  @stable ICU 2.0
 407      */
 408     virtual int32_t first(void);
 409
 410     /**
 411      * Sets the current iteration position to the end of the text.
 412      * @return The text's past-the-end offset.
 413      *  @stable ICU 2.0
 414      */
 415     virtual int32_t last(void);
 416
 417     /**
 418      * Advances the iterator either forward or backward the specified number of steps.
 419      * Negative values move backward, and positive values move forward.  This is
 420      * equivalent to repeatedly calling next() or previous().
 421      * @param n The number of steps to move.  The sign indicates the direction
 422      * (negative is backwards, and positive is forwards).
 423      * @return The character offset of the boundary position n boundaries away from
 424      * the current one.
 425      *  @stable ICU 2.0
 426      */
 427     virtual int32_t next(int32_t n);
 428
 429     /**
 430      * Advances the iterator to the next boundary position.
 431      * @return The position of the first boundary after this one.
 432      *  @stable ICU 2.0
 433      */
 434     virtual int32_t next(void);
 435
 436     /**
 437      * Moves the iterator backwards, to the last boundary preceding this one.
 438      * @return The position of the last boundary position preceding this one.
 439      *  @stable ICU 2.0
 440      */
 441     virtual int32_t previous(void);
 442
 443     /**
 444      * Sets the iterator to refer to the first boundary position following
 445      * the specified position.
 446      * @param offset The position from which to begin searching for a break position.
 447      * @return The position of the first break after the current position.
 448      *  @stable ICU 2.0
 449      */
 450     virtual int32_t following(int32_t offset);
 451
 452     /**
 453      * Sets the iterator to refer to the last boundary position before the
 454      * specified position.
 455      * @param offset The position to begin searching for a break from.
 456      * @return The position of the last boundary before the starting position.
 457      *  @stable ICU 2.0
 458      */
 459     virtual int32_t preceding(int32_t offset);
 460
 461     /**
 462      * Returns true if the specfied position is a boundary position.  As a side
 463      * effect, leaves the iterator pointing to the first boundary position at
 464      * or after "offset".
 465      * @param offset the offset to check.
 466      * @return True if "offset" is a boundary position.
 467      *  @stable ICU 2.0
 468      */
 469     virtual UBool isBoundary(int32_t offset);
 470
 471     /**
 472      * Returns the current iteration position.
 473      * @return The current iteration position.
 474      * @stable ICU 2.0
 475      */
 476     virtual int32_t current(void) const;
 477
 478
 479     /**
 480      * Return the status tag from the break rule that determined the most recently
 481      * returned break position.  For break rules that do not specify a
 482      * status, a default value of 0 is returned.  If more than one break rule
 483      * would cause a boundary to be located at some position in the text,
 484      * the numerically largest of the applicable status values is returned.
 485      * <p>
 486      * Of the standard types of ICU break iterators, only word break and
 487      * line break provide status values.  The values are defined in
 488      * the header file ubrk.h.  For Word breaks, the status allows distinguishing between words
 489      * that contain alphabetic letters, "words" that appear to be numbers,
 490      * punctuation and spaces, words containing ideographic characters, and
 491      * more.  For Line Break, the status distinguishes between hard (mandatory) breaks
 492      * and soft (potential) break positions.
 493      * <p>
 494      * <code>getRuleStatus()</code> can be called after obtaining a boundary
 495      * position from <code>next()</code>, <code>previous()</code>, or
 496      * any other break iterator functions that returns a boundary position.
 497      * <p>
 498      * When creating custom break rules, one is free to define whatever
 499      * status values may be convenient for the application.
 500      * <p>
 501      * Note: this function is not thread safe.  It should not have been
 502      *       declared const, and the const remains only for compatibility
 503      *       reasons.  (The function is logically const, but not bit-wise const).
 504      * <p>
 505      * @return the status from the break rule that determined the most recently
 506      * returned break position.
 507      *
 508      * @see UWordBreak
 509      * @stable ICU 2.2
 510      */
 511     virtual int32_t getRuleStatus() const;
 512
 513    /**
 514     * Get the status (tag) values from the break rule(s) that determined the most
 515     * recently returned break position.
 516     * <p>
 517     * The returned status value(s) are stored into an array provided by the caller.
 518     * The values are stored in sorted (ascending) order.
 519     * If the capacity of the output array is insufficient to hold the data,
 520     *  the output will be truncated to the available length, and a
 521     *  U_BUFFER_OVERFLOW_ERROR will be signaled.
 522     *
 523     * @param fillInVec an array to be filled in with the status values.
 524     * @param capacity  the length of the supplied vector.  A length of zero causes
 525     *                  the function to return the number of status values, in the
 526     *                  normal way, without attemtping to store any values.
 527     * @param status    receives error codes.
 528     * @return          The number of rule status values from rules that determined
 529     *                  the most recent boundary returned by the break iterator.
 530     *                  In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
 531     *                  is the total number of status values that were available,
 532     *                  not the reduced number that were actually returned.
 533     * @see getRuleStatus
 534     * @stable ICU 3.0
 535     */
 536     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
 537
 538     /**
 539      * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
 540      * This method is to implement a simple version of RTTI, since not all
 541      * C++ compilers support genuine RTTI.  Polymorphic operator==() and
 542      * clone() methods call this method.
 543      *
 544      * @return          The class ID for this object. All objects of a
 545      *                  given class have the same class ID.  Objects of
 546      *                  other classes have different class IDs.
 547      * @stable ICU 2.0
 548      */
 549     virtual UClassID getDynamicClassID(void) const;
 550
 551     /**
 552      * Returns the class ID for this class.  This is useful only for
 553      * comparing to a return value from getDynamicClassID().  For example:
 554      *
 555      *      Base* polymorphic_pointer = createPolymorphicObject();
 556      *      if (polymorphic_pointer->getDynamicClassID() ==
 557      *          Derived::getStaticClassID()) ...
 558      *
 559      * @return          The class ID for all objects of this class.
 560      * @stable ICU 2.0
 561      */
 562     static UClassID U_EXPORT2 getStaticClassID(void);
 563
 564     /**
 565      * Deprecated functionality. Use clone() instead.
 566      *
 567      * Create a clone (copy) of this break iterator in memory provided
 568      *  by the caller.  The idea is to increase performance by avoiding
 569      *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
 570      *  Performance gains are minimal, and correct buffer management is
 571      *  tricky.  Use clone() instead.
 572      *
 573      * @param stackBuffer  The pointer to the memory into which the cloned object
 574      *                     should be placed.  If NULL,  allocate heap memory
 575      *                     for the cloned object.
 576      * @param BufferSize   The size of the buffer.  If zero, return the required
 577      *                     buffer size, but do not clone the object.  If the
 578      *                     size was too small (but not zero), allocate heap
 579      *                     storage for the cloned object.
 580      *
 581      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
 582      *                     returned if the the provided buffer was too small, and
 583      *                     the clone was therefore put on the heap.
 584      *
 585      * @return  Pointer to the clone object.  This may differ from the stackBuffer
 586      *          address if the byte alignment of the stack buffer was not suitable
 587      *          or if the stackBuffer was too small to hold the clone.
 588      * @deprecated ICU 52. Use clone() instead.
 589      */
 590     virtual BreakIterator *  createBufferClone(void *stackBuffer,
 591                                                int32_t &BufferSize,
 592                                                UErrorCode &status);
 593
 594
 595     /**
 596      * Return the binary form of compiled break rules,
 597      * which can then be used to create a new break iterator at some
 598      * time in the future.  Creating a break iterator from pre-compiled rules
 599      * is much faster than building one from the source form of the
 600      * break rules.
 601      *
 602      * The binary data can only be used with the same version of ICU
 603      *  and on the same platform type (processor endian-ness)
 604      *
 605      * @param length Returns the length of the binary data.  (Out paramter.)
 606      *
 607      * @return   A pointer to the binary (compiled) rule data.  The storage
 608      *           belongs to the RulesBasedBreakIterator object, not the
 609      *           caller, and must not be modified or deleted.
 610      * @stable ICU 4.8
 611      */
 612     virtual const uint8_t *getBinaryRules(uint32_t &length);
 613
 614     /**
 615      *  Set the subject text string upon which the break iterator is operating
 616      *  without changing any other aspect of the matching state.
 617      *  The new and previous text strings must have the same content.
 618      *
 619      *  This function is intended for use in environments where ICU is operating on
 620      *  strings that may move around in memory.  It provides a mechanism for notifying
 621      *  ICU that the string has been relocated, and providing a new UText to access the
 622      *  string in its new position.
 623      *
 624      *  Note that the break iterator implementation never copies the underlying text
 625      *  of a string being processed, but always operates directly on the original text
 626      *  provided by the user. Refreshing simply drops the references to the old text
 627      *  and replaces them with references to the new.
 628      *
 629      *  Caution:  this function is normally used only by very specialized,
 630      *  system-level code.  One example use case is with garbage collection that moves
 631      *  the text in memory.
 632      *
 633      * @param input      The new (moved) text string.
 634      * @param status     Receives errors detected by this function.
 635      * @return           *this
 636      *
 637      * @stable ICU 49
 638      */
 639     virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
 640
 641
 642 private:
 643     //=======================================================================
 644     // implementation
 645     //=======================================================================
 646     /**
 647      * Dumps caches and performs other actions associated with a complete change
 648      * in text or iteration position.
 649      * @internal
 650      */
 651     void reset(void);
 652
 653     /**
 654       * Set the type of the break iterator.
 655       * @internal
 656       */
 657     void setBreakType(int32_t type);
 658
 659     /**
 660       * Common initialization function, used by constructors and bufferClone.
 661       * @internal
 662       */
 663     void init();
 664
 665     /**
 666      * This method backs the iterator back up to a "safe position" in the text.
 667      * This is a position that we know, without any context, must be a break position.
 668      * The various calling methods then iterate forward from this safe position to
 669      * the appropriate position to return.  (For more information, see the description
 670      * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
 671      * @param statetable state table used of moving backwards
 672      * @internal
 673      */
 674     int32_t handlePrevious(const RBBIStateTable *statetable);
 675
 676     /**
 677      * This method is the actual implementation of the next() method.  All iteration
 678      * vectors through here.  This method initializes the state machine to state 1
 679      * and advances through the text character by character until we reach the end
 680      * of the text or the state machine transitions to state 0.  We update our return
 681      * value every time the state machine passes through a possible end state.
 682      * @param statetable state table used of moving forwards
 683      * @internal
 684      */
 685     int32_t handleNext(const RBBIStateTable *statetable);
 686
 687
 688     /**
 689      * This is the function that actually implements dictionary-based
 690      * breaking.  Covering at least the range from startPos to endPos,
 691      * it checks for dictionary characters, and if it finds them determines
 692      * the appropriate object to deal with them. It may cache found breaks in
 693      * fCachedBreakPositions as it goes. It may well also look at text outside
 694      * the range startPos to endPos.
 695      * If going forward, endPos is the normal Unicode break result, and
 696      * if goind in reverse, startPos is the normal Unicode break result
 697      * @param startPos  The start position of a range of text
 698      * @param endPos    The end position of a range of text
 699      * @param reverse   The call is for the reverse direction
 700      * @internal
 701      */
 702     int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
 703
 704
 705     /**
 706      * This function returns the appropriate LanguageBreakEngine for a
 707      * given character c.
 708      * @param c         A character in the dictionary set
 709      * @internal
 710      */
 711     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
 712
 713     /**
 714      *  @internal
 715      */
 716     void makeRuleStatusValid();
 717
 718 };
 719
 720 //------------------------------------------------------------------------------
 721 //
 722 //   Inline Functions Definitions ...
 723 //
 724 //------------------------------------------------------------------------------
 725
 726 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
 727     return !operator==(that);
 728 }
 729
 730 U_NAMESPACE_END
 731
 732 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
 733
 734 #endif