/**
- **********************************************************************************
- * Copyright (C) 2006-2010, International Business Machines Corporation and others.
- * All Rights Reserved.
- **********************************************************************************
+ *******************************************************************************
+ * Copyright (C) 2006,2012-2013, International Business Machines Corporation *
+ * and others. All Rights Reserved. *
+ *******************************************************************************
*/
#ifndef DICTBE_H
U_NAMESPACE_BEGIN
-class TrieWordDictionary;
+class DictionaryMatcher;
/*******************************************************************
* DictionaryBreakEngine
/**
* <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
- * TrieWordDictionary and heuristics to determine Thai-specific breaks.</p>
+ * dictionary and heuristics to determine Thai-specific breaks.</p>
*
* <p>After it is constructed a ThaiBreakEngine may be shared between
* threads without synchronization.</p>
UnicodeSet fBeginWordSet;
UnicodeSet fSuffixSet;
UnicodeSet fMarkSet;
- const TrieWordDictionary *fDictionary;
+ DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
- * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
- ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status);
+ ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
};
/*******************************************************************
+ * LaoBreakEngine
+ */
+
+/**
+ * <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * dictionary and heuristics to determine Lao-specific breaks.</p>
+ *
+ * <p>After it is constructed a LaoBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class LaoBreakEngine : public DictionaryBreakEngine {
+ private:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+
+ UnicodeSet fLaoWordSet;
+ UnicodeSet fEndWordSet;
+ UnicodeSet fBeginWordSet;
+ UnicodeSet fMarkSet;
+ DictionaryMatcher *fDictionary;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted.
+ */
+ LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~LaoBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UStack &foundBreaks ) const;
+
+};
+
+/*******************************************************************
+ * KhmerBreakEngine
+ */
+
+/**
+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
+ *
+ * <p>After it is constructed a KhmerBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class KhmerBreakEngine : public DictionaryBreakEngine {
+ private:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+
+ UnicodeSet fKhmerWordSet;
+ UnicodeSet fEndWordSet;
+ UnicodeSet fBeginWordSet;
+ UnicodeSet fMarkSet;
+ DictionaryMatcher *fDictionary;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted.
+ */
+ KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~KhmerBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UStack &foundBreaks ) const;
+
+};
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/*******************************************************************
* CjkBreakEngine
*/
/**
* <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
- * TrieWordDictionary with costs associated with each word and
+ * dictionary with costs associated with each word and
* Viterbi decoding to determine CJK-specific breaks.</p>
*/
class CjkBreakEngine : public DictionaryBreakEngine {
UnicodeSet fKatakanaWordSet;
UnicodeSet fHiraganaWordSet;
- const TrieWordDictionary *fDictionary;
+ DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
- * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
- * engine is deleted. The TrieWordDictionary must contain costs for each word
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted. The DictionaryMatcher must contain costs for each word
* in order for the dictionary to work properly.
*/
- CjkBreakEngine(const TrieWordDictionary *adoptDictionary, LanguageType type, UErrorCode &status);
+ CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
};
+#endif
+
U_NAMESPACE_END
/* DICTBE_H */