2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved. *
5 *******************************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/uobject.h"
13 #include "unicode/utext.h"
20 * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
22 * @param ds Pointer to UDataSwapper containing global data about the
23 * transformation and function pointers for handling primitive
25 * @param inData Pointer to the input data to be transformed or examined.
26 * @param length Length of the data, counting bytes. May be -1 for preflighting.
27 * If length>=0, then transform the data.
28 * If length==-1, then only determine the length of the data.
29 * The length cannot be determined from the data itself for all
30 * types of data (e.g., not for simple arrays of integers).
31 * @param outData Pointer to the output data buffer.
32 * If length>=0 (transformation), then the output buffer must
33 * have a capacity of at least length.
34 * If length==-1, then outData will not be used and can be NULL.
35 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
36 * fulfill U_SUCCESS on input.
37 * @return The actual length of the data.
42 U_CAPI int32_t U_EXPORT2
43 triedict_swap(const UDataSwapper *ds,
44 const void *inData, int32_t length, void *outData,
45 UErrorCode *pErrorCode);
49 class StringEnumeration;
51 /*******************************************************************
56 * <p>TrieWordDictionary is an abstract class that represents a word
57 * dictionary based on a trie. The base protocol is read-only.
58 * Subclasses may allow writing.</p>
60 class U_COMMON_API TrieWordDictionary : public UMemory {
64 * <p>Default constructor.</p>
70 * <p>Virtual destructor.</p>
72 virtual ~TrieWordDictionary();
75 * <p>Returns true if the dictionary contains values associated with each word.</p>
77 virtual UBool getValued() const = 0;
80 * <p>Find dictionary words that match the text.</p>
82 * @param text A UText representing the text. The
83 * iterator is left after the longest prefix match in the dictionary.
84 * @param maxLength The maximum number of code units to match.
85 * @param lengths An array that is filled with the lengths of words that matched.
86 * @param count Filled with the number of elements output in lengths.
87 * @param limit The size of the lengths array; this limits the number of words output.
88 * @param values An array that is filled with the values associated with the matched words.
89 * @return The number of characters in text that were matched.
91 virtual int32_t matches( UText *text,
96 uint16_t *values = NULL) const = 0;
99 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
101 * @param status A status code recording the success of the call.
102 * @return A StringEnumeration that will iterate through the whole dictionary.
103 * The caller is responsible for closing it. The order is unspecified.
105 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
109 /*******************************************************************
110 * MutableTrieDictionary
114 * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
118 struct TernaryNode; // Forwards declaration
120 class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
123 * The root node of the trie
130 * A UText for internal use
137 * A UText for internal use
142 friend class CompactTrieDictionary; // For fast conversion
147 * <p>Constructor.</p>
149 * @param median A UChar around which to balance the trie. Ideally, it should
150 * begin at least one word that is near the median of the set in the dictionary
151 * @param status A status code recording the success of the call.
152 * @param containsValue True if the dictionary stores values associated with each word.
154 MutableTrieDictionary( UChar median, UErrorCode &status, UBool containsValue = FALSE );
157 * <p>Virtual destructor.</p>
159 virtual ~MutableTrieDictionary();
162 * Indicate whether the MutableTrieDictionary stores values associated with each word
164 void setValued(UBool valued){
169 * <p>Returns true if the dictionary contains values associated with each word.</p>
171 virtual UBool getValued() const {
176 * <p>Find dictionary words that match the text.</p>
178 * @param text A UText representing the text. The
179 * iterator is left after the longest prefix match in the dictionary.
180 * @param maxLength The maximum number of code units to match.
181 * @param lengths An array that is filled with the lengths of words that matched.
182 * @param count Filled with the number of elements output in lengths.
183 * @param limit The size of the lengths array; this limits the number of words output.
184 * @param values An array that is filled with the values associated with the matched words.
185 * @return The number of characters in text that were matched.
187 virtual int32_t matches( UText *text,
192 uint16_t *values = NULL) const;
195 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
197 * @param status A status code recording the success of the call.
198 * @return A StringEnumeration that will iterate through the whole dictionary.
199 * The caller is responsible for closing it. The order is unspecified.
201 virtual StringEnumeration *openWords( UErrorCode &status ) const;
204 * <p>Add one word to the dictionary with an optional associated value.</p>
206 * @param word A UChar buffer containing the word.
207 * @param length The length of the word.
208 * @param status The resultant status.
209 * @param value The nonzero value associated with this word.
211 virtual void addWord( const UChar *word,
218 * <p>Add all strings from a UEnumeration to the dictionary.</p>
220 * @param words A UEnumeration that will return the desired words.
221 * @param status The resultant status
223 virtual void addWords( UEnumeration *words, UErrorCode &status );
228 * <p>Search the dictionary for matches.</p>
230 * @param text A UText representing the text. The
231 * iterator is left after the longest prefix match in the dictionary.
232 * @param maxLength The maximum number of code units to match.
233 * @param lengths An array that is filled with the lengths of words that matched.
234 * @param count Filled with the number of elements output in lengths.
235 * @param limit The size of the lengths array; this limits the number of words output.
236 * @param parent The parent of the current node.
237 * @param pMatched The returned parent node matched the input/
238 * @param values An array that is filled with the values associated with the matched words.
239 * @return The number of characters in text that were matched.
241 virtual int32_t search( UText *text,
246 TernaryNode *&parent,
248 uint16_t *values = NULL) const;
252 * <p>Private constructor. The root node it not allocated.</p>
254 * @param status A status code recording the success of the call.
255 * @param containsValues True if the dictionary will store a value associated
256 * with each word added.
258 MutableTrieDictionary( UErrorCode &status, UBool containsValues = false );
261 /*******************************************************************
262 * CompactTrieDictionary
265 //forward declarations
266 struct CompactTrieHeader;
267 struct CompactTrieInfo;
270 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
273 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
276 * The header of the CompactTrieDictionary which contains all info
279 CompactTrieInfo *fInfo;
282 * A UBool indicating whether or not we own the fData.
289 * <p>Construct a dictionary from a UDataMemory.</p>
291 * @param data A pointer to a UDataMemory, which is adopted
292 * @param status A status code giving the result of the constructor
294 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
297 * <p>Construct a dictionary from raw saved data.</p>
299 * @param data A pointer to the raw data, which is still owned by the caller
300 * @param status A status code giving the result of the constructor
302 CompactTrieDictionary(const void *dataObj, UErrorCode &status);
305 * <p>Construct a dictionary from a MutableTrieDictionary.</p>
307 * @param dict The dictionary to use as input.
308 * @param status A status code recording the success of the call.
310 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
313 * <p>Virtual destructor.</p>
315 virtual ~CompactTrieDictionary();
318 * <p>Returns true if the dictionary contains values associated with each word.</p>
320 virtual UBool getValued() const;
323 * <p>Find dictionary words that match the text.</p>
325 * @param text A UText representing the text. The
326 * iterator is left after the longest prefix match in the dictionary.
327 * @param maxLength The maximum number of code units to match.
328 * @param lengths An array that is filled with the lengths of words that matched.
329 * @param count Filled with the number of elements output in lengths.
330 * @param limit The size of the lengths array; this limits the number of words output.
331 * @param values An array that is filled with the values associated with the matched words.
332 * @return The number of characters in text that were matched.
334 virtual int32_t matches( UText *text,
339 uint16_t *values = NULL) const;
342 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
344 * @param status A status code recording the success of the call.
345 * @return A StringEnumeration that will iterate through the whole dictionary.
346 * The caller is responsible for closing it. The order is unspecified.
348 virtual StringEnumeration *openWords( UErrorCode &status ) const;
351 * <p>Return the size of the compact data.</p>
353 * @return The size of the dictionary's compact data.
355 virtual uint32_t dataSize() const;
358 * <p>Return a void * pointer to the (unmanaged) compact data, platform-endian.</p>
360 * @return The data for the compact dictionary, suitable for passing to the
363 virtual const void *data() const;
366 * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
368 * @param status A status code recording the success of the call.
369 * @return A MutableTrieDictionary with the same data as this dictionary
371 virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
376 * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
378 * @param dict The dictionary to convert.
379 * @param status A status code recording the success of the call.
380 * @return A single data blob starting with a CompactTrieHeader.
382 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
383 UErrorCode &status );