2 * Copyright (c) 2015 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 * Copyright (C) 1996-2013, International Business Machines Corporation and others.
17 * All Rights Reserved.
20 #ifndef __UTILS_I18N_UBRK_PRODUCT_H__
21 #define __UTILS_I18N_UBRK_PRODUCT_H__
23 #include <utils_i18n_types.h>
26 * @file utils_i18n_ubrk.h
28 * @brief utils_i18n_ubrk
32 * @ingroup CAPI_BASE_UTILS_I18N_MODULE
33 * @defgroup CAPI_BASE_UTILS_I18N_UBRK_MODULE Ubrk
34 * @brief Ubrk defines methods for finding the location of boundaries in text.
36 * @section CAPI_BASE_UTILS_I18N_UBRK_MODULE_HEADER Required Header
37 * \#include <utils_i18n.h>
39 * @section CAPI_BASE_UTILS_I18N_UBRK_MODULE_OVERVIEW Overview
40 * @details Pointer to a #i18n_ubreak_iterator_h maintains a current position and scans over text returning the index of characters where boundaries occur.
49 * @addtogroup CAPI_BASE_UTILS_I18N_UBRK_MODULE
54 * @brief Opens a new #i18n_ubreak_iterator_h for locating text boundaries for a specified locale.
55 * @details A #i18n_ubreak_iterator_h may be used for detecting character, line, word,
56 * and sentence breaks in text.
57 * @remarks Error codes are described in #i18n_error_code_e description.
59 * @param[in] type The type of #i18n_ubreak_iterator_h to open: one of #I18N_UBRK_CHARACTER, #I18N_UBRK_WORD,
60 * #I18N_UBRK_LINE, #I18N_UBRK_SENTENCE
61 * @param[in] locale The locale specifying the text-breaking conventions.
62 * If @c NULL, the default locale will be used.
63 * @param[in] text The text to be iterated over. May be @c NULL, then the iterator will be created without any text.
64 * The text can be set later with i18n_ubrk_set_text() function.
65 * @param[in] text_length The number of characters in text, or -1 if NULL-terminated.
66 * @param[out] break_iter A pointer to the #i18n_ubreak_iterator_h for the specified locale.
67 * @return The obtained error code.
68 * @retval #I18N_ERROR_NONE Successful
69 * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
70 * @see i18n_ubrk_create_rules()
72 int i18n_ubrk_create (i18n_ubreak_iterator_type_e type, const char *locale, const i18n_uchar *text, int32_t text_length, i18n_ubreak_iterator_h *break_iter);
75 * @brief Opens a new #i18n_ubreak_iterator_h for locating text boundaries using specified breaking rules.
76 * @remarks Error codes are described in #i18n_error_code_e description.
78 * @param[in] rules A set of rules specifying the text breaking conventions.
79 * @param[in] rules_length The number of characters in rules, or -1 if NULL-terminated.
80 * @param[in] text The text to be iterated over. May be @c NULL, in which case i18n_ubrk_set_text() is
81 * used to specify the text to be iterated.
82 * @param[in] text_length The number of characters in text, or -1 if NULL-terminated.
83 * @param[out] break_iter A pointer to the #i18n_ubreak_iterator_h for the specified rules.
84 * @param[out] parse_err Receives position and context information for any syntax errors
85 * detected while parsing the rules.
86 * @return The obtained error code.
87 * @retval #I18N_ERROR_NONE Successful
88 * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
89 * @see i18n_ubrk_create()
91 int i18n_ubrk_create_rules (const i18n_uchar *rules, int32_t rules_length, const i18n_uchar *text, int32_t text_length, i18n_ubreak_iterator_h *break_iter, i18n_uparse_error_s *parse_err);
94 * @brief Thread safe cloning operation.
95 * @remarks Error codes are described in #i18n_error_code_e description.
97 * @param[in] break_iter iterator to be cloned. Must not be @c NULL.
98 * @param[in] stack_buffer User allocated space for the new clone. If @c NULL new memory will be allocated.
99 * If buffer is not large enough, new memory will be allocated.
100 * Clients can use the #I18N_U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
101 * @param[in] p_buffer_size A pointer to size of allocated space.
102 * If <code>*p_buffer_size == 0</code>, a sufficient size for use in cloning will
103 * be returned ('pre-flighting')
104 * If *p_buffer_size is not enough for a stack-based safe clone,
105 * new memory will be allocated.
106 * @param[out] break_iter_clone A pointer to the cloned #i18n_ubreak_iterator_h.
107 * @return The obtained error code.
108 * @retval #I18N_ERROR_NONE Successful
109 * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
111 int i18n_ubrk_safe_clone (const i18n_ubreak_iterator_h break_iter, void *stack_buffer, int32_t *p_buffer_size, i18n_ubreak_iterator_h *break_iter_clone);
114 * @brief Closes a #i18n_ubreak_iterator_h.
115 * @details Once closed, a #i18n_ubreak_iterator_h may no longer be used.
116 * @remarks Error codes are described in #i18n_error_code_e description.
118 * @param[in] break_iter The break iterator to close. Must not be @c NULL.
119 * @return The obtained error code.
120 * @retval #I18N_ERROR_NONE Successful
121 * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
123 int i18n_ubrk_destroy (i18n_ubreak_iterator_h break_iter);
126 * @brief Sets an existing iterator to point to a new piece of text.
127 * @remarks Error codes are described in #i18n_error_code_e description.
129 * @param[in] break_iter The iterator to use. Must not be @c NULL.
130 * @param[in] text The text to be set. Must not be @c NULL.
131 * @param[in] text_length The length of the text.
132 * @return The obtained error code.
133 * @retval #I18N_ERROR_NONE Successful
134 * @retval #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
136 int i18n_ubrk_set_text (i18n_ubreak_iterator_h break_iter, const i18n_uchar *text, int32_t text_length);
139 * @brief Determines the most recently-returned text boundary.
140 * @remarks The specific error code can be obtained using the get_last_result() method.
141 * Error codes are described in Exceptions section.
143 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
144 * @return The character index most recently returned by, i18n_ubrk_next(), i18n_ubrk_previous(),
145 * i18n_ubrk_first(), or i18n_ubrk_last().
146 * @exception #I18N_ERROR_NONE Successful
147 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
149 int32_t i18n_ubrk_current (const i18n_ubreak_iterator_h break_iter);
152 * @brief Advances the iterator to the boundary following the current boundary.
153 * @remarks The specific error code can be obtained using the get_last_result() method.
154 * Error codes are described in Exceptions section.
156 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
157 * @return The character index of the next text boundary, or #I18N_UBRK_DONE
158 * if all text boundaries have been returned.
159 * @exception #I18N_ERROR_NONE Successful
160 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
161 * @see i18n_ubrk_previous()
163 int32_t i18n_ubrk_next (i18n_ubreak_iterator_h break_iter);
166 * @brief Sets the iterator position to the boundary preceding the current boundary.
167 * @remarks The specific error code can be obtained using the get_last_result() method.
168 * Error codes are described in Exceptions section.
170 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
171 * @return The character index of the preceding text boundary, or #I18N_UBRK_DONE
172 * if all text boundaries have been returned.
173 * @exception #I18N_ERROR_NONE Successful
174 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
175 * @see i18n_ubrk_next()
177 int32_t i18n_ubrk_previous (i18n_ubreak_iterator_h break_iter);
180 * @brief Sets the iterator position to the index of the first character in the text being scanned.
181 * @details This is not always the same as index @c 0 of the text.
182 * @remarks The specific error code can be obtained using the get_last_result() method.
183 * Error codes are described in Exceptions section.
185 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
186 * @return The character index of the first character in the text being scanned.
187 * @exception #I18N_ERROR_NONE Successful
188 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
189 * @see i18n_ubrk_last()
191 int32_t i18n_ubrk_first (i18n_ubreak_iterator_h break_iter);
194 * @brief Sets the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
195 * @details This is not the same as the last character.
196 * @remarks The specific error code can be obtained using the get_last_result() method.
197 * Error codes are described in Exceptions section.
199 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
200 * @return The character offset immediately <EM>beyond</EM> the last character in the
201 * text being scanned.
202 * @exception #I18N_ERROR_NONE Successful
203 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
204 * @see i18n_ubrk_first()
206 int32_t i18n_ubrk_last (i18n_ubreak_iterator_h break_iter);
209 * @brief Sets the iterator position to the first boundary preceding the specified @c offset.
210 * @details The new position is always smaller than @c offset, or #I18N_UBRK_DONE.
211 * @remarks The specific error code can be obtained using the get_last_result() method.
212 * Error codes are described in Exceptions section.
214 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
215 * @param[in] offset The offset to begin scanning.
216 * @return The text boundary preceding offset, or #I18N_UBRK_DONE.
217 * @exception #I18N_ERROR_NONE Successful
218 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
219 * @see i18n_ubrk_following()
221 int32_t i18n_ubrk_preceding (i18n_ubreak_iterator_h break_iter, int32_t offset);
224 * @brief Advances the iterator to the first boundary following the specified @c offset.
225 * @details The value returned is always greater than @c offset, or #I18N_UBRK_DONE.
226 * @remarks The specific error code can be obtained using the get_last_result() method.
227 * Error codes are described in Exceptions section.
229 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
230 * @param[in] offset The offset to begin scanning.
231 * @return The text boundary following offset, or #I18N_UBRK_DONE.
232 * @exception #I18N_ERROR_NONE Successful
233 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
234 * @see i18n_ubrk_preceding()
236 int32_t i18n_ubrk_following (i18n_ubreak_iterator_h break_iter, int32_t offset);
239 * @brief Gets a locale for which text breaking information is available.
240 * @details A #i18n_ubreak_iterator_h in a locale returned by this function will perform the correct
241 * text breaking for the locale.
242 * @remarks The specific error code can be obtained using the get_last_result() method.
243 * Error codes are described in Exceptions section.
245 * @param[in] index The index of the desired locale.
246 * @return A locale for which number text breaking information is available, or @c 0 if none.
247 * @exception #I18N_ERROR_NONE Successful
248 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
249 * @see i18n_ubrk_count_available()
251 const char *i18n_ubrk_get_available (int32_t index);
254 * @brief Determines how many locales have text breaking information available.
255 * @details This function is most useful as determining the loop ending condition for
256 * calls to i18n_ubrk_get_available().
257 * @remarks The specific error code can be obtained using the get_last_result() method.
258 * Error codes are described in Exceptions section.
260 * @return The number of locales for which text breaking information is available.
261 * @exception #I18N_ERROR_NONE Successful
262 * @see i18n_ubrk_get_available()
264 int32_t i18n_ubrk_count_available (void);
267 * @brief Returns true if the specfied position is a boundary position.
268 * @details As a side effect, leaves the iterator pointing to the first boundary position at
269 * or after @c offset.
270 * @remarks The specific error code can be obtained using the get_last_result() method. Error codes are
271 * described in Exceptions section.
273 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
274 * @param[in] offset The offset to check.
275 * @return True if "offset" is a boundary position.
276 * @exception #I18N_ERROR_NONE Successful
277 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
279 i18n_ubool i18n_ubrk_is_boundary (i18n_ubreak_iterator_h break_iter, int32_t offset);
282 * @brief Returns the status from the break rule that determined the most recently
283 * returned break position.
284 * @details The values appear in the rule source
285 * within brackets, {123}, for example. For rules that do not specify a
286 * status, a default value of 0 is returned.
288 * For word break iterators, the possible values are defined in enum #i18n_uchar_u_word_break_values_e.
289 * @remarks The specific error code can be obtained using the get_last_result() method.
290 * Error codes are described in Exceptions section.
292 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
293 * @return The status from the break rule that determined the most recently returned break position.
294 * @exception #I18N_ERROR_NONE Successful
295 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
297 int32_t i18n_ubrk_get_rule_status (i18n_ubreak_iterator_h break_iter);
300 * @brief Gets the statuses from the break rules that determined the most recently
301 * returned break position.
302 * @details The values appear in the rule source within brackets, {123}, for example. The default status value for rules
303 * that do not explicitly provide one is zero.
305 * For word break iterators, the possible values are defined in enum #i18n_uchar_u_word_break_values_e.
306 * @remarks The specific error code can be obtained using the get_last_result() method.
307 * Error codes are described in Exceptions section and in #i18n_error_code_e description.
310 * @param[in] break_iter The break iterator to use. Must not be @c NULL.
311 * @param[out] fill_in_vec An array to be filled in with the status values.
312 * @param[in] capacity The length of the supplied vector. A length of zero causes
313 * the function to return the number of status values, in the
314 * normal way, without attempting to store any values.
315 * @return The number of rule status values from rules that determined
316 * the most recent boundary returned by the break iterator.
317 * @exception #I18N_ERROR_NONE Successful
318 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
320 int32_t i18n_ubrk_get_rule_status_vec (i18n_ubreak_iterator_h break_iter, int32_t *fill_in_vec, int32_t capacity);
323 * @brief Returns the locale of the break iterator. You can choose between the valid and
325 * @remarks The specific error code can be obtained using the get_last_result() method.
326 * Error codes are described in Exceptions section and in #i18n_error_code_e description.
329 * @param[in] break_iter Break iterator. Must not be @c NULL.
330 * @param[in] type Locale type (valid or actual).
331 * @return locale string
332 * @exception #I18N_ERROR_NONE Successful
333 * @exception #I18N_ERROR_INVALID_PARAMETER Invalid function parameter
335 const char *i18n_ubrk_get_locale_by_type (const i18n_ubreak_iterator_h break_iter, i18n_ulocale_data_locale_type_e type);