for (i = 0; i < len; ++i) {
const term_seq *seq;
- const uint32_t *str;
+ uint32_t *str;
size_t n_str, j;
- str = term_utf8_decode(&t->utf8, &n_str, buf[i]);
+ n_str = term_utf8_decode(&t->utf8, &str, buf[i]);
for (j = 0; j < n_str; ++j) {
type = term_parser_feed(t->parser, &seq, str[j]);
if (type < 0) {
/**
* term_utf8_decode() - Try decoding the next UCS-4 character
* @p: decoder object to operate on or NULL
- * @out_len: output buffer for length of decoded UCS-4 string or NULL
+ * @out_len: output storage for pointer to decoded UCS-4 string or NULL
* @c: next char to push into decoder
*
* This decodes a UTF-8 stream. It must be called for each input-byte of the
- * UTF-8 stream and returns a UCS-4 stream. The length of the returned UCS-4
- * string (number of parsed characters) is stored in @out_len if non-NULL. A
- * pointer to the string is returned (or NULL if none was parsed). The string
- * is not zero-terminated! Furthermore, the string is only valid until the next
- * invokation of this function. It is also bound to the parser-state @p.
+ * UTF-8 stream and returns a UCS-4 stream. A pointer to the parsed UCS-4
+ * string is stored in @out_buf if non-NULL. The length of this string (number
+ * of parsed UCS4 characters) is returned as result. The string is not
+ * zero-terminated! Furthermore, the string is only valid until the next
+ * invocation of this function. It is also bound to the parser state @p and
+ * must not be freed nor written to by the caller.
*
* This function is highly optimized to work with terminal-emulators. Instead
* of being strict about UTF-8 validity, this tries to perform a fallback to
* no helpers to do that for you. To initialize it, simply reset it to all
* zero. You can reset or free the object at any point in time.
*
- * Returns: Pointer to the UCS-4 string or NULL.
+ * Returns: Number of parsed UCS4 characters
*/
-const uint32_t *term_utf8_decode(term_utf8 *p, size_t *out_len, char c) {
+size_t term_utf8_decode(term_utf8 *p, uint32_t **out_buf, char c) {
+ static uint32_t ucs4_null = 0;
uint32_t t, *res = NULL;
uint8_t byte;
size_t len = 0;
p->n_bytes = 0;
out:
- if (out_len)
- *out_len = len;
- return len > 0 ? res : NULL;
+ if (out_buf)
+ *out_buf = res ? : &ucs4_null;
+ return len;
}
/*
}
int term_screen_feed_text(term_screen *screen, const uint8_t *in, size_t size) {
- const uint32_t *ucs4_str;
+ uint32_t *ucs4_str;
size_t i, j, ucs4_len;
const term_seq *seq;
int r;
* 8bit mode if the stream is not valid UTF-8. This should be more than
* enough to support old 7bit/8bit modes. */
for (i = 0; i < size; ++i) {
- ucs4_str = term_utf8_decode(&screen->utf8, &ucs4_len, in[i]);
+ ucs4_len = term_utf8_decode(&screen->utf8, &ucs4_str, in[i]);
for (j = 0; j < ucs4_len; ++j) {
r = term_parser_feed(screen->parser, &seq, ucs4_str[j]);
if (r < 0) {
};
size_t term_utf8_encode(char *out_utf8, uint32_t g);
-const uint32_t *term_utf8_decode(term_utf8 *p, size_t *out_len, char c);
+size_t term_utf8_decode(term_utf8 *p, uint32_t **out_buf, char c);
/*
* Parsers
static void test_term_utf8_invalid(void) {
term_utf8 p = { };
- const uint32_t *res;
+ uint32_t *res;
size_t len;
- res = term_utf8_decode(NULL, NULL, 0);
- assert_se(res == NULL);
+ len = term_utf8_decode(NULL, NULL, 0);
+ assert_se(!len);
- res = term_utf8_decode(&p, NULL, 0);
- assert_se(res != NULL);
-
- len = 5;
- res = term_utf8_decode(NULL, &len, 0);
- assert_se(res == NULL);
- assert_se(len == 0);
+ len = term_utf8_decode(&p, NULL, 0);
+ assert_se(len == 1);
- len = 5;
- res = term_utf8_decode(&p, &len, 0);
+ res = NULL;
+ len = term_utf8_decode(NULL, &res, 0);
+ assert_se(!len);
assert_se(res != NULL);
+ assert_se(!*res);
+
+ len = term_utf8_decode(&p, &res, 0);
assert_se(len == 1);
+ assert_se(res != NULL);
+ assert_se(!*res);
- len = 5;
- res = term_utf8_decode(&p, &len, 0xCf);
- assert_se(res == NULL);
+ len = term_utf8_decode(&p, &res, 0xCf);
assert_se(len == 0);
-
- len = 5;
- res = term_utf8_decode(&p, &len, 0x0);
assert_se(res != NULL);
+ assert_se(!*res);
+
+ len = term_utf8_decode(&p, &res, 0);
assert_se(len == 2);
+ assert_se(res != NULL);
+ assert_se(res[0] == 0xCf && res[1] == 0);
}
static void test_term_utf8_range(void) {
term_utf8 p = { };
- const uint32_t *res;
+ uint32_t *res;
char u8[4];
uint32_t i, j;
size_t ulen, len;
continue;
for (j = 0; j < ulen; ++j) {
- res = term_utf8_decode(&p, &len, u8[j]);
- if (!res) {
+ len = term_utf8_decode(&p, &res, u8[j]);
+ if (len < 1) {
assert_se(j + 1 != ulen);
continue;
}
0x00F0, 0x0080, 0x0080, 0x0001,
};
term_utf8 p = { };
- const uint32_t *res;
+ uint32_t *res;
unsigned int i, j;
size_t len;
for (i = 0, j = 0; i < sizeof(source); ++i) {
- res = term_utf8_decode(&p, &len, source[i]);
- if (!res)
+ len = term_utf8_decode(&p, &res, source[i]);
+ if (len < 1)
continue;
assert_se(j + len <= ELEMENTSOF(result));