From: Colin Walters Date: Thu, 16 Jun 2011 18:00:36 +0000 (-0400) Subject: GDataInputStream: Add _utf8() variants of _read_line X-Git-Tag: 2.29.10~75 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=28254a38a7f077d5fc03939ea7c03260aabe5188;p=platform%2Fupstream%2Fglib.git GDataInputStream: Add _utf8() variants of _read_line These will validate the resulting line, and throw a conversion error. In practice these will likely be used by bindings, but it's good for even C apps too that don't want to explode if that text file they're reading into Pango actually has invalid UTF-8. https://bugzilla.gnome.org/show_bug.cgi?id=652758 --- diff --git a/gio/gdatainputstream.c b/gio/gdatainputstream.c index d528c13..ff866a6 100644 --- a/gio/gdatainputstream.c +++ b/gio/gdatainputstream.c @@ -813,6 +813,49 @@ g_data_input_stream_read_line (GDataInputStream *stream, return line; } +/** + * g_data_input_stream_read_line_utf8: + * @stream: a given #GDataInputStream. + * @length: (out): a #gsize to get the length of the data read in. + * @cancellable: (allow-none): optional #GCancellable object, %NULL to ignore. + * @error: #GError for error reporting. + * + * Reads a UTF-8 encoded line from the data input stream. + * + * If @cancellable is not %NULL, then the operation can be cancelled by + * triggering the cancellable object from another thread. If the operation + * was cancelled, the error %G_IO_ERROR_CANCELLED will be returned. + * + * Returns: (transfer full): a NUL terminated UTF-8 string with the + * line that was read in (without the newlines). Set @length to a + * #gsize to get the length of the read line. On an error, it will + * return %NULL and @error will be set. For UTF-8 conversion errors, + * the set error domain is %G_CONVERT_ERROR. If there's no content to + * read, it will still return %NULL, but @error won't be set. + **/ +char * +g_data_input_stream_read_line_utf8 (GDataInputStream *stream, + gsize *length, + GCancellable *cancellable, + GError **error) +{ + char *res; + + res = g_data_input_stream_read_line (stream, length, cancellable, error); + if (!res) + return NULL; + + if (!g_utf8_validate (res, -1, NULL)) + { + g_set_error_literal (error, G_CONVERT_ERROR, + G_CONVERT_ERROR_ILLEGAL_SEQUENCE, + _("Invalid byte sequence in conversion input")); + g_free (res); + return NULL; + } + return res; +} + static gssize scan_for_chars (GDataInputStream *stream, gsize *checked_out, @@ -1212,6 +1255,45 @@ g_data_input_stream_read_line_finish (GDataInputStream *stream, } /** + * g_data_input_stream_read_line_finish_utf8: + * @stream: a given #GDataInputStream. + * @result: the #GAsyncResult that was provided to the callback. + * @length: (out): a #gsize to get the length of the data read in. + * @error: #GError for error reporting. + * + * Finish an asynchronous call started by + * g_data_input_stream_read_line_async(). + * + * Returns: (transfer full): a string with the line that was read in + * (without the newlines). Set @length to a #gsize to get the length + * of the read line. On an error, it will return %NULL and @error + * will be set. For UTF-8 conversion errors, the set error domain is + * %G_CONVERT_ERROR. If there's no content to read, it will still + * return %NULL, but @error won't be set. + * + * Since: 2.20 + */ +gchar * +g_data_input_stream_read_line_finish_utf8 (GDataInputStream *stream, + GAsyncResult *result, + gsize *length, + GError **error) +{ + gchar *res; + + res = g_data_input_stream_read_line_finish (stream, result, length, error); + if (!g_utf8_validate (res, -1, NULL)) + { + g_set_error_literal (error, G_CONVERT_ERROR, + G_CONVERT_ERROR_ILLEGAL_SEQUENCE, + _("Invalid byte sequence in conversion input")); + g_free (res); + return NULL; + } + return res; +} + +/** * g_data_input_stream_read_until_finish: * @stream: a given #GDataInputStream. * @result: the #GAsyncResult that was provided to the callback. diff --git a/gio/gdatainputstream.h b/gio/gdatainputstream.h index e5724d5..9a38dff 100644 --- a/gio/gdatainputstream.h +++ b/gio/gdatainputstream.h @@ -102,6 +102,10 @@ char * g_data_input_stream_read_line (GDataInputStrea gsize *length, GCancellable *cancellable, GError **error); +char * g_data_input_stream_read_line_utf8 (GDataInputStream *stream, + gsize *length, + GCancellable *cancellable, + GError **error); void g_data_input_stream_read_line_async (GDataInputStream *stream, gint io_priority, GCancellable *cancellable, @@ -111,6 +115,10 @@ char * g_data_input_stream_read_line_finish (GDataInputStrea GAsyncResult *result, gsize *length, GError **error); +char * g_data_input_stream_read_line_finish_utf8(GDataInputStream *stream, + GAsyncResult *result, + gsize *length, + GError **error); char * g_data_input_stream_read_until (GDataInputStream *stream, const gchar *stop_chars, gsize *length, diff --git a/gio/gio.symbols b/gio/gio.symbols index 942cf4c..726f67c 100644 --- a/gio/gio.symbols +++ b/gio/gio.symbols @@ -180,8 +180,10 @@ g_data_input_stream_read_uint32 g_data_input_stream_read_int64 g_data_input_stream_read_uint64 g_data_input_stream_read_line +g_data_input_stream_read_line_utf8 g_data_input_stream_read_line_async g_data_input_stream_read_line_finish +g_data_input_stream_read_line_finish_utf8 g_data_input_stream_read_until g_data_input_stream_read_until_async g_data_input_stream_read_until_finish diff --git a/gio/tests/data-input-stream.c b/gio/tests/data-input-stream.c index 1a34205..3d01a73 100644 --- a/gio/tests/data-input-stream.c +++ b/gio/tests/data-input-stream.c @@ -151,6 +151,78 @@ test_read_lines_any (void) } static void +test_read_lines_LF_valid_utf8 (void) +{ + GInputStream *stream; + GInputStream *base_stream; + GError *error = NULL; + char *line; + guint n_lines = 0; + + base_stream = g_memory_input_stream_new (); + stream = G_INPUT_STREAM (g_data_input_stream_new (base_stream)); + + g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream), + "foo\nthis is valid UTF-8 ☺!\nbar\n", -1, NULL); + + /* Test read line */ + error = NULL; + while (TRUE) + { + gsize length = -1; + line = g_data_input_stream_read_line_utf8 (G_DATA_INPUT_STREAM (stream), &length, NULL, &error); + g_assert_no_error (error); + if (line == NULL) + break; + n_lines++; + g_free (line); + } + g_assert_cmpint (n_lines, ==, 3); + + g_object_unref (base_stream); + g_object_unref (stream); +} + +static void +test_read_lines_LF_invalid_utf8 (void) +{ + GInputStream *stream; + GInputStream *base_stream; + GError *error = NULL; + char *line; + guint n_lines = 0; + + base_stream = g_memory_input_stream_new (); + stream = G_INPUT_STREAM (g_data_input_stream_new (base_stream)); + + g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream), + "foo\nthis is not valid UTF-8 \xE5 =(\nbar\n", -1, NULL); + + /* Test read line */ + error = NULL; + while (TRUE) + { + gsize length = -1; + line = g_data_input_stream_read_line_utf8 (G_DATA_INPUT_STREAM (stream), &length, NULL, &error); + if (n_lines == 0) + g_assert_no_error (error); + else + { + g_assert (error != NULL); + g_clear_error (&error); + g_free (line); + break; + } + n_lines++; + g_free (line); + } + g_assert_cmpint (n_lines, ==, 1); + + g_object_unref (base_stream); + g_object_unref (stream); +} + +static void test_read_until (void) { GInputStream *stream; @@ -417,6 +489,8 @@ main (int argc, g_test_add_func ("/data-input-stream/basic", test_basic); g_test_add_func ("/data-input-stream/read-lines-LF", test_read_lines_LF); + g_test_add_func ("/data-input-stream/read-lines-LF-valid-utf8", test_read_lines_LF_valid_utf8); + g_test_add_func ("/data-input-stream/read-lines-LF-invalid-utf8", test_read_lines_LF_invalid_utf8); g_test_add_func ("/data-input-stream/read-lines-CR", test_read_lines_CR); g_test_add_func ("/data-input-stream/read-lines-CR-LF", test_read_lines_CR_LF); g_test_add_func ("/data-input-stream/read-lines-any", test_read_lines_any);