From: Mark H Weaver Date: Tue, 7 May 2019 01:11:26 +0000 (-0400) Subject: Strings, i18n: Limit the use of alloca to approximately 8 kilobytes. X-Git-Tag: v2.9.2~2^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7c2b48a6bd4b7ccd043b2e19471b498dc66a073d;p=platform%2Fupstream%2Fguile.git Strings, i18n: Limit the use of alloca to approximately 8 kilobytes. * libguile/i18n.c (SCM_MAX_ALLOCA): New macro. (SCM_STRING_TO_U32_BUF): Accept an additional variable to remember whether we used malloc to allocate the buffer. Use malloc if the allocation size is greater than SCM_MAX_ALLOCA. (SCM_CLEANUP_U32_BUF): New macro. (compare_u32_strings, compare_u32_strings_ci, str_to_case): Adapt. * libguile/strings.c (SCM_MAX_ALLOCA): New macro. (normalize_str, unistring_escapes_to_r6rs_escapes): Use malloc if the allocation size is greater than SCM_MAX_ALLOCA. * test-suite/tests/i18n.test, test-suite/tests/strings.test: Add tests. --- diff --git a/libguile/i18n.c b/libguile/i18n.c index 6c87fdaae..5e6783700 100644 --- a/libguile/i18n.c +++ b/libguile/i18n.c @@ -40,6 +40,10 @@ #include #include +#ifndef SCM_MAX_ALLOCA +# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */ +#endif + #if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE /* The GNU thread-aware locale API is documented in ``Thread-Aware Locale Model, a Proposal'', by Ulrich Drepper: @@ -743,23 +747,35 @@ SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0, A similar API can be found in MzScheme starting from version 200: http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */ -#define SCM_STRING_TO_U32_BUF(s1, c_s1) \ - do \ - { \ - if (scm_i_is_narrow_string (s1)) \ - { \ - size_t i, len; \ - const char *buf = scm_i_string_chars (s1); \ - \ - len = scm_i_string_length (s1); \ - c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \ - \ - for (i = 0; i < len; i ++) \ - c_s1[i] = (unsigned char ) buf[i]; \ - c_s1[len] = 0; \ - } \ - else \ - c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \ +#define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p) \ + do \ + { \ + if (scm_i_is_narrow_string (str)) \ + { \ + size_t i, len, bytes; \ + const char *buf = scm_i_string_chars (str); \ + \ + len = scm_i_string_length (str); \ + bytes = (len + 1) * sizeof (scm_t_wchar); \ + c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \ + c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes); \ + \ + for (i = 0; i < len; i ++) \ + c_str[i] = (unsigned char ) buf[i]; \ + c_str[len] = 0; \ + } \ + else \ + { \ + c_str_malloc_p = 0; \ + c_str = (scm_t_wchar *) scm_i_string_wide_chars (str); \ + } \ + } while (0) + +#define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p) \ + do \ + { \ + if (c_str_malloc_p) \ + free (c_str); \ } while (0) @@ -773,10 +789,11 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name) int result; scm_t_locale c_locale; scm_t_wchar *c_s1, *c_s2; + int c_s1_malloc_p, c_s2_malloc_p; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); - SCM_STRING_TO_U32_BUF (s1, c_s1); - SCM_STRING_TO_U32_BUF (s2, c_s2); + SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p); + SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, @@ -786,6 +803,9 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name) result = u32_strcoll ((const scm_t_uint32 *) c_s1, (const scm_t_uint32 *) c_s2); + SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p); + SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p); + scm_remember_upto_here_2 (s1, s2); scm_remember_upto_here (locale); return result; @@ -828,10 +848,11 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name) int result, ret = 0; scm_t_locale c_locale; scm_t_wchar *c_s1, *c_s2; + int c_s1_malloc_p, c_s2_malloc_p; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); - SCM_STRING_TO_U32_BUF (s1, c_s1); - SCM_STRING_TO_U32_BUF (s2, c_s2); + SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p); + SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION @@ -846,6 +867,9 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name) (const scm_t_uint32 *) c_s2, &result); + SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p); + SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p); + if (SCM_UNLIKELY (ret != 0)) { errno = ret; @@ -1212,13 +1236,13 @@ str_to_case (SCM str, scm_t_locale c_locale, scm_t_wchar *c_str, *c_buf; scm_t_uint32 *c_convstr; size_t len, convlen; - int ret; + int ret, c_str_malloc_p; SCM convstr; len = scm_i_string_length (str); if (len == 0) return scm_nullstr; - SCM_STRING_TO_U32_BUF (str, c_str); + SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, ret = @@ -1230,6 +1254,8 @@ str_to_case (SCM str, scm_t_locale c_locale, u32_locale_tocase ((scm_t_uint32 *) c_str, len, &c_convstr, &convlen, func); + SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p); + scm_remember_upto_here (str); if (SCM_UNLIKELY (ret != 0)) diff --git a/libguile/strings.c b/libguile/strings.c index 5a150278d..e5c7f87d6 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -45,6 +45,10 @@ #include "libguile/validate.h" #include "libguile/private-options.h" +#ifndef SCM_MAX_ALLOCA +# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */ +#endif + /* {Strings} @@ -1808,6 +1812,7 @@ static void unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) { char *before, *after; + int malloc_p; size_t i, j; /* The worst case is if the input string contains all 4-digit hex escapes. "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */ @@ -1815,7 +1820,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) size_t nzeros, ndigits; before = buf; - after = alloca (max_out_len); + malloc_p = (max_out_len > SCM_MAX_ALLOCA); + after = malloc_p ? malloc (max_out_len) : alloca (max_out_len); i = 0; j = 0; while (i < *lenp) @@ -1873,6 +1879,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) } *lenp = j; memcpy (before, after, j); + if (malloc_p) + free (after); } char * @@ -2313,28 +2321,37 @@ normalize_str (SCM string, uninorm_t form) { SCM ret; scm_t_uint32 *w_str; + scm_t_uint32 *w_norm_str; scm_t_wchar *cbuf; - size_t rlen, len = scm_i_string_length (string); + int malloc_p; + size_t norm_len, len = scm_i_string_length (string); if (scm_i_is_narrow_string (string)) { - size_t i; + size_t i, bytes; const char *buf = scm_i_string_chars (string); - - w_str = alloca (sizeof (scm_t_wchar) * (len + 1)); - + + bytes = (len + 1) * sizeof (scm_t_wchar); + malloc_p = (bytes > SCM_MAX_ALLOCA); + w_str = malloc_p ? malloc (bytes) : alloca (bytes); + for (i = 0; i < len; i ++) w_str[i] = (unsigned char) buf[i]; w_str[len] = 0; } - else - w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string); + else + { + malloc_p = 0; + w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string); + } - w_str = u32_normalize (form, w_str, len, NULL, &rlen); - - ret = scm_i_make_wide_string (rlen, &cbuf, 0); - u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen); - free (w_str); + w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len); + + ret = scm_i_make_wide_string (norm_len, &cbuf, 0); + u32_cpy ((scm_t_uint32 *) cbuf, w_norm_str, norm_len); + free (w_norm_str); + if (malloc_p) + free (w_str); scm_i_try_narrow_string (ret); diff --git a/test-suite/tests/i18n.test b/test-suite/tests/i18n.test index 811be7b10..427aef4f5 100644 --- a/test-suite/tests/i18n.test +++ b/test-suite/tests/i18n.test @@ -78,7 +78,13 @@ (pass-if "string-locale-ci