#include <unicase.h>
#include <unistr.h>
+#ifndef SCM_MAX_ALLOCA
+# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
+#endif
+
#if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE
/* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
Model, a Proposal'', by Ulrich Drepper:
A similar API can be found in MzScheme starting from version 200:
http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
-#define SCM_STRING_TO_U32_BUF(s1, c_s1) \
- do \
- { \
- if (scm_i_is_narrow_string (s1)) \
- { \
- size_t i, len; \
- const char *buf = scm_i_string_chars (s1); \
- \
- len = scm_i_string_length (s1); \
- c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \
- \
- for (i = 0; i < len; i ++) \
- c_s1[i] = (unsigned char ) buf[i]; \
- c_s1[len] = 0; \
- } \
- else \
- c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \
+#define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p) \
+ do \
+ { \
+ if (scm_i_is_narrow_string (str)) \
+ { \
+ size_t i, len, bytes; \
+ const char *buf = scm_i_string_chars (str); \
+ \
+ len = scm_i_string_length (str); \
+ bytes = (len + 1) * sizeof (scm_t_wchar); \
+ c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \
+ c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes); \
+ \
+ for (i = 0; i < len; i ++) \
+ c_str[i] = (unsigned char ) buf[i]; \
+ c_str[len] = 0; \
+ } \
+ else \
+ { \
+ c_str_malloc_p = 0; \
+ c_str = (scm_t_wchar *) scm_i_string_wide_chars (str); \
+ } \
+ } while (0)
+
+#define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p) \
+ do \
+ { \
+ if (c_str_malloc_p) \
+ free (c_str); \
} while (0)
int result;
scm_t_locale c_locale;
scm_t_wchar *c_s1, *c_s2;
+ int c_s1_malloc_p, c_s2_malloc_p;
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
- SCM_STRING_TO_U32_BUF (s1, c_s1);
- SCM_STRING_TO_U32_BUF (s2, c_s2);
+ SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
+ SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale,
result = u32_strcoll ((const scm_t_uint32 *) c_s1,
(const scm_t_uint32 *) c_s2);
+ SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
+ SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
+
scm_remember_upto_here_2 (s1, s2);
scm_remember_upto_here (locale);
return result;
int result, ret = 0;
scm_t_locale c_locale;
scm_t_wchar *c_s1, *c_s2;
+ int c_s1_malloc_p, c_s2_malloc_p;
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
- SCM_STRING_TO_U32_BUF (s1, c_s1);
- SCM_STRING_TO_U32_BUF (s2, c_s2);
+ SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
+ SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
if (c_locale)
RUN_IN_LOCALE_SECTION
(const scm_t_uint32 *) c_s2,
&result);
+ SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
+ SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
+
if (SCM_UNLIKELY (ret != 0))
{
errno = ret;
scm_t_wchar *c_str, *c_buf;
scm_t_uint32 *c_convstr;
size_t len, convlen;
- int ret;
+ int ret, c_str_malloc_p;
SCM convstr;
len = scm_i_string_length (str);
if (len == 0)
return scm_nullstr;
- SCM_STRING_TO_U32_BUF (str, c_str);
+ SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p);
if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale, ret =
u32_locale_tocase ((scm_t_uint32 *) c_str, len,
&c_convstr, &convlen, func);
+ SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p);
+
scm_remember_upto_here (str);
if (SCM_UNLIKELY (ret != 0))
#include "libguile/validate.h"
#include "libguile/private-options.h"
+#ifndef SCM_MAX_ALLOCA
+# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
+#endif
+
\f
/* {Strings}
unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
{
char *before, *after;
+ int malloc_p;
size_t i, j;
/* The worst case is if the input string contains all 4-digit hex escapes.
"\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
size_t nzeros, ndigits;
before = buf;
- after = alloca (max_out_len);
+ malloc_p = (max_out_len > SCM_MAX_ALLOCA);
+ after = malloc_p ? malloc (max_out_len) : alloca (max_out_len);
i = 0;
j = 0;
while (i < *lenp)
}
*lenp = j;
memcpy (before, after, j);
+ if (malloc_p)
+ free (after);
}
char *
{
SCM ret;
scm_t_uint32 *w_str;
+ scm_t_uint32 *w_norm_str;
scm_t_wchar *cbuf;
- size_t rlen, len = scm_i_string_length (string);
+ int malloc_p;
+ size_t norm_len, len = scm_i_string_length (string);
if (scm_i_is_narrow_string (string))
{
- size_t i;
+ size_t i, bytes;
const char *buf = scm_i_string_chars (string);
-
- w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
-
+
+ bytes = (len + 1) * sizeof (scm_t_wchar);
+ malloc_p = (bytes > SCM_MAX_ALLOCA);
+ w_str = malloc_p ? malloc (bytes) : alloca (bytes);
+
for (i = 0; i < len; i ++)
w_str[i] = (unsigned char) buf[i];
w_str[len] = 0;
}
- else
- w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
+ else
+ {
+ malloc_p = 0;
+ w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
+ }
- w_str = u32_normalize (form, w_str, len, NULL, &rlen);
-
- ret = scm_i_make_wide_string (rlen, &cbuf, 0);
- u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
- free (w_str);
+ w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len);
+
+ ret = scm_i_make_wide_string (norm_len, &cbuf, 0);
+ u32_cpy ((scm_t_uint32 *) cbuf, w_norm_str, norm_len);
+ free (w_norm_str);
+ if (malloc_p)
+ free (w_str);
scm_i_try_narrow_string (ret);
(pass-if "string-locale-ci<?"
(and (string-locale-ci<? "hello" "WORLD")
(string-locale-ci<? "hello" "WORLD"
- (make-locale (list LC_COLLATE) "C")))))
+ (make-locale (list LC_COLLATE) "C"))))
+ (pass-if "large strings"
+ ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+ (let ((large (make-string 4000000 #\a)))
+ (and (string-locale-ci=? large large)
+ (not (string-locale-ci<? large large))
+ (not (string-locale<? large large))))))
\f
(define mingw?
(string=? "Hello, World" (string-locale-titlecase
"hello, world" (make-locale LC_ALL "C")))))
+ (pass-if "large strings"
+ ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+ (let ((hellos (string-join (make-list 700000 "hello")))
+ (HELLOs (string-join (make-list 700000 "HELLO")))
+ (Hellos (string-join (make-list 700000 "Hello"))))
+ (and (string=? hellos (string-locale-downcase Hellos))
+ (string=? HELLOs (string-locale-upcase Hellos))
+ (string=? Hellos (string-locale-titlecase hellos)))))
+
(pass-if "string-locale-upcase German"
(under-german-utf8-locale-or-unresolved
(lambda ()