libidn: stringprep_utf8_to_ucs4 now rejects invalid UTF-8. CVE-2015-2059

author Seonah Moon <seonah1.moon@samsung.com>

Thu, 4 Feb 2021 02:22:46 +0000 (11:22 +0900)

committer Seonah Moon <seonah1.moon@samsung.com>

Thu, 4 Feb 2021 02:30:39 +0000 (11:30 +0900)
author Seonah Moon <seonah1.moon@samsung.com>
Thu, 4 Feb 2021 02:22:46 +0000 (11:22 +0900)
committer Seonah Moon <seonah1.moon@samsung.com>
Thu, 4 Feb 2021 02:30:39 +0000 (11:30 +0900)
diff --git a/NEWS b/NEWS

index 46106623c2da4ce9420ca6986da60850e475d855..fb372fd1ece9ba56644470f3bac7ee6320aacdf8 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,37 @@ Libidn NEWS -- History of user-visible changes.                 -*- outline -*-
  Copyright (C) 2002-2011 Simon Josefsson
  See the end for copying conditions.
  
+* Backported
+
+** libidn: stringprep_utf8_to_ucs4 now rejects invalid UTF-8. CVE-2015-2059
+This function has always been documented to not validate that the
+input UTF-8 string is actually valid UTF-8.  Like the rest of the API,
+when you call a function that works on UTF-8 data, you have to pass it
+valid UTF-8 data.  Application writers appear to have difficulties
+using interfaces designed like that, as bugs triggered by invalid
+UTF-8 has been identified in a number of projects (jabberd2, gnutls,
+wget, and curl).  While we could introduce a new API to perform UTF-8
+validation, so that applications can easily implement the proper
+checks, this appear error prone because there is a risk that the check
+will be forgotten.  Instead, we took the more radical approach of
+modifying the documentation and the implementation of the API.  The
+intention is that all functions that accepts UTF-8 data should
+validate it before use.  This will solve the problem for applications,
+without needing to change them.  This change has the unfortunate
+side-effect that Surrogate codes (see section 5.5 of RFC 3454) no
+longer trigger the STRINGPREP_CONTAINS_PROHIBITED error code but
+instead will trigger the newly introduced STRINGPREP_ICONV_ERROR error
+code, as the gnulib/libunistring-based code that we use to test
+UTF-8-compliance rejects Surrogate codes.  We hope that this is an
+acceptable cost to live with in order to improve application security.
+We welcome feedback on this solution, and we are marking this release
+as beta rather than stable to signal that we may reconsider this
+approach if people disagree.  Reported by several people including
+Thijs Alkemade, Gustavo Grieco, Daniel Stenberg, and Nikos
+Mavrogiannopoulos.
+
+** libidn: Added STRINGPREP_ICONV_ERROR error code.
+
  * Version 1.23 (released 2011-11-25) [stable]
  
  ** stringprep.h: Now #include's sys/types.h instead of unistd.h for ssize_t.
diff --git a/configure b/configure

index 37d75342927979eee32f906d344a34f5b2876b06..94747334262f08b212c7f416aa0ad547ba8dd1be 100755 (executable)
--- a/configure
+++ b/configure
@@ -692,6 +692,8 @@ LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB_FALSE
  LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB_TRUE
  LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_FALSE
  LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_TRUE
+LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_FALSE
+LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_TRUE
  LIBUNISTRING_UNISTR_H
  HAVE_VISIBILITY
  CFLAG_VISIBILITY
@@ -25170,6 +25172,35 @@ $as_echo "#define GNULIB_TEST_STRVERSCMP 1" >>confdefs.h
  
  
  
+       if  { test "$HAVE_LIBUNISTRING" != yes \
+    || {
+
+
+
+            test $LIBUNISTRING_VERSION_MAJOR -lt 0 \
+            || { test $LIBUNISTRING_VERSION_MAJOR -eq 0 \
+                 && { test $LIBUNISTRING_VERSION_MINOR -lt 9 \
+                      || { test $LIBUNISTRING_VERSION_MINOR -eq 9 \
+                           && test $LIBUNISTRING_VERSION_SUBMINOR -lt 0
+                         }
+                    }
+               }
+
+
+
+
+       }
+  }; then
+  LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_TRUE=
+  LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_FALSE='#'
+else
+  LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_TRUE='#'
+  LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_FALSE=
+fi
+
+
+
+
  cat >>confdefs.h <<_ACEOF
  #define GNULIB_UNISTR_U8_MBTOUCR 1
  _ACEOF
@@ -31405,6 +31436,10 @@ if test -z "${GL_GENERATE_STDDEF_H_TRUE}" && test -z "${GL_GENERATE_STDDEF_H_FAL
    as_fn_error $? "conditional \"GL_GENERATE_STDDEF_H\" was never defined.
  Usually this means the macro was only invoked conditionally." "$LINENO" 5
  fi
+if test -z "${LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_TRUE}" && test -z "${LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_FALSE}"; then
+  as_fn_error $? "conditional \"LIBUNISTRING_COMPILE_UNISTR_U8_CHECK\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
  if test -z "${LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_TRUE}" && test -z "${LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_FALSE}"; then
    as_fn_error $? "conditional \"LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR\" was never defined.
  Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/lib/gl/Makefile.in b/lib/gl/Makefile.in

index 67c55ebfd4c7255807bfc20fdfbe6fd4d0804ac7..ddd6ddb2520d05e2bd06ea0e11757c397d1cabae 100644 (file)
--- a/lib/gl/Makefile.in
+++ b/lib/gl/Makefile.in
@@ -80,8 +80,9 @@ host_triplet = @host@
  DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
         $(srcdir)/Makefile.in $(srcdir)/gnulib.mk
  @GL_COND_LIBTOOL_TRUE@am__append_1 = $(LTLIBICONV)
-@LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_TRUE@am__append_2 = unistr/u8-mbtoucr.c
-@LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB_TRUE@am__append_3 = unistr/u8-uctomb.c unistr/u8-uctomb-aux.c
+@LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_TRUE@am__append_2 = unistr/u8-check.c
+@LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_TRUE@am__append_3 = unistr/u8-mbtoucr.c
+@LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB_TRUE@am__append_4 = unistr/u8-uctomb.c unistr/u8-uctomb-aux.c
  subdir = lib/gl
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/lib/gl/m4/gnulib-comp.m4 \
@@ -168,11 +169,13 @@ am__libgnu_la_SOURCES_DIST = c-ctype.h c-ctype.c c-strcase.h \
         c-strcasecmp.c c-strncasecmp.c gettext.h striconv.h striconv.c \
         unistr/u8-mbtoucr.c unistr/u8-uctomb.c unistr/u8-uctomb-aux.c
  am__dirstamp = $(am__leading_dot)dirstamp
-@LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_TRUE@am__objects_1 = unistr/u8-mbtoucr.lo
-@LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB_TRUE@am__objects_2 = unistr/u8-uctomb.lo \
+@LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_TRUE@am__objects_1 =  \
+@LIBUNISTRING_COMPILE_UNISTR_U8_CHECK_TRUE@ unistr/u8-check.lo
+@LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR_TRUE@am__objects_2 = unistr/u8-mbtoucr.lo
+@LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB_TRUE@am__objects_3 = unistr/u8-uctomb.lo \
  @LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB_TRUE@   unistr/u8-uctomb-aux.lo
  am_libgnu_la_OBJECTS = c-ctype.lo c-strcasecmp.lo c-strncasecmp.lo \
-       striconv.lo $(am__objects_1) $(am__objects_2)
+       striconv.lo $(am__objects_1) $(am__objects_2) $(am__objects_3)
  libgnu_la_OBJECTS = $(am_libgnu_la_OBJECTS)
  AM_V_lt = $(am__v_lt_$(V))
  am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
diff --git a/lib/gl/gnulib.mk b/lib/gl/gnulib.mk

index 3722aaca6b652bb238832a57a3749bdcf73c1cb6..da4333f2bf41528f040656defd20a1b94acdc2e3 100644 (file)
--- a/lib/gl/gnulib.mk
+++ b/lib/gl/gnulib.mk
@@ -473,6 +473,14 @@ EXTRA_DIST += unistr.in.h
  
  ## end   gnulib module unistr/base
  
+## begin gnulib module unistr/u8-check
+
+if LIBUNISTRING_COMPILE_UNISTR_U8_CHECK
+libgnu_la_SOURCES += unistr/u8-check.c
+endif
+
+## end   gnulib module unistr/u8-check
+
  ## begin gnulib module unistr/u8-mbtoucr
  
  if LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR
diff --git a/lib/gl/m4/gnulib-cache.m4 b/lib/gl/m4/gnulib-cache.m4

index 4b88a607f1b99de51a0dd1d18d8e0e149890cf54..9f45b5432566bc2cea737e332232c47372242e52 100644 (file)
--- a/lib/gl/m4/gnulib-cache.m4
+++ b/lib/gl/m4/gnulib-cache.m4
@@ -39,6 +39,7 @@ gl_MODULES([
    stdint
    striconv
    strverscmp
+  unistr/u8-check
  ])
  gl_AVOID([iconv-h-tests string-tests wchar-tests])
  gl_SOURCE_BASE([lib/gl])
diff --git a/lib/gl/m4/gnulib-comp.m4 b/lib/gl/m4/gnulib-comp.m4

index 62cb1b2c171b05d1a9ce14cb8c97b9958d3591d8..c4832ac323924cc25c49ac598dfa1020fa1762ac 100644 (file)
--- a/lib/gl/m4/gnulib-comp.m4
+++ b/lib/gl/m4/gnulib-comp.m4
@@ -168,6 +168,7 @@ if test $HAVE_STRVERSCMP = 0; then
  fi
  gl_STRING_MODULE_INDICATOR([strverscmp])
  gl_LIBUNISTRING_LIBHEADER([0.9.2], [unistr.h])
+gl_LIBUNISTRING_MODULE([0.9], [unistr/u8-check])
  gl_MODULE_INDICATOR([unistr/u8-mbtoucr])
  gl_LIBUNISTRING_MODULE([0.9], [unistr/u8-mbtoucr])
  gl_MODULE_INDICATOR([unistr/u8-uctomb])
@@ -393,6 +394,7 @@ AC_DEFUN([lgl_FILE_LIST], [
    lib/string.in.h
    lib/strverscmp.c
    lib/unistr.in.h
+  lib/unistr/u8-check.c
    lib/unistr/u8-mbtoucr.c
    lib/unistr/u8-uctomb-aux.c
    lib/unistr/u8-uctomb.c
diff --git a/lib/gl/unistr/u8-check.c b/lib/gl/unistr/u8-check.c

new file mode 100644 (file)

index 0000000..0e87f11
--- /dev/null
+++ b/lib/gl/unistr/u8-check.c
@@ -0,0 +1,105 @@
+/* Check UTF-8 string.
+   Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unistr.h"
+
+const uint8_t *
+u8_check (const uint8_t *s, size_t n)
+{
+  const uint8_t *s_end = s + n;
+
+  while (s < s_end)
+    {
+      /* Keep in sync with unistr.h and u8-mbtouc-aux.c.  */
+      uint8_t c = *s;
+
+      if (c < 0x80)
+        {
+          s++;
+          continue;
+        }
+      if (c >= 0xc2)
+        {
+          if (c < 0xe0)
+            {
+              if (s + 2 <= s_end
+                  && (s[1] ^ 0x80) < 0x40)
+                {
+                  s += 2;
+                  continue;
+                }
+            }
+          else if (c < 0xf0)
+            {
+              if (s + 3 <= s_end
+                  && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+                  && (c >= 0xe1 || s[1] >= 0xa0)
+                  && (c != 0xed || s[1] < 0xa0))
+                {
+                  s += 3;
+                  continue;
+                }
+            }
+          else if (c < 0xf8)
+            {
+              if (s + 4 <= s_end
+                  && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+                  && (s[3] ^ 0x80) < 0x40
+                  && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+                 )
+                {
+                  s += 4;
+                  continue;
+                }
+            }
+#if 0
+          else if (c < 0xfc)
+            {
+              if (s + 5 <= s_end
+                  && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+                  && (c >= 0xf9 || s[1] >= 0x88))
+                {
+                  s += 5;
+                  continue;
+                }
+            }
+          else if (c < 0xfe)
+            {
+              if (s + 6 <= s_end
+                  && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+                  && (s[5] ^ 0x80) < 0x40
+                  && (c >= 0xfd || s[1] >= 0x84))
+                {
+                  s += 6;
+                  continue;
+                }
+            }
+#endif
+        }
+      /* invalid or incomplete multibyte character */
+      return s;
+    }
+  return NULL;
+}
diff --git a/lib/nfkc.c b/lib/nfkc.c

index 9004bdaff36f3b759e7b9903de04406adc8b975d..653ef75b78e001dd97373c8af7d1f94f4d6ddadc 100644 (file)
--- a/lib/nfkc.c
+++ b/lib/nfkc.c
@@ -944,6 +944,8 @@ stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)
    return g_unichar_to_utf8 (c, outbuf);
  }
  
+#include <unistr.h>
+
  /**
   * stringprep_utf8_to_ucs4:
   * @str: a UTF-8 encoded string
@@ -952,16 +954,25 @@ stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)
   * @items_written: location to store the number of characters in the
   *                 result, or %NULL.
   *
- * Convert a string from UTF-8 to a 32-bit fixed width
- * representation as UCS-4, assuming valid UTF-8 input.
- * This function does no error checking on the input.
- *
+ * Convert a string from UTF-8 to a 32-bit fixed width representation
+ * as UCS-4.  The function now performs error checking to verify that
+ * the input is valid UTF-8 (before it was documented to not do error
+ * checking).
   * Return value: a pointer to a newly allocated UCS-4 string.
   *               This value must be deallocated by the caller.
   **/
  uint32_t *
  stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written)
  {
+  size_t n;
+
+  if (len < 0)
+    n = strlen (str);
+  else
+    n = len;
+
+  if (u8_check ((const uint8_t *) str, n))
+    return NULL;
    return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written);
  }
  
diff --git a/lib/strerror-idna.c b/lib/strerror-idna.c

index 9e53d4f3fa25413cb79c592fec2aa65814fae339..bc86a10c1ed522fda97b4b2275a433d0c3c83f68 100644 (file)
--- a/lib/strerror-idna.c
+++ b/lib/strerror-idna.c
@@ -108,7 +108,7 @@ idna_strerror (Idna_rc rc)
        break;
  
      case IDNA_ICONV_ERROR:
-      p = _("System iconv failed");
+      p = _("Could not convert string in locale encoding");
        break;
  
      case IDNA_MALLOC_ERROR:
diff --git a/lib/strerror-stringprep.c b/lib/strerror-stringprep.c

index 3e58245d00c0181a4f7c3b413b1c690498f8924c..3d736e155815844b2aaf4dd55beb2dac4766cc0b 100644 (file)
--- a/lib/strerror-stringprep.c
+++ b/lib/strerror-stringprep.c
@@ -58,6 +58,7 @@
   *   This usually indicate a problem in the calling application.
   * STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not
   *   known to the library.
+ * STRINGPREP_ICONV_ERROR: Could not convert string in locale encoding.
   * STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed.  This
   *   usually indicate an internal error in the library.
   * STRINGPREP_MALLOC_ERROR: The malloc() was out of memory.  This is
@@ -115,6 +116,10 @@ stringprep_strerror (Stringprep_rc rc)
        p = _("Unknown profile");
        break;
  
+    case STRINGPREP_ICONV_ERROR:
+      p = _("Could not convert string in locale encoding.");
+      break;
+
      case STRINGPREP_NFKC_FAILED:
        p = _("Unicode normalization failed (internal error)");
        break;
diff --git a/lib/stringprep.c b/lib/stringprep.c

index 15be036f28a442c6835a258886aa97f61a6e395a..4d23a8aa40c6dd6f7861cfc8195e2e83705c568e 100644 (file)
--- a/lib/stringprep.c
+++ b/lib/stringprep.c
@@ -373,6 +373,8 @@ stringprep (char *in,
  
        free (ucs4);
        ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
+         if (ucs4 == NULL)
+       return STRINGPREP_ICONV_ERROR;
        maxucs4len = ucs4len + adducs4len;
        newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
        if (!newp)
@@ -395,7 +397,7 @@ stringprep (char *in,
    utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
    free (ucs4);
    if (!utf8)
-    return STRINGPREP_MALLOC_ERROR;
+    return STRINGPREP_ICONV_ERROR;
  
    if (strlen (utf8) >= maxlen)
      {
@@ -583,6 +585,7 @@ stringprep_profile (const char *in,
   *   This usually indicate a problem in the calling application.
   * @STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not
   *   known to the library.
+ * @STRINGPREP_ICONV_ERROR: Could not convert string in locale encoding.
   * @STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed.  This
   *   usually indicate an internal error in the library.
   * @STRINGPREP_MALLOC_ERROR: The malloc() was out of memory.  This is
diff --git a/lib/stringprep.h b/lib/stringprep.h

index 34835e4fe690c6489bff7954d9dad4bb156c623c..a904506ebb964bb9f44a4128f18cfcc82d8d8a2e 100644 (file)
--- a/lib/stringprep.h
+++ b/lib/stringprep.h
@@ -61,6 +61,7 @@ extern "C"
      STRINGPREP_PROFILE_ERROR = 101,
      STRINGPREP_FLAG_ERROR = 102,
      STRINGPREP_UNKNOWN_PROFILE = 103,
+    STRINGPREP_ICONV_ERROR = 104,
      /* Internal errors. */
      STRINGPREP_NFKC_FAILED = 200,
      STRINGPREP_MALLOC_ERROR = 201
author	Seonah Moon <seonah1.moon@samsung.com>
	Thu, 4 Feb 2021 02:22:46 +0000 (11:22 +0900)
committer	Seonah Moon <seonah1.moon@samsung.com>
	Thu, 4 Feb 2021 02:30:39 +0000 (11:30 +0900)
NEWS		patch \| blob \| history
configure		patch \| blob \| history
lib/gl/Makefile.in		patch \| blob \| history
lib/gl/gnulib.mk		patch \| blob \| history
lib/gl/m4/gnulib-cache.m4		patch \| blob \| history
lib/gl/m4/gnulib-comp.m4		patch \| blob \| history
lib/gl/unistr/u8-check.c	[new file with mode: 0644]	patch \| blob
lib/nfkc.c		patch \| blob \| history
lib/strerror-idna.c		patch \| blob \| history
lib/strerror-stringprep.c		patch \| blob \| history
lib/stringprep.c		patch \| blob \| history
lib/stringprep.h		patch \| blob \| history