src/net/android/java/src/org/chromium/net/NetStringUtil.java

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 package org.chromium.net;
   6
   7 import org.chromium.base.CalledByNative;
   8 import org.chromium.base.JNINamespace;
   9
  10 import java.nio.ByteBuffer;
  11 import java.nio.charset.Charset;
  12 import java.nio.charset.CharsetDecoder;
  13 import java.nio.charset.CodingErrorAction;
  14 import java.text.Normalizer;
  15
  16 /**
  17  * Utility functions for converting strings between formats when not built with
  18  * icu.
  19  */
  20 @JNINamespace("net::android")
  21 public class NetStringUtil {
  22     /**
  23      * Attempts to convert text in a given character set to a Unicode string.
  24      * Returns null on failure.
  25      * @param text ByteBuffer containing the character array to convert.
  26      * @param charsetName Character set it's in encoded in.
  27      * @return: Unicode string on success, null on failure.
  28      */
  29     @CalledByNative
  30     private static String convertToUnicode(
  31             ByteBuffer text,
  32             String charsetName) {
  33         try {
  34             Charset charset = Charset.forName(charsetName);
  35             CharsetDecoder decoder = charset.newDecoder();
  36             // On invalid characters, this will throw an exception.
  37             return decoder.decode(text).toString();
  38         } catch (Exception e) {
  39             return null;
  40         }
  41     }
  42
  43     /**
  44      * Attempts to convert text in a given character set to a Unicode string,
  45      * and normalize it.  Returns null on failure.
  46      * @param text ByteBuffer containing the character array to convert.
  47      * @param charsetName Character set it's in encoded in.
  48      * @return: Unicode string on success, null on failure.
  49      */
  50     @CalledByNative
  51     private static String convertToUnicodeAndNormalize(
  52             ByteBuffer text,
  53             String charsetName) {
  54         String unicodeString = convertToUnicode(text, charsetName);
  55         if (unicodeString == null) return null;
  56         return Normalizer.normalize(unicodeString, Normalizer.Form.NFC);
  57     }
  58
  59     /**
  60      * Convert text in a given character set to a Unicode string.  Any invalid
  61      * characters are replaced with U+FFFD.  Returns null if the character set
  62      * is not recognized.
  63      * @param text ByteBuffer containing the character array to convert.
  64      * @param charsetName Character set it's in encoded in.
  65      * @return: Unicode string on success, null on failure.
  66      */
  67     @CalledByNative
  68     private static String convertToUnicodeWithSubstitutions(
  69             ByteBuffer text,
  70             String charsetName) {
  71         try {
  72             Charset charset = Charset.forName(charsetName);
  73
  74             // TODO(mmenke):  Investigate if Charset.decode() can be used
  75             // instead.  The question is whether it uses the proper replace
  76             // character.  JDK CharsetDecoder docs say U+FFFD is the default,
  77             // but Charset.decode() docs say it uses the "charset's default
  78             // replacement byte array".
  79             CharsetDecoder decoder = charset.newDecoder();
  80             decoder.onMalformedInput(CodingErrorAction.REPLACE);
  81             decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  82             decoder.replaceWith("\uFFFD");
  83             return decoder.decode(text).toString();
  84         } catch (Exception e) {
  85             return null;
  86         }
  87     }
  88 }