1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 package org.chromium.net;
7 import org.chromium.base.CalledByNative;
8 import org.chromium.base.JNINamespace;
10 import java.nio.ByteBuffer;
11 import java.nio.charset.Charset;
12 import java.nio.charset.CharsetDecoder;
13 import java.nio.charset.CodingErrorAction;
14 import java.text.Normalizer;
17 * Utility functions for converting strings between formats when not built with
20 @JNINamespace("net::android")
21 public class NetStringUtil {
23 * Attempts to convert text in a given character set to a Unicode string.
24 * Returns null on failure.
25 * @param text ByteBuffer containing the character array to convert.
26 * @param charsetName Character set it's in encoded in.
27 * @return: Unicode string on success, null on failure.
30 private static String convertToUnicode(
34 Charset charset = Charset.forName(charsetName);
35 CharsetDecoder decoder = charset.newDecoder();
36 // On invalid characters, this will throw an exception.
37 return decoder.decode(text).toString();
38 } catch (Exception e) {
44 * Attempts to convert text in a given character set to a Unicode string,
45 * and normalize it. Returns null on failure.
46 * @param text ByteBuffer containing the character array to convert.
47 * @param charsetName Character set it's in encoded in.
48 * @return: Unicode string on success, null on failure.
51 private static String convertToUnicodeAndNormalize(
54 String unicodeString = convertToUnicode(text, charsetName);
55 if (unicodeString == null) return null;
56 return Normalizer.normalize(unicodeString, Normalizer.Form.NFC);
60 * Convert text in a given character set to a Unicode string. Any invalid
61 * characters are replaced with U+FFFD. Returns null if the character set
63 * @param text ByteBuffer containing the character array to convert.
64 * @param charsetName Character set it's in encoded in.
65 * @return: Unicode string on success, null on failure.
68 private static String convertToUnicodeWithSubstitutions(
72 Charset charset = Charset.forName(charsetName);
74 // TODO(mmenke): Investigate if Charset.decode() can be used
75 // instead. The question is whether it uses the proper replace
76 // character. JDK CharsetDecoder docs say U+FFFD is the default,
77 // but Charset.decode() docs say it uses the "charset's default
78 // replacement byte array".
79 CharsetDecoder decoder = charset.newDecoder();
80 decoder.onMalformedInput(CodingErrorAction.REPLACE);
81 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
82 decoder.replaceWith("\uFFFD");
83 return decoder.decode(text).toString();
84 } catch (Exception e) {