From: tromey Date: Sat, 7 Jan 2006 00:46:28 +0000 (+0000) Subject: * java/lang/Character.java (SIZE, MAX_CACHE, charCache, X-Git-Tag: upstream/4.9.2~56650 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0b6d4796137e0c714e623c299af6324ec362bb3a;p=platform%2Fupstream%2Flinaro-gcc.git * java/lang/Character.java (SIZE, MAX_CACHE, charCache, MIN_SURROGATE, MAX_SURROGATE): New fields from Classpath. (MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE, MAX_LOW_SURROGATE): Javadoc fixes. (valueOf, reverseBytes, isHighSurrogate, isLowSurrogate, isSurrogatePair, toCodePoint, codePointAt, codePointBefore): New methods from Classpath. * java/lang/String.java (codePointAt, codePointBefore, codePointCount, contains, replace): New methods from Classpath. (contentEquals): Declare. * java/lang/natString.cc (contentEquals): New method. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@109445 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/libjava/ChangeLog b/libjava/ChangeLog index 76142c5..a5c5c40 100644 --- a/libjava/ChangeLog +++ b/libjava/ChangeLog @@ -1,3 +1,17 @@ +2006-01-06 Tom Tromey + + * java/lang/Character.java (SIZE, MAX_CACHE, charCache, + MIN_SURROGATE, MAX_SURROGATE): New fields from Classpath. + (MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE, + MAX_LOW_SURROGATE): Javadoc fixes. + (valueOf, reverseBytes, isHighSurrogate, isLowSurrogate, + isSurrogatePair, toCodePoint, codePointAt, codePointBefore): New + methods from Classpath. + * java/lang/String.java (codePointAt, codePointBefore, + codePointCount, contains, replace): New methods from Classpath. + (contentEquals): Declare. + * java/lang/natString.cc (contentEquals): New method. + 2005-12-26 Anthony Green * gnu/java/nio/SocketChannelImpl.java (read): Compute the right amount diff --git a/libjava/java/lang/Character.java b/libjava/java/lang/Character.java index aa29e0b..3cb73d0 100644 --- a/libjava/java/lang/Character.java +++ b/libjava/java/lang/Character.java @@ -1,5 +1,5 @@ /* java.lang.Character -- Wrapper class for char, and Unicode subsets - Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -1040,6 +1040,18 @@ public final class Character implements Serializable, Comparable public static final Class TYPE = VMClassLoader.getPrimitiveClass('C'); /** + * The number of bits needed to represent a char. + * @since 1.5 + */ + public static final int SIZE = 16; + + // This caches some Character values, and is used by boxing + // conversions via valueOf(). We must cache at least 0..127; + // this constant controls how much we actually cache. + private static final int MAX_CACHE = 127; + private static Character[] charCache = new Character[MAX_CACHE + 1]; + + /** * Lu = Letter, Uppercase (Informative). * * @since 1.1 @@ -1434,34 +1446,48 @@ public final class Character implements Serializable, Comparable /** - * Minimum high surrrogate code in UTF-16 encoding. + * Minimum high surrogate code in UTF-16 encoding. * * @since 1.5 */ public static final char MIN_HIGH_SURROGATE = '\ud800'; /** - * Maximum high surrrogate code in UTF-16 encoding. + * Maximum high surrogate code in UTF-16 encoding. * * @since 1.5 */ public static final char MAX_HIGH_SURROGATE = '\udbff'; /** - * Minimum low surrrogate code in UTF-16 encoding. + * Minimum low surrogate code in UTF-16 encoding. * * @since 1.5 */ public static final char MIN_LOW_SURROGATE = '\udc00'; /** - * Maximum low surrrogate code in UTF-16 encoding. + * Maximum low surrogate code in UTF-16 encoding. * * @since 1.5 */ public static final char MAX_LOW_SURROGATE = '\udfff'; /** + * Minimum surrogate code in UTF-16 encoding. + * + * @since 1.5 + */ + public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; + + /** + * Maximum low surrogate code in UTF-16 encoding. + * + * @since 1.5 + */ + public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; + + /** * Grabs an attribute offset from the Unicode attribute database. The lower * 5 bits are the character type, the next 2 bits are flags, and the top * 9 bits are the offset into the attribute tables. Note that the top 9 @@ -2213,6 +2239,37 @@ public final class Character implements Serializable, Comparable } /** + * Returns an Character object wrapping the value. + * In contrast to the Character constructor, this method + * will cache some values. It is used by boxing conversion. + * + * @param val the value to wrap + * @return the Character + * + * @since 1.5 + */ + public static Character valueOf(char val) + { + if (val > MAX_CACHE) + return new Character(val); + synchronized (charCache) + { + if (charCache[val - MIN_VALUE] == null) + charCache[val - MIN_VALUE] = new Character(val); + return charCache[val - MIN_VALUE]; + } + } + + /** + * Reverse the bytes in val. + * @since 1.5 + */ + public static char reverseBytes(char val) + { + return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00)); + } + + /** * Converts a unicode code point to a UTF-16 representation of that * code point. * @@ -2280,7 +2337,7 @@ public final class Character implements Serializable, Comparable * Return number of 16-bit characters required to represent the given * code point. * - * @param codePoint a uncode code point + * @param codePoint a unicode code point * * @return 2 if codePoint >= 0x10000, 1 otherwise. * @@ -2325,4 +2382,210 @@ public final class Character implements Serializable, Comparable { return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; } + + /** + * Return true if the given character is a high surrogate. + * @param ch the character + * @return true if the character is a high surrogate character + * + * @since 1.5 + */ + public static boolean isHighSurrogate(char ch) + { + return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; + } + + /** + * Return true if the given character is a low surrogate. + * @param ch the character + * @return true if the character is a low surrogate character + * + * @since 1.5 + */ + public static boolean isLowSurrogate(char ch) + { + return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; + } + + /** + * Return true if the given characters compose a surrogate pair. + * This is true if the first character is a high surrogate and the + * second character is a low surrogate. + * @param ch1 the first character + * @param ch2 the first character + * @return true if the characters compose a surrogate pair + * + * @since 1.5 + */ + public static boolean isSurrogatePair(char ch1, char ch2) + { + return isHighSurrogate(ch1) && isLowSurrogate(ch2); + } + + /** + * Given a valid surrogate pair, this returns the corresponding + * code point. + * @param high the high character of the pair + * @param low the low character of the pair + * @return the corresponding code point + * + * @since 1.5 + */ + public static int toCodePoint(char high, char low) + { + return ((high - MIN_HIGH_SURROGATE) << 10) + (low - MIN_LOW_SURROGATE); + } + + /** + * Get the code point at the specified index in the CharSequence. + * This is like CharSequence#charAt(int), but if the character is + * the start of a surrogate pair, and there is a following + * character, and this character completes the pair, then the + * corresponding supplementary code point is returned. Otherwise, + * the character at the index is returned. + * + * @param sequence the CharSequence + * @param index the index of the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * @since 1.5 + */ + public static int codePointAt(CharSequence sequence, int index) + { + int len = sequence.length(); + if (index < 0 || index >= len) + throw new IndexOutOfBoundsException(); + char high = sequence.charAt(index); + if (! isHighSurrogate(high) || ++index >= len) + return high; + char low = sequence.charAt(index); + if (! isLowSurrogate(low)) + return high; + return toCodePoint(high, low); + } + + /** + * Get the code point at the specified index in the CharSequence. + * If the character is the start of a surrogate pair, and there is a + * following character, and this character completes the pair, then + * the corresponding supplementary code point is returned. + * Otherwise, the character at the index is returned. + * + * @param chars the character array in which to look + * @param index the index of the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * @since 1.5 + */ + public static int codePointAt(char[] chars, int index) + { + return codePointAt(chars, index, chars.length); + } + + /** + * Get the code point at the specified index in the CharSequence. + * If the character is the start of a surrogate pair, and there is a + * following character within the specified range, and this + * character completes the pair, then the corresponding + * supplementary code point is returned. Otherwise, the character + * at the index is returned. + * + * @param chars the character array in which to look + * @param index the index of the codepoint to get, starting at 0 + * @param limit the limit past which characters should not be examined + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= + * limit, or if limit is negative or >= the length of the array + * @since 1.5 + */ + public static int codePointAt(char[] chars, int index, int limit) + { + if (index < 0 || index >= limit || limit < 0 || limit >= chars.length) + throw new IndexOutOfBoundsException(); + char high = chars[index]; + if (! isHighSurrogate(high) || ++index >= limit) + return high; + char low = chars[index]; + if (! isLowSurrogate(low)) + return high; + return toCodePoint(high, low); + } + + /** + * Get the code point before the specified index. This is like + * #codePointAt(char[], int), but checks the characters at + * index-1 and index-2 to see if they form + * a supplementary code point. If they do not, the character at + * index-1 is returned. + * + * @param chars the character array + * @param index the index just past the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * @since 1.5 + */ + public static int codePointBefore(char[] chars, int index) + { + return codePointBefore(chars, index, 1); + } + + /** + * Get the code point before the specified index. This is like + * #codePointAt(char[], int), but checks the characters at + * index-1 and index-2 to see if they form + * a supplementary code point. If they do not, the character at + * index-1 is returned. The start parameter is used to + * limit the range of the array which may be examined. + * + * @param chars the character array + * @param index the index just past the codepoint to get, starting at 0 + * @param start the index before which characters should not be examined + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is > start or > + * the length of the array, or if limit is negative or >= the + * length of the array + * @since 1.5 + */ + public static int codePointBefore(char[] chars, int index, int start) + { + if (index < start || index > chars.length + || start < 0 || start >= chars.length) + throw new IndexOutOfBoundsException(); + --index; + char low = chars[index]; + if (! isLowSurrogate(low) || --index < start) + return low; + char high = chars[index]; + if (! isHighSurrogate(high)) + return low; + return toCodePoint(high, low); + } + + /** + * Get the code point before the specified index. This is like + * #codePointAt(CharSequence, int), but checks the characters at + * index-1 and index-2 to see if they form + * a supplementary code point. If they do not, the character at + * index-1 is returned. + * + * @param sequence the CharSequence + * @param index the index just past the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * @since 1.5 + */ + public static int codePointBefore(CharSequence sequence, int index) + { + int len = sequence.length(); + if (index < 1 || index > len) + throw new IndexOutOfBoundsException(); + --index; + char low = sequence.charAt(index); + if (! isLowSurrogate(low) || --index < 0) + return low; + char high = sequence.charAt(index); + if (! isHighSurrogate(high)) + return low; + return toCodePoint(high, low); + } } // class Character diff --git a/libjava/java/lang/String.java b/libjava/java/lang/String.java index 95ad1fe..3e0bfbe 100644 --- a/libjava/java/lang/String.java +++ b/libjava/java/lang/String.java @@ -1,5 +1,5 @@ /* String.java -- immutable character sequences; the object of string literals - Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -455,6 +455,40 @@ public final class String implements Serializable, Comparable, CharSequence public native char charAt(int index); /** + * Get the code point at the specified index. This is like #charAt(int), + * but if the character is the start of a surrogate pair, and the + * following character completes the pair, then the corresponding + * supplementary code point is returned. + * @param index the index of the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * @since 1.5 + */ + public synchronized int codePointAt(int index) + { + // Use the CharSequence overload as we get better range checking + // this way. + return Character.codePointAt(this, index); + } + + /** + * Get the code point before the specified index. This is like + * #codePointAt(int), but checks the characters at index-1 and + * index-2 to see if they form a supplementary code point. + * @param index the index just past the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * (while unspecified, this is a StringIndexOutOfBoundsException) + * @since 1.5 + */ + public synchronized int codePointBefore(int index) + { + // Use the CharSequence overload as we get better range checking + // this way. + return Character.codePointBefore(this, index); + } + + /** * Copies characters from this String starting at a specified start index, * ending at a specified stop index, to a character array starting at * a specified destination begin index. @@ -566,6 +600,18 @@ public final class String implements Serializable, Comparable, CharSequence public native boolean contentEquals(StringBuffer buffer); /** + * Compares the given CharSequence to this String. This is true if + * the CharSequence has the same content as this String at this + * moment. + * + * @param seq the CharSequence to compare to + * @return true if CharSequence has the same character sequence + * @throws NullPointerException if the given CharSequence is null + * @since 1.5 + */ + public native boolean contentEquals(CharSequence seq); + + /** * Compares a String to this String, ignoring case. This does not handle * multi-character capitalization exceptions; instead the comparison is * made on a character-by-character basis, and is true if:
    @@ -1259,6 +1305,88 @@ public final class String implements Serializable, Comparable, CharSequence */ public native String intern(); + /** + * Return the number of code points between two indices in the + * String. An unpaired surrogate counts as a + * code point for this purpose. Characters outside the indicated + * range are not examined, even if the range ends in the middle of a + * surrogate pair. + * + * @param start the starting index + * @param end one past the ending index + * @return the number of code points + * @since 1.5 + */ + public synchronized int codePointCount(int start, int end) + { + if (start < 0 || end >= count || start > end) + throw new StringIndexOutOfBoundsException(); + + int count = 0; + while (start < end) + { + char base = charAt(start); + if (base < Character.MIN_HIGH_SURROGATE + || base > Character.MAX_HIGH_SURROGATE + || start == end + || start == count + || charAt(start + 1) < Character.MIN_LOW_SURROGATE + || charAt(start + 1) > Character.MAX_LOW_SURROGATE) + { + // Nothing. + } + else + { + // Surrogate pair. + ++start; + } + ++start; + ++count; + } + return count; + } + + /** + * Returns true iff this String contains the sequence of Characters + * described in s. + * @param s the CharSequence + * @return true iff this String contains s + * + * @since 1.5 + */ + public boolean contains (CharSequence s) + { + return this.indexOf(s.toString()) != -1; + } + + /** + * Returns a string that is this string with all instances of the sequence + * represented by target replaced by the sequence in + * replacement. + * @param target the sequence to be replaced + * @param replacement the sequence used as the replacement + * @return the string constructed as above + */ + public String replace (CharSequence target, CharSequence replacement) + { + String targetString = target.toString(); + String replaceString = replacement.toString(); + int targetLength = target.length(); + int replaceLength = replacement.length(); + + int startPos = this.indexOf(targetString); + StringBuilder result = new StringBuilder(this); + while (startPos != -1) + { + // Replace the target with the replacement + result.replace(startPos, startPos + targetLength, replaceString); + + // Search for a new occurrence of the target + startPos = result.indexOf(targetString, startPos + replaceLength); + } + return result.toString(); + } + private native void init(char[] chars, int offset, int count, boolean dont_copy); diff --git a/libjava/java/lang/natString.cc b/libjava/java/lang/natString.cc index c8f3129..3f63081 100644 --- a/libjava/java/lang/natString.cc +++ b/libjava/java/lang/natString.cc @@ -1,6 +1,6 @@ // natString.cc - Implementation of java.lang.String native methods. -/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation +/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation This file is part of libgcj. @@ -15,6 +15,7 @@ details. */ #include #include +#include #include #include #include @@ -564,6 +565,18 @@ java::lang::String::contentEquals(java::lang::StringBuffer* buffer) return true; } +jboolean +java::lang::String::contentEquals(java::lang::CharSequence *seq) +{ + if (seq->length() != count) + return false; + jchar *value = JvGetStringChars(this); + for (int i = 0; i < count; ++i) + if (value[i] != seq->charAt(i)) + return false; + return true; +} + jchar java::lang::String::charAt(jint i) {