* java/net/URLDecoder.java: Remerge with Classpath

author mark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 8 Oct 2001 21:03:34 +0000 (21:03 +0000)

committer mark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 8 Oct 2001 21:03:34 +0000 (21:03 +0000)
author mark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 8 Oct 2001 21:03:34 +0000 (21:03 +0000)
committer mark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 8 Oct 2001 21:03:34 +0000 (21:03 +0000)
diff --git a/libjava/ChangeLog b/libjava/ChangeLog

index af67c16..ff7bbf6 100644 (file)
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@@ -1,3 +1,8 @@
+2001-10-07  Mark Wielaard  <mark@klomp.org>
+
+       * java/net/URLDecoder.java: Remerge with Classpath
+       * java/net/URLEncoder.java: Merge with Classpath
+
  2001-10-08  Tom Tromey  <tromey@redhat.com>
  
         Fix for PR libgcj/4481:
diff --git a/libjava/java/net/URLDecoder.java b/libjava/java/net/URLDecoder.java

index ae7da78..9c51bc0 100644 (file)
--- a/libjava/java/net/URLDecoder.java
+++ b/libjava/java/net/URLDecoder.java
@@ -28,56 +28,131 @@ package java.net;
  
  import java.io.UnsupportedEncodingException;
  
-/**
-  * This utility class contains one static method that converts a 
+ /**
+  * This utility class contains static methods that converts a 
    * string encoded in the x-www-form-urlencoded format to the original
-  * text.  The x-www-form-urlencoded format 
-  * replaces certain disallowed characters with
-  * encoded equivalents.  All upper case and lower case letters in the
-  * US alphabet remain as is, the space character (' ') is replaced with
-  * '+' sign, and all other characters are converted to a "%XX" format
-  * where XX is the hexadecimal representation of that character.  Note
-  * that since unicode characters are 16 bits, and this method encodes only
-  * 8 bits of information, the lower 8 bits of the character are used.
+  * text.  The x-www-form-urlencoded format replaces certain disallowed
+  * characters with encoded equivalents.  All upper case and lower case
+  * letters in the US alphabet remain as is, the space character (' ')
+  * is replaced with '+' sign, and all other characters are converted to a
+  * "%XX" format where XX is the hexadecimal representation of that character
+  * in a given character encoding (default is "UTF-8").
    * <p>
    * This method is very useful for decoding strings sent to CGI scripts
    *
-  * Written using on-line Java Platform 1.2 API Specification.
+  * Written using on-line Java Platform 1.2/1.4 API Specification.
    * Status:  Believed complete and correct.
    *
    * @since 1.2
    *
    * @author Warren Levy <warrenl@cygnus.com>
    * @author Aaron M. Renn (arenn@urbanophile.com) (documentation comments)
-  * @date April 22, 1999.
+  * @author Mark Wielaard (mark@klomp.org)
    */
  public class URLDecoder
  {
-/**
+ /**
    * This method translates the passed in string from x-www-form-urlencoded
-  * format and returns it.
+  * format using the default encoding "UTF-8" to decode the hex encoded
+  * unsafe characters.
    *
-  * @param source The String to convert
+  * @param s the String to convert
    *
-  * @return The converted String
+  * @return the converted String
    */
    public static String decode(String s)
    {
+    try
+      {
+       return decode(s, "UTF-8");
+      }
+    catch (UnsupportedEncodingException uee)
+      {
+       // Should never happen since UTF-8 encoding should always be supported
+       return s;
+      }
+  }
+
+ /**
+  * This method translates the passed in string from x-www-form-urlencoded
+  * format using the given character encoding to decode the hex encoded
+  * unsafe characters.
+  * <p>
+  * This implementation will decode the string even if it contains
+  * unsafe characters (characters that should have been encoded) or if the
+  * two characters following a % do not represent a hex encoded byte.
+  * In those cases the unsafe character or the % character will be added
+  * verbatim to the decoded result.
+  *
+  * @param s the String to convert
+  * @param encoding the character encoding to use the decode the hex encoded
+  *        unsafe characters
+  *
+  * @return the converted String
+  *
+  * @since 1.4
+  */
+  public static String decode(String s, String encoding)
+    throws UnsupportedEncodingException
+  {
+    StringBuffer result = new StringBuffer();
+
+    // First convert all '+' characters to spaces.
      String str = s.replace('+', ' ');
-    String result = "";
+    
+    // Then go through the whole string looking for byte encoded characters
      int i;
      int start = 0;
+    byte[] bytes = null;
+    int length = str.length();
      while ((i = str.indexOf('%', start)) >= 0)
        {
-       result = result + str.substring(start, i) +
-                (char) Integer.parseInt(str.substring(i + 1, i + 3), 16);
-       start = i + 3;
+       // Add all non-encoded characters to the result buffer
+       result.append(str.substring(start, i));
+       start = i;
+
+       // Get all consecutive encoded bytes
+       while ((i+2 < length) && (str.charAt(i) == '%'))
+         i += 3;
+
+       // Decode all these bytes
+       if ((bytes == null) || (bytes.length < ((i-start)/3)))
+         bytes = new byte[((i-start)/3)];
+
+       int index = 0;
+       try
+         {
+           while (start < i)
+             {
+               String sub = str.substring(start + 1, start + 3);
+               bytes[index] = (byte)Integer.parseInt(sub, 16);
+               index++;
+               start += 3;
+             }
+         }
+       catch (NumberFormatException nfe)
+         {
+           // One of the hex encoded strings was bad
+         }
+
+       // Add the bytes as characters according to the given encoding
+       result.append(new String(bytes, 0, index, encoding));
+
+       // Make sure we skip to just after a % sign
+       // There might not have been enough encoded characters after the %
+       // or the hex chars were not actually hex chars (NumberFormatException)
+       if (start < length && s.charAt(start) == '%')
+         {
+           result.append('%');
+           start++;
+         }
        }
  
+    // Add any characters left
      if (start < str.length())
-      result = result + str.substring(start);
+      result.append(str.substring(start));
  
-    return result;
+    return result.toString();
    }
-} // class URLDecoder
  
+} // class URLDecoder
diff --git a/libjava/java/net/URLEncoder.java b/libjava/java/net/URLEncoder.java

index 6590dcf..f39b300 100644 (file)
--- a/libjava/java/net/URLEncoder.java
+++ b/libjava/java/net/URLEncoder.java
@@ -1,71 +1,153 @@
-// URLEncoder.java - Provides a method for encoding strings according to
-//                  application/x-www-form-urlencoded MIME type.
+/* URLEncoder.java -- Class to convert strings to a properly encoded URL
+   Copyright (C) 1998, 1999, 2001 Free Software Foundation, Inc.
  
-/* Copyright (C) 1999  Free Software Foundation
+This file is part of GNU Classpath.
  
-   This file is part of libgcj.
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+ 
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
  
-This software is copyrighted work licensed under the terms of the
-Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
-details.  */
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+As a special exception, if you link this library with other files to
+produce an executable, this library does not by itself cause the
+resulting executable to be covered by the GNU General Public License.
+This exception does not however invalidate any other reasons why the
+executable file might be covered by the GNU General Public License. */
  
  package java.net;
-import java.io.UnsupportedEncodingException;
  
-/**
- * @author Warren Levy <warrenl@cygnus.com>
- * @date April 22, 1999.
- */
+import java.io.UnsupportedEncodingException;
  
  /**
- * Written using on-line Java Platform 1.2 API Specification, as well
+ * Written using on-line Java Platform 1.2/1.4 API Specification, as well
   * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
   * Status:  Believed complete and correct.
   */
  
+ /**
+  * This utility class contains static methods that converts a 
+  * string into a fully encoded URL string in x-www-form-urlencoded
+  * format.  This format replaces certain disallowed characters with
+  * encoded equivalents.  All upper case and lower case letters in the
+  * US alphabet remain as is, the space character (' ') is replaced with
+  * '+' sign, and all other characters are converted to a "%XX" format
+  * where XX is the hexadecimal representation of that character in a
+  * certain encoding (by default "UTF-8").
+  * <p>
+  * This method is very useful for encoding strings to be sent to CGI scripts
+  *
+  * @author Aaron M. Renn (arenn@urbanophile.com)
+  * @author Warren Levy <warrenl@cygnus.com>
+  * @author Mark Wielaard (mark@klomp.org)
+  */
  public class URLEncoder
  {
-  // This method, per the JCL, is conservative in that it encodes
-  // some "allowable" characters as % triplets.
+  /**
+   * This method translates the passed in string into x-www-form-urlencoded
+   * format using the standard "UTF-8" character encoding to hex-encode the
+   * unsafe characters.
+   *
+   * @param s The String to convert
+   *
+   * @return The converted String
+   */
    public static String encode(String s)
    {
-    // Get the bytes in ISO-Latin-1 (i.e. 8859_1) per the JCL.
-    // Even though it is the default in most cases, it's specified here
-    // just in case System.getProperty("file.encoding") is not "8859_1".
-    String result = "";
      try
        {
-       byte[] buf = s.getBytes("8859_1");
-       int start = 0;
-       for (int i = 0; i < buf.length; i++)
-         // For efficiency, check the byte in order of most likely
-         // possibility so as to minimize the number of comparisons.
-         // Hence, exclude all the alphanumeric & allowed special chars first.
-         if ((buf[i] >= 'a' && buf[i] <= 'z') ||
-             (buf[i] >= 'A' && buf[i] <= 'Z') ||
-             (buf[i] >= '0' && buf[i] <= '9') ||
-             buf[i] == '-' || buf[i] == '_' || buf[i] == '.' || buf[i] == '*')
-           ; // This is the most likely case so exclude first for efficiency.
-         else if (buf[i] == ' ')
-           buf[i] = (byte) '+';  // Replace space char with plus symbol.
-         else
-           {
-             result = result + new String(buf, start, i - start, "8859_1") +
-                       "%" + Integer.toHexString(((int) buf[i]) & 0xFF);
-             start = i + 1;
-           }
-
-       // Append remainder of allowable chars from the string, if any.
-       if (start < buf.length)
-         result = result +
-                  new String(buf, start, buf.length - start, "8859_1");
+        return encode(s, "UTF-8");
        }
-    catch (UnsupportedEncodingException ex)
+    catch (UnsupportedEncodingException uee)
        {
-       // This should never happen as "8859_1" is the default encoding.
+        // Should never happen since UTF-8 should always be supported
         return s;
        }
+  }
+
+  /**
+   * This method translates the passed in string into x-www-form-urlencoded
+   * format using the character encoding to hex-encode the unsafe characters.
+   *
+   * @param s The String to convert
+   * @param encoding The encoding to use for unsafe characters
+   *
+   * @return The converted String
+   *
+   * @since 1.4
+   */
+  public static String encode(String s, String encoding)
+    throws UnsupportedEncodingException
+  {
+    StringBuffer result = new StringBuffer();
+    int length = s.length();
+    int start = 0;
+    int i = 0;
  
-    return result;
+    while (true)
+    {
+      while ( i < length && isSafe(s.charAt(i)) )
+       i++;
+
+      // Safe character can just be added
+      result.append(s.substring(start, i));
+
+      // Are we done?
+      if (i >= length)
+       return result.toString();
+      else if (s.charAt(i) == ' ')
+        {
+         result.append('+');  // Replace space char with plus symbol.
+         i++;
+       }
+      else
+       {
+         // Get all unsafe characters
+         start = i;
+         char c;
+         while ( i < length && (c = s.charAt(i)) != ' ' && !isSafe(c) )
+           i++;
+
+         // Convert them to %XY encoded strings
+         String unsafe = s.substring(start,i);
+         byte bytes[] = unsafe.getBytes(encoding);
+         for (int j = 0; j < bytes.length; j++)
+           {
+             result.append('%');
+             result.append(Integer.toHexString(((int) bytes[j]) & 0xFF));
+           }
+       }
+      start = i;
+    }
    }
-}
+
+  /**
+   * Private static method that returns true if the given char is either
+   * a uppercase or lowercase letter from 'a' till 'z', or a digit froim
+   * '0' till '9', or one of the characters '-', '_', '.' or '*'. Such
+   * 'safe' character don't have to be url encoded.
+   */
+  private static boolean isSafe(char c)
+  {
+    return  ((c >= 'a' && c <= 'z') ||
+            (c >= 'A' && c <= 'Z') ||
+            (c >= '0' && c <= '9') ||
+            c == '-' || c == '_' || c == '.' || c == '*');
+  }
+
+  /**
+   * Private constructor that does nothing. Included to avoid a default
+   * public constructor being created by the compiler.
+   */
+  private URLEncoder() { }
+
+} // class URLEncoder
author	mark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 8 Oct 2001 21:03:34 +0000 (21:03 +0000)
committer	mark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 8 Oct 2001 21:03:34 +0000 (21:03 +0000)
libjava/ChangeLog		patch \| blob \| history
libjava/java/net/URLDecoder.java		patch \| blob \| history
libjava/java/net/URLEncoder.java		patch \| blob \| history