1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This file relies on the fact that the following declaration has been made
7 // var $Array = global.Array;
9 // -------------------------------------------------------------------
11 // This file contains support for URI manipulations written in
14 // Lazily initialized.
16 var hexCharCodeArray = 0;
19 function URIAddEncodedOctetToBuffer(octet, result, index) {
20 result[index++] = 37; // Char code of '%'.
21 result[index++] = hexCharCodeArray[octet >> 4];
22 result[index++] = hexCharCodeArray[octet & 0x0F];
27 function URIEncodeOctets(octets, result, index) {
28 if (hexCharCodeArray === 0) {
29 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
30 65, 66, 67, 68, 69, 70];
32 index = URIAddEncodedOctetToBuffer(octets[0], result, index);
33 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
34 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
35 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
40 function URIEncodeSingle(cc, result, index) {
41 var x = (cc >> 12) & 0xF;
42 var y = (cc >> 6) & 63;
44 var octets = new $Array(3);
47 } else if (cc <= 0x07FF) {
55 return URIEncodeOctets(octets, result, index);
59 function URIEncodePair(cc1 , cc2, result, index) {
60 var u = ((cc1 >> 6) & 0xF) + 1;
61 var w = (cc1 >> 2) & 0xF;
63 var y = (cc2 >> 6) & 0xF;
65 var octets = new $Array(4);
66 octets[0] = (u >> 2) + 240;
67 octets[1] = (((u & 3) << 4) | w) + 128;
68 octets[2] = ((x << 4) | y) + 128;
70 return URIEncodeOctets(octets, result, index);
74 function URIHexCharsToCharCode(highChar, lowChar) {
75 var highCode = HexValueOf(highChar);
76 var lowCode = HexValueOf(lowChar);
77 if (highCode == -1 || lowCode == -1) {
78 throw new $URIError("URI malformed");
80 return (highCode << 4) | lowCode;
84 function URIDecodeOctets(octets, result, index) {
89 } else if (o0 < 0xc2) {
90 throw new $URIError("URI malformed");
95 if ((o1 < 0x80) || (o1 > 0xbf)) {
96 throw new $URIError("URI malformed");
100 if (value < 0x80 || value > 0x7ff) {
101 throw new $URIError("URI malformed");
107 if ((o1 < 0x80) || (o1 > 0xbf)) {
108 throw new $URIError("URI malformed");
111 if ((o2 < 0x80) || (o2 > 0xbf)) {
112 throw new $URIError("URI malformed");
115 value = (a << 12) + (b << 6) + c;
116 if ((value < 0x800) || (value > 0xffff)) {
117 throw new $URIError("URI malformed");
123 if ((o1 < 0x80) || (o1 > 0xbf)) {
124 throw new $URIError("URI malformed");
127 if ((o2 < 0x80) || (o2 > 0xbf)) {
128 throw new $URIError("URI malformed");
131 if ((o3 < 0x80) || (o3 > 0xbf)) {
132 throw new $URIError("URI malformed");
135 value = (a << 18) + (b << 12) + (c << 6) + d;
136 if ((value < 0x10000) || (value > 0x10ffff)) {
137 throw new $URIError("URI malformed");
140 throw new $URIError("URI malformed");
145 if (0xD800 <= value && value <= 0xDFFF) {
146 throw new $URIError("URI malformed");
148 if (value < 0x10000) {
149 %_TwoByteSeqStringSetChar(result, index++, value);
152 %_TwoByteSeqStringSetChar(result, index++, (value >> 10) + 0xd7c0);
153 %_TwoByteSeqStringSetChar(result, index++, (value & 0x3ff) + 0xdc00);
159 // ECMA-262, section 15.1.3
160 function Encode(uri, unescape) {
161 var uriLength = uri.length;
162 var array = new InternalArray(uriLength);
164 for (var k = 0; k < uriLength; k++) {
165 var cc1 = uri.charCodeAt(k);
167 array[index++] = cc1;
169 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
170 if (cc1 < 0xD800 || cc1 > 0xDBFF) {
171 index = URIEncodeSingle(cc1, array, index);
174 if (k == uriLength) throw new $URIError("URI malformed");
175 var cc2 = uri.charCodeAt(k);
176 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
177 index = URIEncodePair(cc1, cc2, array, index);
182 var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
183 for (var i = 0; i < array.length; i++) {
184 %_OneByteSeqStringSetChar(result, i, array[i]);
190 // ECMA-262, section 15.1.3
191 function Decode(uri, reserved) {
192 var uriLength = uri.length;
193 var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
197 // Optimistically assume ascii string.
198 for ( ; k < uriLength; k++) {
199 var code = uri.charCodeAt(k);
200 if (code == 37) { // '%'
201 if (k + 2 >= uriLength) throw new $URIError("URI malformed");
202 var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2));
203 if (cc >> 7) break; // Assumption wrong, two byte string.
205 %_OneByteSeqStringSetChar(one_byte, index++, 37); // '%'.
206 %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+1));
207 %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+2));
209 %_OneByteSeqStringSetChar(one_byte, index++, cc);
213 if (code > 0x7f) break; // Assumption wrong, two byte string.
214 %_OneByteSeqStringSetChar(one_byte, index++, code);
218 one_byte = %TruncateString(one_byte, index);
219 if (k == uriLength) return one_byte;
221 // Write into two byte string.
222 var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
225 for ( ; k < uriLength; k++) {
226 var code = uri.charCodeAt(k);
227 if (code == 37) { // '%'
228 if (k + 2 >= uriLength) throw new $URIError("URI malformed");
229 var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
232 while (((cc << ++n) & 0x80) != 0) { }
233 if (n == 1 || n > 4) throw new $URIError("URI malformed");
234 var octets = new $Array(n);
236 if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
237 for (var i = 1; i < n; i++) {
238 if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
239 octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
240 uri.charCodeAt(++k));
242 index = URIDecodeOctets(octets, two_byte, index);
243 } else if (reserved(cc)) {
244 %_TwoByteSeqStringSetChar(two_byte, index++, 37); // '%'.
245 %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k - 1));
246 %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k));
248 %_TwoByteSeqStringSetChar(two_byte, index++, cc);
251 %_TwoByteSeqStringSetChar(two_byte, index++, code);
255 two_byte = %TruncateString(two_byte, index);
256 return one_byte + two_byte;
260 // ECMA-262 - 15.1.3.1.
261 function URIDecode(uri) {
262 var reservedPredicate = function(cc) {
264 if (35 <= cc && cc <= 36) return true;
266 if (cc == 38) return true;
268 if (43 <= cc && cc <= 44) return true;
270 if (cc == 47) return true;
272 if (58 <= cc && cc <= 59) return true;
274 if (cc == 61) return true;
276 if (63 <= cc && cc <= 64) return true;
280 var string = ToString(uri);
281 return Decode(string, reservedPredicate);
285 // ECMA-262 - 15.1.3.2.
286 function URIDecodeComponent(component) {
287 var reservedPredicate = function(cc) { return false; };
288 var string = ToString(component);
289 return Decode(string, reservedPredicate);
293 // Does the char code correspond to an alpha-numeric char.
294 function isAlphaNumeric(cc) {
296 if (97 <= cc && cc <= 122) return true;
298 if (65 <= cc && cc <= 90) return true;
300 if (48 <= cc && cc <= 57) return true;
306 // ECMA-262 - 15.1.3.3.
307 function URIEncode(uri) {
308 var unescapePredicate = function(cc) {
309 if (isAlphaNumeric(cc)) return true;
311 if (cc == 33) return true;
313 if (35 <= cc && cc <= 36) return true;
315 if (38 <= cc && cc <= 47) return true;
317 if (58 <= cc && cc <= 59) return true;
319 if (cc == 61) return true;
321 if (63 <= cc && cc <= 64) return true;
323 if (cc == 95) return true;
325 if (cc == 126) return true;
330 var string = ToString(uri);
331 return Encode(string, unescapePredicate);
335 // ECMA-262 - 15.1.3.4
336 function URIEncodeComponent(component) {
337 var unescapePredicate = function(cc) {
338 if (isAlphaNumeric(cc)) return true;
340 if (cc == 33) return true;
342 if (39 <= cc && cc <= 42) return true;
344 if (45 <= cc && cc <= 46) return true;
346 if (cc == 95) return true;
348 if (cc == 126) return true;
353 var string = ToString(component);
354 return Encode(string, unescapePredicate);
358 function HexValueOf(code) {
360 if (code >= 48 && code <= 57) return code - 48;
362 if (code >= 65 && code <= 70) return code - 55;
364 if (code >= 97 && code <= 102) return code - 87;
370 // Convert a character code to 4-digit hex string representation
371 // 64 -> 0040, 62234 -> F31A.
372 function CharCodeToHex4Str(cc) {
374 if (hexCharArray === 0) {
375 hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
376 "A", "B", "C", "D", "E", "F"];
378 for (var i = 0; i < 4; ++i) {
379 var c = hexCharArray[cc & 0x0F];
387 // Returns true if all digits in string s are valid hex numbers
388 function IsValidHex(s) {
389 for (var i = 0; i < s.length; ++i) {
390 var cc = s.charCodeAt(i);
391 if ((48 <= cc && cc <= 57) ||
392 (65 <= cc && cc <= 70) ||
393 (97 <= cc && cc <= 102)) {
394 // '0'..'9', 'A'..'F' and 'a' .. 'f'.
404 function URIEscape(str) {
405 var s = ToString(str);
406 return %URIEscape(s);
411 function URIUnescape(str) {
412 var s = ToString(str);
413 return %URIUnescape(s);
417 // -------------------------------------------------------------------
419 function SetUpUri() {
420 %CheckIsBootstrapping();
422 // Set up non-enumerable URI functions on the global object and set
424 InstallFunctions(global, DONT_ENUM, $Array(
426 "unescape", URIUnescape,
427 "decodeURI", URIDecode,
428 "decodeURIComponent", URIDecodeComponent,
429 "encodeURI", URIEncode,
430 "encodeURIComponent", URIEncodeComponent