1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This file contains support for URI manipulations written in
8 (function(global, utils) {
12 %CheckIsBootstrapping();
14 //- ------------------------------------------------------------------
17 var GlobalObject = global.Object;
18 var GlobalArray = global.Array;
19 var InternalArray = utils.InternalArray;
22 utils.Import(function(from) {
23 ToString = from.ToString;
26 // -------------------------------------------------------------------
27 // Define internal helper functions.
29 function HexValueOf(code) {
31 if (code >= 48 && code <= 57) return code - 48;
33 if (code >= 65 && code <= 70) return code - 55;
35 if (code >= 97 && code <= 102) return code - 87;
40 // Does the char code correspond to an alpha-numeric char.
41 function isAlphaNumeric(cc) {
43 if (97 <= cc && cc <= 122) return true;
45 if (65 <= cc && cc <= 90) return true;
47 if (48 <= cc && cc <= 57) return true;
52 // Lazily initialized.
53 var hexCharCodeArray = 0;
55 function URIAddEncodedOctetToBuffer(octet, result, index) {
56 result[index++] = 37; // Char code of '%'.
57 result[index++] = hexCharCodeArray[octet >> 4];
58 result[index++] = hexCharCodeArray[octet & 0x0F];
62 function URIEncodeOctets(octets, result, index) {
63 if (hexCharCodeArray === 0) {
64 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
65 65, 66, 67, 68, 69, 70];
67 index = URIAddEncodedOctetToBuffer(octets[0], result, index);
68 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
69 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
70 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
74 function URIEncodeSingle(cc, result, index) {
75 var x = (cc >> 12) & 0xF;
76 var y = (cc >> 6) & 63;
78 var octets = new GlobalArray(3);
81 } else if (cc <= 0x07FF) {
89 return URIEncodeOctets(octets, result, index);
92 function URIEncodePair(cc1 , cc2, result, index) {
93 var u = ((cc1 >> 6) & 0xF) + 1;
94 var w = (cc1 >> 2) & 0xF;
96 var y = (cc2 >> 6) & 0xF;
98 var octets = new GlobalArray(4);
99 octets[0] = (u >> 2) + 240;
100 octets[1] = (((u & 3) << 4) | w) + 128;
101 octets[2] = ((x << 4) | y) + 128;
103 return URIEncodeOctets(octets, result, index);
106 function URIHexCharsToCharCode(highChar, lowChar) {
107 var highCode = HexValueOf(highChar);
108 var lowCode = HexValueOf(lowChar);
109 if (highCode == -1 || lowCode == -1) throw MakeURIError();
110 return (highCode << 4) | lowCode;
113 // Callers must ensure that |result| is a sufficiently long sequential
115 function URIDecodeOctets(octets, result, index) {
120 } else if (o0 < 0xc2) {
121 throw MakeURIError();
126 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
128 value = (a << 6) + b;
129 if (value < 0x80 || value > 0x7ff) throw MakeURIError();
134 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
136 if ((o2 < 0x80) || (o2 > 0xbf)) throw MakeURIError();
138 value = (a << 12) + (b << 6) + c;
139 if ((value < 0x800) || (value > 0xffff)) throw MakeURIError();
144 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
146 if ((o2 < 0x80) || (o2 > 0xbf)) {
147 throw MakeURIError();
150 if ((o3 < 0x80) || (o3 > 0xbf)) throw MakeURIError();
152 value = (a << 18) + (b << 12) + (c << 6) + d;
153 if ((value < 0x10000) || (value > 0x10ffff)) throw MakeURIError();
155 throw MakeURIError();
160 if (0xD800 <= value && value <= 0xDFFF) throw MakeURIError();
161 if (value < 0x10000) {
162 %_TwoByteSeqStringSetChar(index++, value, result);
164 %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result);
165 %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result);
170 // ECMA-262, section 15.1.3
171 function Encode(uri, unescape) {
172 uri = TO_STRING_INLINE(uri);
173 var uriLength = uri.length;
174 var array = new InternalArray(uriLength);
176 for (var k = 0; k < uriLength; k++) {
177 var cc1 = %_StringCharCodeAt(uri, k);
179 array[index++] = cc1;
181 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError();
182 if (cc1 < 0xD800 || cc1 > 0xDBFF) {
183 index = URIEncodeSingle(cc1, array, index);
186 if (k == uriLength) throw MakeURIError();
187 var cc2 = %_StringCharCodeAt(uri, k);
188 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError();
189 index = URIEncodePair(cc1, cc2, array, index);
194 var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
195 for (var i = 0; i < array.length; i++) {
196 %_OneByteSeqStringSetChar(i, array[i], result);
201 // ECMA-262, section 15.1.3
202 function Decode(uri, reserved) {
203 uri = TO_STRING_INLINE(uri);
204 var uriLength = uri.length;
205 var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
209 // Optimistically assume one-byte string.
210 for ( ; k < uriLength; k++) {
211 var code = %_StringCharCodeAt(uri, k);
212 if (code == 37) { // '%'
213 if (k + 2 >= uriLength) throw MakeURIError();
214 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, k+1),
215 %_StringCharCodeAt(uri, k+2));
216 if (cc >> 7) break; // Assumption wrong, two-byte string.
218 %_OneByteSeqStringSetChar(index++, 37, one_byte); // '%'.
219 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+1),
221 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+2),
224 %_OneByteSeqStringSetChar(index++, cc, one_byte);
228 if (code > 0x7f) break; // Assumption wrong, two-byte string.
229 %_OneByteSeqStringSetChar(index++, code, one_byte);
233 one_byte = %TruncateString(one_byte, index);
234 if (k == uriLength) return one_byte;
236 // Write into two byte string.
237 var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
240 for ( ; k < uriLength; k++) {
241 var code = %_StringCharCodeAt(uri, k);
242 if (code == 37) { // '%'
243 if (k + 2 >= uriLength) throw MakeURIError();
244 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
245 %_StringCharCodeAt(uri, ++k));
248 while (((cc << ++n) & 0x80) != 0) { }
249 if (n == 1 || n > 4) throw MakeURIError();
250 var octets = new GlobalArray(n);
252 if (k + 3 * (n - 1) >= uriLength) throw MakeURIError();
253 for (var i = 1; i < n; i++) {
254 if (uri[++k] != '%') throw MakeURIError();
255 octets[i] = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
256 %_StringCharCodeAt(uri, ++k));
258 index = URIDecodeOctets(octets, two_byte, index);
259 } else if (reserved(cc)) {
260 %_TwoByteSeqStringSetChar(index++, 37, two_byte); // '%'.
261 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k - 1),
263 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k),
266 %_TwoByteSeqStringSetChar(index++, cc, two_byte);
269 %_TwoByteSeqStringSetChar(index++, code, two_byte);
273 two_byte = %TruncateString(two_byte, index);
274 return one_byte + two_byte;
277 // -------------------------------------------------------------------
278 // Define exported functions.
281 function URIEscapeJS(str) {
282 var s = ToString(str);
283 return %URIEscape(s);
287 function URIUnescapeJS(str) {
288 var s = ToString(str);
289 return %URIUnescape(s);
292 // ECMA-262 - 15.1.3.1.
293 function URIDecode(uri) {
294 var reservedPredicate = function(cc) {
296 if (35 <= cc && cc <= 36) return true;
298 if (cc == 38) return true;
300 if (43 <= cc && cc <= 44) return true;
302 if (cc == 47) return true;
304 if (58 <= cc && cc <= 59) return true;
306 if (cc == 61) return true;
308 if (63 <= cc && cc <= 64) return true;
312 var string = ToString(uri);
313 return Decode(string, reservedPredicate);
316 // ECMA-262 - 15.1.3.2.
317 function URIDecodeComponent(component) {
318 var reservedPredicate = function(cc) { return false; };
319 var string = ToString(component);
320 return Decode(string, reservedPredicate);
323 // ECMA-262 - 15.1.3.3.
324 function URIEncode(uri) {
325 var unescapePredicate = function(cc) {
326 if (isAlphaNumeric(cc)) return true;
328 if (cc == 33) return true;
330 if (35 <= cc && cc <= 36) return true;
332 if (38 <= cc && cc <= 47) return true;
334 if (58 <= cc && cc <= 59) return true;
336 if (cc == 61) return true;
338 if (63 <= cc && cc <= 64) return true;
340 if (cc == 95) return true;
342 if (cc == 126) return true;
346 var string = ToString(uri);
347 return Encode(string, unescapePredicate);
350 // ECMA-262 - 15.1.3.4
351 function URIEncodeComponent(component) {
352 var unescapePredicate = function(cc) {
353 if (isAlphaNumeric(cc)) return true;
355 if (cc == 33) return true;
357 if (39 <= cc && cc <= 42) return true;
359 if (45 <= cc && cc <= 46) return true;
361 if (cc == 95) return true;
363 if (cc == 126) return true;
367 var string = ToString(component);
368 return Encode(string, unescapePredicate);
371 // -------------------------------------------------------------------
372 // Install exported functions.
374 // Set up non-enumerable URI functions on the global object and set
376 utils.InstallFunctions(global, DONT_ENUM, [
377 "escape", URIEscapeJS,
378 "unescape", URIUnescapeJS,
379 "decodeURI", URIDecode,
380 "decodeURIComponent", URIDecodeComponent,
381 "encodeURI", URIEncode,
382 "encodeURIComponent", URIEncodeComponent