1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This file contains support for URI manipulations written in
8 (function(global, utils) {
12 %CheckIsBootstrapping();
14 //- ------------------------------------------------------------------
17 var GlobalObject = global.Object;
18 var GlobalArray = global.Array;
19 var InternalArray = utils.InternalArray;
21 // -------------------------------------------------------------------
22 // Define internal helper functions.
24 function HexValueOf(code) {
26 if (code >= 48 && code <= 57) return code - 48;
28 if (code >= 65 && code <= 70) return code - 55;
30 if (code >= 97 && code <= 102) return code - 87;
35 // Does the char code correspond to an alpha-numeric char.
36 function isAlphaNumeric(cc) {
38 if (97 <= cc && cc <= 122) return true;
40 if (65 <= cc && cc <= 90) return true;
42 if (48 <= cc && cc <= 57) return true;
47 // Lazily initialized.
48 var hexCharCodeArray = 0;
50 function URIAddEncodedOctetToBuffer(octet, result, index) {
51 result[index++] = 37; // Char code of '%'.
52 result[index++] = hexCharCodeArray[octet >> 4];
53 result[index++] = hexCharCodeArray[octet & 0x0F];
57 function URIEncodeOctets(octets, result, index) {
58 if (hexCharCodeArray === 0) {
59 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
60 65, 66, 67, 68, 69, 70];
62 index = URIAddEncodedOctetToBuffer(octets[0], result, index);
63 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
64 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
65 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
69 function URIEncodeSingle(cc, result, index) {
70 var x = (cc >> 12) & 0xF;
71 var y = (cc >> 6) & 63;
73 var octets = new GlobalArray(3);
76 } else if (cc <= 0x07FF) {
84 return URIEncodeOctets(octets, result, index);
87 function URIEncodePair(cc1 , cc2, result, index) {
88 var u = ((cc1 >> 6) & 0xF) + 1;
89 var w = (cc1 >> 2) & 0xF;
91 var y = (cc2 >> 6) & 0xF;
93 var octets = new GlobalArray(4);
94 octets[0] = (u >> 2) + 240;
95 octets[1] = (((u & 3) << 4) | w) + 128;
96 octets[2] = ((x << 4) | y) + 128;
98 return URIEncodeOctets(octets, result, index);
101 function URIHexCharsToCharCode(highChar, lowChar) {
102 var highCode = HexValueOf(highChar);
103 var lowCode = HexValueOf(lowChar);
104 if (highCode == -1 || lowCode == -1) throw MakeURIError();
105 return (highCode << 4) | lowCode;
108 // Callers must ensure that |result| is a sufficiently long sequential
110 function URIDecodeOctets(octets, result, index) {
115 } else if (o0 < 0xc2) {
116 throw MakeURIError();
121 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
123 value = (a << 6) + b;
124 if (value < 0x80 || value > 0x7ff) throw MakeURIError();
129 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
131 if ((o2 < 0x80) || (o2 > 0xbf)) throw MakeURIError();
133 value = (a << 12) + (b << 6) + c;
134 if ((value < 0x800) || (value > 0xffff)) throw MakeURIError();
139 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
141 if ((o2 < 0x80) || (o2 > 0xbf)) {
142 throw MakeURIError();
145 if ((o3 < 0x80) || (o3 > 0xbf)) throw MakeURIError();
147 value = (a << 18) + (b << 12) + (c << 6) + d;
148 if ((value < 0x10000) || (value > 0x10ffff)) throw MakeURIError();
150 throw MakeURIError();
155 if (0xD800 <= value && value <= 0xDFFF) throw MakeURIError();
156 if (value < 0x10000) {
157 %_TwoByteSeqStringSetChar(index++, value, result);
159 %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result);
160 %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result);
165 // ECMA-262, section 15.1.3
166 function Encode(uri, unescape) {
167 uri = TO_STRING(uri);
168 var uriLength = uri.length;
169 var array = new InternalArray(uriLength);
171 for (var k = 0; k < uriLength; k++) {
172 var cc1 = %_StringCharCodeAt(uri, k);
174 array[index++] = cc1;
176 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError();
177 if (cc1 < 0xD800 || cc1 > 0xDBFF) {
178 index = URIEncodeSingle(cc1, array, index);
181 if (k == uriLength) throw MakeURIError();
182 var cc2 = %_StringCharCodeAt(uri, k);
183 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError();
184 index = URIEncodePair(cc1, cc2, array, index);
189 var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
190 for (var i = 0; i < array.length; i++) {
191 %_OneByteSeqStringSetChar(i, array[i], result);
196 // ECMA-262, section 15.1.3
197 function Decode(uri, reserved) {
198 uri = TO_STRING(uri);
199 var uriLength = uri.length;
200 var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
204 // Optimistically assume one-byte string.
205 for ( ; k < uriLength; k++) {
206 var code = %_StringCharCodeAt(uri, k);
207 if (code == 37) { // '%'
208 if (k + 2 >= uriLength) throw MakeURIError();
209 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, k+1),
210 %_StringCharCodeAt(uri, k+2));
211 if (cc >> 7) break; // Assumption wrong, two-byte string.
213 %_OneByteSeqStringSetChar(index++, 37, one_byte); // '%'.
214 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+1),
216 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+2),
219 %_OneByteSeqStringSetChar(index++, cc, one_byte);
223 if (code > 0x7f) break; // Assumption wrong, two-byte string.
224 %_OneByteSeqStringSetChar(index++, code, one_byte);
228 one_byte = %TruncateString(one_byte, index);
229 if (k == uriLength) return one_byte;
231 // Write into two byte string.
232 var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
235 for ( ; k < uriLength; k++) {
236 var code = %_StringCharCodeAt(uri, k);
237 if (code == 37) { // '%'
238 if (k + 2 >= uriLength) throw MakeURIError();
239 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
240 %_StringCharCodeAt(uri, ++k));
243 while (((cc << ++n) & 0x80) != 0) { }
244 if (n == 1 || n > 4) throw MakeURIError();
245 var octets = new GlobalArray(n);
247 if (k + 3 * (n - 1) >= uriLength) throw MakeURIError();
248 for (var i = 1; i < n; i++) {
249 if (uri[++k] != '%') throw MakeURIError();
250 octets[i] = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
251 %_StringCharCodeAt(uri, ++k));
253 index = URIDecodeOctets(octets, two_byte, index);
254 } else if (reserved(cc)) {
255 %_TwoByteSeqStringSetChar(index++, 37, two_byte); // '%'.
256 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k - 1),
258 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k),
261 %_TwoByteSeqStringSetChar(index++, cc, two_byte);
264 %_TwoByteSeqStringSetChar(index++, code, two_byte);
268 two_byte = %TruncateString(two_byte, index);
269 return one_byte + two_byte;
272 // -------------------------------------------------------------------
273 // Define exported functions.
276 function URIEscapeJS(s) {
277 return %URIEscape(s);
281 function URIUnescapeJS(s) {
282 return %URIUnescape(s);
285 // ECMA-262 - 15.1.3.1.
286 function URIDecode(uri) {
287 var reservedPredicate = function(cc) {
289 if (35 <= cc && cc <= 36) return true;
291 if (cc == 38) return true;
293 if (43 <= cc && cc <= 44) return true;
295 if (cc == 47) return true;
297 if (58 <= cc && cc <= 59) return true;
299 if (cc == 61) return true;
301 if (63 <= cc && cc <= 64) return true;
305 return Decode(uri, reservedPredicate);
308 // ECMA-262 - 15.1.3.2.
309 function URIDecodeComponent(component) {
310 var reservedPredicate = function(cc) { return false; };
311 return Decode(component, reservedPredicate);
314 // ECMA-262 - 15.1.3.3.
315 function URIEncode(uri) {
316 var unescapePredicate = function(cc) {
317 if (isAlphaNumeric(cc)) return true;
319 if (cc == 33) return true;
321 if (35 <= cc && cc <= 36) return true;
323 if (38 <= cc && cc <= 47) return true;
325 if (58 <= cc && cc <= 59) return true;
327 if (cc == 61) return true;
329 if (63 <= cc && cc <= 64) return true;
331 if (cc == 95) return true;
333 if (cc == 126) return true;
337 return Encode(uri, unescapePredicate);
340 // ECMA-262 - 15.1.3.4
341 function URIEncodeComponent(component) {
342 var unescapePredicate = function(cc) {
343 if (isAlphaNumeric(cc)) return true;
345 if (cc == 33) return true;
347 if (39 <= cc && cc <= 42) return true;
349 if (45 <= cc && cc <= 46) return true;
351 if (cc == 95) return true;
353 if (cc == 126) return true;
357 return Encode(component, unescapePredicate);
360 // -------------------------------------------------------------------
361 // Install exported functions.
363 // Set up non-enumerable URI functions on the global object and set
365 utils.InstallFunctions(global, DONT_ENUM, [
366 "escape", URIEscapeJS,
367 "unescape", URIUnescapeJS,
368 "decodeURI", URIDecode,
369 "decodeURIComponent", URIDecodeComponent,
370 "encodeURI", URIEncode,
371 "encodeURIComponent", URIEncodeComponent