1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "src/arguments.h"
8 #include "src/conversions.h"
9 #include "src/runtime/runtime-utils.h"
10 #include "src/string-search.h"
11 #include "src/utils.h"
17 template <typename Char>
18 static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
22 Vector<const uint8_t> GetCharVector(Handle<String> string) {
23 String::FlatContent flat = string->GetFlatContent();
24 DCHECK(flat.IsOneByte());
25 return flat.ToOneByteVector();
30 Vector<const uc16> GetCharVector(Handle<String> string) {
31 String::FlatContent flat = string->GetFlatContent();
32 DCHECK(flat.IsTwoByte());
33 return flat.ToUC16Vector();
37 class URIUnescape : public AllStatic {
39 template <typename Char>
40 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
41 Handle<String> source);
44 static const signed char kHexValue['g'];
46 template <typename Char>
47 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(Isolate* isolate,
48 Handle<String> string,
51 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
53 template <typename Char>
54 static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length,
59 const signed char URIUnescape::kHexValue[] = {
60 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1, 2, 3, 4, 5,
63 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1,
64 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
65 -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15};
68 template <typename Char>
69 MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
70 Handle<String> source) {
73 DisallowHeapAllocation no_allocation;
74 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
75 index = search.Search(GetCharVector<Char>(source), 0);
76 if (index < 0) return source;
78 return UnescapeSlow<Char>(isolate, source, index);
82 template <typename Char>
83 MaybeHandle<String> URIUnescape::UnescapeSlow(Isolate* isolate,
84 Handle<String> string,
87 int length = string->length();
89 int unescaped_length = 0;
91 DisallowHeapAllocation no_allocation;
92 Vector<const Char> vector = GetCharVector<Char>(string);
93 for (int i = start_index; i < length; unescaped_length++) {
95 if (UnescapeChar(vector, i, length, &step) >
96 String::kMaxOneByteCharCode) {
103 DCHECK(start_index < length);
104 Handle<String> first_part =
105 isolate->factory()->NewProperSubString(string, 0, start_index);
107 int dest_position = 0;
108 Handle<String> second_part;
109 DCHECK(unescaped_length <= String::kMaxLength);
111 Handle<SeqOneByteString> dest = isolate->factory()
112 ->NewRawOneByteString(unescaped_length)
114 DisallowHeapAllocation no_allocation;
115 Vector<const Char> vector = GetCharVector<Char>(string);
116 for (int i = start_index; i < length; dest_position++) {
118 dest->SeqOneByteStringSet(dest_position,
119 UnescapeChar(vector, i, length, &step));
124 Handle<SeqTwoByteString> dest = isolate->factory()
125 ->NewRawTwoByteString(unescaped_length)
127 DisallowHeapAllocation no_allocation;
128 Vector<const Char> vector = GetCharVector<Char>(string);
129 for (int i = start_index; i < length; dest_position++) {
131 dest->SeqTwoByteStringSet(dest_position,
132 UnescapeChar(vector, i, length, &step));
137 return isolate->factory()->NewConsString(first_part, second_part);
141 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
142 if (character1 > 'f') return -1;
143 int hi = kHexValue[character1];
144 if (hi == -1) return -1;
145 if (character2 > 'f') return -1;
146 int lo = kHexValue[character2];
147 if (lo == -1) return -1;
148 return (hi << 4) + lo;
152 template <typename Char>
153 int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length,
155 uint16_t character = vector[i];
158 if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
159 (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 &&
160 (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) {
162 return (hi << 8) + lo;
163 } else if (character == '%' && i <= length - 3 &&
164 (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) {
174 class URIEscape : public AllStatic {
176 template <typename Char>
177 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
178 Handle<String> string);
181 static const char kHexChars[17];
182 static const char kNotEscaped[256];
184 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
188 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
191 // kNotEscaped is generated by the following:
194 // for (my $i = 0; $i < 256; $i++) {
195 // print "\n" if $i % 16 == 0;
198 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
199 // print $escaped ? "0, " : "1, ";
202 const char URIEscape::kNotEscaped[] = {
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
204 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
205 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
206 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
207 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
208 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
216 template <typename Char>
217 MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
218 DCHECK(string->IsFlat());
219 int escaped_length = 0;
220 int length = string->length();
223 DisallowHeapAllocation no_allocation;
224 Vector<const Char> vector = GetCharVector<Char>(string);
225 for (int i = 0; i < length; i++) {
226 uint16_t c = vector[i];
229 } else if (IsNotEscaped(c)) {
235 // We don't allow strings that are longer than a maximal length.
236 DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
237 if (escaped_length > String::kMaxLength) break; // Provoke exception.
241 // No length change implies no change. Return original string if no change.
242 if (escaped_length == length) return string;
244 Handle<SeqOneByteString> dest;
245 ASSIGN_RETURN_ON_EXCEPTION(
246 isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
248 int dest_position = 0;
251 DisallowHeapAllocation no_allocation;
252 Vector<const Char> vector = GetCharVector<Char>(string);
253 for (int i = 0; i < length; i++) {
254 uint16_t c = vector[i];
256 dest->SeqOneByteStringSet(dest_position, '%');
257 dest->SeqOneByteStringSet(dest_position + 1, 'u');
258 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c >> 12]);
259 dest->SeqOneByteStringSet(dest_position + 3, kHexChars[(c >> 8) & 0xf]);
260 dest->SeqOneByteStringSet(dest_position + 4, kHexChars[(c >> 4) & 0xf]);
261 dest->SeqOneByteStringSet(dest_position + 5, kHexChars[c & 0xf]);
263 } else if (IsNotEscaped(c)) {
264 dest->SeqOneByteStringSet(dest_position, c);
267 dest->SeqOneByteStringSet(dest_position, '%');
268 dest->SeqOneByteStringSet(dest_position + 1, kHexChars[c >> 4]);
269 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c & 0xf]);
279 RUNTIME_FUNCTION(Runtime_URIEscape) {
280 HandleScope scope(isolate);
281 DCHECK(args.length() == 1);
282 CONVERT_ARG_HANDLE_CHECKED(String, source, 0);
283 Handle<String> string = String::Flatten(source);
284 DCHECK(string->IsFlat());
285 Handle<String> result;
286 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
287 isolate, result, string->IsOneByteRepresentationUnderneath()
288 ? URIEscape::Escape<uint8_t>(isolate, source)
289 : URIEscape::Escape<uc16>(isolate, source));
294 RUNTIME_FUNCTION(Runtime_URIUnescape) {
295 HandleScope scope(isolate);
296 DCHECK(args.length() == 1);
297 CONVERT_ARG_HANDLE_CHECKED(String, source, 0);
298 Handle<String> string = String::Flatten(source);
299 DCHECK(string->IsFlat());
300 Handle<String> result;
301 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
302 isolate, result, string->IsOneByteRepresentationUnderneath()
303 ? URIUnescape::Unescape<uint8_t>(isolate, source)
304 : URIUnescape::Unescape<uc16>(isolate, source));
308 } // namespace v8::internal