1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
10 #include "conversions.h"
11 #include "string-search.h"
18 template <typename Char>
19 static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
23 Vector<const uint8_t> GetCharVector(Handle<String> string) {
24 String::FlatContent flat = string->GetFlatContent();
25 ASSERT(flat.IsAscii());
26 return flat.ToOneByteVector();
31 Vector<const uc16> GetCharVector(Handle<String> string) {
32 String::FlatContent flat = string->GetFlatContent();
33 ASSERT(flat.IsTwoByte());
34 return flat.ToUC16Vector();
38 class URIUnescape : public AllStatic {
40 template<typename Char>
41 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
42 Handle<String> source);
45 static const signed char kHexValue['g'];
47 template<typename Char>
48 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(
49 Isolate* isolate, Handle<String> string, int start_index);
51 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
53 template <typename Char>
54 static INLINE(int UnescapeChar(Vector<const Char> vector,
61 const signed char URIUnescape::kHexValue[] = {
62 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
64 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
65 -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
66 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
68 -1, 10, 11, 12, 13, 14, 15 };
71 template<typename Char>
72 MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
73 Handle<String> source) {
75 { DisallowHeapAllocation no_allocation;
76 StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%"));
77 index = search.Search(GetCharVector<Char>(source), 0);
78 if (index < 0) return source;
80 return UnescapeSlow<Char>(isolate, source, index);
84 template <typename Char>
85 MaybeHandle<String> URIUnescape::UnescapeSlow(
86 Isolate* isolate, Handle<String> string, int start_index) {
88 int length = string->length();
90 int unescaped_length = 0;
91 { DisallowHeapAllocation no_allocation;
92 Vector<const Char> vector = GetCharVector<Char>(string);
93 for (int i = start_index; i < length; unescaped_length++) {
95 if (UnescapeChar(vector, i, length, &step) >
96 String::kMaxOneByteCharCode) {
103 ASSERT(start_index < length);
104 Handle<String> first_part =
105 isolate->factory()->NewProperSubString(string, 0, start_index);
107 int dest_position = 0;
108 Handle<String> second_part;
109 ASSERT(unescaped_length <= String::kMaxLength);
111 Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString(
112 unescaped_length).ToHandleChecked();
113 DisallowHeapAllocation no_allocation;
114 Vector<const Char> vector = GetCharVector<Char>(string);
115 for (int i = start_index; i < length; dest_position++) {
117 dest->SeqOneByteStringSet(dest_position,
118 UnescapeChar(vector, i, length, &step));
123 Handle<SeqTwoByteString> dest = isolate->factory()->NewRawTwoByteString(
124 unescaped_length).ToHandleChecked();
125 DisallowHeapAllocation no_allocation;
126 Vector<const Char> vector = GetCharVector<Char>(string);
127 for (int i = start_index; i < length; dest_position++) {
129 dest->SeqTwoByteStringSet(dest_position,
130 UnescapeChar(vector, i, length, &step));
135 return isolate->factory()->NewConsString(first_part, second_part);
139 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
140 if (character1 > 'f') return -1;
141 int hi = kHexValue[character1];
142 if (hi == -1) return -1;
143 if (character2 > 'f') return -1;
144 int lo = kHexValue[character2];
145 if (lo == -1) return -1;
146 return (hi << 4) + lo;
150 template <typename Char>
151 int URIUnescape::UnescapeChar(Vector<const Char> vector,
155 uint16_t character = vector[i];
158 if (character == '%' &&
160 vector[i + 1] == 'u' &&
161 (hi = TwoDigitHex(vector[i + 2],
162 vector[i + 3])) != -1 &&
163 (lo = TwoDigitHex(vector[i + 4],
164 vector[i + 5])) != -1) {
166 return (hi << 8) + lo;
167 } else if (character == '%' &&
169 (lo = TwoDigitHex(vector[i + 1],
170 vector[i + 2])) != -1) {
180 class URIEscape : public AllStatic {
182 template<typename Char>
183 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
184 Handle<String> string);
187 static const char kHexChars[17];
188 static const char kNotEscaped[256];
190 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
194 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
197 // kNotEscaped is generated by the following:
200 // for (my $i = 0; $i < 256; $i++) {
201 // print "\n" if $i % 16 == 0;
204 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
205 // print $escaped ? "0, " : "1, ";
208 const char URIEscape::kNotEscaped[] = {
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
213 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
215 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
227 template<typename Char>
228 MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
229 ASSERT(string->IsFlat());
230 int escaped_length = 0;
231 int length = string->length();
233 { DisallowHeapAllocation no_allocation;
234 Vector<const Char> vector = GetCharVector<Char>(string);
235 for (int i = 0; i < length; i++) {
236 uint16_t c = vector[i];
239 } else if (IsNotEscaped(c)) {
245 // We don't allow strings that are longer than a maximal length.
246 ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
247 if (escaped_length > String::kMaxLength) break; // Provoke exception.
251 // No length change implies no change. Return original string if no change.
252 if (escaped_length == length) return string;
254 Handle<SeqOneByteString> dest;
255 ASSIGN_RETURN_ON_EXCEPTION(
257 isolate->factory()->NewRawOneByteString(escaped_length),
259 int dest_position = 0;
261 { DisallowHeapAllocation no_allocation;
262 Vector<const Char> vector = GetCharVector<Char>(string);
263 for (int i = 0; i < length; i++) {
264 uint16_t c = vector[i];
266 dest->SeqOneByteStringSet(dest_position, '%');
267 dest->SeqOneByteStringSet(dest_position+1, 'u');
268 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
269 dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
270 dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
271 dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
273 } else if (IsNotEscaped(c)) {
274 dest->SeqOneByteStringSet(dest_position, c);
277 dest->SeqOneByteStringSet(dest_position, '%');
278 dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
279 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
288 } } // namespace v8::internal