2 * libiri: An IRI/URI/URL parsing library
7 * Copyright (c) 2005, 2008 Mo McRoberts.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The names of the author(s) of this software may not be used to endorse
18 * or promote products derived from this software without specific prior
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24 * AUTHORS OF THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 The following code was added by Samsung Electronics.
39 @@ -59,6 +59,7 @@ iri__hexnibble(char c)
46 static inline const char *
47 @@ -130,6 +131,7 @@ iri__allocbuf(const char *src, size_t *len)
48 *len = (src - c) + 1 + sc + ((sc + 1) * (sizeof(char *) + 7));
51 + *len = 9999; // FIXME: DIRTY HACK THAT USUALLY WORKS... UNTIL IRI IS NOT TO
52 return (char *) calloc(1, *len);
57 License of the above code is Apache License
60 Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd. All rights reserved.
63 Version 2.0, January 2004
64 http://www.apache.org/licenses/
66 TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
70 "License" shall mean the terms and conditions for use, reproduction,
71 and distribution as defined by Sections 1 through 9 of this document.
73 "Licensor" shall mean the copyright owner or entity authorized by
74 the copyright owner that is granting the License.
76 "Legal Entity" shall mean the union of the acting entity and all
77 other entities that control, are controlled by, or are under common
78 control with that entity. For the purposes of this definition,
79 "control" means (i) the power, direct or indirect, to cause the
80 direction or management of such entity, whether by contract or
81 otherwise, or (ii) ownership of fifty percent (50%) or more of the
82 outstanding shares, or (iii) beneficial ownership of such entity.
84 "You" (or "Your") shall mean an individual or Legal Entity
85 exercising permissions granted by this License.
87 "Source" form shall mean the preferred form for making modifications,
88 including but not limited to software source code, documentation
89 source, and configuration files.
91 "Object" form shall mean any form resulting from mechanical
92 transformation or translation of a Source form, including but
93 not limited to compiled object code, generated documentation,
94 and conversions to other media types.
96 "Work" shall mean the work of authorship, whether in Source or
97 Object form, made available under the License, as indicated by a
98 copyright notice that is included in or attached to the work
99 (an example is provided in the Appendix below).
101 "Derivative Works" shall mean any work, whether in Source or Object
102 form, that is based on (or derived from) the Work and for which the
103 editorial revisions, annotations, elaborations, or other modifications
104 represent, as a whole, an original work of authorship. For the purposes
105 of this License, Derivative Works shall not include works that remain
106 separable from, or merely link (or bind by name) to the interfaces of,
107 the Work and Derivative Works thereof.
109 "Contribution" shall mean any work of authorship, including
110 the original version of the Work and any modifications or additions
111 to that Work or Derivative Works thereof, that is intentionally
112 submitted to Licensor for inclusion in the Work by the copyright owner
113 or by an individual or Legal Entity authorized to submit on behalf of
114 the copyright owner. For the purposes of this definition, "submitted"
115 means any form of electronic, verbal, or written communication sent
116 to the Licensor or its representatives, including but not limited to
117 communication on electronic mailing lists, source code control systems,
118 and issue tracking systems that are managed by, or on behalf of, the
119 Licensor for the purpose of discussing and improving the Work, but
120 excluding communication that is conspicuously marked or otherwise
121 designated in writing by the copyright owner as "Not a Contribution."
123 "Contributor" shall mean Licensor and any individual or Legal Entity
124 on behalf of whom a Contribution has been received by Licensor and
125 subsequently incorporated within the Work.
127 2. Grant of Copyright License. Subject to the terms and conditions of
128 this License, each Contributor hereby grants to You a perpetual,
129 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
130 copyright license to reproduce, prepare Derivative Works of,
131 publicly display, publicly perform, sublicense, and distribute the
132 Work and such Derivative Works in Source or Object form.
134 3. Grant of Patent License. Subject to the terms and conditions of
135 this License, each Contributor hereby grants to You a perpetual,
136 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
137 (except as stated in this section) patent license to make, have made,
138 use, offer to sell, sell, import, and otherwise transfer the Work,
139 where such license applies only to those patent claims licensable
140 by such Contributor that are necessarily infringed by their
141 Contribution(s) alone or by combination of their Contribution(s)
142 with the Work to which such Contribution(s) was submitted. If You
143 institute patent litigation against any entity (including a
144 cross-claim or counterclaim in a lawsuit) alleging that the Work
145 or a Contribution incorporated within the Work constitutes direct
146 or contributory patent infringement, then any patent licenses
147 granted to You under this License for that Work shall terminate
148 as of the date such litigation is filed.
150 4. Redistribution. You may reproduce and distribute copies of the
151 Work or Derivative Works thereof in any medium, with or without
152 modifications, and in Source or Object form, provided that You
153 meet the following conditions:
155 (a) You must give any other recipients of the Work or
156 Derivative Works a copy of this License; and
158 (b) You must cause any modified files to carry prominent notices
159 stating that You changed the files; and
161 (c) You must retain, in the Source form of any Derivative Works
162 that You distribute, all copyright, patent, trademark, and
163 attribution notices from the Source form of the Work,
164 excluding those notices that do not pertain to any part of
165 the Derivative Works; and
167 (d) If the Work includes a "NOTICE" text file as part of its
168 distribution, then any Derivative Works that You distribute must
169 include a readable copy of the attribution notices contained
170 within such NOTICE file, excluding those notices that do not
171 pertain to any part of the Derivative Works, in at least one
172 of the following places: within a NOTICE text file distributed
173 as part of the Derivative Works; within the Source form or
174 documentation, if provided along with the Derivative Works; or,
175 within a display generated by the Derivative Works, if and
176 wherever such third-party notices normally appear. The contents
177 of the NOTICE file are for informational purposes only and
178 do not modify the License. You may add Your own attribution
179 notices within Derivative Works that You distribute, alongside
180 or as an addendum to the NOTICE text from the Work, provided
181 that such additional attribution notices cannot be construed
182 as modifying the License.
184 You may add Your own copyright statement to Your modifications and
185 may provide additional or different license terms and conditions
186 for use, reproduction, or distribution of Your modifications, or
187 for any such Derivative Works as a whole, provided Your use,
188 reproduction, and distribution of the Work otherwise complies with
189 the conditions stated in this License.
191 5. Submission of Contributions. Unless You explicitly state otherwise,
192 any Contribution intentionally submitted for inclusion in the Work
193 by You to the Licensor shall be under the terms and conditions of
194 this License, without any additional terms or conditions.
195 Notwithstanding the above, nothing herein shall supersede or modify
196 the terms of any separate license agreement you may have executed
197 with Licensor regarding such Contributions.
199 6. Trademarks. This License does not grant permission to use the trade
200 names, trademarks, service marks, or product names of the Licensor,
201 except as required for reasonable and customary use in describing the
202 origin of the Work and reproducing the content of the NOTICE file.
204 7. Disclaimer of Warranty. Unless required by applicable law or
205 agreed to in writing, Licensor provides the Work (and each
206 Contributor provides its Contributions) on an "AS IS" BASIS,
207 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
208 implied, including, without limitation, any warranties or conditions
209 of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
210 PARTICULAR PURPOSE. You are solely responsible for determining the
211 appropriateness of using or redistributing the Work and assume any
212 risks associated with Your exercise of permissions under this License.
214 8. Limitation of Liability. In no event and under no legal theory,
215 whether in tort (including negligence), contract, or otherwise,
216 unless required by applicable law (such as deliberate and grossly
217 negligent acts) or agreed to in writing, shall any Contributor be
218 liable to You for damages, including any direct, indirect, special,
219 incidental, or consequential damages of any character arising as a
220 result of this License or out of the use or inability to use the
221 Work (including but not limited to damages for loss of goodwill,
222 work stoppage, computer failure or malfunction, or any and all
223 other commercial damages or losses), even if such Contributor
224 has been advised of the possibility of such damages.
226 9. Accepting Warranty or Additional Liability. While redistributing
227 the Work or Derivative Works thereof, You may choose to offer,
228 and charge a fee for, acceptance of support, warranty, indemnity,
229 or other liability obligations and/or rights consistent with this
230 License. However, in accepting such obligations, You may act only
231 on Your own behalf and on Your sole responsibility, not on behalf
232 of any other Contributor, and only if You agree to indemnify,
233 defend, and hold each Contributor harmless for any liability
234 incurred by, or claims asserted against, such Contributor by reason
235 of your accepting any such warranty or additional liability.
237 END OF TERMS AND CONDITIONS
239 APPENDIX: How to apply the Apache License to your work.
241 To apply the Apache License to your work, attach the following
242 boilerplate notice, with the fields enclosed by brackets "[]"
243 replaced with your own identifying information. (Don't include
244 the brackets!) The text should be enclosed in the appropriate
245 comment syntax for the file format. We also recommend that a
246 file or class name and description of purpose be included on the
247 same "printed page" as the copyright notice for easier
248 identification within third-party archives.
250 Copyright [yyyy] [name of copyright owner]
252 Licensed under the Apache License, Version 2.0 (the "License");
253 you may not use this file except in compliance with the License.
254 You may obtain a copy of the License at
256 http://www.apache.org/licenses/LICENSE-2.0
258 Unless required by applicable law or agreed to in writing, software
259 distributed under the License is distributed on an "AS IS" BASIS,
260 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
261 See the License for the specific language governing permissions and
262 limitations under the License.
273 #include "p_libiri.h"
278 #define _ALIGN(x) ((((x)+(ALIGNMENT-1))&~(ALIGNMENT-1)))
279 #define ALIGN(x) (char *) _ALIGN((size_t) x)
282 iri__hexnibble(char c)
284 if(c >= '0' && c <= '9')
288 if(c >= 'A' && c <= 'F')
292 if(c >= 'a' && c <= 'f')
299 static inline const char *
300 iri__copychar(char **dest, const char *src)
308 /* TODO: Punycode decoding for the host part */
309 static inline const char *
310 iri__copychar_decode(char **dest, const char *src, int convert_space)
312 unsigned char *p = (unsigned char *) (*dest);
314 if(1 == convert_space && '+' == *src)
320 if(0 == isxdigit(src[1]) || 0 == isxdigit(src[2]))
322 /* TODO: Deal with %u<nnnn> non-standard encoding - be liberal in
323 * what you accept, etc.
329 *p = (iri__hexnibble(src[1]) << 4) | iri__hexnibble(src[2]);
343 iri__allocbuf(const char *src, size_t *len)
348 Internal format of IRI structure is very hard to understand at first.
349 The buffer is used to store character strings with every parsed part of
350 IRI, like host, user, auth, path etc. Start of every character string is
351 ALIGNED to ALIGNMENT value and finished with NULL byte.
352 Above that, the buffer is used to keep variable size array of parsed
353 scheme parts. It consist of the array of addresses pointing to starts
354 of scheme parts which are kept as all other characters strings, so are
355 aligned to ALIGMENT and ended with NULL byte.
356 This function calculates approximation of buffer size to store all the
359 Fully filled buffer with scheme parts looks as follows:
360 0. start of the buffer
361 1. aligned start of the scheme part with added NULL byte
362 2. aligned start of the user part with added NULL byte
363 3. aligned start of the password part with added NULL byte
364 4. aligned start of the array of size schemes_number+1 of pointers that point
365 to consecutive scheme part character strings (last one is NULL)
366 schemes_number is a number of scheme tokens delimited with + sign in
368 5. schems_number of characters strings of scheme parts each of which
369 aligned and finished with NULL byte.
370 6. aligned start of the host part with added NULL byte
371 7. aligned start of the path part with added NULL byte
372 8. aligned start of the query part with added NULL byte
373 9. aligned start of the anchor part with added NULL byte
375 There can be indentified 4 kinds of characters in IRI:
376 - characters which are copied one to one (i.e. letters)
377 - characters which are removed (special characters like comma in scheme)
378 - characters which are replaced with other characers where buffer grows
379 this only happens with scheme part
380 - characters which are replaced with other characers where buffer decreases
382 Alighning a pointer in worst case will advance a buffer pointers
385 Knowing all that we can count an approximation of buffer size which can
386 be trusted that whole parsed IRI content will fit in.
389 /* first approximation - all characers will have to be stored in buffer */
392 /* second approximation - IRI has all possible parts which have to be
393 * aligned to ALIGNMENT and have NULL byte an the end. There are 7 different
395 *len += 7 * (ALIGNMENT-1 + 1);
397 /* third approximation - we have to make a room for scheme parts array.
398 * Because the array has an aligned array of n + 1 pointers and n
399 * characters strings aligned and NULL byte terminated.
401 if(NULL != (c = strchr(src, ':')))
404 for(p = src; p < c; p++)
411 /* fourth approximation - all characters of scheme part will be stored
412 * in scheme parts tokens */
415 /* fifth approximation - Ensure we can align each element on an
416 * ALIGNMENT byte boundary and append NULL byte */
417 *len += sc * (ALIGNMENT-1 + 1);
419 /* sixth approximation - Ensure we have a room for aligned array
421 *len += ALIGNMENT-1 + (sc + 1) * (sizeof(char*)/sizeof(char));
423 return (char *) calloc(1, *len);
427 iri_parse(const char *src)
430 char *bufstart, *endp, *bufp, **sl;
431 const char *at, *colon, *slash, *t;
432 size_t buflen, sc, cp;
434 if(NULL == (p = (iri_t *) calloc(1, sizeof(iri_t))))
438 if(NULL == (bufstart = iri__allocbuf(src, &buflen)))
443 p->base = bufp = bufstart;
445 at = strchr(src, '@');
446 slash = strchr(src, '/');
447 colon = strchr(src, ':');
448 if(slash && colon && slash < colon)
450 /* We can disregard the colon if a slash appears before it */
455 /* Definitely a scheme */
457 p->iri.scheme = bufp;
458 while(*src && *src != ':')
460 src = iri__copychar_decode(&bufp, src, 0);
465 /* src[0-1] SHOULD == '/' */
466 if(src[0] == '/') src++;
467 if(src[0] == '/') src++;
469 else if(colon && at && colon < at)
471 fprintf(stderr, "Colon occurs before at\n");
472 /* This could be scheme://user[;auth][:password]@host or [scheme:]user[;auth][:password]@host (urgh) */
473 if(colon[1] == '/' && colon[2] == '/' && colon[3] != '/')
476 p->iri.scheme = bufp;
477 while(*src && *src != ':')
479 src = iri__copychar_decode(&bufp, src, 0);
484 /* src[0-1] SHOULD == '/' */
485 for(; *src == '/'; src++);
488 fprintf(stderr, "Found user\n");
492 fprintf(stderr, "Matched scheme\n");
494 p->iri.scheme = bufp;
496 while(*src && *src != ':' && *src != '@' && *src != ';')
498 src = iri__copychar_decode(&bufp, src, 0);
504 /* Following authentication parameters */
508 while(*src && *src != ':' && *src != '@')
510 /* Don't decode, so it can be extracted properly */
511 src = iri__copychar(&bufp, src);
518 /* Following password data */
521 p->iri.password = bufp;
522 while(*src && *src != ':' && *src != '@')
524 src = iri__copychar_decode(&bufp, src, 0);
531 /* It was actually scheme:user:auth@host */
532 p->iri.user = p->iri.auth;
534 p->iri.password = bufp;
535 while(*src && *src != '@')
537 src = iri__copychar_decode(&bufp, src, 0);
555 /* user[;auth]@host[/path...] */
558 while(*src != '@' && *src != ';')
560 src = iri__copychar_decode(&bufp, src, 0);
569 while(*src && *src != '@')
571 /* Don't decode, so it can be extracted properly */
572 src = iri__copychar(&bufp, src);
582 if(NULL != p->iri.scheme)
585 for(t = p->iri.scheme; *t; t++)
593 sl = (char **) (void *) bufp;
594 bufp += (sc + 1) * sizeof(char *);
599 for(t = p->iri.scheme; *t; t++)
627 p->iri.schemelist = (const char **) sl;
628 p->iri.nschemes = sc;
633 while(*src && *src != ':' && *src != '/' && *src != '?' && *src != '#')
635 src = iri__copychar_decode(&bufp, src, 0);
644 p->iri.port = strtol(src, &endp, 10);
651 while(*src && *src != '?' && *src != '#')
653 src = iri__copychar_decode(&bufp, src, 0);
663 while(*src && *src != '#')
665 /* Don't actually decode the query itself, otherwise it
666 * can't be reliably split */
667 src = iri__copychar(&bufp, src);
675 p->iri.anchor = bufp;
678 src = iri__copychar_decode(&bufp, src, 0);
685 /* Still stuff left? It must be a path... of sorts */
688 while(*src && *src != '?' && *src != '#')
690 src = iri__copychar_decode(&bufp, src, 0);