2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 3986, 2732 and 2373
6 * See Copyright for the status of this software.
8 * TODO: that module behaves really badly on OOM situation
18 #include <libxml/xmlmemory.h>
19 #include <libxml/uri.h>
20 #include <libxml/globals.h>
21 #include <libxml/xmlerror.h>
23 static void xmlCleanURI(xmlURIPtr uri);
26 * Old rule from 2396 used in legacy handling code
27 * alpha = lowalpha | upalpha
29 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
33 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
34 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
35 * "u" | "v" | "w" | "x" | "y" | "z"
38 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
42 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
43 * "U" | "V" | "W" | "X" | "Y" | "Z"
45 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
51 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
53 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56 * alphanum = alpha | digit
59 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
65 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
66 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
67 ((x) == '(') || ((x) == ')'))
70 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
73 #define IS_UNWISE(p) \
74 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
75 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
76 ((*(p) == ']')) || ((*(p) == '`')))
78 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 * unreserved = alphanum | mark
91 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
94 * Skip to next pointer char, handle escaped sequences
97 #define NEXT(p) ((*p == '%')? p += 3 : p++)
100 * Productions from the spec.
102 * authority = server | reg_name
103 * reg_name = 1*( unreserved | escaped | "$" | "," |
104 * ";" | ":" | "@" | "&" | "=" | "+" )
106 * path = [ abs_path | opaque_part ]
109 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
111 /************************************************************************
115 ************************************************************************/
117 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
118 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
119 ((*(p) >= 'A') && (*(p) <= 'Z')))
120 #define ISA_HEXDIG(p) \
121 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
122 ((*(p) >= 'A') && (*(p) <= 'F')))
125 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
126 * / "*" / "+" / "," / ";" / "="
128 #define ISA_SUB_DELIM(p) \
129 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
130 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
131 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
132 ((*(p) == '=')) || ((*(p) == '\'')))
135 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
137 #define ISA_GEN_DELIM(p) \
138 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
139 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
143 * reserved = gen-delims / sub-delims
145 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
148 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
150 #define ISA_UNRESERVED(p) \
151 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
152 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
155 * pct-encoded = "%" HEXDIG HEXDIG
157 #define ISA_PCT_ENCODED(p) \
158 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
161 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
163 #define ISA_PCHAR(p) \
164 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
165 ((*(p) == ':')) || ((*(p) == '@')))
168 * xmlParse3986Scheme:
169 * @uri: pointer to an URI structure
170 * @str: pointer to the string to analyze
172 * Parse an URI scheme
174 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
176 * Returns 0 or the error code
179 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
189 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
190 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
192 if (uri->scheme != NULL) xmlFree(uri->scheme);
193 uri->scheme = STRNDUP(*str, cur - *str);
200 * xmlParse3986Fragment:
201 * @uri: pointer to an URI structure
202 * @str: pointer to the string to analyze
204 * Parse the query part of an URI
206 * fragment = *( pchar / "/" / "?" )
207 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
208 * in the fragment identifier but this is used very broadly for
209 * xpointer scheme selection, so we are allowing it here to not break
210 * for example all the DocBook processing chains.
212 * Returns 0 or the error code
215 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
224 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
225 (*cur == '[') || (*cur == ']') ||
226 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
229 if (uri->fragment != NULL)
230 xmlFree(uri->fragment);
231 if (uri->cleanup & 2)
232 uri->fragment = STRNDUP(*str, cur - *str);
234 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
242 * @uri: pointer to an URI structure
243 * @str: pointer to the string to analyze
245 * Parse the query part of an URI
249 * Returns 0 or the error code
252 xmlParse3986Query(xmlURIPtr uri, const char **str)
261 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
262 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
265 if (uri->query != NULL)
267 if (uri->cleanup & 2)
268 uri->query = STRNDUP(*str, cur - *str);
270 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
272 /* Save the raw bytes of the query as well.
273 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
275 if (uri->query_raw != NULL)
276 xmlFree (uri->query_raw);
277 uri->query_raw = STRNDUP (*str, cur - *str);
285 * @uri: pointer to an URI structure
286 * @str: the string to analyze
288 * Parse a port part and fills in the appropriate fields
289 * of the @uri structure
293 * Returns 0 or the error code
296 xmlParse3986Port(xmlURIPtr uri, const char **str)
298 const char *cur = *str;
300 if (ISA_DIGIT(cur)) {
303 while (ISA_DIGIT(cur)) {
305 uri->port = uri->port * 10 + (*cur - '0');
315 * xmlParse3986Userinfo:
316 * @uri: pointer to an URI structure
317 * @str: the string to analyze
319 * Parse an user informations part and fills in the appropriate fields
320 * of the @uri structure
322 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
324 * Returns 0 or the error code
327 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
332 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
333 ISA_SUB_DELIM(cur) || (*cur == ':'))
337 if (uri->user != NULL) xmlFree(uri->user);
338 if (uri->cleanup & 2)
339 uri->user = STRNDUP(*str, cur - *str);
341 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
350 * xmlParse3986DecOctet:
351 * @str: the string to analyze
353 * dec-octet = DIGIT ; 0-9
354 * / %x31-39 DIGIT ; 10-99
355 * / "1" 2DIGIT ; 100-199
356 * / "2" %x30-34 DIGIT ; 200-249
357 * / "25" %x30-35 ; 250-255
361 * Returns 0 if found and skipped, 1 otherwise
364 xmlParse3986DecOctet(const char **str) {
365 const char *cur = *str;
367 if (!(ISA_DIGIT(cur)))
369 if (!ISA_DIGIT(cur+1))
371 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
373 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
375 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
376 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
378 else if ((*cur == '2') && (*(cur + 1) == '5') &&
379 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
388 * @uri: pointer to an URI structure
389 * @str: the string to analyze
391 * Parse an host part and fills in the appropriate fields
392 * of the @uri structure
394 * host = IP-literal / IPv4address / reg-name
395 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
396 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
397 * reg-name = *( unreserved / pct-encoded / sub-delims )
399 * Returns 0 or the error code
402 xmlParse3986Host(xmlURIPtr uri, const char **str)
404 const char *cur = *str;
409 * IPv6 and future adressing scheme are enclosed between brackets
413 while ((*cur != ']') && (*cur != 0))
421 * try to parse an IPv4
423 if (ISA_DIGIT(cur)) {
424 if (xmlParse3986DecOctet(&cur) != 0)
429 if (xmlParse3986DecOctet(&cur) != 0)
433 if (xmlParse3986DecOctet(&cur) != 0)
437 if (xmlParse3986DecOctet(&cur) != 0)
444 * then this should be a hostname which can be empty
446 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
450 if (uri->authority != NULL) xmlFree(uri->authority);
451 uri->authority = NULL;
452 if (uri->server != NULL) xmlFree(uri->server);
454 if (uri->cleanup & 2)
455 uri->server = STRNDUP(host, cur - host);
457 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
466 * xmlParse3986Authority:
467 * @uri: pointer to an URI structure
468 * @str: the string to analyze
470 * Parse an authority part and fills in the appropriate fields
471 * of the @uri structure
473 * authority = [ userinfo "@" ] host [ ":" port ]
475 * Returns 0 or the error code
478 xmlParse3986Authority(xmlURIPtr uri, const char **str)
485 * try to parse an userinfo and check for the trailing @
487 ret = xmlParse3986Userinfo(uri, &cur);
488 if ((ret != 0) || (*cur != '@'))
492 ret = xmlParse3986Host(uri, &cur);
493 if (ret != 0) return(ret);
496 ret = xmlParse3986Port(uri, &cur);
497 if (ret != 0) return(ret);
504 * xmlParse3986Segment:
505 * @str: the string to analyze
506 * @forbid: an optional forbidden character
507 * @empty: allow an empty segment
509 * Parse a segment and fills in the appropriate fields
510 * of the @uri structure
513 * segment-nz = 1*pchar
514 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
515 * ; non-zero-length segment without any colon ":"
517 * Returns 0 or the error code
520 xmlParse3986Segment(const char **str, char forbid, int empty)
525 if (!ISA_PCHAR(cur)) {
530 while (ISA_PCHAR(cur) && (*cur != forbid))
537 * xmlParse3986PathAbEmpty:
538 * @uri: pointer to an URI structure
539 * @str: the string to analyze
541 * Parse an path absolute or empty and fills in the appropriate fields
542 * of the @uri structure
544 * path-abempty = *( "/" segment )
546 * Returns 0 or the error code
549 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
556 while (*cur == '/') {
558 ret = xmlParse3986Segment(&cur, 0, 1);
559 if (ret != 0) return(ret);
562 if (uri->path != NULL) xmlFree(uri->path);
564 if (uri->cleanup & 2)
565 uri->path = STRNDUP(*str, cur - *str);
567 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
577 * xmlParse3986PathAbsolute:
578 * @uri: pointer to an URI structure
579 * @str: the string to analyze
581 * Parse an path absolute and fills in the appropriate fields
582 * of the @uri structure
584 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
586 * Returns 0 or the error code
589 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
599 ret = xmlParse3986Segment(&cur, 0, 0);
601 while (*cur == '/') {
603 ret = xmlParse3986Segment(&cur, 0, 1);
604 if (ret != 0) return(ret);
608 if (uri->path != NULL) xmlFree(uri->path);
610 if (uri->cleanup & 2)
611 uri->path = STRNDUP(*str, cur - *str);
613 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
623 * xmlParse3986PathRootless:
624 * @uri: pointer to an URI structure
625 * @str: the string to analyze
627 * Parse an path without root and fills in the appropriate fields
628 * of the @uri structure
630 * path-rootless = segment-nz *( "/" segment )
632 * Returns 0 or the error code
635 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
642 ret = xmlParse3986Segment(&cur, 0, 0);
643 if (ret != 0) return(ret);
644 while (*cur == '/') {
646 ret = xmlParse3986Segment(&cur, 0, 1);
647 if (ret != 0) return(ret);
650 if (uri->path != NULL) xmlFree(uri->path);
652 if (uri->cleanup & 2)
653 uri->path = STRNDUP(*str, cur - *str);
655 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
665 * xmlParse3986PathNoScheme:
666 * @uri: pointer to an URI structure
667 * @str: the string to analyze
669 * Parse an path which is not a scheme and fills in the appropriate fields
670 * of the @uri structure
672 * path-noscheme = segment-nz-nc *( "/" segment )
674 * Returns 0 or the error code
677 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
684 ret = xmlParse3986Segment(&cur, ':', 0);
685 if (ret != 0) return(ret);
686 while (*cur == '/') {
688 ret = xmlParse3986Segment(&cur, 0, 1);
689 if (ret != 0) return(ret);
692 if (uri->path != NULL) xmlFree(uri->path);
694 if (uri->cleanup & 2)
695 uri->path = STRNDUP(*str, cur - *str);
697 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
707 * xmlParse3986HierPart:
708 * @uri: pointer to an URI structure
709 * @str: the string to analyze
711 * Parse an hierarchical part and fills in the appropriate fields
712 * of the @uri structure
714 * hier-part = "//" authority path-abempty
719 * Returns 0 or the error code
722 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
729 if ((*cur == '/') && (*(cur + 1) == '/')) {
731 ret = xmlParse3986Authority(uri, &cur);
732 if (ret != 0) return(ret);
733 ret = xmlParse3986PathAbEmpty(uri, &cur);
734 if (ret != 0) return(ret);
737 } else if (*cur == '/') {
738 ret = xmlParse3986PathAbsolute(uri, &cur);
739 if (ret != 0) return(ret);
740 } else if (ISA_PCHAR(cur)) {
741 ret = xmlParse3986PathRootless(uri, &cur);
742 if (ret != 0) return(ret);
744 /* path-empty is effectively empty */
746 if (uri->path != NULL) xmlFree(uri->path);
755 * xmlParse3986RelativeRef:
756 * @uri: pointer to an URI structure
757 * @str: the string to analyze
759 * Parse an URI string and fills in the appropriate fields
760 * of the @uri structure
762 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
763 * relative-part = "//" authority path-abempty
768 * Returns 0 or the error code
771 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
774 if ((*str == '/') && (*(str + 1) == '/')) {
776 ret = xmlParse3986Authority(uri, &str);
777 if (ret != 0) return(ret);
778 ret = xmlParse3986PathAbEmpty(uri, &str);
779 if (ret != 0) return(ret);
780 } else if (*str == '/') {
781 ret = xmlParse3986PathAbsolute(uri, &str);
782 if (ret != 0) return(ret);
783 } else if (ISA_PCHAR(str)) {
784 ret = xmlParse3986PathNoScheme(uri, &str);
785 if (ret != 0) return(ret);
787 /* path-empty is effectively empty */
789 if (uri->path != NULL) xmlFree(uri->path);
796 ret = xmlParse3986Query(uri, &str);
797 if (ret != 0) return(ret);
801 ret = xmlParse3986Fragment(uri, &str);
802 if (ret != 0) return(ret);
814 * @uri: pointer to an URI structure
815 * @str: the string to analyze
817 * Parse an URI string and fills in the appropriate fields
818 * of the @uri structure
820 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
822 * Returns 0 or the error code
825 xmlParse3986URI(xmlURIPtr uri, const char *str) {
828 ret = xmlParse3986Scheme(uri, &str);
829 if (ret != 0) return(ret);
834 ret = xmlParse3986HierPart(uri, &str);
835 if (ret != 0) return(ret);
838 ret = xmlParse3986Query(uri, &str);
839 if (ret != 0) return(ret);
843 ret = xmlParse3986Fragment(uri, &str);
844 if (ret != 0) return(ret);
854 * xmlParse3986URIReference:
855 * @uri: pointer to an URI structure
856 * @str: the string to analyze
858 * Parse an URI reference string and fills in the appropriate fields
859 * of the @uri structure
861 * URI-reference = URI / relative-ref
863 * Returns 0 or the error code
866 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
874 * Try first to parse absolute refs, then fallback to relative if
877 ret = xmlParse3986URI(uri, str);
880 ret = xmlParse3986RelativeRef(uri, str);
891 * @str: the URI string to analyze
893 * Parse an URI based on RFC 3986
895 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
897 * Returns a newly built xmlURIPtr or NULL in case of error
900 xmlParseURI(const char *str) {
906 uri = xmlCreateURI();
908 ret = xmlParse3986URIReference(uri, str);
918 * xmlParseURIReference:
919 * @uri: pointer to an URI structure
920 * @str: the string to analyze
922 * Parse an URI reference string based on RFC 3986 and fills in the
923 * appropriate fields of the @uri structure
925 * URI-reference = URI / relative-ref
927 * Returns 0 or the error code
930 xmlParseURIReference(xmlURIPtr uri, const char *str) {
931 return(xmlParse3986URIReference(uri, str));
936 * @str: the URI string to analyze
937 * @raw: if 1 unescaping of URI pieces are disabled
939 * Parse an URI but allows to keep intact the original fragments.
941 * URI-reference = URI / relative-ref
943 * Returns a newly built xmlURIPtr or NULL in case of error
946 xmlParseURIRaw(const char *str, int raw) {
952 uri = xmlCreateURI();
957 ret = xmlParseURIReference(uri, str);
966 /************************************************************************
968 * Generic URI structure functions *
970 ************************************************************************/
975 * Simply creates an empty xmlURI
977 * Returns the new structure or NULL in case of error
983 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
985 xmlGenericError(xmlGenericErrorContext,
986 "xmlCreateURI: out of memory\n");
989 memset(ret, 0, sizeof(xmlURI));
995 * @uri: pointer to an xmlURI
997 * Save the URI as an escaped string
999 * Returns a new string (to be deallocated by caller)
1002 xmlSaveUri(xmlURIPtr uri) {
1003 xmlChar *ret = NULL;
1009 if (uri == NULL) return(NULL);
1013 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1015 xmlGenericError(xmlGenericErrorContext,
1016 "xmlSaveUri: out of memory\n");
1021 if (uri->scheme != NULL) {
1026 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1028 xmlGenericError(xmlGenericErrorContext,
1029 "xmlSaveUri: out of memory\n");
1039 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1041 xmlGenericError(xmlGenericErrorContext,
1042 "xmlSaveUri: out of memory\n");
1050 if (uri->opaque != NULL) {
1053 if (len + 3 >= max) {
1055 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1057 xmlGenericError(xmlGenericErrorContext,
1058 "xmlSaveUri: out of memory\n");
1064 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1067 int val = *(unsigned char *)p++;
1068 int hi = val / 0x10, lo = val % 0x10;
1070 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1071 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1075 if (uri->server != NULL) {
1076 if (len + 3 >= max) {
1078 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1080 xmlGenericError(xmlGenericErrorContext,
1081 "xmlSaveUri: out of memory\n");
1089 if (uri->user != NULL) {
1092 if (len + 3 >= max) {
1094 temp = (xmlChar *) xmlRealloc(ret,
1095 (max + 1) * sizeof(xmlChar));
1097 xmlGenericError(xmlGenericErrorContext,
1098 "xmlSaveUri: out of memory\n");
1104 if ((IS_UNRESERVED(*(p))) ||
1105 ((*(p) == ';')) || ((*(p) == ':')) ||
1106 ((*(p) == '&')) || ((*(p) == '=')) ||
1107 ((*(p) == '+')) || ((*(p) == '$')) ||
1111 int val = *(unsigned char *)p++;
1112 int hi = val / 0x10, lo = val % 0x10;
1114 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1115 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1118 if (len + 3 >= max) {
1120 temp = (xmlChar *) xmlRealloc(ret,
1121 (max + 1) * sizeof(xmlChar));
1123 xmlGenericError(xmlGenericErrorContext,
1124 "xmlSaveUri: out of memory\n");
1136 temp = (xmlChar *) xmlRealloc(ret,
1137 (max + 1) * sizeof(xmlChar));
1139 xmlGenericError(xmlGenericErrorContext,
1140 "xmlSaveUri: out of memory\n");
1148 if (uri->port > 0) {
1149 if (len + 10 >= max) {
1151 temp = (xmlChar *) xmlRealloc(ret,
1152 (max + 1) * sizeof(xmlChar));
1154 xmlGenericError(xmlGenericErrorContext,
1155 "xmlSaveUri: out of memory\n");
1161 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1163 } else if (uri->authority != NULL) {
1164 if (len + 3 >= max) {
1166 temp = (xmlChar *) xmlRealloc(ret,
1167 (max + 1) * sizeof(xmlChar));
1169 xmlGenericError(xmlGenericErrorContext,
1170 "xmlSaveUri: out of memory\n");
1180 if (len + 3 >= max) {
1182 temp = (xmlChar *) xmlRealloc(ret,
1183 (max + 1) * sizeof(xmlChar));
1185 xmlGenericError(xmlGenericErrorContext,
1186 "xmlSaveUri: out of memory\n");
1192 if ((IS_UNRESERVED(*(p))) ||
1193 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1194 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1195 ((*(p) == '=')) || ((*(p) == '+')))
1198 int val = *(unsigned char *)p++;
1199 int hi = val / 0x10, lo = val % 0x10;
1201 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1202 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1205 } else if (uri->scheme != NULL) {
1206 if (len + 3 >= max) {
1208 temp = (xmlChar *) xmlRealloc(ret,
1209 (max + 1) * sizeof(xmlChar));
1211 xmlGenericError(xmlGenericErrorContext,
1212 "xmlSaveUri: out of memory\n");
1221 if (uri->path != NULL) {
1224 * the colon in file:///d: should not be escaped or
1225 * Windows accesses fail later.
1227 if ((uri->scheme != NULL) &&
1229 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1230 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1232 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1233 if (len + 3 >= max) {
1235 ret = (xmlChar *) xmlRealloc(ret,
1236 (max + 1) * sizeof(xmlChar));
1238 xmlGenericError(xmlGenericErrorContext,
1239 "xmlSaveUri: out of memory\n");
1248 if (len + 3 >= max) {
1250 temp = (xmlChar *) xmlRealloc(ret,
1251 (max + 1) * sizeof(xmlChar));
1253 xmlGenericError(xmlGenericErrorContext,
1254 "xmlSaveUri: out of memory\n");
1260 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1261 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1262 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1266 int val = *(unsigned char *)p++;
1267 int hi = val / 0x10, lo = val % 0x10;
1269 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1270 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1274 if (uri->query_raw != NULL) {
1275 if (len + 1 >= max) {
1277 temp = (xmlChar *) xmlRealloc(ret,
1278 (max + 1) * sizeof(xmlChar));
1280 xmlGenericError(xmlGenericErrorContext,
1281 "xmlSaveUri: out of memory\n");
1290 if (len + 1 >= max) {
1292 temp = (xmlChar *) xmlRealloc(ret,
1293 (max + 1) * sizeof(xmlChar));
1295 xmlGenericError(xmlGenericErrorContext,
1296 "xmlSaveUri: out of memory\n");
1304 } else if (uri->query != NULL) {
1305 if (len + 3 >= max) {
1307 temp = (xmlChar *) xmlRealloc(ret,
1308 (max + 1) * sizeof(xmlChar));
1310 xmlGenericError(xmlGenericErrorContext,
1311 "xmlSaveUri: out of memory\n");
1320 if (len + 3 >= max) {
1322 temp = (xmlChar *) xmlRealloc(ret,
1323 (max + 1) * sizeof(xmlChar));
1325 xmlGenericError(xmlGenericErrorContext,
1326 "xmlSaveUri: out of memory\n");
1332 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1335 int val = *(unsigned char *)p++;
1336 int hi = val / 0x10, lo = val % 0x10;
1338 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1339 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1344 if (uri->fragment != NULL) {
1345 if (len + 3 >= max) {
1347 temp = (xmlChar *) xmlRealloc(ret,
1348 (max + 1) * sizeof(xmlChar));
1350 xmlGenericError(xmlGenericErrorContext,
1351 "xmlSaveUri: out of memory\n");
1360 if (len + 3 >= max) {
1362 temp = (xmlChar *) xmlRealloc(ret,
1363 (max + 1) * sizeof(xmlChar));
1365 xmlGenericError(xmlGenericErrorContext,
1366 "xmlSaveUri: out of memory\n");
1372 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1375 int val = *(unsigned char *)p++;
1376 int hi = val / 0x10, lo = val % 0x10;
1378 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1379 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1385 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1387 xmlGenericError(xmlGenericErrorContext,
1388 "xmlSaveUri: out of memory\n");
1400 * @stream: a FILE* for the output
1401 * @uri: pointer to an xmlURI
1403 * Prints the URI in the stream @stream.
1406 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1409 out = xmlSaveUri(uri);
1411 fprintf(stream, "%s", (char *) out);
1418 * @uri: pointer to an xmlURI
1420 * Make sure the xmlURI struct is free of content
1423 xmlCleanURI(xmlURIPtr uri) {
1424 if (uri == NULL) return;
1426 if (uri->scheme != NULL) xmlFree(uri->scheme);
1428 if (uri->server != NULL) xmlFree(uri->server);
1430 if (uri->user != NULL) xmlFree(uri->user);
1432 if (uri->path != NULL) xmlFree(uri->path);
1434 if (uri->fragment != NULL) xmlFree(uri->fragment);
1435 uri->fragment = NULL;
1436 if (uri->opaque != NULL) xmlFree(uri->opaque);
1438 if (uri->authority != NULL) xmlFree(uri->authority);
1439 uri->authority = NULL;
1440 if (uri->query != NULL) xmlFree(uri->query);
1442 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1443 uri->query_raw = NULL;
1448 * @uri: pointer to an xmlURI
1450 * Free up the xmlURI struct
1453 xmlFreeURI(xmlURIPtr uri) {
1454 if (uri == NULL) return;
1456 if (uri->scheme != NULL) xmlFree(uri->scheme);
1457 if (uri->server != NULL) xmlFree(uri->server);
1458 if (uri->user != NULL) xmlFree(uri->user);
1459 if (uri->path != NULL) xmlFree(uri->path);
1460 if (uri->fragment != NULL) xmlFree(uri->fragment);
1461 if (uri->opaque != NULL) xmlFree(uri->opaque);
1462 if (uri->authority != NULL) xmlFree(uri->authority);
1463 if (uri->query != NULL) xmlFree(uri->query);
1464 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1468 /************************************************************************
1470 * Helper functions *
1472 ************************************************************************/
1475 * xmlNormalizeURIPath:
1476 * @path: pointer to the path string
1478 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1479 * Section 5.2, steps 6.c through 6.g.
1481 * Normalization occurs directly on the string, no new allocation is done
1483 * Returns 0 or an error code
1486 xmlNormalizeURIPath(char *path) {
1492 /* Skip all initial "/" chars. We want to get to the beginning of the
1493 * first non-empty segment.
1496 while (cur[0] == '/')
1501 /* Keep everything we've seen so far. */
1505 * Analyze each segment in sequence for cases (c) and (d).
1507 while (cur[0] != '\0') {
1509 * c) All occurrences of "./", where "." is a complete path segment,
1510 * are removed from the buffer string.
1512 if ((cur[0] == '.') && (cur[1] == '/')) {
1514 /* '//' normalization should be done at this point too */
1515 while (cur[0] == '/')
1521 * d) If the buffer string ends with "." as a complete path segment,
1522 * that "." is removed.
1524 if ((cur[0] == '.') && (cur[1] == '\0'))
1527 /* Otherwise keep the segment. */
1528 while (cur[0] != '/') {
1531 (out++)[0] = (cur++)[0];
1534 while ((cur[0] == '/') && (cur[1] == '/'))
1537 (out++)[0] = (cur++)[0];
1542 /* Reset to the beginning of the first segment for the next sequence. */
1544 while (cur[0] == '/')
1550 * Analyze each segment in sequence for cases (e) and (f).
1552 * e) All occurrences of "<segment>/../", where <segment> is a
1553 * complete path segment not equal to "..", are removed from the
1554 * buffer string. Removal of these path segments is performed
1555 * iteratively, removing the leftmost matching pattern on each
1556 * iteration, until no matching pattern remains.
1558 * f) If the buffer string ends with "<segment>/..", where <segment>
1559 * is a complete path segment not equal to "..", that
1560 * "<segment>/.." is removed.
1562 * To satisfy the "iterative" clause in (e), we need to collapse the
1563 * string every time we find something that needs to be removed. Thus,
1564 * we don't need to keep two pointers into the string: we only need a
1565 * "current position" pointer.
1570 /* At the beginning of each iteration of this loop, "cur" points to
1571 * the first character of the segment we want to examine.
1574 /* Find the end of the current segment. */
1576 while ((segp[0] != '/') && (segp[0] != '\0'))
1579 /* If this is the last segment, we're done (we need at least two
1580 * segments to meet the criteria for the (e) and (f) cases).
1582 if (segp[0] == '\0')
1585 /* If the first segment is "..", or if the next segment _isn't_ "..",
1586 * keep this segment and try the next one.
1589 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1590 || ((segp[0] != '.') || (segp[1] != '.')
1591 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1596 /* If we get here, remove this segment and the next one and back up
1597 * to the previous segment (if there is one), to implement the
1598 * "iteratively" clause. It's pretty much impossible to back up
1599 * while maintaining two pointers into the buffer, so just compact
1600 * the whole buffer now.
1603 /* If this is the end of the buffer, we're done. */
1604 if (segp[2] == '\0') {
1608 /* Valgrind complained, strcpy(cur, segp + 3); */
1609 /* string will overlap, do not use strcpy */
1612 while ((*tmp++ = *segp++) != 0)
1615 /* If there are no previous segments, then keep going from here. */
1617 while ((segp > path) && ((--segp)[0] == '/'))
1622 /* "segp" is pointing to the end of a previous segment; find it's
1623 * start. We need to back up to the previous segment and start
1624 * over with that to handle things like "foo/bar/../..". If we
1625 * don't do this, then on the first pass we'll remove the "bar/..",
1626 * but be pointing at the second ".." so we won't realize we can also
1627 * remove the "foo/..".
1630 while ((cur > path) && (cur[-1] != '/'))
1636 * g) If the resulting buffer string still begins with one or more
1637 * complete path segments of "..", then the reference is
1638 * considered to be in error. Implementations may handle this
1639 * error by retaining these components in the resolved path (i.e.,
1640 * treating them as part of the final URI), by removing them from
1641 * the resolved path (i.e., discarding relative levels above the
1642 * root), or by avoiding traversal of the reference.
1644 * We discard them from the final path.
1646 if (path[0] == '/') {
1648 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1649 && ((cur[3] == '/') || (cur[3] == '\0')))
1654 while (cur[0] != '\0')
1655 (out++)[0] = (cur++)[0];
1663 static int is_hex(char c) {
1664 if (((c >= '0') && (c <= '9')) ||
1665 ((c >= 'a') && (c <= 'f')) ||
1666 ((c >= 'A') && (c <= 'F')))
1672 * xmlURIUnescapeString:
1673 * @str: the string to unescape
1674 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1675 * @target: optional destination buffer
1677 * Unescaping routine, but does not check that the string is an URI. The
1678 * output is a direct unsigned char translation of %XX values (no encoding)
1679 * Note that the length of the result can only be smaller or same size as
1682 * Returns a copy of the string, but unescaped, will return NULL only in case
1686 xmlURIUnescapeString(const char *str, int len, char *target) {
1692 if (len <= 0) len = strlen(str);
1693 if (len < 0) return(NULL);
1695 if (target == NULL) {
1696 ret = (char *) xmlMallocAtomic(len + 1);
1698 xmlGenericError(xmlGenericErrorContext,
1699 "xmlURIUnescapeString: out of memory\n");
1707 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1709 if ((*in >= '0') && (*in <= '9'))
1711 else if ((*in >= 'a') && (*in <= 'f'))
1712 *out = (*in - 'a') + 10;
1713 else if ((*in >= 'A') && (*in <= 'F'))
1714 *out = (*in - 'A') + 10;
1716 if ((*in >= '0') && (*in <= '9'))
1717 *out = *out * 16 + (*in - '0');
1718 else if ((*in >= 'a') && (*in <= 'f'))
1719 *out = *out * 16 + (*in - 'a') + 10;
1720 else if ((*in >= 'A') && (*in <= 'F'))
1721 *out = *out * 16 + (*in - 'A') + 10;
1736 * @str: string to escape
1737 * @list: exception list string of chars not to escape
1739 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1740 * and the characters in the exception list.
1742 * Returns a new escaped string or NULL in case of error.
1745 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1750 unsigned int len, out;
1755 return(xmlStrdup(str));
1756 len = xmlStrlen(str);
1757 if (!(len > 0)) return(NULL);
1760 ret = (xmlChar *) xmlMallocAtomic(len);
1762 xmlGenericError(xmlGenericErrorContext,
1763 "xmlURIEscapeStr: out of memory\n");
1766 in = (const xmlChar *) str;
1769 if (len - out <= 3) {
1771 temp = (xmlChar *) xmlRealloc(ret, len);
1773 xmlGenericError(xmlGenericErrorContext,
1774 "xmlURIEscapeStr: out of memory\n");
1783 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1788 ret[out++] = '0' + val;
1790 ret[out++] = 'A' + val - 0xA;
1793 ret[out++] = '0' + val;
1795 ret[out++] = 'A' + val - 0xA;
1808 * @str: the string of the URI to escape
1810 * Escaping routine, does not do validity checks !
1811 * It will try to escape the chars needing this, but this is heuristic
1812 * based it's impossible to be sure.
1814 * Returns an copy of the string, but escaped
1817 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1818 * according to RFC2396.
1822 xmlURIEscape(const xmlChar * str)
1824 xmlChar *ret, *segment = NULL;
1828 #define NULLCHK(p) if(!p) { \
1829 xmlGenericError(xmlGenericErrorContext, \
1830 "xmlURIEscape: out of memory\n"); \
1837 uri = xmlCreateURI();
1840 * Allow escaping errors in the unescaped form
1843 ret2 = xmlParseURIReference(uri, (const char *)str);
1856 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1858 ret = xmlStrcat(ret, segment);
1859 ret = xmlStrcat(ret, BAD_CAST ":");
1863 if (uri->authority) {
1865 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1867 ret = xmlStrcat(ret, BAD_CAST "//");
1868 ret = xmlStrcat(ret, segment);
1873 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1875 ret = xmlStrcat(ret,BAD_CAST "//");
1876 ret = xmlStrcat(ret, segment);
1877 ret = xmlStrcat(ret, BAD_CAST "@");
1882 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1884 if (uri->user == NULL)
1885 ret = xmlStrcat(ret, BAD_CAST "//");
1886 ret = xmlStrcat(ret, segment);
1893 snprintf((char *) port, 10, "%d", uri->port);
1894 ret = xmlStrcat(ret, BAD_CAST ":");
1895 ret = xmlStrcat(ret, port);
1900 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1902 ret = xmlStrcat(ret, segment);
1906 if (uri->query_raw) {
1907 ret = xmlStrcat(ret, BAD_CAST "?");
1908 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1910 else if (uri->query) {
1912 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1914 ret = xmlStrcat(ret, BAD_CAST "?");
1915 ret = xmlStrcat(ret, segment);
1920 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1922 ret = xmlStrcat(ret, segment);
1926 if (uri->fragment) {
1927 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1929 ret = xmlStrcat(ret, BAD_CAST "#");
1930 ret = xmlStrcat(ret, segment);
1940 /************************************************************************
1942 * Public functions *
1944 ************************************************************************/
1948 * @URI: the URI instance found in the document
1949 * @base: the base value
1951 * Computes he final URI of the reference done by checking that
1952 * the given URI is valid, and building the final URI using the
1953 * base URI. This is processed according to section 5.2 of the
1956 * 5.2. Resolving Relative References to Absolute Form
1958 * Returns a new URI string (to be freed by the caller) or NULL in case
1962 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1963 xmlChar *val = NULL;
1964 int ret, len, indx, cur, out;
1965 xmlURIPtr ref = NULL;
1966 xmlURIPtr bas = NULL;
1967 xmlURIPtr res = NULL;
1970 * 1) The URI reference is parsed into the potential four components and
1971 * fragment identifier, as described in Section 4.3.
1973 * NOTE that a completely empty URI is treated by modern browsers
1974 * as a reference to "." rather than as a synonym for the current
1975 * URI. Should we do that here?
1981 ref = xmlCreateURI();
1984 ret = xmlParseURIReference(ref, (const char *) URI);
1991 if ((ref != NULL) && (ref->scheme != NULL)) {
1993 * The URI is absolute don't modify.
1995 val = xmlStrdup(URI);
2001 bas = xmlCreateURI();
2004 ret = xmlParseURIReference(bas, (const char *) base);
2008 val = xmlSaveUri(ref);
2013 * the base fragment must be ignored
2015 if (bas->fragment != NULL) {
2016 xmlFree(bas->fragment);
2017 bas->fragment = NULL;
2019 val = xmlSaveUri(bas);
2024 * 2) If the path component is empty and the scheme, authority, and
2025 * query components are undefined, then it is a reference to the
2026 * current document and we are done. Otherwise, the reference URI's
2027 * query and fragment components are defined as found (or not found)
2028 * within the URI reference and not inherited from the base URI.
2030 * NOTE that in modern browsers, the parsing differs from the above
2031 * in the following aspect: the query component is allowed to be
2032 * defined while still treating this as a reference to the current
2035 res = xmlCreateURI();
2038 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2039 ((ref->authority == NULL) && (ref->server == NULL))) {
2040 if (bas->scheme != NULL)
2041 res->scheme = xmlMemStrdup(bas->scheme);
2042 if (bas->authority != NULL)
2043 res->authority = xmlMemStrdup(bas->authority);
2044 else if (bas->server != NULL) {
2045 res->server = xmlMemStrdup(bas->server);
2046 if (bas->user != NULL)
2047 res->user = xmlMemStrdup(bas->user);
2048 res->port = bas->port;
2050 if (bas->path != NULL)
2051 res->path = xmlMemStrdup(bas->path);
2052 if (ref->query_raw != NULL)
2053 res->query_raw = xmlMemStrdup (ref->query_raw);
2054 else if (ref->query != NULL)
2055 res->query = xmlMemStrdup(ref->query);
2056 else if (bas->query_raw != NULL)
2057 res->query_raw = xmlMemStrdup(bas->query_raw);
2058 else if (bas->query != NULL)
2059 res->query = xmlMemStrdup(bas->query);
2060 if (ref->fragment != NULL)
2061 res->fragment = xmlMemStrdup(ref->fragment);
2066 * 3) If the scheme component is defined, indicating that the reference
2067 * starts with a scheme name, then the reference is interpreted as an
2068 * absolute URI and we are done. Otherwise, the reference URI's
2069 * scheme is inherited from the base URI's scheme component.
2071 if (ref->scheme != NULL) {
2072 val = xmlSaveUri(ref);
2075 if (bas->scheme != NULL)
2076 res->scheme = xmlMemStrdup(bas->scheme);
2078 if (ref->query_raw != NULL)
2079 res->query_raw = xmlMemStrdup(ref->query_raw);
2080 else if (ref->query != NULL)
2081 res->query = xmlMemStrdup(ref->query);
2082 if (ref->fragment != NULL)
2083 res->fragment = xmlMemStrdup(ref->fragment);
2086 * 4) If the authority component is defined, then the reference is a
2087 * network-path and we skip to step 7. Otherwise, the reference
2088 * URI's authority is inherited from the base URI's authority
2089 * component, which will also be undefined if the URI scheme does not
2090 * use an authority component.
2092 if ((ref->authority != NULL) || (ref->server != NULL)) {
2093 if (ref->authority != NULL)
2094 res->authority = xmlMemStrdup(ref->authority);
2096 res->server = xmlMemStrdup(ref->server);
2097 if (ref->user != NULL)
2098 res->user = xmlMemStrdup(ref->user);
2099 res->port = ref->port;
2101 if (ref->path != NULL)
2102 res->path = xmlMemStrdup(ref->path);
2105 if (bas->authority != NULL)
2106 res->authority = xmlMemStrdup(bas->authority);
2107 else if (bas->server != NULL) {
2108 res->server = xmlMemStrdup(bas->server);
2109 if (bas->user != NULL)
2110 res->user = xmlMemStrdup(bas->user);
2111 res->port = bas->port;
2115 * 5) If the path component begins with a slash character ("/"), then
2116 * the reference is an absolute-path and we skip to step 7.
2118 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2119 res->path = xmlMemStrdup(ref->path);
2125 * 6) If this step is reached, then we are resolving a relative-path
2126 * reference. The relative path needs to be merged with the base
2127 * URI's path. Although there are many ways to do this, we will
2128 * describe a simple method using a separate string buffer.
2130 * Allocate a buffer large enough for the result string.
2132 len = 2; /* extra / and 0 */
2133 if (ref->path != NULL)
2134 len += strlen(ref->path);
2135 if (bas->path != NULL)
2136 len += strlen(bas->path);
2137 res->path = (char *) xmlMallocAtomic(len);
2138 if (res->path == NULL) {
2139 xmlGenericError(xmlGenericErrorContext,
2140 "xmlBuildURI: out of memory\n");
2146 * a) All but the last segment of the base URI's path component is
2147 * copied to the buffer. In other words, any characters after the
2148 * last (right-most) slash character, if any, are excluded.
2152 if (bas->path != NULL) {
2153 while (bas->path[cur] != 0) {
2154 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2156 if (bas->path[cur] == 0)
2161 res->path[out] = bas->path[out];
2169 * b) The reference's path component is appended to the buffer
2172 if (ref->path != NULL && ref->path[0] != 0) {
2175 * Ensure the path includes a '/'
2177 if ((out == 0) && (bas->server != NULL))
2178 res->path[out++] = '/';
2179 while (ref->path[indx] != 0) {
2180 res->path[out++] = ref->path[indx++];
2186 * Steps c) to h) are really path normalization steps
2188 xmlNormalizeURIPath(res->path);
2193 * 7) The resulting URI components, including any inherited from the
2194 * base URI, are recombined to give the absolute form of the URI
2197 val = xmlSaveUri(res);
2210 * xmlBuildRelativeURI:
2211 * @URI: the URI reference under consideration
2212 * @base: the base value
2214 * Expresses the URI of the reference in terms relative to the
2215 * base. Some examples of this operation include:
2216 * base = "http://site1.com/docs/book1.html"
2217 * URI input URI returned
2218 * docs/pic1.gif pic1.gif
2219 * docs/img/pic1.gif img/pic1.gif
2220 * img/pic1.gif ../img/pic1.gif
2221 * http://site1.com/docs/pic1.gif pic1.gif
2222 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2224 * base = "docs/book1.html"
2225 * URI input URI returned
2226 * docs/pic1.gif pic1.gif
2227 * docs/img/pic1.gif img/pic1.gif
2228 * img/pic1.gif ../img/pic1.gif
2229 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2232 * Note: if the URI reference is really wierd or complicated, it may be
2233 * worthwhile to first convert it into a "nice" one by calling
2234 * xmlBuildURI (using 'base') before calling this routine,
2235 * since this routine (for reasonable efficiency) assumes URI has
2236 * already been through some validation.
2238 * Returns a new URI string (to be freed by the caller) or NULL in case
2242 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2244 xmlChar *val = NULL;
2250 xmlURIPtr ref = NULL;
2251 xmlURIPtr bas = NULL;
2252 xmlChar *bptr, *uptr, *vptr;
2253 int remove_path = 0;
2255 if ((URI == NULL) || (*URI == 0))
2259 * First parse URI into a standard form
2261 ref = xmlCreateURI ();
2264 /* If URI not already in "relative" form */
2265 if (URI[0] != '.') {
2266 ret = xmlParseURIReference (ref, (const char *) URI);
2268 goto done; /* Error in URI, return NULL */
2270 ref->path = (char *)xmlStrdup(URI);
2273 * Next parse base into the same standard form
2275 if ((base == NULL) || (*base == 0)) {
2276 val = xmlStrdup (URI);
2279 bas = xmlCreateURI ();
2282 if (base[0] != '.') {
2283 ret = xmlParseURIReference (bas, (const char *) base);
2285 goto done; /* Error in base, return NULL */
2287 bas->path = (char *)xmlStrdup(base);
2290 * If the scheme / server on the URI differs from the base,
2291 * just return the URI
2293 if ((ref->scheme != NULL) &&
2294 ((bas->scheme == NULL) ||
2295 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2296 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2297 val = xmlStrdup (URI);
2300 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2301 val = xmlStrdup(BAD_CAST "");
2304 if (bas->path == NULL) {
2305 val = xmlStrdup((xmlChar *)ref->path);
2308 if (ref->path == NULL) {
2309 ref->path = (char *) "/";
2314 * At this point (at last!) we can compare the two paths
2316 * First we take care of the special case where either of the
2317 * two path components may be missing (bug 316224)
2319 if (bas->path == NULL) {
2320 if (ref->path != NULL) {
2321 uptr = (xmlChar *) ref->path;
2324 /* exception characters from xmlSaveUri */
2325 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2329 bptr = (xmlChar *)bas->path;
2330 if (ref->path == NULL) {
2331 for (ix = 0; bptr[ix] != 0; ix++) {
2332 if (bptr[ix] == '/')
2336 len = 1; /* this is for a string terminator only */
2339 * Next we compare the two strings and find where they first differ
2341 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2343 if ((*bptr == '.') && (bptr[1] == '/'))
2345 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2347 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2350 if (bptr[pos] == ref->path[pos]) {
2351 val = xmlStrdup(BAD_CAST "");
2352 goto done; /* (I can't imagine why anyone would do this) */
2356 * In URI, "back up" to the last '/' encountered. This will be the
2357 * beginning of the "unique" suffix of URI
2360 if ((ref->path[ix] == '/') && (ix > 0))
2362 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2364 for (; ix > 0; ix--) {
2365 if (ref->path[ix] == '/')
2369 uptr = (xmlChar *)ref->path;
2372 uptr = (xmlChar *)&ref->path[ix];
2376 * In base, count the number of '/' from the differing point
2378 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2379 for (; bptr[ix] != 0; ix++) {
2380 if (bptr[ix] == '/')
2384 len = xmlStrlen (uptr) + 1;
2389 /* exception characters from xmlSaveUri */
2390 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2395 * Allocate just enough space for the returned string -
2396 * length of the remainder of the URI, plus enough space
2397 * for the "../" groups, plus one for the terminator
2399 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2401 xmlGenericError(xmlGenericErrorContext,
2402 "xmlBuildRelativeURI: out of memory\n");
2407 * Put in as many "../" as needed
2409 for (; nbslash>0; nbslash--) {
2415 * Finish up with the end of the URI
2418 if ((vptr > val) && (len > 0) &&
2419 (uptr[0] == '/') && (vptr[-1] == '/')) {
2420 memcpy (vptr, uptr + 1, len - 1);
2423 memcpy (vptr, uptr, len);
2430 /* escape the freshly-built path */
2432 /* exception characters from xmlSaveUri */
2433 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2438 * Free the working variables
2440 if (remove_path != 0)
2452 * @path: the resource locator in a filesystem notation
2454 * Constructs a canonic path from the specified path.
2456 * Returns a new canonic path, or a duplicate of the path parameter if the
2457 * construction fails. The caller is responsible for freeing the memory occupied
2458 * by the returned string. If there is insufficient memory available, or the
2459 * argument is NULL, the function returns NULL.
2461 #define IS_WINDOWS_PATH(p) \
2463 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2464 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2465 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2467 xmlCanonicPath(const xmlChar *path)
2470 * For Windows implementations, additional work needs to be done to
2471 * replace backslashes in pathnames with "forward slashes"
2473 #if defined(_WIN32) && !defined(__CYGWIN__)
2480 const xmlChar *absuri;
2485 /* sanitize filename starting with // so it can be used as URI */
2486 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2489 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2491 return xmlStrdup(path);
2494 /* Check if this is an "absolute uri" */
2495 absuri = xmlStrstr(path, BAD_CAST "://");
2496 if (absuri != NULL) {
2502 * this looks like an URI where some parts have not been
2503 * escaped leading to a parsing problem. Check that the first
2504 * part matches a protocol.
2507 /* Bypass if first part (part before the '://') is > 20 chars */
2508 if ((l <= 0) || (l > 20))
2509 goto path_processing;
2510 /* Bypass if any non-alpha characters are present in first part */
2511 for (j = 0;j < l;j++) {
2513 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2514 goto path_processing;
2517 /* Escape all except the characters specified in the supplied path */
2518 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2519 if (escURI != NULL) {
2520 /* Try parsing the escaped path */
2521 uri = xmlParseURI((const char *) escURI);
2522 /* If successful, return the escaped string */
2531 /* For Windows implementations, replace backslashes with 'forward slashes' */
2532 #if defined(_WIN32) && !defined(__CYGWIN__)
2534 * Create a URI structure
2536 uri = xmlCreateURI();
2537 if (uri == NULL) { /* Guard against 'out of memory' */
2541 len = xmlStrlen(path);
2542 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2543 /* make the scheme 'file' */
2544 uri->scheme = xmlStrdup(BAD_CAST "file");
2545 /* allocate space for leading '/' + path + string terminator */
2546 uri->path = xmlMallocAtomic(len + 2);
2547 if (uri->path == NULL) {
2548 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2551 /* Put in leading '/' plus path */
2554 strncpy(p, path, len + 1);
2556 uri->path = xmlStrdup(path);
2557 if (uri->path == NULL) {
2563 /* Now change all occurences of '\' to '/' */
2564 while (*p != '\0') {
2570 if (uri->scheme == NULL) {
2571 ret = xmlStrdup((const xmlChar *) uri->path);
2573 ret = xmlSaveUri(uri);
2578 ret = xmlStrdup((const xmlChar *) path);
2585 * @path: the resource locator in a filesystem notation
2587 * Constructs an URI expressing the existing path
2589 * Returns a new URI, or a duplicate of the path parameter if the
2590 * construction fails. The caller is responsible for freeing the memory
2591 * occupied by the returned string. If there is insufficient memory available,
2592 * or the argument is NULL, the function returns NULL.
2595 xmlPathToURI(const xmlChar *path)
2604 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2606 return xmlStrdup(path);
2608 cal = xmlCanonicPath(path);
2611 #if defined(_WIN32) && !defined(__CYGWIN__)
2612 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2613 If 'cal' is a valid URI allready then we are done here, as continuing would make
2615 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2619 /* 'cal' can contain a relative path with backslashes. If that is processed
2620 by xmlSaveURI, they will be escaped and the external entity loader machinery
2621 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2623 while (*ret != '\0') {
2629 memset(&temp, 0, sizeof(temp));
2630 temp.path = (char *) cal;
2631 ret = xmlSaveUri(&temp);
2636 #include "elfgcchack.h"