2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 3986, 2732 and 2373
6 * See Copyright for the status of this software.
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
33 #define MAX_URI_LENGTH 1024 * 1024
36 xmlURIErrMemory(const char *extra)
39 __xmlRaiseError(NULL, NULL, NULL,
40 NULL, NULL, XML_FROM_URI,
41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42 extra, NULL, NULL, 0, 0,
43 "Memory allocation failed : %s\n", extra);
45 __xmlRaiseError(NULL, NULL, NULL,
46 NULL, NULL, XML_FROM_URI,
47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48 NULL, NULL, NULL, 0, 0,
49 "Memory allocation failed\n");
52 static void xmlCleanURI(xmlURIPtr uri);
55 * Old rule from 2396 used in legacy handling code
56 * alpha = lowalpha | upalpha
58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
85 * alphanum = alpha | digit
88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
91 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
96 ((x) == '(') || ((x) == ')'))
99 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
102 #define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
117 * unreserved = alphanum | mark
120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
123 * Skip to next pointer char, handle escaped sequences
126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
129 * Productions from the spec.
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
135 * path = [ abs_path | opaque_part ]
138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140 /************************************************************************
144 ************************************************************************/
146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149 #define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
157 #define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
161 ((*(p) == '=')) || ((*(p) == '\'')))
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166 #define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
172 * reserved = gen-delims / sub-delims
174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179 #define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
184 * pct-encoded = "%" HEXDIG HEXDIG
186 #define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192 #define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
201 * Parse an URI scheme
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205 * Returns 0 or the error code
208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221 if (uri->scheme != NULL) xmlFree(uri->scheme);
222 uri->scheme = STRNDUP(*str, cur - *str);
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
233 * Parse the query part of an URI
235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
241 * Returns 0 or the error code
244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254 (*cur == '[') || (*cur == ']') ||
255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
258 if (uri->fragment != NULL)
259 xmlFree(uri->fragment);
260 if (uri->cleanup & 2)
261 uri->fragment = STRNDUP(*str, cur - *str);
263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
274 * Parse the query part of an URI
278 * Returns 0 or the error code
281 xmlParse3986Query(xmlURIPtr uri, const char **str)
290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
294 if (uri->query != NULL)
296 if (uri->cleanup & 2)
297 uri->query = STRNDUP(*str, cur - *str);
299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 if (uri->query_raw != NULL)
305 xmlFree (uri->query_raw);
306 uri->query_raw = STRNDUP (*str, cur - *str);
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
317 * Parse a port part and fills in the appropriate fields
318 * of the @uri structure
322 * Returns 0 or the error code
325 xmlParse3986Port(xmlURIPtr uri, const char **str)
327 const char *cur = *str;
328 unsigned port = 0; /* unsigned for defined overflow behavior */
330 if (ISA_DIGIT(cur)) {
331 while (ISA_DIGIT(cur)) {
332 port = port * 10 + (*cur - '0');
337 uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
345 * xmlParse3986Userinfo:
346 * @uri: pointer to an URI structure
347 * @str: the string to analyze
349 * Parse an user informations part and fills in the appropriate fields
350 * of the @uri structure
352 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
354 * Returns 0 or the error code
357 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
362 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
363 ISA_SUB_DELIM(cur) || (*cur == ':'))
367 if (uri->user != NULL) xmlFree(uri->user);
368 if (uri->cleanup & 2)
369 uri->user = STRNDUP(*str, cur - *str);
371 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
380 * xmlParse3986DecOctet:
381 * @str: the string to analyze
383 * dec-octet = DIGIT ; 0-9
384 * / %x31-39 DIGIT ; 10-99
385 * / "1" 2DIGIT ; 100-199
386 * / "2" %x30-34 DIGIT ; 200-249
387 * / "25" %x30-35 ; 250-255
391 * Returns 0 if found and skipped, 1 otherwise
394 xmlParse3986DecOctet(const char **str) {
395 const char *cur = *str;
397 if (!(ISA_DIGIT(cur)))
399 if (!ISA_DIGIT(cur+1))
401 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
403 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
405 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
406 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
408 else if ((*cur == '2') && (*(cur + 1) == '5') &&
409 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
418 * @uri: pointer to an URI structure
419 * @str: the string to analyze
421 * Parse an host part and fills in the appropriate fields
422 * of the @uri structure
424 * host = IP-literal / IPv4address / reg-name
425 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
426 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
427 * reg-name = *( unreserved / pct-encoded / sub-delims )
429 * Returns 0 or the error code
432 xmlParse3986Host(xmlURIPtr uri, const char **str)
434 const char *cur = *str;
439 * IPv6 and future adressing scheme are enclosed between brackets
443 while ((*cur != ']') && (*cur != 0))
451 * try to parse an IPv4
453 if (ISA_DIGIT(cur)) {
454 if (xmlParse3986DecOctet(&cur) != 0)
459 if (xmlParse3986DecOctet(&cur) != 0)
463 if (xmlParse3986DecOctet(&cur) != 0)
467 if (xmlParse3986DecOctet(&cur) != 0)
474 * then this should be a hostname which can be empty
476 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
480 if (uri->authority != NULL) xmlFree(uri->authority);
481 uri->authority = NULL;
482 if (uri->server != NULL) xmlFree(uri->server);
484 if (uri->cleanup & 2)
485 uri->server = STRNDUP(host, cur - host);
487 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
496 * xmlParse3986Authority:
497 * @uri: pointer to an URI structure
498 * @str: the string to analyze
500 * Parse an authority part and fills in the appropriate fields
501 * of the @uri structure
503 * authority = [ userinfo "@" ] host [ ":" port ]
505 * Returns 0 or the error code
508 xmlParse3986Authority(xmlURIPtr uri, const char **str)
515 * try to parse an userinfo and check for the trailing @
517 ret = xmlParse3986Userinfo(uri, &cur);
518 if ((ret != 0) || (*cur != '@'))
522 ret = xmlParse3986Host(uri, &cur);
523 if (ret != 0) return(ret);
526 ret = xmlParse3986Port(uri, &cur);
527 if (ret != 0) return(ret);
534 * xmlParse3986Segment:
535 * @str: the string to analyze
536 * @forbid: an optional forbidden character
537 * @empty: allow an empty segment
539 * Parse a segment and fills in the appropriate fields
540 * of the @uri structure
543 * segment-nz = 1*pchar
544 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545 * ; non-zero-length segment without any colon ":"
547 * Returns 0 or the error code
550 xmlParse3986Segment(const char **str, char forbid, int empty)
555 if (!ISA_PCHAR(cur)) {
560 while (ISA_PCHAR(cur) && (*cur != forbid))
567 * xmlParse3986PathAbEmpty:
568 * @uri: pointer to an URI structure
569 * @str: the string to analyze
571 * Parse an path absolute or empty and fills in the appropriate fields
572 * of the @uri structure
574 * path-abempty = *( "/" segment )
576 * Returns 0 or the error code
579 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
586 while (*cur == '/') {
588 ret = xmlParse3986Segment(&cur, 0, 1);
589 if (ret != 0) return(ret);
592 if (uri->path != NULL) xmlFree(uri->path);
594 if (uri->cleanup & 2)
595 uri->path = STRNDUP(*str, cur - *str);
597 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
607 * xmlParse3986PathAbsolute:
608 * @uri: pointer to an URI structure
609 * @str: the string to analyze
611 * Parse an path absolute and fills in the appropriate fields
612 * of the @uri structure
614 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
616 * Returns 0 or the error code
619 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
629 ret = xmlParse3986Segment(&cur, 0, 0);
631 while (*cur == '/') {
633 ret = xmlParse3986Segment(&cur, 0, 1);
634 if (ret != 0) return(ret);
638 if (uri->path != NULL) xmlFree(uri->path);
640 if (uri->cleanup & 2)
641 uri->path = STRNDUP(*str, cur - *str);
643 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
653 * xmlParse3986PathRootless:
654 * @uri: pointer to an URI structure
655 * @str: the string to analyze
657 * Parse an path without root and fills in the appropriate fields
658 * of the @uri structure
660 * path-rootless = segment-nz *( "/" segment )
662 * Returns 0 or the error code
665 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
672 ret = xmlParse3986Segment(&cur, 0, 0);
673 if (ret != 0) return(ret);
674 while (*cur == '/') {
676 ret = xmlParse3986Segment(&cur, 0, 1);
677 if (ret != 0) return(ret);
680 if (uri->path != NULL) xmlFree(uri->path);
682 if (uri->cleanup & 2)
683 uri->path = STRNDUP(*str, cur - *str);
685 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
695 * xmlParse3986PathNoScheme:
696 * @uri: pointer to an URI structure
697 * @str: the string to analyze
699 * Parse an path which is not a scheme and fills in the appropriate fields
700 * of the @uri structure
702 * path-noscheme = segment-nz-nc *( "/" segment )
704 * Returns 0 or the error code
707 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
714 ret = xmlParse3986Segment(&cur, ':', 0);
715 if (ret != 0) return(ret);
716 while (*cur == '/') {
718 ret = xmlParse3986Segment(&cur, 0, 1);
719 if (ret != 0) return(ret);
722 if (uri->path != NULL) xmlFree(uri->path);
724 if (uri->cleanup & 2)
725 uri->path = STRNDUP(*str, cur - *str);
727 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
737 * xmlParse3986HierPart:
738 * @uri: pointer to an URI structure
739 * @str: the string to analyze
741 * Parse an hierarchical part and fills in the appropriate fields
742 * of the @uri structure
744 * hier-part = "//" authority path-abempty
749 * Returns 0 or the error code
752 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
759 if ((*cur == '/') && (*(cur + 1) == '/')) {
761 ret = xmlParse3986Authority(uri, &cur);
762 if (ret != 0) return(ret);
763 if (uri->server == NULL)
765 ret = xmlParse3986PathAbEmpty(uri, &cur);
766 if (ret != 0) return(ret);
769 } else if (*cur == '/') {
770 ret = xmlParse3986PathAbsolute(uri, &cur);
771 if (ret != 0) return(ret);
772 } else if (ISA_PCHAR(cur)) {
773 ret = xmlParse3986PathRootless(uri, &cur);
774 if (ret != 0) return(ret);
776 /* path-empty is effectively empty */
778 if (uri->path != NULL) xmlFree(uri->path);
787 * xmlParse3986RelativeRef:
788 * @uri: pointer to an URI structure
789 * @str: the string to analyze
791 * Parse an URI string and fills in the appropriate fields
792 * of the @uri structure
794 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
795 * relative-part = "//" authority path-abempty
800 * Returns 0 or the error code
803 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
806 if ((*str == '/') && (*(str + 1) == '/')) {
808 ret = xmlParse3986Authority(uri, &str);
809 if (ret != 0) return(ret);
810 ret = xmlParse3986PathAbEmpty(uri, &str);
811 if (ret != 0) return(ret);
812 } else if (*str == '/') {
813 ret = xmlParse3986PathAbsolute(uri, &str);
814 if (ret != 0) return(ret);
815 } else if (ISA_PCHAR(str)) {
816 ret = xmlParse3986PathNoScheme(uri, &str);
817 if (ret != 0) return(ret);
819 /* path-empty is effectively empty */
821 if (uri->path != NULL) xmlFree(uri->path);
828 ret = xmlParse3986Query(uri, &str);
829 if (ret != 0) return(ret);
833 ret = xmlParse3986Fragment(uri, &str);
834 if (ret != 0) return(ret);
846 * @uri: pointer to an URI structure
847 * @str: the string to analyze
849 * Parse an URI string and fills in the appropriate fields
850 * of the @uri structure
852 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
854 * Returns 0 or the error code
857 xmlParse3986URI(xmlURIPtr uri, const char *str) {
860 ret = xmlParse3986Scheme(uri, &str);
861 if (ret != 0) return(ret);
866 ret = xmlParse3986HierPart(uri, &str);
867 if (ret != 0) return(ret);
870 ret = xmlParse3986Query(uri, &str);
871 if (ret != 0) return(ret);
875 ret = xmlParse3986Fragment(uri, &str);
876 if (ret != 0) return(ret);
886 * xmlParse3986URIReference:
887 * @uri: pointer to an URI structure
888 * @str: the string to analyze
890 * Parse an URI reference string and fills in the appropriate fields
891 * of the @uri structure
893 * URI-reference = URI / relative-ref
895 * Returns 0 or the error code
898 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
906 * Try first to parse absolute refs, then fallback to relative if
909 ret = xmlParse3986URI(uri, str);
912 ret = xmlParse3986RelativeRef(uri, str);
923 * @str: the URI string to analyze
925 * Parse an URI based on RFC 3986
927 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
929 * Returns a newly built xmlURIPtr or NULL in case of error
932 xmlParseURI(const char *str) {
938 uri = xmlCreateURI();
940 ret = xmlParse3986URIReference(uri, str);
950 * xmlParseURIReference:
951 * @uri: pointer to an URI structure
952 * @str: the string to analyze
954 * Parse an URI reference string based on RFC 3986 and fills in the
955 * appropriate fields of the @uri structure
957 * URI-reference = URI / relative-ref
959 * Returns 0 or the error code
962 xmlParseURIReference(xmlURIPtr uri, const char *str) {
963 return(xmlParse3986URIReference(uri, str));
968 * @str: the URI string to analyze
969 * @raw: if 1 unescaping of URI pieces are disabled
971 * Parse an URI but allows to keep intact the original fragments.
973 * URI-reference = URI / relative-ref
975 * Returns a newly built xmlURIPtr or NULL in case of error
978 xmlParseURIRaw(const char *str, int raw) {
984 uri = xmlCreateURI();
989 ret = xmlParseURIReference(uri, str);
998 /************************************************************************
1000 * Generic URI structure functions *
1002 ************************************************************************/
1007 * Simply creates an empty xmlURI
1009 * Returns the new structure or NULL in case of error
1012 xmlCreateURI(void) {
1015 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1017 xmlURIErrMemory("creating URI structure\n");
1020 memset(ret, 0, sizeof(xmlURI));
1025 * xmlSaveUriRealloc:
1027 * Function to handle properly a reallocation when saving an URI
1028 * Also imposes some limit on the length of an URI string output
1031 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1035 if (*max > MAX_URI_LENGTH) {
1036 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1040 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1042 xmlURIErrMemory("saving URI\n");
1051 * @uri: pointer to an xmlURI
1053 * Save the URI as an escaped string
1055 * Returns a new string (to be deallocated by caller)
1058 xmlSaveUri(xmlURIPtr uri) {
1059 xmlChar *ret = NULL;
1065 if (uri == NULL) return(NULL);
1069 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1071 xmlURIErrMemory("saving URI\n");
1076 if (uri->scheme != NULL) {
1080 temp = xmlSaveUriRealloc(ret, &max);
1081 if (temp == NULL) goto mem_error;
1087 temp = xmlSaveUriRealloc(ret, &max);
1088 if (temp == NULL) goto mem_error;
1093 if (uri->opaque != NULL) {
1096 if (len + 3 >= max) {
1097 temp = xmlSaveUriRealloc(ret, &max);
1098 if (temp == NULL) goto mem_error;
1101 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1104 int val = *(unsigned char *)p++;
1105 int hi = val / 0x10, lo = val % 0x10;
1107 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1112 if ((uri->server != NULL) || (uri->port == -1)) {
1113 if (len + 3 >= max) {
1114 temp = xmlSaveUriRealloc(ret, &max);
1115 if (temp == NULL) goto mem_error;
1120 if (uri->user != NULL) {
1123 if (len + 3 >= max) {
1124 temp = xmlSaveUriRealloc(ret, &max);
1125 if (temp == NULL) goto mem_error;
1128 if ((IS_UNRESERVED(*(p))) ||
1129 ((*(p) == ';')) || ((*(p) == ':')) ||
1130 ((*(p) == '&')) || ((*(p) == '=')) ||
1131 ((*(p) == '+')) || ((*(p) == '$')) ||
1135 int val = *(unsigned char *)p++;
1136 int hi = val / 0x10, lo = val % 0x10;
1138 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1142 if (len + 3 >= max) {
1143 temp = xmlSaveUriRealloc(ret, &max);
1144 if (temp == NULL) goto mem_error;
1149 if (uri->server != NULL) {
1153 temp = xmlSaveUriRealloc(ret, &max);
1154 if (temp == NULL) goto mem_error;
1159 if (uri->port > 0) {
1160 if (len + 10 >= max) {
1161 temp = xmlSaveUriRealloc(ret, &max);
1162 if (temp == NULL) goto mem_error;
1165 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1168 } else if (uri->authority != NULL) {
1169 if (len + 3 >= max) {
1170 temp = xmlSaveUriRealloc(ret, &max);
1171 if (temp == NULL) goto mem_error;
1178 if (len + 3 >= max) {
1179 temp = xmlSaveUriRealloc(ret, &max);
1180 if (temp == NULL) goto mem_error;
1183 if ((IS_UNRESERVED(*(p))) ||
1184 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186 ((*(p) == '=')) || ((*(p) == '+')))
1189 int val = *(unsigned char *)p++;
1190 int hi = val / 0x10, lo = val % 0x10;
1192 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1196 } else if (uri->scheme != NULL) {
1197 if (len + 3 >= max) {
1198 temp = xmlSaveUriRealloc(ret, &max);
1199 if (temp == NULL) goto mem_error;
1203 if (uri->path != NULL) {
1206 * the colon in file:///d: should not be escaped or
1207 * Windows accesses fail later.
1209 if ((uri->scheme != NULL) &&
1211 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1214 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215 if (len + 3 >= max) {
1216 temp = xmlSaveUriRealloc(ret, &max);
1217 if (temp == NULL) goto mem_error;
1225 if (len + 3 >= max) {
1226 temp = xmlSaveUriRealloc(ret, &max);
1227 if (temp == NULL) goto mem_error;
1230 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1236 int val = *(unsigned char *)p++;
1237 int hi = val / 0x10, lo = val % 0x10;
1239 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1244 if (uri->query_raw != NULL) {
1245 if (len + 1 >= max) {
1246 temp = xmlSaveUriRealloc(ret, &max);
1247 if (temp == NULL) goto mem_error;
1253 if (len + 1 >= max) {
1254 temp = xmlSaveUriRealloc(ret, &max);
1255 if (temp == NULL) goto mem_error;
1260 } else if (uri->query != NULL) {
1261 if (len + 3 >= max) {
1262 temp = xmlSaveUriRealloc(ret, &max);
1263 if (temp == NULL) goto mem_error;
1269 if (len + 3 >= max) {
1270 temp = xmlSaveUriRealloc(ret, &max);
1271 if (temp == NULL) goto mem_error;
1274 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1277 int val = *(unsigned char *)p++;
1278 int hi = val / 0x10, lo = val % 0x10;
1280 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1286 if (uri->fragment != NULL) {
1287 if (len + 3 >= max) {
1288 temp = xmlSaveUriRealloc(ret, &max);
1289 if (temp == NULL) goto mem_error;
1295 if (len + 3 >= max) {
1296 temp = xmlSaveUriRealloc(ret, &max);
1297 if (temp == NULL) goto mem_error;
1300 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1303 int val = *(unsigned char *)p++;
1304 int hi = val / 0x10, lo = val % 0x10;
1306 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1312 temp = xmlSaveUriRealloc(ret, &max);
1313 if (temp == NULL) goto mem_error;
1326 * @stream: a FILE* for the output
1327 * @uri: pointer to an xmlURI
1329 * Prints the URI in the stream @stream.
1332 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1335 out = xmlSaveUri(uri);
1337 fprintf(stream, "%s", (char *) out);
1344 * @uri: pointer to an xmlURI
1346 * Make sure the xmlURI struct is free of content
1349 xmlCleanURI(xmlURIPtr uri) {
1350 if (uri == NULL) return;
1352 if (uri->scheme != NULL) xmlFree(uri->scheme);
1354 if (uri->server != NULL) xmlFree(uri->server);
1356 if (uri->user != NULL) xmlFree(uri->user);
1358 if (uri->path != NULL) xmlFree(uri->path);
1360 if (uri->fragment != NULL) xmlFree(uri->fragment);
1361 uri->fragment = NULL;
1362 if (uri->opaque != NULL) xmlFree(uri->opaque);
1364 if (uri->authority != NULL) xmlFree(uri->authority);
1365 uri->authority = NULL;
1366 if (uri->query != NULL) xmlFree(uri->query);
1368 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369 uri->query_raw = NULL;
1374 * @uri: pointer to an xmlURI
1376 * Free up the xmlURI struct
1379 xmlFreeURI(xmlURIPtr uri) {
1380 if (uri == NULL) return;
1382 if (uri->scheme != NULL) xmlFree(uri->scheme);
1383 if (uri->server != NULL) xmlFree(uri->server);
1384 if (uri->user != NULL) xmlFree(uri->user);
1385 if (uri->path != NULL) xmlFree(uri->path);
1386 if (uri->fragment != NULL) xmlFree(uri->fragment);
1387 if (uri->opaque != NULL) xmlFree(uri->opaque);
1388 if (uri->authority != NULL) xmlFree(uri->authority);
1389 if (uri->query != NULL) xmlFree(uri->query);
1390 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1394 /************************************************************************
1396 * Helper functions *
1398 ************************************************************************/
1401 * xmlNormalizeURIPath:
1402 * @path: pointer to the path string
1404 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405 * Section 5.2, steps 6.c through 6.g.
1407 * Normalization occurs directly on the string, no new allocation is done
1409 * Returns 0 or an error code
1412 xmlNormalizeURIPath(char *path) {
1418 /* Skip all initial "/" chars. We want to get to the beginning of the
1419 * first non-empty segment.
1422 while (cur[0] == '/')
1427 /* Keep everything we've seen so far. */
1431 * Analyze each segment in sequence for cases (c) and (d).
1433 while (cur[0] != '\0') {
1435 * c) All occurrences of "./", where "." is a complete path segment,
1436 * are removed from the buffer string.
1438 if ((cur[0] == '.') && (cur[1] == '/')) {
1440 /* '//' normalization should be done at this point too */
1441 while (cur[0] == '/')
1447 * d) If the buffer string ends with "." as a complete path segment,
1448 * that "." is removed.
1450 if ((cur[0] == '.') && (cur[1] == '\0'))
1453 /* Otherwise keep the segment. */
1454 while (cur[0] != '/') {
1457 (out++)[0] = (cur++)[0];
1460 while ((cur[0] == '/') && (cur[1] == '/'))
1463 (out++)[0] = (cur++)[0];
1468 /* Reset to the beginning of the first segment for the next sequence. */
1470 while (cur[0] == '/')
1476 * Analyze each segment in sequence for cases (e) and (f).
1478 * e) All occurrences of "<segment>/../", where <segment> is a
1479 * complete path segment not equal to "..", are removed from the
1480 * buffer string. Removal of these path segments is performed
1481 * iteratively, removing the leftmost matching pattern on each
1482 * iteration, until no matching pattern remains.
1484 * f) If the buffer string ends with "<segment>/..", where <segment>
1485 * is a complete path segment not equal to "..", that
1486 * "<segment>/.." is removed.
1488 * To satisfy the "iterative" clause in (e), we need to collapse the
1489 * string every time we find something that needs to be removed. Thus,
1490 * we don't need to keep two pointers into the string: we only need a
1491 * "current position" pointer.
1496 /* At the beginning of each iteration of this loop, "cur" points to
1497 * the first character of the segment we want to examine.
1500 /* Find the end of the current segment. */
1502 while ((segp[0] != '/') && (segp[0] != '\0'))
1505 /* If this is the last segment, we're done (we need at least two
1506 * segments to meet the criteria for the (e) and (f) cases).
1508 if (segp[0] == '\0')
1511 /* If the first segment is "..", or if the next segment _isn't_ "..",
1512 * keep this segment and try the next one.
1515 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516 || ((segp[0] != '.') || (segp[1] != '.')
1517 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1522 /* If we get here, remove this segment and the next one and back up
1523 * to the previous segment (if there is one), to implement the
1524 * "iteratively" clause. It's pretty much impossible to back up
1525 * while maintaining two pointers into the buffer, so just compact
1526 * the whole buffer now.
1529 /* If this is the end of the buffer, we're done. */
1530 if (segp[2] == '\0') {
1534 /* Valgrind complained, strcpy(cur, segp + 3); */
1535 /* string will overlap, do not use strcpy */
1538 while ((*tmp++ = *segp++) != 0)
1541 /* If there are no previous segments, then keep going from here. */
1543 while ((segp > path) && ((--segp)[0] == '/'))
1548 /* "segp" is pointing to the end of a previous segment; find it's
1549 * start. We need to back up to the previous segment and start
1550 * over with that to handle things like "foo/bar/../..". If we
1551 * don't do this, then on the first pass we'll remove the "bar/..",
1552 * but be pointing at the second ".." so we won't realize we can also
1553 * remove the "foo/..".
1556 while ((cur > path) && (cur[-1] != '/'))
1562 * g) If the resulting buffer string still begins with one or more
1563 * complete path segments of "..", then the reference is
1564 * considered to be in error. Implementations may handle this
1565 * error by retaining these components in the resolved path (i.e.,
1566 * treating them as part of the final URI), by removing them from
1567 * the resolved path (i.e., discarding relative levels above the
1568 * root), or by avoiding traversal of the reference.
1570 * We discard them from the final path.
1572 if (path[0] == '/') {
1574 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575 && ((cur[3] == '/') || (cur[3] == '\0')))
1580 while (cur[0] != '\0')
1581 (out++)[0] = (cur++)[0];
1589 static int is_hex(char c) {
1590 if (((c >= '0') && (c <= '9')) ||
1591 ((c >= 'a') && (c <= 'f')) ||
1592 ((c >= 'A') && (c <= 'F')))
1598 * xmlURIUnescapeString:
1599 * @str: the string to unescape
1600 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1601 * @target: optional destination buffer
1603 * Unescaping routine, but does not check that the string is an URI. The
1604 * output is a direct unsigned char translation of %XX values (no encoding)
1605 * Note that the length of the result can only be smaller or same size as
1608 * Returns a copy of the string, but unescaped, will return NULL only in case
1612 xmlURIUnescapeString(const char *str, int len, char *target) {
1618 if (len <= 0) len = strlen(str);
1619 if (len < 0) return(NULL);
1621 if (target == NULL) {
1622 ret = (char *) xmlMallocAtomic(len + 1);
1624 xmlURIErrMemory("unescaping URI value\n");
1632 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1634 if ((*in >= '0') && (*in <= '9'))
1636 else if ((*in >= 'a') && (*in <= 'f'))
1637 *out = (*in - 'a') + 10;
1638 else if ((*in >= 'A') && (*in <= 'F'))
1639 *out = (*in - 'A') + 10;
1641 if ((*in >= '0') && (*in <= '9'))
1642 *out = *out * 16 + (*in - '0');
1643 else if ((*in >= 'a') && (*in <= 'f'))
1644 *out = *out * 16 + (*in - 'a') + 10;
1645 else if ((*in >= 'A') && (*in <= 'F'))
1646 *out = *out * 16 + (*in - 'A') + 10;
1661 * @str: string to escape
1662 * @list: exception list string of chars not to escape
1664 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665 * and the characters in the exception list.
1667 * Returns a new escaped string or NULL in case of error.
1670 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1679 return(xmlStrdup(str));
1680 len = xmlStrlen(str);
1681 if (!(len > 0)) return(NULL);
1684 ret = (xmlChar *) xmlMallocAtomic(len);
1686 xmlURIErrMemory("escaping URI value\n");
1689 in = (const xmlChar *) str;
1692 if (len - out <= 3) {
1693 temp = xmlSaveUriRealloc(ret, &len);
1695 xmlURIErrMemory("escaping URI value\n");
1704 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1709 ret[out++] = '0' + val;
1711 ret[out++] = 'A' + val - 0xA;
1714 ret[out++] = '0' + val;
1716 ret[out++] = 'A' + val - 0xA;
1729 * @str: the string of the URI to escape
1731 * Escaping routine, does not do validity checks !
1732 * It will try to escape the chars needing this, but this is heuristic
1733 * based it's impossible to be sure.
1735 * Returns an copy of the string, but escaped
1738 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739 * according to RFC2396.
1743 xmlURIEscape(const xmlChar * str)
1745 xmlChar *ret, *segment = NULL;
1749 #define NULLCHK(p) if(!p) { \
1750 xmlURIErrMemory("escaping URI value\n"); \
1757 uri = xmlCreateURI();
1760 * Allow escaping errors in the unescaped form
1763 ret2 = xmlParseURIReference(uri, (const char *)str);
1776 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1778 ret = xmlStrcat(ret, segment);
1779 ret = xmlStrcat(ret, BAD_CAST ":");
1783 if (uri->authority) {
1785 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1787 ret = xmlStrcat(ret, BAD_CAST "//");
1788 ret = xmlStrcat(ret, segment);
1793 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1795 ret = xmlStrcat(ret,BAD_CAST "//");
1796 ret = xmlStrcat(ret, segment);
1797 ret = xmlStrcat(ret, BAD_CAST "@");
1802 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1804 if (uri->user == NULL)
1805 ret = xmlStrcat(ret, BAD_CAST "//");
1806 ret = xmlStrcat(ret, segment);
1813 snprintf((char *) port, 10, "%d", uri->port);
1814 ret = xmlStrcat(ret, BAD_CAST ":");
1815 ret = xmlStrcat(ret, port);
1820 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1822 ret = xmlStrcat(ret, segment);
1826 if (uri->query_raw) {
1827 ret = xmlStrcat(ret, BAD_CAST "?");
1828 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1830 else if (uri->query) {
1832 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1834 ret = xmlStrcat(ret, BAD_CAST "?");
1835 ret = xmlStrcat(ret, segment);
1840 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1842 ret = xmlStrcat(ret, segment);
1846 if (uri->fragment) {
1847 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1849 ret = xmlStrcat(ret, BAD_CAST "#");
1850 ret = xmlStrcat(ret, segment);
1860 /************************************************************************
1862 * Public functions *
1864 ************************************************************************/
1868 * @URI: the URI instance found in the document
1869 * @base: the base value
1871 * Computes he final URI of the reference done by checking that
1872 * the given URI is valid, and building the final URI using the
1873 * base URI. This is processed according to section 5.2 of the
1876 * 5.2. Resolving Relative References to Absolute Form
1878 * Returns a new URI string (to be freed by the caller) or NULL in case
1882 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883 xmlChar *val = NULL;
1884 int ret, len, indx, cur, out;
1885 xmlURIPtr ref = NULL;
1886 xmlURIPtr bas = NULL;
1887 xmlURIPtr res = NULL;
1890 * 1) The URI reference is parsed into the potential four components and
1891 * fragment identifier, as described in Section 4.3.
1893 * NOTE that a completely empty URI is treated by modern browsers
1894 * as a reference to "." rather than as a synonym for the current
1895 * URI. Should we do that here?
1901 ref = xmlCreateURI();
1904 ret = xmlParseURIReference(ref, (const char *) URI);
1911 if ((ref != NULL) && (ref->scheme != NULL)) {
1913 * The URI is absolute don't modify.
1915 val = xmlStrdup(URI);
1921 bas = xmlCreateURI();
1924 ret = xmlParseURIReference(bas, (const char *) base);
1928 val = xmlSaveUri(ref);
1933 * the base fragment must be ignored
1935 if (bas->fragment != NULL) {
1936 xmlFree(bas->fragment);
1937 bas->fragment = NULL;
1939 val = xmlSaveUri(bas);
1944 * 2) If the path component is empty and the scheme, authority, and
1945 * query components are undefined, then it is a reference to the
1946 * current document and we are done. Otherwise, the reference URI's
1947 * query and fragment components are defined as found (or not found)
1948 * within the URI reference and not inherited from the base URI.
1950 * NOTE that in modern browsers, the parsing differs from the above
1951 * in the following aspect: the query component is allowed to be
1952 * defined while still treating this as a reference to the current
1955 res = xmlCreateURI();
1958 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959 ((ref->authority == NULL) && (ref->server == NULL))) {
1960 if (bas->scheme != NULL)
1961 res->scheme = xmlMemStrdup(bas->scheme);
1962 if (bas->authority != NULL)
1963 res->authority = xmlMemStrdup(bas->authority);
1964 else if (bas->server != NULL) {
1965 res->server = xmlMemStrdup(bas->server);
1966 if (bas->user != NULL)
1967 res->user = xmlMemStrdup(bas->user);
1968 res->port = bas->port;
1970 if (bas->path != NULL)
1971 res->path = xmlMemStrdup(bas->path);
1972 if (ref->query_raw != NULL)
1973 res->query_raw = xmlMemStrdup (ref->query_raw);
1974 else if (ref->query != NULL)
1975 res->query = xmlMemStrdup(ref->query);
1976 else if (bas->query_raw != NULL)
1977 res->query_raw = xmlMemStrdup(bas->query_raw);
1978 else if (bas->query != NULL)
1979 res->query = xmlMemStrdup(bas->query);
1980 if (ref->fragment != NULL)
1981 res->fragment = xmlMemStrdup(ref->fragment);
1986 * 3) If the scheme component is defined, indicating that the reference
1987 * starts with a scheme name, then the reference is interpreted as an
1988 * absolute URI and we are done. Otherwise, the reference URI's
1989 * scheme is inherited from the base URI's scheme component.
1991 if (ref->scheme != NULL) {
1992 val = xmlSaveUri(ref);
1995 if (bas->scheme != NULL)
1996 res->scheme = xmlMemStrdup(bas->scheme);
1998 if (ref->query_raw != NULL)
1999 res->query_raw = xmlMemStrdup(ref->query_raw);
2000 else if (ref->query != NULL)
2001 res->query = xmlMemStrdup(ref->query);
2002 if (ref->fragment != NULL)
2003 res->fragment = xmlMemStrdup(ref->fragment);
2006 * 4) If the authority component is defined, then the reference is a
2007 * network-path and we skip to step 7. Otherwise, the reference
2008 * URI's authority is inherited from the base URI's authority
2009 * component, which will also be undefined if the URI scheme does not
2010 * use an authority component.
2012 if ((ref->authority != NULL) || (ref->server != NULL)) {
2013 if (ref->authority != NULL)
2014 res->authority = xmlMemStrdup(ref->authority);
2016 res->server = xmlMemStrdup(ref->server);
2017 if (ref->user != NULL)
2018 res->user = xmlMemStrdup(ref->user);
2019 res->port = ref->port;
2021 if (ref->path != NULL)
2022 res->path = xmlMemStrdup(ref->path);
2025 if (bas->authority != NULL)
2026 res->authority = xmlMemStrdup(bas->authority);
2027 else if (bas->server != NULL) {
2028 res->server = xmlMemStrdup(bas->server);
2029 if (bas->user != NULL)
2030 res->user = xmlMemStrdup(bas->user);
2031 res->port = bas->port;
2035 * 5) If the path component begins with a slash character ("/"), then
2036 * the reference is an absolute-path and we skip to step 7.
2038 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2039 res->path = xmlMemStrdup(ref->path);
2045 * 6) If this step is reached, then we are resolving a relative-path
2046 * reference. The relative path needs to be merged with the base
2047 * URI's path. Although there are many ways to do this, we will
2048 * describe a simple method using a separate string buffer.
2050 * Allocate a buffer large enough for the result string.
2052 len = 2; /* extra / and 0 */
2053 if (ref->path != NULL)
2054 len += strlen(ref->path);
2055 if (bas->path != NULL)
2056 len += strlen(bas->path);
2057 res->path = (char *) xmlMallocAtomic(len);
2058 if (res->path == NULL) {
2059 xmlURIErrMemory("resolving URI against base\n");
2065 * a) All but the last segment of the base URI's path component is
2066 * copied to the buffer. In other words, any characters after the
2067 * last (right-most) slash character, if any, are excluded.
2071 if (bas->path != NULL) {
2072 while (bas->path[cur] != 0) {
2073 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2075 if (bas->path[cur] == 0)
2080 res->path[out] = bas->path[out];
2088 * b) The reference's path component is appended to the buffer
2091 if (ref->path != NULL && ref->path[0] != 0) {
2094 * Ensure the path includes a '/'
2096 if ((out == 0) && (bas->server != NULL))
2097 res->path[out++] = '/';
2098 while (ref->path[indx] != 0) {
2099 res->path[out++] = ref->path[indx++];
2105 * Steps c) to h) are really path normalization steps
2107 xmlNormalizeURIPath(res->path);
2112 * 7) The resulting URI components, including any inherited from the
2113 * base URI, are recombined to give the absolute form of the URI
2116 val = xmlSaveUri(res);
2129 * xmlBuildRelativeURI:
2130 * @URI: the URI reference under consideration
2131 * @base: the base value
2133 * Expresses the URI of the reference in terms relative to the
2134 * base. Some examples of this operation include:
2135 * base = "http://site1.com/docs/book1.html"
2136 * URI input URI returned
2137 * docs/pic1.gif pic1.gif
2138 * docs/img/pic1.gif img/pic1.gif
2139 * img/pic1.gif ../img/pic1.gif
2140 * http://site1.com/docs/pic1.gif pic1.gif
2141 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2143 * base = "docs/book1.html"
2144 * URI input URI returned
2145 * docs/pic1.gif pic1.gif
2146 * docs/img/pic1.gif img/pic1.gif
2147 * img/pic1.gif ../img/pic1.gif
2148 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2151 * Note: if the URI reference is really wierd or complicated, it may be
2152 * worthwhile to first convert it into a "nice" one by calling
2153 * xmlBuildURI (using 'base') before calling this routine,
2154 * since this routine (for reasonable efficiency) assumes URI has
2155 * already been through some validation.
2157 * Returns a new URI string (to be freed by the caller) or NULL in case
2161 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2163 xmlChar *val = NULL;
2169 xmlURIPtr ref = NULL;
2170 xmlURIPtr bas = NULL;
2171 xmlChar *bptr, *uptr, *vptr;
2172 int remove_path = 0;
2174 if ((URI == NULL) || (*URI == 0))
2178 * First parse URI into a standard form
2180 ref = xmlCreateURI ();
2183 /* If URI not already in "relative" form */
2184 if (URI[0] != '.') {
2185 ret = xmlParseURIReference (ref, (const char *) URI);
2187 goto done; /* Error in URI, return NULL */
2189 ref->path = (char *)xmlStrdup(URI);
2192 * Next parse base into the same standard form
2194 if ((base == NULL) || (*base == 0)) {
2195 val = xmlStrdup (URI);
2198 bas = xmlCreateURI ();
2201 if (base[0] != '.') {
2202 ret = xmlParseURIReference (bas, (const char *) base);
2204 goto done; /* Error in base, return NULL */
2206 bas->path = (char *)xmlStrdup(base);
2209 * If the scheme / server on the URI differs from the base,
2210 * just return the URI
2212 if ((ref->scheme != NULL) &&
2213 ((bas->scheme == NULL) ||
2214 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2215 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2216 val = xmlStrdup (URI);
2219 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2220 val = xmlStrdup(BAD_CAST "");
2223 if (bas->path == NULL) {
2224 val = xmlStrdup((xmlChar *)ref->path);
2227 if (ref->path == NULL) {
2228 ref->path = (char *) "/";
2233 * At this point (at last!) we can compare the two paths
2235 * First we take care of the special case where either of the
2236 * two path components may be missing (bug 316224)
2238 if (bas->path == NULL) {
2239 if (ref->path != NULL) {
2240 uptr = (xmlChar *) ref->path;
2243 /* exception characters from xmlSaveUri */
2244 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2248 bptr = (xmlChar *)bas->path;
2249 if (ref->path == NULL) {
2250 for (ix = 0; bptr[ix] != 0; ix++) {
2251 if (bptr[ix] == '/')
2255 len = 1; /* this is for a string terminator only */
2258 * Next we compare the two strings and find where they first differ
2260 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2262 if ((*bptr == '.') && (bptr[1] == '/'))
2264 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2266 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2269 if (bptr[pos] == ref->path[pos]) {
2270 val = xmlStrdup(BAD_CAST "");
2271 goto done; /* (I can't imagine why anyone would do this) */
2275 * In URI, "back up" to the last '/' encountered. This will be the
2276 * beginning of the "unique" suffix of URI
2279 if ((ref->path[ix] == '/') && (ix > 0))
2281 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2283 for (; ix > 0; ix--) {
2284 if (ref->path[ix] == '/')
2288 uptr = (xmlChar *)ref->path;
2291 uptr = (xmlChar *)&ref->path[ix];
2295 * In base, count the number of '/' from the differing point
2297 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2298 for (; bptr[ix] != 0; ix++) {
2299 if (bptr[ix] == '/')
2303 len = xmlStrlen (uptr) + 1;
2308 /* exception characters from xmlSaveUri */
2309 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2314 * Allocate just enough space for the returned string -
2315 * length of the remainder of the URI, plus enough space
2316 * for the "../" groups, plus one for the terminator
2318 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2320 xmlURIErrMemory("building relative URI\n");
2325 * Put in as many "../" as needed
2327 for (; nbslash>0; nbslash--) {
2333 * Finish up with the end of the URI
2336 if ((vptr > val) && (len > 0) &&
2337 (uptr[0] == '/') && (vptr[-1] == '/')) {
2338 memcpy (vptr, uptr + 1, len - 1);
2341 memcpy (vptr, uptr, len);
2348 /* escape the freshly-built path */
2350 /* exception characters from xmlSaveUri */
2351 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2356 * Free the working variables
2358 if (remove_path != 0)
2370 * @path: the resource locator in a filesystem notation
2372 * Constructs a canonic path from the specified path.
2374 * Returns a new canonic path, or a duplicate of the path parameter if the
2375 * construction fails. The caller is responsible for freeing the memory occupied
2376 * by the returned string. If there is insufficient memory available, or the
2377 * argument is NULL, the function returns NULL.
2379 #define IS_WINDOWS_PATH(p) \
2381 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2382 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2383 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2385 xmlCanonicPath(const xmlChar *path)
2388 * For Windows implementations, additional work needs to be done to
2389 * replace backslashes in pathnames with "forward slashes"
2391 #if defined(_WIN32) && !defined(__CYGWIN__)
2398 const xmlChar *absuri;
2405 * We must not change the backslashes to slashes if the the path
2407 * Those paths can be up to 32k characters long.
2408 * Was added specifically for OpenOffice, those paths can't be converted
2411 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2413 return xmlStrdup((const xmlChar *) path);
2416 /* sanitize filename starting with // so it can be used as URI */
2417 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2420 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2422 return xmlStrdup(path);
2425 /* Check if this is an "absolute uri" */
2426 absuri = xmlStrstr(path, BAD_CAST "://");
2427 if (absuri != NULL) {
2433 * this looks like an URI where some parts have not been
2434 * escaped leading to a parsing problem. Check that the first
2435 * part matches a protocol.
2438 /* Bypass if first part (part before the '://') is > 20 chars */
2439 if ((l <= 0) || (l > 20))
2440 goto path_processing;
2441 /* Bypass if any non-alpha characters are present in first part */
2442 for (j = 0;j < l;j++) {
2444 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2445 goto path_processing;
2448 /* Escape all except the characters specified in the supplied path */
2449 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2450 if (escURI != NULL) {
2451 /* Try parsing the escaped path */
2452 uri = xmlParseURI((const char *) escURI);
2453 /* If successful, return the escaped string */
2462 /* For Windows implementations, replace backslashes with 'forward slashes' */
2463 #if defined(_WIN32) && !defined(__CYGWIN__)
2465 * Create a URI structure
2467 uri = xmlCreateURI();
2468 if (uri == NULL) { /* Guard against 'out of memory' */
2472 len = xmlStrlen(path);
2473 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2474 /* make the scheme 'file' */
2475 uri->scheme = xmlStrdup(BAD_CAST "file");
2476 /* allocate space for leading '/' + path + string terminator */
2477 uri->path = xmlMallocAtomic(len + 2);
2478 if (uri->path == NULL) {
2479 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2482 /* Put in leading '/' plus path */
2485 strncpy(p, path, len + 1);
2487 uri->path = xmlStrdup(path);
2488 if (uri->path == NULL) {
2494 /* Now change all occurences of '\' to '/' */
2495 while (*p != '\0') {
2501 if (uri->scheme == NULL) {
2502 ret = xmlStrdup((const xmlChar *) uri->path);
2504 ret = xmlSaveUri(uri);
2509 ret = xmlStrdup((const xmlChar *) path);
2516 * @path: the resource locator in a filesystem notation
2518 * Constructs an URI expressing the existing path
2520 * Returns a new URI, or a duplicate of the path parameter if the
2521 * construction fails. The caller is responsible for freeing the memory
2522 * occupied by the returned string. If there is insufficient memory available,
2523 * or the argument is NULL, the function returns NULL.
2526 xmlPathToURI(const xmlChar *path)
2535 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2537 return xmlStrdup(path);
2539 cal = xmlCanonicPath(path);
2542 #if defined(_WIN32) && !defined(__CYGWIN__)
2543 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2544 If 'cal' is a valid URI allready then we are done here, as continuing would make
2546 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2550 /* 'cal' can contain a relative path with backslashes. If that is processed
2551 by xmlSaveURI, they will be escaped and the external entity loader machinery
2552 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2554 while (*ret != '\0') {
2560 memset(&temp, 0, sizeof(temp));
2561 temp.path = (char *) cal;
2562 ret = xmlSaveUri(&temp);
2567 #include "elfgcchack.h"