2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 2396, 2732 and 2373
6 * See Copyright for the status of this software.
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
21 /************************************************************************
23 * Macros to differentiate various character type *
24 * directly extracted from RFC 2396 *
26 ************************************************************************/
29 * alpha = lowalpha | upalpha
31 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
40 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
47 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
55 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
58 * alphanum = alpha | digit
61 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
68 #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
75 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
81 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
85 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
87 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
91 * unreserved = alphanum | mark
94 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
97 * escaped = "%" hex hex
100 #define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
107 #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
115 #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
125 #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
134 #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
142 #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
151 #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
157 * uric = reserved | unreserved | escaped
160 #define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
164 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
167 #define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
173 * Skip to next pointer char, handle escaped sequences
176 #define NEXT(p) ((*p == '%')? p += 3 : p++)
179 * Productions from the spec.
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
185 * path = [ abs_path | opaque_part ]
188 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
190 /************************************************************************
192 * Generic URI structure functions *
194 ************************************************************************/
199 * Simply creates an empty xmlURI
201 * Returns the new structure or NULL in case of error
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
213 memset(ret, 0, sizeof(xmlURI));
219 * @uri: pointer to an xmlURI
221 * Save the URI as an escaped string
223 * Returns a new string (to be deallocated by caller)
226 xmlSaveUri(xmlURIPtr uri) {
232 if (uri == NULL) return(NULL);
236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
244 if (uri->scheme != NULL) {
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
269 if (uri->opaque != NULL) {
272 if (len + 3 >= max) {
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
304 if (uri->user != NULL) {
307 if (len + 3 >= max) {
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
331 if (len + 3 >= max) {
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
358 if (len + 10 >= max) {
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
385 if (len + 3 >= max) {
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
422 if (uri->path != NULL) {
425 if (len + 3 >= max) {
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
449 if (uri->query != NULL) {
450 if (len + 3 >= max) {
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
463 if (len + 3 >= max) {
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
473 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
476 int val = *(unsigned char *)p++;
477 int hi = val / 0x10, lo = val % 0x10;
479 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
480 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
485 if (uri->fragment != NULL) {
486 if (len + 3 >= max) {
488 ret = (xmlChar *) xmlRealloc(ret,
489 (max + 1) * sizeof(xmlChar));
491 xmlGenericError(xmlGenericErrorContext,
492 "xmlSaveUri: out of memory\n");
499 if (len + 3 >= max) {
501 ret = (xmlChar *) xmlRealloc(ret,
502 (max + 1) * sizeof(xmlChar));
504 xmlGenericError(xmlGenericErrorContext,
505 "xmlSaveUri: out of memory\n");
509 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
512 int val = *(unsigned char *)p++;
513 int hi = val / 0x10, lo = val % 0x10;
515 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
516 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
522 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
524 xmlGenericError(xmlGenericErrorContext,
525 "xmlSaveUri: out of memory\n");
535 * @stream: a FILE* for the output
536 * @uri: pointer to an xmlURI
538 * Prints the URI in the stream @stream.
541 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
544 out = xmlSaveUri(uri);
546 fprintf(stream, "%s", (char *) out);
553 * @uri: pointer to an xmlURI
555 * Make sure the xmlURI struct is free of content
558 xmlCleanURI(xmlURIPtr uri) {
559 if (uri == NULL) return;
561 if (uri->scheme != NULL) xmlFree(uri->scheme);
563 if (uri->server != NULL) xmlFree(uri->server);
565 if (uri->user != NULL) xmlFree(uri->user);
567 if (uri->path != NULL) xmlFree(uri->path);
569 if (uri->fragment != NULL) xmlFree(uri->fragment);
570 uri->fragment = NULL;
571 if (uri->opaque != NULL) xmlFree(uri->opaque);
573 if (uri->authority != NULL) xmlFree(uri->authority);
574 uri->authority = NULL;
575 if (uri->query != NULL) xmlFree(uri->query);
581 * @uri: pointer to an xmlURI
583 * Free up the xmlURI struct
586 xmlFreeURI(xmlURIPtr uri) {
587 if (uri == NULL) return;
589 if (uri->scheme != NULL) xmlFree(uri->scheme);
590 if (uri->server != NULL) xmlFree(uri->server);
591 if (uri->user != NULL) xmlFree(uri->user);
592 if (uri->path != NULL) xmlFree(uri->path);
593 if (uri->fragment != NULL) xmlFree(uri->fragment);
594 if (uri->opaque != NULL) xmlFree(uri->opaque);
595 if (uri->authority != NULL) xmlFree(uri->authority);
596 if (uri->query != NULL) xmlFree(uri->query);
600 /************************************************************************
604 ************************************************************************/
607 * xmlNormalizeURIPath:
608 * @path: pointer to the path string
610 * Applies the 5 normalization steps to a path string--that is, RFC 2396
611 * Section 5.2, steps 6.c through 6.g.
613 * Normalization occurs directly on the string, no new allocation is done
615 * Returns 0 or an error code
618 xmlNormalizeURIPath(char *path) {
624 /* Skip all initial "/" chars. We want to get to the beginning of the
625 * first non-empty segment.
628 while (cur[0] == '/')
633 /* Keep everything we've seen so far. */
637 * Analyze each segment in sequence for cases (c) and (d).
639 while (cur[0] != '\0') {
641 * c) All occurrences of "./", where "." is a complete path segment,
642 * are removed from the buffer string.
644 if ((cur[0] == '.') && (cur[1] == '/')) {
646 /* '//' normalization should be done at this point too */
647 while (cur[0] == '/')
653 * d) If the buffer string ends with "." as a complete path segment,
654 * that "." is removed.
656 if ((cur[0] == '.') && (cur[1] == '\0'))
659 /* Otherwise keep the segment. */
660 while (cur[0] != '/') {
663 (out++)[0] = (cur++)[0];
666 while ((cur[0] == '/') && (cur[1] == '/'))
669 (out++)[0] = (cur++)[0];
674 /* Reset to the beginning of the first segment for the next sequence. */
676 while (cur[0] == '/')
682 * Analyze each segment in sequence for cases (e) and (f).
684 * e) All occurrences of "<segment>/../", where <segment> is a
685 * complete path segment not equal to "..", are removed from the
686 * buffer string. Removal of these path segments is performed
687 * iteratively, removing the leftmost matching pattern on each
688 * iteration, until no matching pattern remains.
690 * f) If the buffer string ends with "<segment>/..", where <segment>
691 * is a complete path segment not equal to "..", that
692 * "<segment>/.." is removed.
694 * To satisfy the "iterative" clause in (e), we need to collapse the
695 * string every time we find something that needs to be removed. Thus,
696 * we don't need to keep two pointers into the string: we only need a
697 * "current position" pointer.
702 /* At the beginning of each iteration of this loop, "cur" points to
703 * the first character of the segment we want to examine.
706 /* Find the end of the current segment. */
708 while ((segp[0] != '/') && (segp[0] != '\0'))
711 /* If this is the last segment, we're done (we need at least two
712 * segments to meet the criteria for the (e) and (f) cases).
717 /* If the first segment is "..", or if the next segment _isn't_ "..",
718 * keep this segment and try the next one.
721 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
722 || ((segp[0] != '.') || (segp[1] != '.')
723 || ((segp[2] != '/') && (segp[2] != '\0')))) {
728 /* If we get here, remove this segment and the next one and back up
729 * to the previous segment (if there is one), to implement the
730 * "iteratively" clause. It's pretty much impossible to back up
731 * while maintaining two pointers into the buffer, so just compact
732 * the whole buffer now.
735 /* If this is the end of the buffer, we're done. */
736 if (segp[2] == '\0') {
740 /* Valgrind complained, strcpy(cur, segp + 3); */
741 /* string will overlap, do not use strcpy */
744 while ((*tmp++ = *segp++) != 0);
746 /* If there are no previous segments, then keep going from here. */
748 while ((segp > path) && ((--segp)[0] == '/'))
753 /* "segp" is pointing to the end of a previous segment; find it's
754 * start. We need to back up to the previous segment and start
755 * over with that to handle things like "foo/bar/../..". If we
756 * don't do this, then on the first pass we'll remove the "bar/..",
757 * but be pointing at the second ".." so we won't realize we can also
758 * remove the "foo/..".
761 while ((cur > path) && (cur[-1] != '/'))
767 * g) If the resulting buffer string still begins with one or more
768 * complete path segments of "..", then the reference is
769 * considered to be in error. Implementations may handle this
770 * error by retaining these components in the resolved path (i.e.,
771 * treating them as part of the final URI), by removing them from
772 * the resolved path (i.e., discarding relative levels above the
773 * root), or by avoiding traversal of the reference.
775 * We discard them from the final path.
777 if (path[0] == '/') {
779 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
780 && ((cur[3] == '/') || (cur[3] == '\0')))
785 while (cur[0] != '\0')
786 (out++)[0] = (cur++)[0];
794 static int is_hex(char c) {
795 if (((c >= '0') && (c <= '9')) ||
796 ((c >= 'a') && (c <= 'f')) ||
797 ((c >= 'A') && (c <= 'F')))
803 * xmlURIUnescapeString:
804 * @str: the string to unescape
805 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
806 * @target: optional destination buffer
808 * Unescaping routine, does not do validity checks !
809 * Output is direct unsigned char translation of %XX values (no encoding)
811 * Returns an copy of the string, but unescaped
814 xmlURIUnescapeString(const char *str, int len, char *target) {
820 if (len <= 0) len = strlen(str);
821 if (len < 0) return(NULL);
823 if (target == NULL) {
824 ret = (char *) xmlMallocAtomic(len + 1);
826 xmlGenericError(xmlGenericErrorContext,
827 "xmlURIUnescapeString: out of memory\n");
835 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
837 if ((*in >= '0') && (*in <= '9'))
839 else if ((*in >= 'a') && (*in <= 'f'))
840 *out = (*in - 'a') + 10;
841 else if ((*in >= 'A') && (*in <= 'F'))
842 *out = (*in - 'A') + 10;
844 if ((*in >= '0') && (*in <= '9'))
845 *out = *out * 16 + (*in - '0');
846 else if ((*in >= 'a') && (*in <= 'f'))
847 *out = *out * 16 + (*in - 'a') + 10;
848 else if ((*in >= 'A') && (*in <= 'F'))
849 *out = *out * 16 + (*in - 'A') + 10;
864 * @str: string to escape
865 * @list: exception list string of chars not to escape
867 * This routine escapes a string to hex, ignoring reserved characters (a-z)
868 * and the characters in the exception list.
870 * Returns a new escaped string or NULL in case of error.
873 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
877 unsigned int len, out;
882 return(xmlStrdup(str));
883 len = xmlStrlen(str);
884 if (!(len > 0)) return(NULL);
887 ret = (xmlChar *) xmlMallocAtomic(len);
889 xmlGenericError(xmlGenericErrorContext,
890 "xmlURIEscapeStr: out of memory\n");
893 in = (const xmlChar *) str;
896 if (len - out <= 3) {
898 ret = (xmlChar *) xmlRealloc(ret, len);
900 xmlGenericError(xmlGenericErrorContext,
901 "xmlURIEscapeStr: out of memory\n");
908 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
913 ret[out++] = '0' + val;
915 ret[out++] = 'A' + val - 0xA;
918 ret[out++] = '0' + val;
920 ret[out++] = 'A' + val - 0xA;
933 * @str: the string of the URI to escape
935 * Escaping routine, does not do validity checks !
936 * It will try to escape the chars needing this, but this is heuristic
937 * based it's impossible to be sure.
939 * Returns an copy of the string, but escaped
942 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
943 * according to RFC2396.
947 xmlURIEscape(const xmlChar * str)
949 xmlChar *ret, *segment = NULL;
953 #define NULLCHK(p) if(!p) { \
954 xmlGenericError(xmlGenericErrorContext, \
955 "xmlURIEscape: out of memory\n"); \
961 uri = xmlCreateURI();
964 * Allow escaping errors in the unescaped form
967 ret2 = xmlParseURIReference(uri, (const char *)str);
980 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
982 ret = xmlStrcat(ret, segment);
983 ret = xmlStrcat(ret, BAD_CAST ":");
987 if (uri->authority) {
989 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
991 ret = xmlStrcat(ret, BAD_CAST "//");
992 ret = xmlStrcat(ret, segment);
997 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
999 ret = xmlStrcat(ret,BAD_CAST "//");
1000 ret = xmlStrcat(ret, segment);
1001 ret = xmlStrcat(ret, BAD_CAST "@");
1006 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1008 if (uri->user == NULL)
1009 ret = xmlStrcat(ret, BAD_CAST "//");
1010 ret = xmlStrcat(ret, segment);
1017 snprintf((char *) port, 10, "%d", uri->port);
1018 ret = xmlStrcat(ret, BAD_CAST ":");
1019 ret = xmlStrcat(ret, port);
1024 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1026 ret = xmlStrcat(ret, segment);
1032 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1034 ret = xmlStrcat(ret, BAD_CAST "?");
1035 ret = xmlStrcat(ret, segment);
1040 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1042 ret = xmlStrcat(ret, segment);
1046 if (uri->fragment) {
1047 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1049 ret = xmlStrcat(ret, BAD_CAST "#");
1050 ret = xmlStrcat(ret, segment);
1060 /************************************************************************
1062 * Escaped URI parsing *
1064 ************************************************************************/
1067 * xmlParseURIFragment:
1068 * @uri: pointer to an URI structure
1069 * @str: pointer to the string to analyze
1071 * Parse an URI fragment string and fills in the appropriate fields
1072 * of the @uri structure.
1076 * Returns 0 or the error code
1079 xmlParseURIFragment(xmlURIPtr uri, const char **str)
1088 while (IS_URIC(cur) || IS_UNWISE(cur))
1091 if (uri->fragment != NULL)
1092 xmlFree(uri->fragment);
1093 if (uri->cleanup & 2)
1094 uri->fragment = STRNDUP(*str, cur - *str);
1096 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1104 * @uri: pointer to an URI structure
1105 * @str: pointer to the string to analyze
1107 * Parse the query part of an URI
1111 * Returns 0 or the error code
1114 xmlParseURIQuery(xmlURIPtr uri, const char **str)
1123 while ((IS_URIC(cur)) ||
1124 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1127 if (uri->query != NULL)
1128 xmlFree(uri->query);
1129 if (uri->cleanup & 2)
1130 uri->query = STRNDUP(*str, cur - *str);
1132 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1139 * xmlParseURIScheme:
1140 * @uri: pointer to an URI structure
1141 * @str: pointer to the string to analyze
1143 * Parse an URI scheme
1145 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1147 * Returns 0 or the error code
1150 xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1157 if (!IS_ALPHA(*cur))
1160 while (IS_SCHEME(*cur)) cur++;
1162 if (uri->scheme != NULL) xmlFree(uri->scheme);
1163 uri->scheme = STRNDUP(*str, cur - *str);
1170 * xmlParseURIOpaquePart:
1171 * @uri: pointer to an URI structure
1172 * @str: pointer to the string to analyze
1174 * Parse an URI opaque part
1176 * opaque_part = uric_no_slash *uric
1178 * Returns 0 or the error code
1181 xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1189 if (!((IS_URIC_NO_SLASH(cur)) ||
1190 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1194 while ((IS_URIC(cur)) ||
1195 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1198 if (uri->opaque != NULL)
1199 xmlFree(uri->opaque);
1200 if (uri->cleanup & 2)
1201 uri->opaque = STRNDUP(*str, cur - *str);
1203 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1210 * xmlParseURIServer:
1211 * @uri: pointer to an URI structure
1212 * @str: pointer to the string to analyze
1214 * Parse a server subpart of an URI, it's a finer grain analysis
1215 * of the authority part.
1217 * server = [ [ userinfo "@" ] hostport ]
1218 * userinfo = *( unreserved | escaped |
1219 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1220 * hostport = host [ ":" port ]
1221 * host = hostname | IPv4address | IPv6reference
1222 * hostname = *( domainlabel "." ) toplabel [ "." ]
1223 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1224 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1225 * IPv6reference = "[" IPv6address "]"
1226 * IPv6address = hexpart [ ":" IPv4address ]
1227 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1228 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1229 * hexseq = hex4 *( ":" hex4)
1233 * Returns 0 or the error code
1236 xmlParseURIServer(xmlURIPtr uri, const char **str) {
1238 const char *host, *tmp;
1239 const int IPV4max = 4;
1240 const int IPV6max = 8;
1249 * is there a userinfo ?
1251 while (IS_USERINFO(cur)) NEXT(cur);
1254 if (uri->user != NULL) xmlFree(uri->user);
1255 if (uri->cleanup & 2)
1256 uri->path = STRNDUP(*str, cur - *str);
1258 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1263 if (uri->user != NULL) xmlFree(uri->user);
1269 * This can be empty in the case where there is no server
1274 if (uri->authority != NULL) xmlFree(uri->authority);
1275 uri->authority = NULL;
1276 if (uri->server != NULL) xmlFree(uri->server);
1283 * host part of hostport can denote an IPV4 address, an IPV6 address
1284 * or an unresolved name. Check the IP first, its easier to detect
1285 * errors if wrong one.
1286 * An IPV6 address must start with a '[' and end with a ']'.
1291 for (oct = 0; oct < IPV6max; ++oct) {
1294 return(3); /* multiple compression attempted */
1295 if (!oct) { /* initial char is compression */
1299 compress = 1; /* set compression-encountered flag */
1300 cur++; /* skip over the second ':' */
1303 while(IS_HEX(*cur)) cur++;
1304 if (oct == (IPV6max-1))
1310 if ((!compress) && (oct != IPV6max))
1315 if (uri->server != NULL) xmlFree(uri->server);
1316 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1322 * Not IPV6, maybe IPV4
1324 for (oct = 0; oct < IPV4max; ++oct) {
1326 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1327 while(IS_DIGIT(*cur)) cur++;
1328 if (oct == (IPV4max-1))
1335 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1337 /* maybe host_name */
1338 if (!IS_ALPHANUM(*cur))
1339 return(4); /* e.g. http://xml.$oft */
1341 do ++cur; while (IS_ALPHANUM(*cur));
1345 return(5); /* e.g. http://xml.-soft */
1352 return(6); /* e.g. http://xml-.soft */
1354 return(7); /* e.g. http://xml..soft */
1362 --tmp; /* e.g. http://xml.$Oft/ */
1363 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1364 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1365 return(8); /* e.g. http://xmlsOft.0rg/ */
1368 if (uri->authority != NULL) xmlFree(uri->authority);
1369 uri->authority = NULL;
1370 if (host[0] != '[') { /* it's not an IPV6 addr */
1371 if (uri->server != NULL) xmlFree(uri->server);
1372 if (uri->cleanup & 2)
1373 uri->server = STRNDUP(host, cur - host);
1375 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1379 * finish by checking for a port presence.
1383 if (IS_DIGIT(*cur)) {
1386 while (IS_DIGIT(*cur)) {
1388 uri->port = uri->port * 10 + (*cur - '0');
1398 * xmlParseURIRelSegment:
1399 * @uri: pointer to an URI structure
1400 * @str: pointer to the string to analyze
1402 * Parse an URI relative segment
1404 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1407 * Returns 0 or the error code
1410 xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1418 if (!((IS_SEGMENT(cur)) ||
1419 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1423 while ((IS_SEGMENT(cur)) ||
1424 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1427 if (uri->path != NULL)
1429 if (uri->cleanup & 2)
1430 uri->path = STRNDUP(*str, cur - *str);
1432 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1439 * xmlParseURIPathSegments:
1440 * @uri: pointer to an URI structure
1441 * @str: pointer to the string to analyze
1442 * @slash: should we add a leading slash
1444 * Parse an URI set of path segments
1446 * path_segments = segment *( "/" segment )
1447 * segment = *pchar *( ";" param )
1450 * Returns 0 or the error code
1453 xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1463 while ((IS_PCHAR(cur)) ||
1464 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1466 while (*cur == ';') {
1468 while ((IS_PCHAR(cur)) ||
1469 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1481 * Concat the set of path segments to the current path
1487 if (uri->path != NULL) {
1488 len2 = strlen(uri->path);
1491 path = (char *) xmlMallocAtomic(len + 1);
1493 xmlGenericError(xmlGenericErrorContext,
1494 "xmlParseURIPathSegments: out of memory\n");
1498 if (uri->path != NULL)
1499 memcpy(path, uri->path, len2);
1505 if (cur - *str > 0) {
1506 if (uri->cleanup & 2) {
1507 memcpy(&path[len2], *str, cur - *str);
1508 path[len2 + (cur - *str)] = 0;
1510 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1512 if (uri->path != NULL)
1521 * xmlParseURIAuthority:
1522 * @uri: pointer to an URI structure
1523 * @str: pointer to the string to analyze
1525 * Parse the authority part of an URI.
1527 * authority = server | reg_name
1528 * server = [ [ userinfo "@" ] hostport ]
1529 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1530 * "@" | "&" | "=" | "+" )
1532 * Note : this is completely ambiguous since reg_name is allowed to
1533 * use the full set of chars in use by server:
1535 * 3.2.1. Registry-based Naming Authority
1537 * The structure of a registry-based naming authority is specific
1538 * to the URI scheme, but constrained to the allowed characters
1539 * for an authority component.
1541 * Returns 0 or the error code
1544 xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1554 * try first to parse it as a server string.
1556 ret = xmlParseURIServer(uri, str);
1557 if ((ret == 0) && (*str != NULL) &&
1558 ((**str == 0) || (**str == '/') || (**str == '?')))
1563 * failed, fallback to reg_name
1565 if (!IS_REG_NAME(cur)) {
1569 while (IS_REG_NAME(cur)) NEXT(cur);
1571 if (uri->server != NULL) xmlFree(uri->server);
1573 if (uri->user != NULL) xmlFree(uri->user);
1575 if (uri->authority != NULL) xmlFree(uri->authority);
1576 if (uri->cleanup & 2)
1577 uri->authority = STRNDUP(*str, cur - *str);
1579 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1586 * xmlParseURIHierPart:
1587 * @uri: pointer to an URI structure
1588 * @str: pointer to the string to analyze
1590 * Parse an URI hierarchical part
1592 * hier_part = ( net_path | abs_path ) [ "?" query ]
1593 * abs_path = "/" path_segments
1594 * net_path = "//" authority [ abs_path ]
1596 * Returns 0 or the error code
1599 xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1608 if ((cur[0] == '/') && (cur[1] == '/')) {
1610 ret = xmlParseURIAuthority(uri, &cur);
1613 if (cur[0] == '/') {
1615 ret = xmlParseURIPathSegments(uri, &cur, 1);
1617 } else if (cur[0] == '/') {
1619 ret = xmlParseURIPathSegments(uri, &cur, 1);
1627 ret = xmlParseURIQuery(uri, &cur);
1636 * xmlParseAbsoluteURI:
1637 * @uri: pointer to an URI structure
1638 * @str: pointer to the string to analyze
1640 * Parse an URI reference string and fills in the appropriate fields
1641 * of the @uri structure
1643 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1645 * Returns 0 or the error code
1648 xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1657 ret = xmlParseURIScheme(uri, str);
1658 if (ret != 0) return(ret);
1665 return(xmlParseURIHierPart(uri, str));
1666 return(xmlParseURIOpaquePart(uri, str));
1670 * xmlParseRelativeURI:
1671 * @uri: pointer to an URI structure
1672 * @str: pointer to the string to analyze
1674 * Parse an relative URI string and fills in the appropriate fields
1675 * of the @uri structure
1677 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1678 * abs_path = "/" path_segments
1679 * net_path = "//" authority [ abs_path ]
1680 * rel_path = rel_segment [ abs_path ]
1682 * Returns 0 or the error code
1685 xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1693 if ((cur[0] == '/') && (cur[1] == '/')) {
1695 ret = xmlParseURIAuthority(uri, &cur);
1698 if (cur[0] == '/') {
1700 ret = xmlParseURIPathSegments(uri, &cur, 1);
1702 } else if (cur[0] == '/') {
1704 ret = xmlParseURIPathSegments(uri, &cur, 1);
1705 } else if (cur[0] != '#' && cur[0] != '?') {
1706 ret = xmlParseURIRelSegment(uri, &cur);
1709 if (cur[0] == '/') {
1711 ret = xmlParseURIPathSegments(uri, &cur, 1);
1718 ret = xmlParseURIQuery(uri, &cur);
1727 * xmlParseURIReference:
1728 * @uri: pointer to an URI structure
1729 * @str: the string to analyze
1731 * Parse an URI reference string and fills in the appropriate fields
1732 * of the @uri structure
1734 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1736 * Returns 0 or the error code
1739 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1741 const char *tmp = str;
1748 * Try first to parse absolute refs, then fallback to relative if
1751 ret = xmlParseAbsoluteURI(uri, &str);
1755 ret = xmlParseRelativeURI(uri, &str);
1764 ret = xmlParseURIFragment(uri, &str);
1765 if (ret != 0) return(ret);
1776 * @str: the URI string to analyze
1780 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1782 * Returns a newly built xmlURIPtr or NULL in case of error
1785 xmlParseURI(const char *str) {
1791 uri = xmlCreateURI();
1793 ret = xmlParseURIReference(uri, str);
1804 * @str: the URI string to analyze
1805 * @raw: if 1 unescaping of URI pieces are disabled
1807 * Parse an URI but allows to keep intact the original fragments.
1809 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1811 * Returns a newly built xmlURIPtr or NULL in case of error
1814 xmlParseURIRaw(const char *str, int raw) {
1820 uri = xmlCreateURI();
1825 ret = xmlParseURIReference(uri, str);
1834 /************************************************************************
1836 * Public functions *
1838 ************************************************************************/
1842 * @URI: the URI instance found in the document
1843 * @base: the base value
1845 * Computes he final URI of the reference done by checking that
1846 * the given URI is valid, and building the final URI using the
1847 * base URI. This is processed according to section 5.2 of the
1850 * 5.2. Resolving Relative References to Absolute Form
1852 * Returns a new URI string (to be freed by the caller) or NULL in case
1856 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1857 xmlChar *val = NULL;
1858 int ret, len, indx, cur, out;
1859 xmlURIPtr ref = NULL;
1860 xmlURIPtr bas = NULL;
1861 xmlURIPtr res = NULL;
1864 * 1) The URI reference is parsed into the potential four components and
1865 * fragment identifier, as described in Section 4.3.
1867 * NOTE that a completely empty URI is treated by modern browsers
1868 * as a reference to "." rather than as a synonym for the current
1869 * URI. Should we do that here?
1875 ref = xmlCreateURI();
1878 ret = xmlParseURIReference(ref, (const char *) URI);
1885 if ((ref != NULL) && (ref->scheme != NULL)) {
1887 * The URI is absolute don't modify.
1889 val = xmlStrdup(URI);
1895 bas = xmlCreateURI();
1898 ret = xmlParseURIReference(bas, (const char *) base);
1902 val = xmlSaveUri(ref);
1907 * the base fragment must be ignored
1909 if (bas->fragment != NULL) {
1910 xmlFree(bas->fragment);
1911 bas->fragment = NULL;
1913 val = xmlSaveUri(bas);
1918 * 2) If the path component is empty and the scheme, authority, and
1919 * query components are undefined, then it is a reference to the
1920 * current document and we are done. Otherwise, the reference URI's
1921 * query and fragment components are defined as found (or not found)
1922 * within the URI reference and not inherited from the base URI.
1924 * NOTE that in modern browsers, the parsing differs from the above
1925 * in the following aspect: the query component is allowed to be
1926 * defined while still treating this as a reference to the current
1929 res = xmlCreateURI();
1932 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1933 ((ref->authority == NULL) && (ref->server == NULL))) {
1934 if (bas->scheme != NULL)
1935 res->scheme = xmlMemStrdup(bas->scheme);
1936 if (bas->authority != NULL)
1937 res->authority = xmlMemStrdup(bas->authority);
1938 else if (bas->server != NULL) {
1939 res->server = xmlMemStrdup(bas->server);
1940 if (bas->user != NULL)
1941 res->user = xmlMemStrdup(bas->user);
1942 res->port = bas->port;
1944 if (bas->path != NULL)
1945 res->path = xmlMemStrdup(bas->path);
1946 if (ref->query != NULL)
1947 res->query = xmlMemStrdup(ref->query);
1948 else if (bas->query != NULL)
1949 res->query = xmlMemStrdup(bas->query);
1950 if (ref->fragment != NULL)
1951 res->fragment = xmlMemStrdup(ref->fragment);
1956 * 3) If the scheme component is defined, indicating that the reference
1957 * starts with a scheme name, then the reference is interpreted as an
1958 * absolute URI and we are done. Otherwise, the reference URI's
1959 * scheme is inherited from the base URI's scheme component.
1961 if (ref->scheme != NULL) {
1962 val = xmlSaveUri(ref);
1965 if (bas->scheme != NULL)
1966 res->scheme = xmlMemStrdup(bas->scheme);
1968 if (ref->query != NULL)
1969 res->query = xmlMemStrdup(ref->query);
1970 if (ref->fragment != NULL)
1971 res->fragment = xmlMemStrdup(ref->fragment);
1974 * 4) If the authority component is defined, then the reference is a
1975 * network-path and we skip to step 7. Otherwise, the reference
1976 * URI's authority is inherited from the base URI's authority
1977 * component, which will also be undefined if the URI scheme does not
1978 * use an authority component.
1980 if ((ref->authority != NULL) || (ref->server != NULL)) {
1981 if (ref->authority != NULL)
1982 res->authority = xmlMemStrdup(ref->authority);
1984 res->server = xmlMemStrdup(ref->server);
1985 if (ref->user != NULL)
1986 res->user = xmlMemStrdup(ref->user);
1987 res->port = ref->port;
1989 if (ref->path != NULL)
1990 res->path = xmlMemStrdup(ref->path);
1993 if (bas->authority != NULL)
1994 res->authority = xmlMemStrdup(bas->authority);
1995 else if (bas->server != NULL) {
1996 res->server = xmlMemStrdup(bas->server);
1997 if (bas->user != NULL)
1998 res->user = xmlMemStrdup(bas->user);
1999 res->port = bas->port;
2003 * 5) If the path component begins with a slash character ("/"), then
2004 * the reference is an absolute-path and we skip to step 7.
2006 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2007 res->path = xmlMemStrdup(ref->path);
2013 * 6) If this step is reached, then we are resolving a relative-path
2014 * reference. The relative path needs to be merged with the base
2015 * URI's path. Although there are many ways to do this, we will
2016 * describe a simple method using a separate string buffer.
2018 * Allocate a buffer large enough for the result string.
2020 len = 2; /* extra / and 0 */
2021 if (ref->path != NULL)
2022 len += strlen(ref->path);
2023 if (bas->path != NULL)
2024 len += strlen(bas->path);
2025 res->path = (char *) xmlMallocAtomic(len);
2026 if (res->path == NULL) {
2027 xmlGenericError(xmlGenericErrorContext,
2028 "xmlBuildURI: out of memory\n");
2034 * a) All but the last segment of the base URI's path component is
2035 * copied to the buffer. In other words, any characters after the
2036 * last (right-most) slash character, if any, are excluded.
2040 if (bas->path != NULL) {
2041 while (bas->path[cur] != 0) {
2042 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2044 if (bas->path[cur] == 0)
2049 res->path[out] = bas->path[out];
2057 * b) The reference's path component is appended to the buffer
2060 if (ref->path != NULL && ref->path[0] != 0) {
2063 * Ensure the path includes a '/'
2065 if ((out == 0) && (bas->server != NULL))
2066 res->path[out++] = '/';
2067 while (ref->path[indx] != 0) {
2068 res->path[out++] = ref->path[indx++];
2074 * Steps c) to h) are really path normalization steps
2076 xmlNormalizeURIPath(res->path);
2081 * 7) The resulting URI components, including any inherited from the
2082 * base URI, are recombined to give the absolute form of the URI
2085 val = xmlSaveUri(res);
2098 * xmlBuildRelativeURI:
2099 * @URI: the URI reference under consideration
2100 * @base: the base value
2102 * Expresses the URI of the reference in terms relative to the
2103 * base. Some examples of this operation include:
2104 * base = "http://site1.com/docs/book1.html"
2105 * URI input URI returned
2106 * docs/pic1.gif pic1.gif
2107 * docs/img/pic1.gif img/pic1.gif
2108 * img/pic1.gif ../img/pic1.gif
2109 * http://site1.com/docs/pic1.gif pic1.gif
2110 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2112 * base = "docs/book1.html"
2113 * URI input URI returned
2114 * docs/pic1.gif pic1.gif
2115 * docs/img/pic1.gif img/pic1.gif
2116 * img/pic1.gif ../img/pic1.gif
2117 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2120 * Note: if the URI reference is really wierd or complicated, it may be
2121 * worthwhile to first convert it into a "nice" one by calling
2122 * xmlBuildURI (using 'base') before calling this routine,
2123 * since this routine (for reasonable efficiency) assumes URI has
2124 * already been through some validation.
2126 * Returns a new URI string (to be freed by the caller) or NULL in case
2130 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2132 xmlChar *val = NULL;
2138 xmlURIPtr ref = NULL;
2139 xmlURIPtr bas = NULL;
2140 xmlChar *bptr, *uptr, *vptr;
2141 int remove_path = 0;
2143 if ((URI == NULL) || (*URI == 0))
2147 * First parse URI into a standard form
2149 ref = xmlCreateURI ();
2152 /* If URI not already in "relative" form */
2153 if (URI[0] != '.') {
2154 ret = xmlParseURIReference (ref, (const char *) URI);
2156 goto done; /* Error in URI, return NULL */
2158 ref->path = (char *)xmlStrdup(URI);
2161 * Next parse base into the same standard form
2163 if ((base == NULL) || (*base == 0)) {
2164 val = xmlStrdup (URI);
2167 bas = xmlCreateURI ();
2170 if (base[0] != '.') {
2171 ret = xmlParseURIReference (bas, (const char *) base);
2173 goto done; /* Error in base, return NULL */
2175 bas->path = (char *)xmlStrdup(base);
2178 * If the scheme / server on the URI differs from the base,
2179 * just return the URI
2181 if ((ref->scheme != NULL) &&
2182 ((bas->scheme == NULL) ||
2183 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2184 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2185 val = xmlStrdup (URI);
2188 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2189 val = xmlStrdup(BAD_CAST "");
2192 if (bas->path == NULL) {
2193 val = xmlStrdup((xmlChar *)ref->path);
2196 if (ref->path == NULL) {
2197 ref->path = (char *) "/";
2202 * At this point (at last!) we can compare the two paths
2204 * First we take care of the special case where either of the
2205 * two path components may be missing (bug 316224)
2207 if (bas->path == NULL) {
2208 if (ref->path != NULL) {
2209 uptr = (xmlChar *) ref->path;
2212 val = xmlStrdup(uptr);
2216 bptr = (xmlChar *)bas->path;
2217 if (ref->path == NULL) {
2218 for (ix = 0; bptr[ix] != 0; ix++) {
2219 if (bptr[ix] == '/')
2223 len = 1; /* this is for a string terminator only */
2226 * Next we compare the two strings and find where they first differ
2228 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2230 if ((*bptr == '.') && (bptr[1] == '/'))
2232 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2234 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2237 if (bptr[pos] == ref->path[pos]) {
2238 val = xmlStrdup(BAD_CAST "");
2239 goto done; /* (I can't imagine why anyone would do this) */
2243 * In URI, "back up" to the last '/' encountered. This will be the
2244 * beginning of the "unique" suffix of URI
2247 if ((ref->path[ix] == '/') && (ix > 0))
2249 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2251 for (; ix > 0; ix--) {
2252 if (ref->path[ix] == '/')
2256 uptr = (xmlChar *)ref->path;
2259 uptr = (xmlChar *)&ref->path[ix];
2263 * In base, count the number of '/' from the differing point
2265 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2266 for (; bptr[ix] != 0; ix++) {
2267 if (bptr[ix] == '/')
2271 len = xmlStrlen (uptr) + 1;
2276 val = xmlStrdup (uptr);
2281 * Allocate just enough space for the returned string -
2282 * length of the remainder of the URI, plus enough space
2283 * for the "../" groups, plus one for the terminator
2285 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2287 xmlGenericError(xmlGenericErrorContext,
2288 "xmlBuildRelativeURI: out of memory\n");
2293 * Put in as many "../" as needed
2295 for (; nbslash>0; nbslash--) {
2301 * Finish up with the end of the URI
2304 if ((vptr > val) && (len > 0) &&
2305 (uptr[0] == '/') && (vptr[-1] == '/')) {
2306 memcpy (vptr, uptr + 1, len - 1);
2309 memcpy (vptr, uptr, len);
2318 * Free the working variables
2320 if (remove_path != 0)
2332 * @path: the resource locator in a filesystem notation
2334 * Constructs a canonic path from the specified path.
2336 * Returns a new canonic path, or a duplicate of the path parameter if the
2337 * construction fails. The caller is responsible for freeing the memory occupied
2338 * by the returned string. If there is insufficient memory available, or the
2339 * argument is NULL, the function returns NULL.
2341 #define IS_WINDOWS_PATH(p) \
2343 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2344 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2345 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2347 xmlCanonicPath(const xmlChar *path)
2349 #if defined(_WIN32) && !defined(__CYGWIN__)
2356 const xmlChar *absuri;
2360 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2362 return xmlStrdup(path);
2365 absuri = xmlStrstr(path, BAD_CAST "://");
2366 if (absuri != NULL) {
2372 * this looks like an URI where some parts have not been
2373 * escaped leading to a parsing problem check that the first
2374 * part matches a protocol.
2377 if ((l <= 0) || (l > 20))
2378 goto path_processing;
2379 for (j = 0;j < l;j++) {
2381 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2382 goto path_processing;
2385 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2386 if (escURI != NULL) {
2387 uri = xmlParseURI((const char *) escURI);
2397 #if defined(_WIN32) && !defined(__CYGWIN__)
2399 * This really need to be cleaned up by someone with a Windows box
2401 uri = xmlCreateURI();
2406 len = xmlStrlen(path);
2407 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2408 uri->scheme = xmlStrdup(BAD_CAST "file");
2409 uri->path = xmlMallocAtomic(len + 2);
2410 if (uri->path == NULL) {
2416 strncpy(p, path, len + 1);
2418 uri->path = xmlStrdup(path);
2419 if (uri->path == NULL) {
2425 while (*p != '\0') {
2430 if (uri->path == NULL) {
2435 if (uri->scheme == NULL) {
2436 ret = xmlStrdup((const xmlChar *) path);
2438 ret = xmlSaveUri(uri);
2443 ret = xmlStrdup((const xmlChar *) path);
2450 * @path: the resource locator in a filesystem notation
2452 * Constructs an URI expressing the existing path
2454 * Returns a new URI, or a duplicate of the path parameter if the
2455 * construction fails. The caller is responsible for freeing the memory
2456 * occupied by the returned string. If there is insufficient memory available,
2457 * or the argument is NULL, the function returns NULL.
2460 xmlPathToURI(const xmlChar *path)
2469 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2471 return xmlStrdup(path);
2473 cal = xmlCanonicPath(path);
2476 memset(&temp, 0, sizeof(temp));
2477 temp.path = (char *) cal;
2478 ret = xmlSaveUri(&temp);
2483 #include "elfgcchack.h"