1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2012, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
22 #include "tool_setup.h"
24 #define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
25 #include <curl/mprintf.h>
27 #include "tool_urlglob.h"
30 #include "memdebug.h" /* keep this as LAST include */
41 * Input a full globbed string, set the forth argument to the amount of
42 * strings we get out of this. Return GlobCode.
44 static GlobCode glob_word(URLGlob *, /* object anchor */
45 char *, /* globbed string */
46 size_t, /* position */
47 int *); /* returned number of strings */
49 static GlobCode glob_set(URLGlob *glob, char *pattern,
50 size_t pos, int *amount)
52 /* processes a set expression with the point behind the opening '{'
53 ','-separated elements are collected until the next closing '}'
58 char* buf = glob->glob_buffer;
60 pat = &glob->pattern[glob->size / 2];
61 /* patterns 0,1,2,... correspond to size=1,3,5,... */
63 pat->content.Set.size = 0;
64 pat->content.Set.ptr_s = 0;
65 pat->content.Set.elements = NULL;
67 if(++glob->size > (GLOB_PATTERN_NUM*2)) {
68 snprintf(glob->errormsg, sizeof(glob->errormsg), "too many globs used\n");
74 case '\0': /* URL ended while set was still open */
75 snprintf(glob->errormsg, sizeof(glob->errormsg),
76 "unmatched brace at pos %zu\n", pos);
80 case '[': /* no nested expressions at this time */
81 snprintf(glob->errormsg, sizeof(glob->errormsg),
82 "nested braces not supported at pos %zu\n", pos);
86 case '}': /* set element completed */
88 if(pat->content.Set.elements) {
89 char **new_arr = realloc(pat->content.Set.elements,
90 (pat->content.Set.size + 1) * sizeof(char*));
93 for(elem = 0; elem < pat->content.Set.size; elem++)
94 Curl_safefree(pat->content.Set.elements[elem]);
95 Curl_safefree(pat->content.Set.elements);
96 pat->content.Set.ptr_s = 0;
97 pat->content.Set.size = 0;
99 pat->content.Set.elements = new_arr;
102 pat->content.Set.elements = malloc(sizeof(char*));
103 if(!pat->content.Set.elements) {
104 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
107 pat->content.Set.elements[pat->content.Set.size] =
108 strdup(glob->glob_buffer);
109 if(!pat->content.Set.elements[pat->content.Set.size]) {
111 for(elem = 0; elem < pat->content.Set.size; elem++)
112 Curl_safefree(pat->content.Set.elements[elem]);
113 Curl_safefree(pat->content.Set.elements);
114 pat->content.Set.ptr_s = 0;
115 pat->content.Set.size = 0;
116 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
119 ++pat->content.Set.size;
121 if(*pattern == '}') {
122 /* entire set pattern completed */
125 /* always check for a literal (may be "") between patterns */
126 res = glob_word(glob, ++pattern, ++pos, &wordamount);
129 for(elem = 0; elem < pat->content.Set.size; elem++)
130 Curl_safefree(pat->content.Set.elements[elem]);
131 Curl_safefree(pat->content.Set.elements);
132 pat->content.Set.ptr_s = 0;
133 pat->content.Set.size = 0;
137 *amount = pat->content.Set.size * wordamount;
143 buf = glob->glob_buffer;
148 case ']': /* illegal closing bracket */
149 snprintf(glob->errormsg, sizeof(glob->errormsg),
150 "illegal pattern at pos %zu\n", pos);
153 case '\\': /* escaped character, skip '\' */
158 /* intentional fallthrough */
160 *buf++ = *pattern++; /* copy character to set element */
167 static GlobCode glob_range(URLGlob *glob, char *pattern,
168 size_t pos, int *amount)
170 /* processes a range expression with the point behind the opening '['
171 - char range: e.g. "a-z]", "B-Q]"
172 - num range: e.g. "0-9]", "17-2000]"
173 - num range with leading zeros: e.g. "001-999]"
174 expression is checked for well-formedness and collected until the next ']'
185 pat = &glob->pattern[glob->size / 2];
186 /* patterns 0,1,2,... correspond to size=1,3,5,... */
187 if(++glob->size > (GLOB_PATTERN_NUM*2)) {
188 snprintf(glob->errormsg, sizeof(glob->errormsg), "too many globs used\n");
192 if(ISALPHA(*pattern)) {
193 /* character range detected */
197 pat->type = UPTCharRange;
199 rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
201 if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
202 /* the pattern is not well-formed */
203 snprintf(glob->errormsg, sizeof(glob->errormsg),
204 "error: bad range specification after pos %zu\n", pos);
208 /* check the (first) separating character */
209 if((sep != ']') && (sep != ':')) {
210 snprintf(glob->errormsg, sizeof(glob->errormsg),
211 "error: unsupported character (%c) after range at pos %zu\n",
216 /* if there was a ":[num]" thing, use that as step or else use 1 */
217 pat->content.CharRange.step =
218 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
220 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
221 pat->content.CharRange.max_c = max_c;
223 else if(ISDIGIT(*pattern)) {
224 /* numeric range detected */
228 pat->type = UPTNumRange;
229 pat->content.NumRange.padlength = 0;
231 rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
233 if((rc < 2) || (min_n > max_n)) {
234 /* the pattern is not well-formed */
235 snprintf(glob->errormsg, sizeof(glob->errormsg),
236 "error: bad range specification after pos %zu\n", pos);
239 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
240 pat->content.NumRange.max_n = max_n;
242 /* if there was a ":[num]" thing, use that as step or else use 1 */
243 pat->content.NumRange.step =
244 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
246 if(*pattern == '0') {
247 /* leading zero specified */
251 ++pat->content.NumRange.padlength; /* padding length is set for all
252 instances of this pattern */
257 snprintf(glob->errormsg, sizeof(glob->errormsg),
258 "illegal character in range specification at pos %zu\n", pos);
262 c = (char*)strchr(pattern, ']'); /* continue after next ']' */
266 snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
267 return GLOB_ERROR; /* missing ']' */
270 /* always check for a literal (may be "") between patterns */
272 res = glob_word(glob, c, pos + (c - pattern), &wordamount);
273 if(res == GLOB_ERROR) {
279 if(pat->type == UPTCharRange)
280 *amount = wordamount * (pat->content.CharRange.max_c -
281 pat->content.CharRange.min_c + 1);
283 *amount = wordamount * (pat->content.NumRange.max_n -
284 pat->content.NumRange.min_n + 1);
287 return res; /* GLOB_OK or GLOB_NO_MEM */
290 static GlobCode glob_word(URLGlob *glob, char *pattern,
291 size_t pos, int *amount)
293 /* processes a literal string component of a URL
294 special characters '{' and '[' branch to set/range processing functions
296 char* buf = glob->glob_buffer;
298 GlobCode res = GLOB_OK;
300 *amount = 1; /* default is one single string */
302 while(*pattern != '\0' && *pattern != '{' && *pattern != '[') {
303 if(*pattern == '}' || *pattern == ']') {
304 snprintf(glob->errormsg, sizeof(glob->errormsg),
305 "unmatched close brace/bracket at pos %zu\n", pos);
309 /* only allow \ to escape known "special letters" */
310 if(*pattern == '\\' &&
311 (*(pattern+1) == '{' || *(pattern+1) == '[' ||
312 *(pattern+1) == '}' || *(pattern+1) == ']') ) {
314 /* escape character, skip '\' */
318 *buf++ = *pattern++; /* copy character to literal */
322 litindex = glob->size / 2;
323 /* literals 0,1,2,... correspond to size=0,2,4,... */
324 glob->literal[litindex] = strdup(glob->glob_buffer);
325 if(!glob->literal[litindex]) {
326 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
333 /* singular URL processed */
337 /* process set pattern */
338 res = glob_set(glob, ++pattern, ++pos, amount);
342 /* process range pattern */
343 res = glob_range(glob, ++pattern, ++pos, amount);
348 Curl_safefree(glob->literal[litindex]);
353 int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
356 * We can deal with any-size, just make a buffer with the same length
357 * as the specified URL!
359 URLGlob *glob_expand;
366 glob_buffer = malloc(strlen(url) + 1);
368 return CURLE_OUT_OF_MEMORY;
370 glob_expand = calloc(1, sizeof(URLGlob));
372 Curl_safefree(glob_buffer);
373 return CURLE_OUT_OF_MEMORY;
375 glob_expand->size = 0;
376 glob_expand->urllen = strlen(url);
377 glob_expand->glob_buffer = glob_buffer;
378 glob_expand->beenhere = 0;
380 res = glob_word(glob_expand, url, 1, &amount);
384 if(error && glob_expand->errormsg[0]) {
385 /* send error description to the error-stream */
386 fprintf(error, "curl: (%d) [globbing] %s",
387 (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT,
388 glob_expand->errormsg);
390 /* it failed, we cleanup */
391 Curl_safefree(glob_buffer);
392 Curl_safefree(glob_expand);
394 return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
401 void glob_cleanup(URLGlob* glob)
406 for(i = glob->size - 1; i < glob->size; --i) {
407 if(!(i & 1)) { /* even indexes contain literals */
408 Curl_safefree(glob->literal[i/2]);
410 else { /* odd indexes contain sets or ranges */
411 if((glob->pattern[i/2].type == UPTSet) &&
412 (glob->pattern[i/2].content.Set.elements)) {
413 for(elem = glob->pattern[i/2].content.Set.size - 1;
416 Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
418 Curl_safefree(glob->pattern[i/2].content.Set.elements);
422 Curl_safefree(glob->glob_buffer);
426 int glob_next_url(char **globbed, URLGlob *glob)
433 size_t buflen = glob->urllen + 1;
434 char *buf = glob->glob_buffer;
443 /* implement a counter over the index ranges of all patterns,
444 starting with the rightmost pattern */
445 for(i = glob->size / 2 - 1; carry && (i < glob->size); --i) {
447 pat = &glob->pattern[i];
450 if((pat->content.Set.elements) &&
451 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
452 pat->content.Set.ptr_s = 0;
457 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
458 (int)((unsigned char)pat->content.CharRange.ptr_c));
459 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
460 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
465 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
466 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
467 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
472 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
473 return CURLE_FAILED_INIT;
476 if(carry) { /* first pattern ptr has run into overflow, done! */
477 /* TODO: verify if this should actally return CURLE_OK. */
478 return CURLE_OK; /* CURLE_OK to match previous behavior */
482 for(j = 0; j < glob->size; ++j) {
483 if(!(j&1)) { /* every other term (j even) is a literal */
484 lit = glob->literal[j/2];
485 len = snprintf(buf, buflen, "%s", lit);
489 else { /* the rest (i odd) are patterns */
490 pat = &glob->pattern[j/2];
493 if(pat->content.Set.elements) {
494 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
495 snprintf(buf, buflen, "%s",
496 pat->content.Set.elements[pat->content.Set.ptr_s]);
502 *buf++ = pat->content.CharRange.ptr_c;
505 len = snprintf(buf, buflen, "%0*d",
506 pat->content.NumRange.padlength,
507 pat->content.NumRange.ptr_n);
512 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
513 return CURLE_FAILED_INIT;
519 *globbed = strdup(glob->glob_buffer);
521 return CURLE_OUT_OF_MEMORY;
526 int glob_match_url(char **result, char *filename, URLGlob *glob)
531 char *appendthis = NULL;
532 size_t appendlen = 0;
533 size_t stringlen = 0;
537 /* We cannot use the glob_buffer for storage here since the filename may
538 * be longer than the URL we use. We allocate a good start size, then
539 * we need to realloc in case of need.
541 allocsize = strlen(filename) + 1; /* make it at least one byte to store the
543 target = malloc(allocsize);
545 return CURLE_OUT_OF_MEMORY;
548 if(*filename == '#' && ISDIGIT(filename[1])) {
550 char *ptr = filename;
551 unsigned long num = strtoul(&filename[1], &filename, 10);
554 if(num && (i <= glob->size / 2)) {
555 URLPattern pat = glob->pattern[i];
558 if(pat.content.Set.elements) {
559 appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
561 strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
565 numbuf[0] = pat.content.CharRange.ptr_c;
571 snprintf(numbuf, sizeof(numbuf), "%0*d",
572 pat.content.NumRange.padlength,
573 pat.content.NumRange.ptr_n);
575 appendlen = strlen(numbuf);
578 printf("internal error: invalid pattern type (%d)\n",
580 Curl_safefree(target);
581 return CURLE_FAILED_INIT;
585 /* #[num] out of range, use the #[num] in the output */
587 appendthis = filename++;
592 appendthis = filename++;
595 if(appendlen + stringlen >= allocsize) {
597 /* we append a single byte to allow for the trailing byte to be appended
598 at the end of this function outside the while() loop */
599 allocsize = (appendlen + stringlen) * 2;
600 newstr = realloc(target, allocsize + 1);
602 Curl_safefree(target);
603 return CURLE_OUT_OF_MEMORY;
607 memcpy(&target[stringlen], appendthis, appendlen);
608 stringlen += appendlen;
610 target[stringlen]= '\0';