1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2012, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
22 #include "tool_setup.h"
24 #include <curl/curl.h>
26 #define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
27 #include <curl/mprintf.h>
29 #include "tool_urlglob.h"
32 #include "memdebug.h" /* keep this as LAST include */
43 * Input a full globbed string, set the forth argument to the amount of
44 * strings we get out of this. Return GlobCode.
46 static GlobCode glob_word(URLGlob *, /* object anchor */
47 char *, /* globbed string */
48 size_t, /* position */
49 int *); /* returned number of strings */
51 static GlobCode glob_set(URLGlob *glob, char *pattern,
52 size_t pos, int *amount)
54 /* processes a set expression with the point behind the opening '{'
55 ','-separated elements are collected until the next closing '}'
60 char* buf = glob->glob_buffer;
62 pat = &glob->pattern[glob->size / 2];
63 /* patterns 0,1,2,... correspond to size=1,3,5,... */
65 pat->content.Set.size = 0;
66 pat->content.Set.ptr_s = 0;
67 pat->content.Set.elements = NULL;
73 case '\0': /* URL ended while set was still open */
74 snprintf(glob->errormsg, sizeof(glob->errormsg),
75 "unmatched brace at pos %zu\n", pos);
79 case '[': /* no nested expressions at this time */
80 snprintf(glob->errormsg, sizeof(glob->errormsg),
81 "nested braces not supported at pos %zu\n", pos);
85 case '}': /* set element completed */
87 if(pat->content.Set.elements) {
88 char **new_arr = realloc(pat->content.Set.elements,
89 (pat->content.Set.size + 1) * sizeof(char*));
92 for(elem = 0; elem < pat->content.Set.size; elem++)
93 Curl_safefree(pat->content.Set.elements[elem]);
94 Curl_safefree(pat->content.Set.elements);
95 pat->content.Set.ptr_s = 0;
96 pat->content.Set.size = 0;
98 pat->content.Set.elements = new_arr;
101 pat->content.Set.elements = malloc(sizeof(char*));
102 if(!pat->content.Set.elements) {
103 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
106 pat->content.Set.elements[pat->content.Set.size] =
107 strdup(glob->glob_buffer);
108 if(!pat->content.Set.elements[pat->content.Set.size]) {
110 for(elem = 0; elem < pat->content.Set.size; elem++)
111 Curl_safefree(pat->content.Set.elements[elem]);
112 Curl_safefree(pat->content.Set.elements);
113 pat->content.Set.ptr_s = 0;
114 pat->content.Set.size = 0;
115 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
118 ++pat->content.Set.size;
120 if(*pattern == '}') {
121 /* entire set pattern completed */
124 /* always check for a literal (may be "") between patterns */
125 res = glob_word(glob, ++pattern, ++pos, &wordamount);
128 for(elem = 0; elem < pat->content.Set.size; elem++)
129 Curl_safefree(pat->content.Set.elements[elem]);
130 Curl_safefree(pat->content.Set.elements);
131 pat->content.Set.ptr_s = 0;
132 pat->content.Set.size = 0;
136 *amount = pat->content.Set.size * wordamount;
142 buf = glob->glob_buffer;
147 case ']': /* illegal closing bracket */
148 snprintf(glob->errormsg, sizeof(glob->errormsg),
149 "illegal pattern at pos %zu\n", pos);
152 case '\\': /* escaped character, skip '\' */
157 /* intentional fallthrough */
159 *buf++ = *pattern++; /* copy character to set element */
166 static GlobCode glob_range(URLGlob *glob, char *pattern,
167 size_t pos, int *amount)
169 /* processes a range expression with the point behind the opening '['
170 - char range: e.g. "a-z]", "B-Q]"
171 - num range: e.g. "0-9]", "17-2000]"
172 - num range with leading zeros: e.g. "001-999]"
173 expression is checked for well-formedness and collected until the next ']'
184 pat = &glob->pattern[glob->size / 2];
185 /* patterns 0,1,2,... correspond to size=1,3,5,... */
188 if(ISALPHA(*pattern)) {
189 /* character range detected */
193 pat->type = UPTCharRange;
195 rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
197 if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
198 /* the pattern is not well-formed */
199 snprintf(glob->errormsg, sizeof(glob->errormsg),
200 "error: bad range specification after pos %zu\n", pos);
204 /* check the (first) separating character */
205 if((sep != ']') && (sep != ':')) {
206 snprintf(glob->errormsg, sizeof(glob->errormsg),
207 "error: unsupported character (%c) after range at pos %zu\n",
212 /* if there was a ":[num]" thing, use that as step or else use 1 */
213 pat->content.CharRange.step =
214 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
216 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
217 pat->content.CharRange.max_c = max_c;
219 else if(ISDIGIT(*pattern)) {
220 /* numeric range detected */
224 pat->type = UPTNumRange;
225 pat->content.NumRange.padlength = 0;
227 rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
229 if((rc < 2) || (min_n > max_n)) {
230 /* the pattern is not well-formed */
231 snprintf(glob->errormsg, sizeof(glob->errormsg),
232 "error: bad range specification after pos %zu\n", pos);
235 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
236 pat->content.NumRange.max_n = max_n;
238 /* if there was a ":[num]" thing, use that as step or else use 1 */
239 pat->content.NumRange.step =
240 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
242 if(*pattern == '0') {
243 /* leading zero specified */
247 ++pat->content.NumRange.padlength; /* padding length is set for all
248 instances of this pattern */
253 snprintf(glob->errormsg, sizeof(glob->errormsg),
254 "illegal character in range specification at pos %zu\n", pos);
258 c = (char*)strchr(pattern, ']'); /* continue after next ']' */
262 snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
263 return GLOB_ERROR; /* missing ']' */
266 /* always check for a literal (may be "") between patterns */
268 res = glob_word(glob, c, pos + (c - pattern), &wordamount);
269 if(res == GLOB_ERROR) {
275 if(pat->type == UPTCharRange)
276 *amount = wordamount * (pat->content.CharRange.max_c -
277 pat->content.CharRange.min_c + 1);
279 *amount = wordamount * (pat->content.NumRange.max_n -
280 pat->content.NumRange.min_n + 1);
283 return res; /* GLOB_OK or GLOB_NO_MEM */
286 static GlobCode glob_word(URLGlob *glob, char *pattern,
287 size_t pos, int *amount)
289 /* processes a literal string component of a URL
290 special characters '{' and '[' branch to set/range processing functions
292 char* buf = glob->glob_buffer;
294 GlobCode res = GLOB_OK;
296 *amount = 1; /* default is one single string */
298 while(*pattern != '\0' && *pattern != '{' && *pattern != '[') {
299 if(*pattern == '}' || *pattern == ']') {
300 snprintf(glob->errormsg, sizeof(glob->errormsg),
301 "unmatched close brace/bracket at pos %zu\n", pos);
305 /* only allow \ to escape known "special letters" */
306 if(*pattern == '\\' &&
307 (*(pattern+1) == '{' || *(pattern+1) == '[' ||
308 *(pattern+1) == '}' || *(pattern+1) == ']') ) {
310 /* escape character, skip '\' */
314 *buf++ = *pattern++; /* copy character to literal */
318 litindex = glob->size / 2;
319 /* literals 0,1,2,... correspond to size=0,2,4,... */
320 glob->literal[litindex] = strdup(glob->glob_buffer);
321 if(!glob->literal[litindex]) {
322 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
329 /* singular URL processed */
333 /* process set pattern */
334 res = glob_set(glob, ++pattern, ++pos, amount);
338 /* process range pattern */
339 res = glob_range(glob, ++pattern, ++pos, amount);
344 Curl_safefree(glob->literal[litindex]);
349 int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
352 * We can deal with any-size, just make a buffer with the same length
353 * as the specified URL!
355 URLGlob *glob_expand;
362 glob_buffer = malloc(strlen(url) + 1);
364 return CURLE_OUT_OF_MEMORY;
366 glob_expand = calloc(1, sizeof(URLGlob));
368 Curl_safefree(glob_buffer);
369 return CURLE_OUT_OF_MEMORY;
371 glob_expand->size = 0;
372 glob_expand->urllen = strlen(url);
373 glob_expand->glob_buffer = glob_buffer;
374 glob_expand->beenhere = 0;
376 res = glob_word(glob_expand, url, 1, &amount);
380 if(error && glob_expand->errormsg[0]) {
381 /* send error description to the error-stream */
382 fprintf(error, "curl: (%d) [globbing] %s",
383 (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT,
384 glob_expand->errormsg);
386 /* it failed, we cleanup */
387 Curl_safefree(glob_buffer);
388 Curl_safefree(glob_expand);
390 return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
397 void glob_cleanup(URLGlob* glob)
402 for(i = glob->size - 1; i < glob->size; --i) {
403 if(!(i & 1)) { /* even indexes contain literals */
404 Curl_safefree(glob->literal[i/2]);
406 else { /* odd indexes contain sets or ranges */
407 if((glob->pattern[i/2].type == UPTSet) &&
408 (glob->pattern[i/2].content.Set.elements)) {
409 for(elem = glob->pattern[i/2].content.Set.size - 1;
412 Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
414 Curl_safefree(glob->pattern[i/2].content.Set.elements);
418 Curl_safefree(glob->glob_buffer);
422 int glob_next_url(char **globbed, URLGlob *glob)
429 size_t buflen = glob->urllen + 1;
430 char *buf = glob->glob_buffer;
439 /* implement a counter over the index ranges of all patterns,
440 starting with the rightmost pattern */
441 for(i = glob->size / 2 - 1; carry && (i < glob->size); --i) {
443 pat = &glob->pattern[i];
446 if((pat->content.Set.elements) &&
447 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
448 pat->content.Set.ptr_s = 0;
453 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
454 (int)((unsigned char)pat->content.CharRange.ptr_c));
455 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
456 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
461 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
462 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
463 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
468 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
469 return CURLE_FAILED_INIT;
472 if(carry) { /* first pattern ptr has run into overflow, done! */
473 /* TODO: verify if this should actally return CURLE_OK. */
474 return CURLE_OK; /* CURLE_OK to match previous behavior */
478 for(j = 0; j < glob->size; ++j) {
479 if(!(j&1)) { /* every other term (j even) is a literal */
480 lit = glob->literal[j/2];
481 len = snprintf(buf, buflen, "%s", lit);
485 else { /* the rest (i odd) are patterns */
486 pat = &glob->pattern[j/2];
489 if(pat->content.Set.elements) {
490 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
491 snprintf(buf, buflen, "%s",
492 pat->content.Set.elements[pat->content.Set.ptr_s]);
498 *buf++ = pat->content.CharRange.ptr_c;
501 len = snprintf(buf, buflen, "%0*d",
502 pat->content.NumRange.padlength,
503 pat->content.NumRange.ptr_n);
508 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
509 return CURLE_FAILED_INIT;
515 *globbed = strdup(glob->glob_buffer);
517 return CURLE_OUT_OF_MEMORY;
522 int glob_match_url(char **result, char *filename, URLGlob *glob)
527 char *appendthis = NULL;
528 size_t appendlen = 0;
529 size_t stringlen = 0;
533 /* We cannot use the glob_buffer for storage here since the filename may
534 * be longer than the URL we use. We allocate a good start size, then
535 * we need to realloc in case of need.
537 allocsize = strlen(filename) + 1; /* make it at least one byte to store the
539 target = malloc(allocsize);
541 return CURLE_OUT_OF_MEMORY;
544 if(*filename == '#' && ISDIGIT(filename[1])) {
546 char *ptr = filename;
547 unsigned long num = strtoul(&filename[1], &filename, 10);
550 if(num && (i <= glob->size / 2)) {
551 URLPattern pat = glob->pattern[i];
554 if(pat.content.Set.elements) {
555 appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
557 strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
561 numbuf[0] = pat.content.CharRange.ptr_c;
567 snprintf(numbuf, sizeof(numbuf), "%0*d",
568 pat.content.NumRange.padlength,
569 pat.content.NumRange.ptr_n);
571 appendlen = strlen(numbuf);
574 printf("internal error: invalid pattern type (%d)\n",
576 Curl_safefree(target);
577 return CURLE_FAILED_INIT;
581 /* #[num] out of range, use the #[num] in the output */
583 appendthis = filename++;
588 appendthis = filename++;
591 if(appendlen + stringlen >= allocsize) {
593 /* we append a single byte to allow for the trailing byte to be appended
594 at the end of this function outside the while() loop */
595 allocsize = (appendlen + stringlen) * 2;
596 newstr = realloc(target, allocsize + 1);
598 Curl_safefree(target);
599 return CURLE_OUT_OF_MEMORY;
603 memcpy(&target[stringlen], appendthis, appendlen);
604 stringlen += appendlen;
606 target[stringlen]= '\0';