1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
24 #include <curl/curl.h>
26 #define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
27 #include <curl/mprintf.h>
29 #include "tool_urlglob.h"
32 #include "memdebug.h" /* keep this as LAST include */
43 * Input a full globbed string, set the forth argument to the amount of
44 * strings we get out of this. Return GlobCode.
46 static GlobCode glob_word(URLGlob *, /* object anchor */
47 char *, /* globbed string */
48 size_t, /* position */
49 int *); /* returned number of strings */
51 static GlobCode glob_set(URLGlob *glob, char *pattern,
52 size_t pos, int *amount)
54 /* processes a set expression with the point behind the opening '{'
55 ','-separated elements are collected until the next closing '}'
60 char* buf = glob->glob_buffer;
62 pat = &glob->pattern[glob->size / 2];
63 /* patterns 0,1,2,... correspond to size=1,3,5,... */
65 pat->content.Set.size = 0;
66 pat->content.Set.ptr_s = 0;
67 pat->content.Set.elements = NULL;
73 case '\0': /* URL ended while set was still open */
74 snprintf(glob->errormsg, sizeof(glob->errormsg),
75 "unmatched brace at pos %zu\n", pos);
79 case '[': /* no nested expressions at this time */
80 snprintf(glob->errormsg, sizeof(glob->errormsg),
81 "nested braces not supported at pos %zu\n", pos);
85 case '}': /* set element completed */
87 if(pat->content.Set.elements) {
88 char **new_arr = realloc(pat->content.Set.elements,
89 (pat->content.Set.size + 1) * sizeof(char*));
92 for(elem = 0; elem < pat->content.Set.size; elem++)
93 Curl_safefree(pat->content.Set.elements[elem]);
94 Curl_safefree(pat->content.Set.elements);
95 pat->content.Set.ptr_s = 0;
96 pat->content.Set.size = 0;
98 pat->content.Set.elements = new_arr;
101 pat->content.Set.elements = malloc(sizeof(char*));
102 if(!pat->content.Set.elements) {
103 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
106 pat->content.Set.elements[pat->content.Set.size] =
107 strdup(glob->glob_buffer);
108 if(!pat->content.Set.elements[pat->content.Set.size]) {
110 for(elem = 0; elem < pat->content.Set.size; elem++)
111 Curl_safefree(pat->content.Set.elements[elem]);
112 Curl_safefree(pat->content.Set.elements);
113 pat->content.Set.ptr_s = 0;
114 pat->content.Set.size = 0;
115 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
118 ++pat->content.Set.size;
120 if(*pattern == '}') {
121 /* entire set pattern completed */
124 /* always check for a literal (may be "") between patterns */
125 res = glob_word(glob, ++pattern, ++pos, &wordamount);
128 for(elem = 0; elem < pat->content.Set.size; elem++)
129 Curl_safefree(pat->content.Set.elements[elem]);
130 Curl_safefree(pat->content.Set.elements);
131 pat->content.Set.ptr_s = 0;
132 pat->content.Set.size = 0;
136 *amount = pat->content.Set.size * wordamount;
142 buf = glob->glob_buffer;
147 case ']': /* illegal closing bracket */
148 snprintf(glob->errormsg, sizeof(glob->errormsg),
149 "illegal pattern at pos %zu\n", pos);
152 case '\\': /* escaped character, skip '\' */
157 /* intentional fallthrough */
159 *buf++ = *pattern++; /* copy character to set element */
166 static GlobCode glob_range(URLGlob *glob, char *pattern,
167 size_t pos, int *amount)
169 /* processes a range expression with the point behind the opening '['
170 - char range: e.g. "a-z]", "B-Q]"
171 - num range: e.g. "0-9]", "17-2000]"
172 - num range with leading zeros: e.g. "001-999]"
173 expression is checked for well-formedness and collected until the next ']'
184 pat = &glob->pattern[glob->size / 2];
185 /* patterns 0,1,2,... correspond to size=1,3,5,... */
188 if(ISALPHA(*pattern)) {
189 /* character range detected */
193 pat->type = UPTCharRange;
195 rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
197 if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
198 /* the pattern is not well-formed */
199 snprintf(glob->errormsg, sizeof(glob->errormsg),
200 "error: bad range specification after pos %zu\n", pos);
204 /* check the (first) separating character */
205 if((sep != ']') && (sep != ':')) {
206 snprintf(glob->errormsg, sizeof(glob->errormsg),
207 "error: unsupported character (%c) after range at pos %zu\n",
212 /* if there was a ":[num]" thing, use that as step or else use 1 */
213 pat->content.CharRange.step =
214 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
216 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
217 pat->content.CharRange.max_c = max_c;
219 else if(ISDIGIT(*pattern)) {
220 /* numeric range detected */
224 pat->type = UPTNumRange;
225 pat->content.NumRange.padlength = 0;
227 rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
229 if((rc < 2) || (min_n > max_n)) {
230 /* the pattern is not well-formed */
231 snprintf(glob->errormsg, sizeof(glob->errormsg),
232 "error: bad range specification after pos %zu\n", pos);
235 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
236 pat->content.NumRange.max_n = max_n;
238 /* if there was a ":[num]" thing, use that as step or else use 1 */
239 pat->content.NumRange.step =
240 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
242 if(*pattern == '0') {
243 /* leading zero specified */
247 ++pat->content.NumRange.padlength; /* padding length is set for all
248 instances of this pattern */
253 snprintf(glob->errormsg, sizeof(glob->errormsg),
254 "illegal character in range specification at pos %zu\n", pos);
258 c = (char*)strchr(pattern, ']'); /* continue after next ']' */
262 snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
263 return GLOB_ERROR; /* missing ']' */
266 /* always check for a literal (may be "") between patterns */
268 res = glob_word(glob, c, pos + (c - pattern), &wordamount);
269 if(res == GLOB_ERROR) {
275 if(pat->type == UPTCharRange)
276 *amount = wordamount * (pat->content.CharRange.max_c -
277 pat->content.CharRange.min_c + 1);
279 *amount = wordamount * (pat->content.NumRange.max_n -
280 pat->content.NumRange.min_n + 1);
283 return res; /* GLOB_OK or GLOB_NO_MEM */
286 static GlobCode glob_word(URLGlob *glob, char *pattern,
287 size_t pos, int *amount)
289 /* processes a literal string component of a URL
290 special characters '{' and '[' branch to set/range processing functions
292 char* buf = glob->glob_buffer;
294 GlobCode res = GLOB_OK;
296 *amount = 1; /* default is one single string */
298 while(*pattern != '\0' && *pattern != '{' && *pattern != '[') {
299 if(*pattern == '}' || *pattern == ']') {
300 snprintf(glob->errormsg, sizeof(glob->errormsg),
301 "unmatched close brace/bracket at pos %zu\n", pos);
305 /* only allow \ to escape known "special letters" */
306 if(*pattern == '\\' &&
307 (*(pattern+1) == '{' || *(pattern+1) == '[' ||
308 *(pattern+1) == '}' || *(pattern+1) == ']') ) {
310 /* escape character, skip '\' */
314 *buf++ = *pattern++; /* copy character to literal */
318 litindex = glob->size / 2;
319 /* literals 0,1,2,... correspond to size=0,2,4,... */
320 glob->literal[litindex] = strdup(glob->glob_buffer);
321 if(!glob->literal[litindex]) {
322 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
329 /* singular URL processed */
333 /* process set pattern */
334 res = glob_set(glob, ++pattern, ++pos, amount);
338 /* process range pattern */
339 res = glob_range(glob, ++pattern, ++pos, amount);
344 Curl_safefree(glob->literal[litindex]);
349 int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
352 * We can deal with any-size, just make a buffer with the same length
353 * as the specified URL!
355 URLGlob *glob_expand;
362 glob_buffer = malloc(strlen(url) + 1);
364 return CURLE_OUT_OF_MEMORY;
366 glob_expand = calloc(1, sizeof(URLGlob));
368 Curl_safefree(glob_buffer);
369 return CURLE_OUT_OF_MEMORY;
371 glob_expand->size = 0;
372 glob_expand->urllen = strlen(url);
373 glob_expand->glob_buffer = glob_buffer;
374 glob_expand->beenhere = 0;
376 res = glob_word(glob_expand, url, 1, &amount);
380 if(error && glob_expand->errormsg[0]) {
381 /* send error description to the error-stream */
382 fprintf(error, "curl: (%d) [globbing] %s",
383 (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT,
384 glob_expand->errormsg);
386 /* it failed, we cleanup */
387 Curl_safefree(glob_buffer);
388 Curl_safefree(glob_expand);
390 return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
397 void glob_cleanup(URLGlob* glob)
402 for(i = glob->size - 1; i < glob->size; --i) {
403 if(!(i & 1)) { /* even indexes contain literals */
404 Curl_safefree(glob->literal[i/2]);
406 else { /* odd indexes contain sets or ranges */
407 if((glob->pattern[i/2].type == UPTSet) &&
408 (glob->pattern[i/2].content.Set.elements)) {
409 for(elem = glob->pattern[i/2].content.Set.size - 1;
412 Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
414 Curl_safefree(glob->pattern[i/2].content.Set.elements);
418 Curl_safefree(glob->glob_buffer);
422 char *glob_next_url(URLGlob *glob)
429 size_t buflen = glob->urllen + 1;
430 char *buf = glob->glob_buffer;
437 /* implement a counter over the index ranges of all patterns,
438 starting with the rightmost pattern */
439 for(i = glob->size / 2 - 1; carry && (i < glob->size); --i) {
441 pat = &glob->pattern[i];
444 if((pat->content.Set.elements) &&
445 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
446 pat->content.Set.ptr_s = 0;
451 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
452 (int)((unsigned char)pat->content.CharRange.ptr_c));
453 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
454 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
459 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
460 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
461 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
466 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
467 exit (CURLE_FAILED_INIT);
470 if(carry) /* first pattern ptr has run into overflow, done! */
474 for(j = 0; j < glob->size; ++j) {
475 if(!(j&1)) { /* every other term (j even) is a literal */
476 lit = glob->literal[j/2];
477 len = snprintf(buf, buflen, "%s", lit);
481 else { /* the rest (i odd) are patterns */
482 pat = &glob->pattern[j/2];
485 if(pat->content.Set.elements) {
486 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
487 snprintf(buf, buflen, "%s",
488 pat->content.Set.elements[pat->content.Set.ptr_s]);
494 *buf++ = pat->content.CharRange.ptr_c;
497 len = snprintf(buf, buflen, "%0*d",
498 pat->content.NumRange.padlength,
499 pat->content.NumRange.ptr_n);
504 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
505 exit (CURLE_FAILED_INIT);
510 return strdup(glob->glob_buffer);
513 char *glob_match_url(char *filename, URLGlob *glob)
518 char *appendthis = NULL;
519 size_t appendlen = 0;
520 size_t stringlen = 0;
522 /* We cannot use the glob_buffer for storage here since the filename may
523 * be longer than the URL we use. We allocate a good start size, then
524 * we need to realloc in case of need.
526 allocsize = strlen(filename) + 1; /* make it at least one byte to store the
528 target = malloc(allocsize);
530 return NULL; /* major failure */
533 if(*filename == '#' && ISDIGIT(filename[1])) {
535 char *ptr = filename;
536 unsigned long num = strtoul(&filename[1], &filename, 10);
539 if(num && (i <= glob->size / 2)) {
540 URLPattern pat = glob->pattern[i];
543 if(pat.content.Set.elements) {
544 appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
546 strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
550 numbuf[0] = pat.content.CharRange.ptr_c;
556 snprintf(numbuf, sizeof(numbuf), "%0*d",
557 pat.content.NumRange.padlength,
558 pat.content.NumRange.ptr_n);
560 appendlen = strlen(numbuf);
563 printf("internal error: invalid pattern type (%d)\n",
565 Curl_safefree(target);
570 /* #[num] out of range, use the #[num] in the output */
572 appendthis = filename++;
577 appendthis = filename++;
580 if(appendlen + stringlen >= allocsize) {
582 /* we append a single byte to allow for the trailing byte to be appended
583 at the end of this function outside the while() loop */
584 allocsize = (appendlen + stringlen) * 2;
585 newstr = realloc(target, allocsize + 1);
587 Curl_safefree(target);
592 memcpy(&target[stringlen], appendthis, appendlen);
593 stringlen += appendlen;
595 target[stringlen]= '\0';