1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
24 #include <curl/curl.h>
26 #define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
27 #include <curl/mprintf.h>
32 #include "memdebug.h" /* keep this as LAST include */
42 * Input a full globbed string, set the forth argument to the amount of
43 * strings we get out of this. Return GlobCode.
45 static GlobCode glob_word(URLGlob *, /* object anchor */
46 char *, /* globbed string */
47 size_t, /* position */
48 int *); /* returned number of strings */
50 static GlobCode glob_set(URLGlob *glob, char *pattern,
51 size_t pos, int *amount)
53 /* processes a set expression with the point behind the opening '{'
54 ','-separated elements are collected until the next closing '}'
57 char* buf = glob->glob_buffer;
60 pat = (URLPattern*)&glob->pattern[glob->size / 2];
61 /* patterns 0,1,2,... correspond to size=1,3,5,... */
63 pat->content.Set.size = 0;
64 pat->content.Set.ptr_s = 0;
65 pat->content.Set.elements = NULL;
71 case '\0': /* URL ended while set was still open */
72 snprintf(glob->errormsg, sizeof(glob->errormsg),
73 "unmatched brace at pos %zu\n", pos);
77 case '[': /* no nested expressions at this time */
78 snprintf(glob->errormsg, sizeof(glob->errormsg),
79 "nested braces not supported at pos %zu\n", pos);
83 case '}': /* set element completed */
85 if(pat->content.Set.elements) {
86 char **new_arr = realloc(pat->content.Set.elements,
87 (pat->content.Set.size + 1) * sizeof(char*));
90 for(elem = 0; elem < pat->content.Set.size; elem++)
91 Curl_safefree(pat->content.Set.elements[elem]);
92 Curl_safefree(pat->content.Set.elements);
93 pat->content.Set.ptr_s = 0;
94 pat->content.Set.size = 0;
96 pat->content.Set.elements = new_arr;
99 pat->content.Set.elements = malloc(sizeof(char*));
100 if(!pat->content.Set.elements) {
101 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory");
104 pat->content.Set.elements[pat->content.Set.size] =
105 strdup(glob->glob_buffer);
106 if(!pat->content.Set.elements[pat->content.Set.size]) {
108 for(elem = 0; elem < pat->content.Set.size; elem++)
109 Curl_safefree(pat->content.Set.elements[elem]);
110 Curl_safefree(pat->content.Set.elements);
111 pat->content.Set.ptr_s = 0;
112 pat->content.Set.size = 0;
113 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory");
116 ++pat->content.Set.size;
118 if(*pattern == '}') {
119 /* entire set pattern completed */
122 /* always check for a literal (may be "") between patterns */
123 if(GLOB_ERROR == glob_word(glob, ++pattern, ++pos, &wordamount))
125 *amount = pat->content.Set.size * wordamount;
131 buf = glob->glob_buffer;
136 case ']': /* illegal closing bracket */
137 snprintf(glob->errormsg, sizeof(glob->errormsg),
138 "illegal pattern at pos %zu\n", pos);
141 case '\\': /* escaped character, skip '\' */
146 /* intentional fallthrough */
148 *buf++ = *pattern++; /* copy character to set element */
155 static GlobCode glob_range(URLGlob *glob, char *pattern,
156 size_t pos, int *amount)
158 /* processes a range expression with the point behind the opening '['
159 - char range: e.g. "a-z]", "B-Q]"
160 - num range: e.g. "0-9]", "17-2000]"
161 - num range with leading zeros: e.g. "001-999]"
162 expression is checked for well-formedness and collected until the next ']'
172 pat = (URLPattern*)&glob->pattern[glob->size / 2];
173 /* patterns 0,1,2,... correspond to size=1,3,5,... */
176 if(ISALPHA(*pattern)) { /* character range detected */
180 pat->type = UPTCharRange;
181 rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
182 if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
183 /* the pattern is not well-formed */
184 snprintf(glob->errormsg, sizeof(glob->errormsg),
185 "error: bad range specification after pos %zu\n", pos);
189 /* check the (first) separating character */
190 if((sep != ']') && (sep != ':')) {
191 snprintf(glob->errormsg, sizeof(glob->errormsg),
192 "error: unsupported character (%c) after range at pos %zu\n",
197 /* if there was a ":[num]" thing, use that as step or else use 1 */
198 pat->content.CharRange.step =
199 ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
201 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
202 pat->content.CharRange.max_c = max_c;
204 else if(ISDIGIT(*pattern)) { /* numeric range detected */
208 pat->type = UPTNumRange;
209 pat->content.NumRange.padlength = 0;
211 rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
213 if((rc < 2) || (min_n > max_n)) {
214 /* the pattern is not well-formed */
215 snprintf(glob->errormsg, sizeof(glob->errormsg),
216 "error: bad range specification after pos %zu\n", pos);
219 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
220 pat->content.NumRange.max_n = max_n;
222 /* if there was a ":[num]" thing, use that as step or else use 1 */
223 pat->content.NumRange.step =
224 ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
226 if(*pattern == '0') { /* leading zero specified */
230 ++pat->content.NumRange.padlength; /* padding length is set for all
231 instances of this pattern */
237 snprintf(glob->errormsg, sizeof(glob->errormsg),
238 "illegal character in range specification at pos %zu\n", pos);
242 c = (char*)strchr(pattern, ']'); /* continue after next ']' */
246 snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
247 return GLOB_ERROR; /* missing ']' */
250 /* always check for a literal (may be "") between patterns */
252 if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
255 if(pat->type == UPTCharRange)
256 *amount = (pat->content.CharRange.max_c -
257 pat->content.CharRange.min_c + 1) *
260 *amount = (pat->content.NumRange.max_n -
261 pat->content.NumRange.min_n + 1) * wordamount;
266 static GlobCode glob_word(URLGlob *glob, char *pattern,
267 size_t pos, int *amount)
269 /* processes a literal string component of a URL
270 special characters '{' and '[' branch to set/range processing functions
272 char* buf = glob->glob_buffer;
274 GlobCode res = GLOB_OK;
276 *amount = 1; /* default is one single string */
278 while(*pattern != '\0' && *pattern != '{' && *pattern != '[') {
279 if(*pattern == '}' || *pattern == ']') {
280 snprintf(glob->errormsg, sizeof(glob->errormsg),
281 "unmatched close brace/bracket at pos %zu\n", pos);
285 /* only allow \ to escape known "special letters" */
286 if(*pattern == '\\' &&
287 (*(pattern+1) == '{' || *(pattern+1) == '[' ||
288 *(pattern+1) == '}' || *(pattern+1) == ']') ) {
290 /* escape character, skip '\' */
294 *buf++ = *pattern++; /* copy character to literal */
298 litindex = glob->size / 2;
299 /* literals 0,1,2,... correspond to size=0,2,4,... */
300 glob->literal[litindex] = strdup(glob->glob_buffer);
301 if(!glob->literal[litindex])
307 break; /* singular URL processed */
310 /* process set pattern */
311 res = glob_set(glob, ++pattern, ++pos, amount);
315 /* process range pattern */
316 res= glob_range(glob, ++pattern, ++pos, amount);
321 /* free that strdup'ed string again */
322 Curl_safefree(glob->literal[litindex]);
324 return res; /* something got wrong */
327 int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
330 * We can deal with any-size, just make a buffer with the same length
331 * as the specified URL!
333 URLGlob *glob_expand;
335 char *glob_buffer = malloc(strlen(url)+1);
338 if(NULL == glob_buffer)
339 return CURLE_OUT_OF_MEMORY;
341 glob_expand = calloc(1, sizeof(URLGlob));
342 if(NULL == glob_expand) {
343 Curl_safefree(glob_buffer);
344 return CURLE_OUT_OF_MEMORY;
346 glob_expand->size = 0;
347 glob_expand->urllen = strlen(url);
348 glob_expand->glob_buffer = glob_buffer;
349 glob_expand->beenhere=0;
350 if(GLOB_OK == glob_word(glob_expand, url, 1, &amount))
353 if(error && glob_expand->errormsg[0]) {
354 /* send error description to the error-stream */
355 fprintf(error, "curl: (%d) [globbing] %s",
356 CURLE_URL_MALFORMAT, glob_expand->errormsg);
358 /* it failed, we cleanup */
359 Curl_safefree(glob_buffer);
360 Curl_safefree(glob_expand);
362 return CURLE_URL_MALFORMAT;
369 void glob_cleanup(URLGlob* glob)
374 for(i = glob->size - 1; i < glob->size; --i) {
375 if(!(i & 1)) { /* even indexes contain literals */
376 Curl_safefree(glob->literal[i/2]);
378 else { /* odd indexes contain sets or ranges */
379 if((glob->pattern[i/2].type == UPTSet) &&
380 (glob->pattern[i/2].content.Set.elements)) {
381 for(elem = glob->pattern[i/2].content.Set.size - 1;
384 if(glob->pattern[i/2].content.Set.elements[elem])
385 Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
387 Curl_safefree(glob->pattern[i/2].content.Set.elements);
391 Curl_safefree(glob->glob_buffer);
395 char *glob_next_url(URLGlob *glob)
397 char *buf = glob->glob_buffer;
402 size_t buflen = glob->urllen+1;
410 /* implement a counter over the index ranges of all patterns,
411 starting with the rightmost pattern */
412 for(i = glob->size / 2 - 1; carry && i < glob->size; --i) {
414 pat = &glob->pattern[i];
417 if((pat->content.Set.elements) &&
418 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
419 pat->content.Set.ptr_s = 0;
424 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
425 (int)((unsigned char)pat->content.CharRange.ptr_c));
426 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
427 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
432 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
433 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
434 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
439 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
440 exit (CURLE_FAILED_INIT);
443 if(carry) /* first pattern ptr has run into overflow, done! */
447 for(j = 0; j < glob->size; ++j) {
448 if(!(j&1)) { /* every other term (j even) is a literal */
449 lit = glob->literal[j/2];
450 len = snprintf(buf, buflen, "%s", lit);
454 else { /* the rest (i odd) are patterns */
455 pat = &glob->pattern[j/2];
458 if(pat->content.Set.elements) {
459 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
460 snprintf(buf, buflen, "%s",
461 pat->content.Set.elements[pat->content.Set.ptr_s]);
467 *buf++ = pat->content.CharRange.ptr_c;
470 len = snprintf(buf, buflen, "%0*d",
471 pat->content.NumRange.padlength,
472 pat->content.NumRange.ptr_n);
477 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
478 exit (CURLE_FAILED_INIT);
483 return strdup(glob->glob_buffer);
486 char *glob_match_url(char *filename, URLGlob *glob)
492 char *appendthis = NULL;
493 size_t appendlen = 0;
495 /* We cannot use the glob_buffer for storage here since the filename may
496 * be longer than the URL we use. We allocate a good start size, then
497 * we need to realloc in case of need.
499 allocsize=strlen(filename)+1; /* make it at least one byte to store the
501 target = malloc(allocsize);
503 return NULL; /* major failure */
506 if(*filename == '#' && ISDIGIT(filename[1])) {
508 char *ptr = filename;
509 unsigned long num = strtoul(&filename[1], &filename, 10);
512 if(num && (i <= glob->size / 2)) {
513 URLPattern pat = glob->pattern[i];
516 if(pat.content.Set.elements) {
517 appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
519 strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
523 numbuf[0]=pat.content.CharRange.ptr_c;
529 snprintf(numbuf, sizeof(numbuf), "%0*d",
530 pat.content.NumRange.padlength,
531 pat.content.NumRange.ptr_n);
533 appendlen = strlen(numbuf);
536 printf("internal error: invalid pattern type (%d)\n",
538 Curl_safefree(target);
543 /* #[num] out of range, use the #[num] in the output */
545 appendthis=filename++;
550 appendthis=filename++;
553 if(appendlen + stringlen >= allocsize) {
555 /* we append a single byte to allow for the trailing byte to be appended
556 at the end of this function outside the while() loop */
557 allocsize = (appendlen + stringlen)*2;
558 newstr=realloc(target, allocsize + 1);
560 Curl_safefree(target);
565 memcpy(&target[stringlen], appendthis, appendlen);
566 stringlen += appendlen;
568 target[stringlen]= '\0';