1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2010, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
23 /* client-local setup.h */
30 #include <curl/curl.h>
32 #define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
33 #include <curl/mprintf.h>
36 #include "os-specific.h"
38 #if defined(CURLDEBUG) && defined(CURLTOOLDEBUG)
50 * Input a full globbed string, set the forth argument to the amount of
51 * strings we get out of this. Return GlobCode.
53 static GlobCode glob_word(URLGlob *, /* object anchor */
54 char *, /* globbed string */
55 size_t, /* position */
56 int *); /* returned number of strings */
58 static GlobCode glob_set(URLGlob *glob, char *pattern,
59 size_t pos, int *amount)
61 /* processes a set expression with the point behind the opening '{'
62 ','-separated elements are collected until the next closing '}'
65 char* buf = glob->glob_buffer;
68 pat = (URLPattern*)&glob->pattern[glob->size / 2];
69 /* patterns 0,1,2,... correspond to size=1,3,5,... */
71 pat->content.Set.size = 0;
72 pat->content.Set.ptr_s = 0;
73 /* FIXME: Here's a nasty zero size malloc */
74 pat->content.Set.elements = (char**)malloc(0);
79 case '\0': /* URL ended while set was still open */
80 snprintf(glob->errormsg, sizeof(glob->errormsg),
81 "unmatched brace at pos %zu\n", pos);
85 case '[': /* no nested expressions at this time */
86 snprintf(glob->errormsg, sizeof(glob->errormsg),
87 "nested braces not supported at pos %zu\n", pos);
91 case '}': /* set element completed */
93 pat->content.Set.elements =
94 realloc(pat->content.Set.elements,
95 (pat->content.Set.size + 1) * sizeof(char*));
96 if (!pat->content.Set.elements) {
97 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory");
100 pat->content.Set.elements[pat->content.Set.size] =
101 strdup(glob->glob_buffer);
102 ++pat->content.Set.size;
104 if (*pattern == '}') {
105 /* entire set pattern completed */
108 /* always check for a literal (may be "") between patterns */
109 if(GLOB_ERROR == glob_word(glob, ++pattern, ++pos, &wordamount))
111 *amount = pat->content.Set.size * wordamount;
117 buf = glob->glob_buffer;
122 case ']': /* illegal closing bracket */
123 snprintf(glob->errormsg, sizeof(glob->errormsg),
124 "illegal pattern at pos %zu\n", pos);
127 case '\\': /* escaped character, skip '\' */
132 /* intentional fallthrough */
134 *buf++ = *pattern++; /* copy character to set element */
141 static GlobCode glob_range(URLGlob *glob, char *pattern,
142 size_t pos, int *amount)
144 /* processes a range expression with the point behind the opening '['
145 - char range: e.g. "a-z]", "B-Q]"
146 - num range: e.g. "0-9]", "17-2000]"
147 - num range with leading zeros: e.g. "001-999]"
148 expression is checked for well-formedness and collected until the next ']'
158 pat = (URLPattern*)&glob->pattern[glob->size / 2];
159 /* patterns 0,1,2,... correspond to size=1,3,5,... */
162 if (ISALPHA(*pattern)) { /* character range detected */
166 pat->type = UPTCharRange;
167 rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
168 if ((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
169 /* the pattern is not well-formed */
170 snprintf(glob->errormsg, sizeof(glob->errormsg),
171 "error: bad range specification after pos %zu\n", pos);
175 /* check the (first) separating character */
176 if((sep != ']') && (sep != ':')) {
177 snprintf(glob->errormsg, sizeof(glob->errormsg),
178 "error: unsupported character (%c) after range at pos %zu\n",
183 /* if there was a ":[num]" thing, use that as step or else use 1 */
184 pat->content.CharRange.step =
185 ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
187 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
188 pat->content.CharRange.max_c = max_c;
190 else if (ISDIGIT(*pattern)) { /* numeric range detected */
194 pat->type = UPTNumRange;
195 pat->content.NumRange.padlength = 0;
197 rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
199 if ((rc < 2) || (min_n > max_n)) {
200 /* the pattern is not well-formed */
201 snprintf(glob->errormsg, sizeof(glob->errormsg),
202 "error: bad range specification after pos %zu\n", pos);
205 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
206 pat->content.NumRange.max_n = max_n;
208 /* if there was a ":[num]" thing, use that as step or else use 1 */
209 pat->content.NumRange.step =
210 ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
212 if (*pattern == '0') { /* leading zero specified */
214 while (ISDIGIT(*c)) {
216 ++pat->content.NumRange.padlength; /* padding length is set for all
217 instances of this pattern */
223 snprintf(glob->errormsg, sizeof(glob->errormsg),
224 "illegal character in range specification at pos %zu\n", pos);
228 c = (char*)strchr(pattern, ']'); /* continue after next ']' */
232 snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
233 return GLOB_ERROR; /* missing ']' */
236 /* always check for a literal (may be "") between patterns */
238 if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
241 if(pat->type == UPTCharRange)
242 *amount = (pat->content.CharRange.max_c -
243 pat->content.CharRange.min_c + 1) *
246 *amount = (pat->content.NumRange.max_n -
247 pat->content.NumRange.min_n + 1) * wordamount;
252 static GlobCode glob_word(URLGlob *glob, char *pattern,
253 size_t pos, int *amount)
255 /* processes a literal string component of a URL
256 special characters '{' and '[' branch to set/range processing functions
258 char* buf = glob->glob_buffer;
260 GlobCode res = GLOB_OK;
262 *amount = 1; /* default is one single string */
264 while (*pattern != '\0' && *pattern != '{' && *pattern != '[') {
265 if (*pattern == '}' || *pattern == ']') {
266 snprintf(glob->errormsg, sizeof(glob->errormsg),
267 "unmatched close brace/bracket at pos %zu\n", pos);
271 /* only allow \ to escape known "special letters" */
272 if (*pattern == '\\' &&
273 (*(pattern+1) == '{' || *(pattern+1) == '[' ||
274 *(pattern+1) == '}' || *(pattern+1) == ']') ) {
276 /* escape character, skip '\' */
280 *buf++ = *pattern++; /* copy character to literal */
284 litindex = glob->size / 2;
285 /* literals 0,1,2,... correspond to size=0,2,4,... */
286 glob->literal[litindex] = strdup(glob->glob_buffer);
287 if(!glob->literal[litindex])
293 break; /* singular URL processed */
296 /* process set pattern */
297 res = glob_set(glob, ++pattern, ++pos, amount);
301 /* process range pattern */
302 res= glob_range(glob, ++pattern, ++pos, amount);
307 /* free that strdup'ed string again */
308 free(glob->literal[litindex]);
310 return res; /* something got wrong */
313 int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
316 * We can deal with any-size, just make a buffer with the same length
317 * as the specified URL!
319 URLGlob *glob_expand;
321 char *glob_buffer = malloc(strlen(url)+1);
324 if(NULL == glob_buffer)
325 return CURLE_OUT_OF_MEMORY;
327 glob_expand = calloc(1, sizeof(URLGlob));
328 if(NULL == glob_expand) {
330 return CURLE_OUT_OF_MEMORY;
332 glob_expand->size = 0;
333 glob_expand->urllen = strlen(url);
334 glob_expand->glob_buffer = glob_buffer;
335 glob_expand->beenhere=0;
336 if(GLOB_OK == glob_word(glob_expand, url, 1, &amount))
339 if(error && glob_expand->errormsg[0]) {
340 /* send error description to the error-stream */
341 fprintf(error, "curl: (%d) [globbing] %s",
342 CURLE_URL_MALFORMAT, glob_expand->errormsg);
344 /* it failed, we cleanup */
349 return CURLE_URL_MALFORMAT;
356 void glob_cleanup(URLGlob* glob)
361 for (i = glob->size - 1; i < glob->size; --i) {
362 if (!(i & 1)) { /* even indexes contain literals */
363 free(glob->literal[i/2]);
365 else { /* odd indexes contain sets or ranges */
366 if (glob->pattern[i/2].type == UPTSet) {
367 for (elem = glob->pattern[i/2].content.Set.size - 1;
370 free(glob->pattern[i/2].content.Set.elements[elem]);
372 free(glob->pattern[i/2].content.Set.elements);
376 free(glob->glob_buffer);
380 char *glob_next_url(URLGlob *glob)
382 char *buf = glob->glob_buffer;
387 size_t buflen = glob->urllen+1;
395 /* implement a counter over the index ranges of all patterns,
396 starting with the rightmost pattern */
397 for (i = glob->size / 2 - 1; carry && i < glob->size; --i) {
399 pat = &glob->pattern[i];
402 if (++pat->content.Set.ptr_s == pat->content.Set.size) {
403 pat->content.Set.ptr_s = 0;
408 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
409 (int)((unsigned char)pat->content.CharRange.ptr_c));
410 if (pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
411 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
416 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
417 if (pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
418 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
423 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
424 exit (CURLE_FAILED_INIT);
427 if (carry) /* first pattern ptr has run into overflow, done! */
431 for (j = 0; j < glob->size; ++j) {
432 if (!(j&1)) { /* every other term (j even) is a literal */
433 lit = glob->literal[j/2];
434 len = snprintf(buf, buflen, "%s", lit);
438 else { /* the rest (i odd) are patterns */
439 pat = &glob->pattern[j/2];
442 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
443 snprintf(buf, buflen, "%s",
444 pat->content.Set.elements[pat->content.Set.ptr_s]);
449 *buf++ = pat->content.CharRange.ptr_c;
452 len = snprintf(buf, buflen, "%0*d",
453 pat->content.NumRange.padlength,
454 pat->content.NumRange.ptr_n);
459 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
460 exit (CURLE_FAILED_INIT);
465 return strdup(glob->glob_buffer);
468 char *glob_match_url(char *filename, URLGlob *glob)
474 char *appendthis = NULL;
475 size_t appendlen = 0;
477 /* We cannot use the glob_buffer for storage here since the filename may
478 * be longer than the URL we use. We allocate a good start size, then
479 * we need to realloc in case of need.
481 allocsize=strlen(filename)+1; /* make it at least one byte to store the
483 target = malloc(allocsize);
485 return NULL; /* major failure */
488 if (*filename == '#' && ISDIGIT(filename[1])) {
490 char *ptr = filename;
491 unsigned long num = strtoul(&filename[1], &filename, 10);
494 if (num && (i <= glob->size / 2)) {
495 URLPattern pat = glob->pattern[i];
498 appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
499 appendlen = strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
502 numbuf[0]=pat.content.CharRange.ptr_c;
508 snprintf(numbuf, sizeof(numbuf), "%0*d",
509 pat.content.NumRange.padlength,
510 pat.content.NumRange.ptr_n);
512 appendlen = strlen(numbuf);
515 printf("internal error: invalid pattern type (%d)\n",
522 /* #[num] out of range, use the #[num] in the output */
524 appendthis=filename++;
529 appendthis=filename++;
532 if(appendlen + stringlen >= allocsize) {
534 /* we append a single byte to allow for the trailing byte to be appended
535 at the end of this function outside the while() loop */
536 allocsize = (appendlen + stringlen)*2;
537 newstr=realloc(target, allocsize + 1);
544 memcpy(&target[stringlen], appendthis, appendlen);
545 stringlen += appendlen;
547 target[stringlen]= '\0';