Introducing range stepping to the curl globbing support. Now you can specify

author Daniel Stenberg <daniel@haxx.se>

Thu, 10 Nov 2005 22:11:01 +0000 (22:11 +0000)

committer Daniel Stenberg <daniel@haxx.se>

Thu, 10 Nov 2005 22:11:01 +0000 (22:11 +0000)
author Daniel Stenberg <daniel@haxx.se>
Thu, 10 Nov 2005 22:11:01 +0000 (22:11 +0000)
committer Daniel Stenberg <daniel@haxx.se>
Thu, 10 Nov 2005 22:11:01 +0000 (22:11 +0000)
diff --git a/CHANGES b/CHANGES

index 9bf8f0c..5ee6285 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -8,6 +8,18 @@
  
  
  
+Daniel (10 November 2005)
+- Introducing range stepping to the curl globbing support. Now you can specify
+  step counter by adding :[num] within the brackets when specifying a range:
+
+   [1-100:10]
+   [a-z:2]
+
+  If no step counter is set, it defaults to 1 as before:
+
+   [1-100]
+   [d-h]
+
  Daniel (8 November 2005)
  - Removed the use of AI_CANONNAME in the IPv6-enabled resolver functions since
    we really have no use for reverse lookups of the address.
diff --git a/RELEASE-NOTES b/RELEASE-NOTES

index 799784c..ac18e8f 100644 (file)
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -11,6 +11,7 @@ Curl and libcurl 7.15.1
  
  This release includes the following changes:
  
+ o URL globbing now offers "range steps": [1-100:10]
   o LDAPv3 is now the preferred LDAP protocol version
   o --max-redirs and CURLOPT_MAXREDIRS set to 0 limits redirects
   o improved MSVC makefile
diff --git a/docs/curl.1 b/docs/curl.1

index ccc22dd..688a14b 100644 (file)
--- a/docs/curl.1
+++ b/docs/curl.1
@@ -21,7 +21,7 @@
  .\" * $Id$
  .\" **************************************************************************
  .\"
-.TH curl 1 "4 Sep 2005" "Curl 7.14.2" "Curl Manual"
+.TH curl 1 "10 Nov 2005" "Curl 7.15.1" "Curl Manual"
  .SH NAME
  curl \- transfer a URL
  .SH SYNOPSIS
@@ -64,6 +64,12 @@ several ones next to each other:
  You can specify any amount of URLs on the command line. They will be fetched
  in a sequential manner in the specified order.
  
+Since curl 7.15.1 you can also specify step counter for the ranges, so that
+you can get every Nth number or letter:
+
+ http://www.numericals.com/file[1-100:10].txt
+ http://www.letters.com/file[a-z:2].txt
+
  If you specify URL without protocol:// prefix, curl will attempt to guess what
  protocol you might want. It will then default to HTTP but try other protocols
  based on often-used host name prefixes. For example, for host names starting
diff --git a/src/urlglob.c b/src/urlglob.c

index 2843a51..0d7b577 100644 (file)
--- a/src/urlglob.c
+++ b/src/urlglob.c
@@ -166,78 +166,101 @@ static GlobCode glob_range(URLGlob *glob, char *pattern,
    URLPattern *pat;
    char *c;
    int wordamount=1;
+  char sep;
+  char sep2;
+  int step;
+  int rc;
  
    pat = (URLPattern*)&glob->pattern[glob->size / 2];
    /* patterns 0,1,2,... correspond to size=1,3,5,... */
    ++glob->size;
  
    if (isalpha((int)*pattern)) {         /* character range detected */
+    char min_c;
+    char max_c;
+
      pat->type = UPTCharRange;
-    if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c,
-               &pat->content.CharRange.max_c) != 2 ||
-        pat->content.CharRange.min_c >= pat->content.CharRange.max_c ||
-        pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') {
+    rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
+    if ((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
        /* the pattern is not well-formed */
        snprintf(glob->errormsg, sizeof(glob->errormsg),
-               "illegal pattern or range specification after pos %d\n", pos);
+               "errpr: bad range specification after pos %d\n", pos);
        return GLOB_ERROR;
      }
-    pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
-    /* always check for a literal (may be "") between patterns */
  
-    if(GLOB_ERROR == glob_word(glob, pattern + 4, pos + 4, &wordamount))
-      wordamount=1;
+    /* check the (first) separating character */
+    if((sep != ']') && (sep != ':')) {
+      snprintf(glob->errormsg, sizeof(glob->errormsg),
+               "error: unsupported character (%c) after range at pos %d\n",
+               sep, pos);
+      return GLOB_ERROR;
+    }
  
-    *amount = (pat->content.CharRange.max_c -
-               pat->content.CharRange.min_c + 1) *
-      wordamount;
+    /* if there was a ":[num]" thing, use that as step or else use 1 */
+    pat->content.CharRange.step =
+      ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
  
-    return GLOB_OK;
+    pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
+    pat->content.CharRange.max_c = max_c;
    }
-
-  if (isdigit((int)*pattern)) { /* numeric range detected */
+  else if (isdigit((int)*pattern)) { /* numeric range detected */
+    int min_n;
+    int max_n;
  
      pat->type = UPTNumRange;
      pat->content.NumRange.padlength = 0;
-    if (sscanf(pattern, "%d-%d]",
-               &pat->content.NumRange.min_n,
-               &pat->content.NumRange.max_n) != 2 ||
-        pat->content.NumRange.min_n >= pat->content.NumRange.max_n) {
+
+    rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
+
+    if ((rc < 2) || (min_n >= max_n)) {
        /* the pattern is not well-formed */
        snprintf(glob->errormsg, sizeof(glob->errormsg),
-               "error: illegal pattern or range specification after pos %d\n",
-               pos);
+               "error: bad range specification after pos %d\n", pos);
        return GLOB_ERROR;
      }
+    pat->content.NumRange.ptr_n =  pat->content.NumRange.min_n = min_n;
+    pat->content.NumRange.max_n = max_n;
+
+    /* if there was a ":[num]" thing, use that as step or else use 1 */
+    pat->content.NumRange.step =
+      ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
+
      if (*pattern == '0') {              /* leading zero specified */
        c = pattern;
        while (isdigit((int)*c++))
          ++pat->content.NumRange.padlength; /* padding length is set for all
                                                instances of this pattern */
      }
-    pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
-    c = (char*)strchr(pattern, ']'); /* continue after next ']' */
-    if(c)
-      c++;
-    else {
-      snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
-      return GLOB_ERROR; /* missing ']' */
-    }
  
-    /* always check for a literal (may be "") between patterns */
+  }
+  else {
+    snprintf(glob->errormsg, sizeof(glob->errormsg),
+             "illegal character in range specification at pos %d\n", pos);
+    return GLOB_ERROR;
+  }
  
-    if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
-      wordamount = 1;
+  c = (char*)strchr(pattern, ']'); /* continue after next ']' */
+  if(c)
+    c++;
+  else {
+    snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
+    return GLOB_ERROR; /* missing ']' */
+  }
  
-    *amount = (pat->content.NumRange.max_n -
-               pat->content.NumRange.min_n + 1) *
+  /* always check for a literal (may be "") between patterns */
+
+  if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
+    wordamount = 1;
+
+  if(pat->type == UPTCharRange)
+    *amount = (pat->content.CharRange.max_c -
+               pat->content.CharRange.min_c + 1) *
        wordamount;
+  else
+    *amount = (pat->content.NumRange.max_n -
+               pat->content.NumRange.min_n + 1) * wordamount;
  
-    return GLOB_OK;
-  }
-  snprintf(glob->errormsg, sizeof(glob->errormsg),
-           "illegal character in range specification at pos %d\n", pos);
-  return GLOB_ERROR;
+  return GLOB_OK;
  }
  
  static GlobCode glob_word(URLGlob *glob, char *pattern,
@@ -374,35 +397,36 @@ char *glob_next_url(URLGlob *glob)
    char *lit;
    size_t i;
    size_t j;
-  int carry;
  
    if (!glob->beenhere)
      glob->beenhere = 1;
    else {
-    carry = 1;
+    bool carry = TRUE;
  
      /* implement a counter over the index ranges of all patterns,
         starting with the rightmost pattern */
      for (i = glob->size / 2 - 1; carry && i < glob->size; --i) {
-      carry = 0;
+      carry = FALSE;
        pat = &glob->pattern[i];
        switch (pat->type) {
        case UPTSet:
          if (++pat->content.Set.ptr_s == pat->content.Set.size) {
            pat->content.Set.ptr_s = 0;
-          carry = 1;
+          carry = TRUE;
          }
          break;
        case UPTCharRange:
-        if (++pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
+        pat->content.CharRange.ptr_c += pat->content.CharRange.step;
+        if (pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
            pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
-          carry = 1;
+          carry = TRUE;
          }
          break;
        case UPTNumRange:
-        if (++pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
+        pat->content.NumRange.ptr_n += pat->content.NumRange.step;
+        if (pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
            pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
-          carry = 1;
+          carry = TRUE;
          }
          break;
        default:
diff --git a/src/urlglob.h b/src/urlglob.h

index fdda41e..d081840 100644 (file)
--- a/src/urlglob.h
+++ b/src/urlglob.h
@@ -1,18 +1,18 @@
  #ifndef __URLGLOB_H
  #define __URLGLOB_H
  /***************************************************************************
- *                                  _   _ ____  _     
- *  Project                     ___| | | |  _ \| |    
- *                             / __| | | | |_) | |    
- *                            | (__| |_| |  _ <| |___ 
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
   *                             \___|\___/|_| \_\_____|
   *
- * Copyright (C) 1998 - 2004, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2005, Daniel Stenberg, <daniel@haxx.se>, et al.
   *
   * This software is licensed as described in the file COPYING, which
   * you should have received as part of this distribution. The terms
   * are also available at http://curl.haxx.se/docs/copyright.html.
- * 
+ *
   * You may opt to use, copy, modify, merge, publish, distribute and/or sell
   * copies of the Software, and permit persons to whom the Software is
   * furnished to do so, under the terms of the COPYING file.
@@ -39,11 +39,13 @@ typedef struct {
      struct {
        char min_c, max_c;
        char ptr_c;
+      int step;
      } CharRange;
      struct {
        int min_n, max_n;
        short padlength;
        int ptr_n;
+      int step;
      } NumRange ;
    } content;
  } URLPattern;
@@ -60,7 +62,7 @@ typedef struct {
  
  int glob_url(URLGlob**, char*, int *, FILE *);
  char* glob_next_url(URLGlob*);
-char* glob_match_url(char*, URLGlob *); 
+char* glob_match_url(char*, URLGlob *);
  void glob_cleanup(URLGlob* glob);
  
  #endif
author	Daniel Stenberg <daniel@haxx.se>
	Thu, 10 Nov 2005 22:11:01 +0000 (22:11 +0000)
committer	Daniel Stenberg <daniel@haxx.se>
	Thu, 10 Nov 2005 22:11:01 +0000 (22:11 +0000)
CHANGES		patch \| blob \| history
RELEASE-NOTES		patch \| blob \| history
docs/curl.1		patch \| blob \| history
src/urlglob.c		patch \| blob \| history
src/urlglob.h		patch \| blob \| history