1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2007 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
46 #include "pcre_internal.h"
49 /*************************************************
50 * Find number for named string *
51 *************************************************/
53 /* This function is used by the get_first_set() function below, as well
54 as being generally available. It assumes that names are unique.
57 code the compiled regex
58 stringname the name whose number is required
60 Returns: the number of the named parentheses, or a negative number
61 (PCRE_ERROR_NOSUBSTRING) if not found
65 pcre_get_stringnumber(const pcre *code, const char *stringname)
72 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
74 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
76 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
78 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
84 int mid = (top + bot) / 2;
85 uschar *entry = nametable + entrysize*mid;
86 int c = strcmp(stringname, (char *)(entry + 2));
87 if (c == 0) return (entry[0] << 8) + entry[1];
88 if (c > 0) bot = mid + 1; else top = mid;
91 return PCRE_ERROR_NOSUBSTRING;
96 /*************************************************
97 * Find (multiple) entries for named string *
98 *************************************************/
100 /* This is used by the get_first_set() function below, as well as being
101 generally available. It is used when duplicated names are permitted.
104 code the compiled regex
105 stringname the name whose entries required
106 firstptr where to put the pointer to the first entry
107 lastptr where to put the pointer to the last entry
109 Returns: the length of each entry, or a negative number
110 (PCRE_ERROR_NOSUBSTRING) if not found
114 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
115 char **firstptr, char **lastptr)
120 uschar *nametable, *lastentry;
122 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
124 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
128 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
131 lastentry = nametable + entrysize * (top - 1);
135 int mid = (top + bot) / 2;
136 uschar *entry = nametable + entrysize*mid;
137 int c = strcmp(stringname, (char *)(entry + 2));
140 uschar *first = entry;
141 uschar *last = entry;
142 while (first > nametable)
144 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
147 while (last < lastentry)
149 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
152 *firstptr = (char *)first;
153 *lastptr = (char *)last;
156 if (c > 0) bot = mid + 1; else top = mid;
159 return PCRE_ERROR_NOSUBSTRING;
164 /*************************************************
165 * Find first set of multiple named strings *
166 *************************************************/
168 /* This function allows for duplicate names in the table of named substrings.
169 It returns the number of the first one that was set in a pattern match.
172 code the compiled regex
173 stringname the name of the capturing substring
174 ovector the vector of matched substrings
176 Returns: the number of the first that is set,
177 or the number of the last one if none are set,
178 or a negative number on error
182 get_first_set(const pcre *code, const char *stringname, int *ovector)
184 const real_pcre *re = (const real_pcre *)code;
188 if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
189 return pcre_get_stringnumber(code, stringname);
190 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
191 if (entrysize <= 0) return entrysize;
192 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
194 int n = (entry[0] << 8) + entry[1];
195 if (ovector[n*2] >= 0) return n;
197 return (first[0] << 8) + first[1];
203 /*************************************************
204 * Copy captured string to given buffer *
205 *************************************************/
207 /* This function copies a single captured substring into a given buffer.
208 Note that we use memcpy() rather than strncpy() in case there are binary zeros
212 subject the subject string that was matched
213 ovector pointer to the offsets table
214 stringcount the number of substrings that were captured
215 (i.e. the yield of the pcre_exec call, unless
216 that was zero, in which case it should be 1/3
217 of the offset table size)
218 stringnumber the number of the required substring
219 buffer where to put the substring
220 size the size of the buffer
222 Returns: if successful:
223 the length of the copied string, not including the zero
224 that is put on the end; can be zero
226 PCRE_ERROR_NOMEMORY (-6) buffer too small
227 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
231 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
232 int stringnumber, char *buffer, int size)
235 if (stringnumber < 0 || stringnumber >= stringcount)
236 return PCRE_ERROR_NOSUBSTRING;
238 yield = ovector[stringnumber+1] - ovector[stringnumber];
239 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
240 memcpy(buffer, subject + ovector[stringnumber], yield);
247 /*************************************************
248 * Copy named captured string to given buffer *
249 *************************************************/
251 /* This function copies a single captured substring into a given buffer,
252 identifying it by name. If the regex permits duplicate names, the first
253 substring that is set is chosen.
256 code the compiled regex
257 subject the subject string that was matched
258 ovector pointer to the offsets table
259 stringcount the number of substrings that were captured
260 (i.e. the yield of the pcre_exec call, unless
261 that was zero, in which case it should be 1/3
262 of the offset table size)
263 stringname the name of the required substring
264 buffer where to put the substring
265 size the size of the buffer
267 Returns: if successful:
268 the length of the copied string, not including the zero
269 that is put on the end; can be zero
271 PCRE_ERROR_NOMEMORY (-6) buffer too small
272 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
276 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
277 int stringcount, const char *stringname, char *buffer, int size)
279 int n = get_first_set(code, stringname, ovector);
280 if (n <= 0) return n;
281 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
286 /*************************************************
287 * Copy all captured strings to new store *
288 *************************************************/
290 /* This function gets one chunk of store and builds a list of pointers and all
291 of the captured substrings in it. A NULL pointer is put on the end of the list.
294 subject the subject string that was matched
295 ovector pointer to the offsets table
296 stringcount the number of substrings that were captured
297 (i.e. the yield of the pcre_exec call, unless
298 that was zero, in which case it should be 1/3
299 of the offset table size)
300 listptr set to point to the list of pointers
302 Returns: if successful: 0
304 PCRE_ERROR_NOMEMORY (-6) failed to get store
308 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
309 const char ***listptr)
312 int size = sizeof(char *);
313 int double_count = stringcount * 2;
317 for (i = 0; i < double_count; i += 2)
318 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
320 stringlist = (char **)(pcre_malloc)(size);
321 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
323 *listptr = (const char **)stringlist;
324 p = (char *)(stringlist + stringcount + 1);
326 for (i = 0; i < double_count; i += 2)
328 int len = ovector[i+1] - ovector[i];
329 memcpy(p, subject + ovector[i], len);
341 /*************************************************
342 * Free store obtained by get_substring_list *
343 *************************************************/
345 /* This function exists for the benefit of people calling PCRE from non-C
346 programs that can call its functions, but not free() or (pcre_free)() directly.
348 Argument: the result of a previous pcre_get_substring_list()
353 pcre_free_substring_list(const char **pointer)
355 (pcre_free)((void *)pointer);
360 /*************************************************
361 * Copy captured string to new store *
362 *************************************************/
364 /* This function copies a single captured substring into a piece of new
368 subject the subject string that was matched
369 ovector pointer to the offsets table
370 stringcount the number of substrings that were captured
371 (i.e. the yield of the pcre_exec call, unless
372 that was zero, in which case it should be 1/3
373 of the offset table size)
374 stringnumber the number of the required substring
375 stringptr where to put a pointer to the substring
377 Returns: if successful:
378 the length of the string, not including the zero that
379 is put on the end; can be zero
381 PCRE_ERROR_NOMEMORY (-6) failed to get store
382 PCRE_ERROR_NOSUBSTRING (-7) substring not present
386 pcre_get_substring(const char *subject, int *ovector, int stringcount,
387 int stringnumber, const char **stringptr)
391 if (stringnumber < 0 || stringnumber >= stringcount)
392 return PCRE_ERROR_NOSUBSTRING;
394 yield = ovector[stringnumber+1] - ovector[stringnumber];
395 substring = (char *)(pcre_malloc)(yield + 1);
396 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
397 memcpy(substring, subject + ovector[stringnumber], yield);
398 substring[yield] = 0;
399 *stringptr = substring;
405 /*************************************************
406 * Copy named captured string to new store *
407 *************************************************/
409 /* This function copies a single captured substring, identified by name, into
410 new store. If the regex permits duplicate names, the first substring that is
414 code the compiled regex
415 subject the subject string that was matched
416 ovector pointer to the offsets table
417 stringcount the number of substrings that were captured
418 (i.e. the yield of the pcre_exec call, unless
419 that was zero, in which case it should be 1/3
420 of the offset table size)
421 stringname the name of the required substring
422 stringptr where to put the pointer
424 Returns: if successful:
425 the length of the copied string, not including the zero
426 that is put on the end; can be zero
428 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
429 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
433 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
434 int stringcount, const char *stringname, const char **stringptr)
436 int n = get_first_set(code, stringname, ovector);
437 if (n <= 0) return n;
438 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
444 /*************************************************
445 * Free store obtained by get_substring *
446 *************************************************/
448 /* This function exists for the benefit of people calling PCRE from non-C
449 programs that can call its functions, but not free() or (pcre_free)() directly.
451 Argument: the result of a previous pcre_get_substring()
456 pcre_free_substring(const char *pointer)
458 (pcre_free)((void *)pointer);
461 /* End of pcre_get.c */