1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
50 #include "pcre_internal.h"
53 /*************************************************
54 * Find number for named string *
55 *************************************************/
57 /* This function is used by the get_first_set() function below, as well
58 as being generally available. It assumes that names are unique.
61 code the compiled regex
62 stringname the name whose number is required
64 Returns: the number of the named parentheses, or a negative number
65 (PCRE_ERROR_NOSUBSTRING) if not found
68 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
69 pcre_get_stringnumber(const pcre *code, const char *stringname)
76 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
78 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
80 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
82 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
88 int mid = (top + bot) / 2;
89 uschar *entry = nametable + entrysize*mid;
90 int c = strcmp(stringname, (char *)(entry + 2));
91 if (c == 0) return (entry[0] << 8) + entry[1];
92 if (c > 0) bot = mid + 1; else top = mid;
95 return PCRE_ERROR_NOSUBSTRING;
100 /*************************************************
101 * Find (multiple) entries for named string *
102 *************************************************/
104 /* This is used by the get_first_set() function below, as well as being
105 generally available. It is used when duplicated names are permitted.
108 code the compiled regex
109 stringname the name whose entries required
110 firstptr where to put the pointer to the first entry
111 lastptr where to put the pointer to the last entry
113 Returns: the length of each entry, or a negative number
114 (PCRE_ERROR_NOSUBSTRING) if not found
117 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
118 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
119 char **firstptr, char **lastptr)
124 uschar *nametable, *lastentry;
126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
135 lastentry = nametable + entrysize * (top - 1);
139 int mid = (top + bot) / 2;
140 uschar *entry = nametable + entrysize*mid;
141 int c = strcmp(stringname, (char *)(entry + 2));
144 uschar *first = entry;
145 uschar *last = entry;
146 while (first > nametable)
148 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
151 while (last < lastentry)
153 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
156 *firstptr = (char *)first;
157 *lastptr = (char *)last;
160 if (c > 0) bot = mid + 1; else top = mid;
163 return PCRE_ERROR_NOSUBSTRING;
167 #ifdef NOT_USED_IN_GLIB
169 /*************************************************
170 * Find first set of multiple named strings *
171 *************************************************/
173 /* This function allows for duplicate names in the table of named substrings.
174 It returns the number of the first one that was set in a pattern match.
177 code the compiled regex
178 stringname the name of the capturing substring
179 ovector the vector of matched substrings
181 Returns: the number of the first that is set,
182 or the number of the last one if none are set,
183 or a negative number on error
187 get_first_set(const pcre *code, const char *stringname, int *ovector)
189 const real_pcre *re = (const real_pcre *)code;
193 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
194 return pcre_get_stringnumber(code, stringname);
195 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
196 if (entrysize <= 0) return entrysize;
197 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
199 int n = (entry[0] << 8) + entry[1];
200 if (ovector[n*2] >= 0) return n;
202 return (first[0] << 8) + first[1];
208 /*************************************************
209 * Copy captured string to given buffer *
210 *************************************************/
212 /* This function copies a single captured substring into a given buffer.
213 Note that we use memcpy() rather than strncpy() in case there are binary zeros
217 subject the subject string that was matched
218 ovector pointer to the offsets table
219 stringcount the number of substrings that were captured
220 (i.e. the yield of the pcre_exec call, unless
221 that was zero, in which case it should be 1/3
222 of the offset table size)
223 stringnumber the number of the required substring
224 buffer where to put the substring
225 size the size of the buffer
227 Returns: if successful:
228 the length of the copied string, not including the zero
229 that is put on the end; can be zero
231 PCRE_ERROR_NOMEMORY (-6) buffer too small
232 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
235 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
236 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
237 int stringnumber, char *buffer, int size)
240 if (stringnumber < 0 || stringnumber >= stringcount)
241 return PCRE_ERROR_NOSUBSTRING;
243 yield = ovector[stringnumber+1] - ovector[stringnumber];
244 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
245 memcpy(buffer, subject + ovector[stringnumber], yield);
252 /*************************************************
253 * Copy named captured string to given buffer *
254 *************************************************/
256 /* This function copies a single captured substring into a given buffer,
257 identifying it by name. If the regex permits duplicate names, the first
258 substring that is set is chosen.
261 code the compiled regex
262 subject the subject string that was matched
263 ovector pointer to the offsets table
264 stringcount the number of substrings that were captured
265 (i.e. the yield of the pcre_exec call, unless
266 that was zero, in which case it should be 1/3
267 of the offset table size)
268 stringname the name of the required substring
269 buffer where to put the substring
270 size the size of the buffer
272 Returns: if successful:
273 the length of the copied string, not including the zero
274 that is put on the end; can be zero
276 PCRE_ERROR_NOMEMORY (-6) buffer too small
277 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
280 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
281 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
282 int stringcount, const char *stringname, char *buffer, int size)
284 int n = get_first_set(code, stringname, ovector);
285 if (n <= 0) return n;
286 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
291 /*************************************************
292 * Copy all captured strings to new store *
293 *************************************************/
295 /* This function gets one chunk of store and builds a list of pointers and all
296 of the captured substrings in it. A NULL pointer is put on the end of the list.
299 subject the subject string that was matched
300 ovector pointer to the offsets table
301 stringcount the number of substrings that were captured
302 (i.e. the yield of the pcre_exec call, unless
303 that was zero, in which case it should be 1/3
304 of the offset table size)
305 listptr set to point to the list of pointers
307 Returns: if successful: 0
309 PCRE_ERROR_NOMEMORY (-6) failed to get store
312 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
313 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
314 const char ***listptr)
317 int size = sizeof(char *);
318 int double_count = stringcount * 2;
322 for (i = 0; i < double_count; i += 2)
323 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
325 stringlist = (char **)(pcre_malloc)(size);
326 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
328 *listptr = (const char **)stringlist;
329 p = (char *)(stringlist + stringcount + 1);
331 for (i = 0; i < double_count; i += 2)
333 int len = ovector[i+1] - ovector[i];
334 memcpy(p, subject + ovector[i], len);
346 /*************************************************
347 * Free store obtained by get_substring_list *
348 *************************************************/
350 /* This function exists for the benefit of people calling PCRE from non-C
351 programs that can call its functions, but not free() or (pcre_free)() directly.
353 Argument: the result of a previous pcre_get_substring_list()
357 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
358 pcre_free_substring_list(const char **pointer)
360 (pcre_free)((void *)pointer);
365 /*************************************************
366 * Copy captured string to new store *
367 *************************************************/
369 /* This function copies a single captured substring into a piece of new
373 subject the subject string that was matched
374 ovector pointer to the offsets table
375 stringcount the number of substrings that were captured
376 (i.e. the yield of the pcre_exec call, unless
377 that was zero, in which case it should be 1/3
378 of the offset table size)
379 stringnumber the number of the required substring
380 stringptr where to put a pointer to the substring
382 Returns: if successful:
383 the length of the string, not including the zero that
384 is put on the end; can be zero
386 PCRE_ERROR_NOMEMORY (-6) failed to get store
387 PCRE_ERROR_NOSUBSTRING (-7) substring not present
390 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
391 pcre_get_substring(const char *subject, int *ovector, int stringcount,
392 int stringnumber, const char **stringptr)
396 if (stringnumber < 0 || stringnumber >= stringcount)
397 return PCRE_ERROR_NOSUBSTRING;
399 yield = ovector[stringnumber+1] - ovector[stringnumber];
400 substring = (char *)(pcre_malloc)(yield + 1);
401 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
402 memcpy(substring, subject + ovector[stringnumber], yield);
403 substring[yield] = 0;
404 *stringptr = substring;
410 /*************************************************
411 * Copy named captured string to new store *
412 *************************************************/
414 /* This function copies a single captured substring, identified by name, into
415 new store. If the regex permits duplicate names, the first substring that is
419 code the compiled regex
420 subject the subject string that was matched
421 ovector pointer to the offsets table
422 stringcount the number of substrings that were captured
423 (i.e. the yield of the pcre_exec call, unless
424 that was zero, in which case it should be 1/3
425 of the offset table size)
426 stringname the name of the required substring
427 stringptr where to put the pointer
429 Returns: if successful:
430 the length of the copied string, not including the zero
431 that is put on the end; can be zero
433 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
434 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
437 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
438 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
439 int stringcount, const char *stringname, const char **stringptr)
441 int n = get_first_set(code, stringname, ovector);
442 if (n <= 0) return n;
443 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
449 /*************************************************
450 * Free store obtained by get_substring *
451 *************************************************/
453 /* This function exists for the benefit of people calling PCRE from non-C
454 programs that can call its functions, but not free() or (pcre_free)() directly.
456 Argument: the result of a previous pcre_get_substring()
460 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
461 pcre_free_substring(const char *pointer)
463 (pcre_free)((void *)pointer);
468 /* End of pcre_get.c */