1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package regexp implements regular expression search.
7 // The syntax of the regular expressions accepted is the same
8 // general syntax used by Perl, Python, and other languages.
9 // More precisely, it is the syntax accepted by RE2 and described at
10 // http://code.google.com/p/re2/wiki/Syntax, except for \C.
11 // For an overview of the syntax, run
12 // godoc regexp/syntax
14 // All characters are UTF-8-encoded code points.
16 // There are 16 methods of Regexp that match a regular expression and identify
17 // the matched text. Their names are matched by this regular expression:
19 // Find(All)?(String)?(Submatch)?(Index)?
21 // If 'All' is present, the routine matches successive non-overlapping
22 // matches of the entire expression. Empty matches abutting a preceding
23 // match are ignored. The return value is a slice containing the successive
24 // return values of the corresponding non-'All' routine. These routines take
25 // an extra integer argument, n; if n >= 0, the function returns at most n
26 // matches/submatches.
28 // If 'String' is present, the argument is a string; otherwise it is a slice
29 // of bytes; return values are adjusted as appropriate.
31 // If 'Submatch' is present, the return value is a slice identifying the
32 // successive submatches of the expression. Submatches are matches of
33 // parenthesized subexpressions (also known as capturing groups) within the
34 // regular expression, numbered from left to right in order of opening
35 // parenthesis. Submatch 0 is the match of the entire expression, submatch 1
36 // the match of the first parenthesized subexpression, and so on.
38 // If 'Index' is present, matches and submatches are identified by byte index
39 // pairs within the input string: result[2*n:2*n+1] identifies the indexes of
40 // the nth submatch. The pair for n==0 identifies the match of the entire
41 // expression. If 'Index' is not present, the match is identified by the
42 // text of the match/submatch. If an index is negative, it means that
43 // subexpression did not match any string in the input.
45 // There is also a subset of the methods that can be applied to text read
48 // MatchReader, FindReaderIndex, FindReaderSubmatchIndex
50 // This set may grow. Note that regular expression matches may need to
51 // examine text beyond the text returned by a match, so the methods that
52 // match text from a RuneReader may read arbitrarily far into the input
55 // (There are a few other methods that do not match this pattern.)
72 // Regexp is the representation of a compiled regular expression.
73 // The public interface is entirely through methods.
74 // A Regexp is safe for concurrent use by multiple goroutines.
76 // read-only after Compile
77 expr string // as passed to Compile
78 prog *syntax.Prog // compiled program
79 prefix string // required prefix in unanchored matches
80 prefixBytes []byte // prefix, as a []byte
81 prefixComplete bool // prefix is the entire regexp
82 prefixRune rune // first rune in prefix
83 cond syntax.EmptyOp // empty-width conditions required at start of match
88 // cache of machines for running regexp
93 // String returns the source text used to compile the regular expression.
94 func (re *Regexp) String() string {
98 // Compile parses a regular expression and returns, if successful,
99 // a Regexp object that can be used to match against text.
101 // When matching against text, the regexp returns a match that
102 // begins as early as possible in the input (leftmost), and among those
103 // it chooses the one that a backtracking search would have found first.
104 // This so-called leftmost-first matching is the same semantics
105 // that Perl, Python, and other implementations use, although this
106 // package implements it without the expense of backtracking.
107 // For POSIX leftmost-longest matching, see CompilePOSIX.
108 func Compile(expr string) (*Regexp, error) {
109 return compile(expr, syntax.Perl, false)
112 // CompilePOSIX is like Compile but restricts the regular expression
113 // to POSIX ERE (egrep) syntax and changes the match semantics to
116 // That is, when matching against text, the regexp returns a match that
117 // begins as early as possible in the input (leftmost), and among those
118 // it chooses a match that is as long as possible.
119 // This so-called leftmost-longest matching is the same semantics
120 // that early regular expression implementations used and that POSIX
123 // However, there can be multiple leftmost-longest matches, with different
124 // submatch choices, and here this package diverges from POSIX.
125 // Among the possible leftmost-longest matches, this package chooses
126 // the one that a backtracking search would have found first, while POSIX
127 // specifies that the match be chosen to maximize the length of the first
128 // subexpression, then the second, and so on from left to right.
129 // The POSIX rule is computationally prohibitive and not even well-defined.
130 // See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
131 func CompilePOSIX(expr string) (*Regexp, error) {
132 return compile(expr, syntax.POSIX, true)
135 // Longest makes future searches prefer the leftmost-longest match.
136 // That is, when matching against text, the regexp returns a match that
137 // begins as early as possible in the input (leftmost), and among those
138 // it chooses a match that is as long as possible.
139 func (re *Regexp) Longest() {
143 func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
144 re, err := syntax.Parse(expr, mode)
148 maxCap := re.MaxCap()
149 capNames := re.CapNames()
152 prog, err := syntax.Compile(re)
160 subexpNames: capNames,
161 cond: prog.StartCond(),
164 regexp.prefix, regexp.prefixComplete = prog.Prefix()
165 if regexp.prefix != "" {
166 // TODO(rsc): Remove this allocation by adding
167 // IndexString to package bytes.
168 regexp.prefixBytes = []byte(regexp.prefix)
169 regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
174 // get returns a machine to use for matching re.
175 // It uses the re's machine cache if possible, to avoid
176 // unnecessary allocation.
177 func (re *Regexp) get() *machine {
179 if n := len(re.machine); n > 0 {
181 re.machine = re.machine[:n-1]
186 z := progMachine(re.prog)
191 // put returns a machine to the re's machine cache.
192 // There is no attempt to limit the size of the cache, so it will
193 // grow to the maximum number of simultaneous matches
194 // run using re. (The cache empties when re gets garbage collected.)
195 func (re *Regexp) put(z *machine) {
197 re.machine = append(re.machine, z)
201 // MustCompile is like Compile but panics if the expression cannot be parsed.
202 // It simplifies safe initialization of global variables holding compiled regular
204 func MustCompile(str string) *Regexp {
205 regexp, error := Compile(str)
207 panic(`regexp: Compile(` + quote(str) + `): ` + error.Error())
212 // MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
213 // It simplifies safe initialization of global variables holding compiled regular
215 func MustCompilePOSIX(str string) *Regexp {
216 regexp, error := CompilePOSIX(str)
218 panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + error.Error())
223 func quote(s string) string {
224 if strconv.CanBackquote(s) {
227 return strconv.Quote(s)
230 // NumSubexp returns the number of parenthesized subexpressions in this Regexp.
231 func (re *Regexp) NumSubexp() int {
235 // SubexpNames returns the names of the parenthesized subexpressions
236 // in this Regexp. The name for the first sub-expression is names[1],
237 // so that if m is a match slice, the name for m[i] is SubexpNames()[i].
238 // Since the Regexp as a whole cannot be named, names[0] is always
239 // the empty string. The slice should not be modified.
240 func (re *Regexp) SubexpNames() []string {
241 return re.subexpNames
244 const endOfText rune = -1
246 // input abstracts different representations of the input text. It provides
247 // one-character lookahead.
248 type input interface {
249 step(pos int) (r rune, width int) // advance one rune
250 canCheckPrefix() bool // can we look ahead without losing info?
251 hasPrefix(re *Regexp) bool
252 index(re *Regexp, pos int) int
253 context(pos int) syntax.EmptyOp
256 // inputString scans a string.
257 type inputString struct {
261 func (i *inputString) step(pos int) (rune, int) {
262 if pos < len(i.str) {
264 if c < utf8.RuneSelf {
267 return utf8.DecodeRuneInString(i.str[pos:])
272 func (i *inputString) canCheckPrefix() bool {
276 func (i *inputString) hasPrefix(re *Regexp) bool {
277 return strings.HasPrefix(i.str, re.prefix)
280 func (i *inputString) index(re *Regexp, pos int) int {
281 return strings.Index(i.str[pos:], re.prefix)
284 func (i *inputString) context(pos int) syntax.EmptyOp {
285 r1, r2 := endOfText, endOfText
286 if pos > 0 && pos <= len(i.str) {
287 r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
289 if pos < len(i.str) {
290 r2, _ = utf8.DecodeRuneInString(i.str[pos:])
292 return syntax.EmptyOpContext(r1, r2)
295 // inputBytes scans a byte slice.
296 type inputBytes struct {
300 func (i *inputBytes) step(pos int) (rune, int) {
301 if pos < len(i.str) {
303 if c < utf8.RuneSelf {
306 return utf8.DecodeRune(i.str[pos:])
311 func (i *inputBytes) canCheckPrefix() bool {
315 func (i *inputBytes) hasPrefix(re *Regexp) bool {
316 return bytes.HasPrefix(i.str, re.prefixBytes)
319 func (i *inputBytes) index(re *Regexp, pos int) int {
320 return bytes.Index(i.str[pos:], re.prefixBytes)
323 func (i *inputBytes) context(pos int) syntax.EmptyOp {
324 r1, r2 := endOfText, endOfText
325 if pos > 0 && pos <= len(i.str) {
326 r1, _ = utf8.DecodeLastRune(i.str[:pos])
328 if pos < len(i.str) {
329 r2, _ = utf8.DecodeRune(i.str[pos:])
331 return syntax.EmptyOpContext(r1, r2)
334 // inputReader scans a RuneReader.
335 type inputReader struct {
341 func (i *inputReader) step(pos int) (rune, int) {
342 if !i.atEOT && pos != i.pos {
346 r, w, err := i.r.ReadRune()
355 func (i *inputReader) canCheckPrefix() bool {
359 func (i *inputReader) hasPrefix(re *Regexp) bool {
363 func (i *inputReader) index(re *Regexp, pos int) int {
367 func (i *inputReader) context(pos int) syntax.EmptyOp {
371 // LiteralPrefix returns a literal string that must begin any match
372 // of the regular expression re. It returns the boolean true if the
373 // literal string comprises the entire regular expression.
374 func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
375 return re.prefix, re.prefixComplete
378 // MatchReader returns whether the Regexp matches the text read by the
379 // RuneReader. The return value is a boolean: true for match, false for no
381 func (re *Regexp) MatchReader(r io.RuneReader) bool {
382 return re.doExecute(r, nil, "", 0, 0) != nil
385 // MatchString returns whether the Regexp matches the string s.
386 // The return value is a boolean: true for match, false for no match.
387 func (re *Regexp) MatchString(s string) bool {
388 return re.doExecute(nil, nil, s, 0, 0) != nil
391 // Match returns whether the Regexp matches the byte slice b.
392 // The return value is a boolean: true for match, false for no match.
393 func (re *Regexp) Match(b []byte) bool {
394 return re.doExecute(nil, b, "", 0, 0) != nil
397 // MatchReader checks whether a textual regular expression matches the text
398 // read by the RuneReader. More complicated queries need to use Compile and
399 // the full Regexp interface.
400 func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
401 re, err := Compile(pattern)
405 return re.MatchReader(r), nil
408 // MatchString checks whether a textual regular expression
409 // matches a string. More complicated queries need
410 // to use Compile and the full Regexp interface.
411 func MatchString(pattern string, s string) (matched bool, err error) {
412 re, err := Compile(pattern)
416 return re.MatchString(s), nil
419 // Match checks whether a textual regular expression
420 // matches a byte slice. More complicated queries need
421 // to use Compile and the full Regexp interface.
422 func Match(pattern string, b []byte) (matched bool, err error) {
423 re, err := Compile(pattern)
427 return re.Match(b), nil
430 // ReplaceAllString returns a copy of src, replacing matches of the Regexp
431 // with the replacement string repl. Inside repl, $ signs are interpreted as
432 // in Expand, so for instance $1 represents the text of the first submatch.
433 func (re *Regexp) ReplaceAllString(src, repl string) string {
435 if strings.Index(repl, "$") >= 0 {
436 n = 2 * (re.numSubexp + 1)
438 b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
439 return re.expand(dst, repl, nil, src, match)
444 // ReplaceAllStringLiteral returns a copy of src, replacing matches of the Regexp
445 // with the replacement string repl. The replacement repl is substituted directly,
446 // without using Expand.
447 func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
448 return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
449 return append(dst, repl...)
453 // ReplaceAllStringFunc returns a copy of src in which all matches of the
454 // Regexp have been replaced by the return value of function repl applied
455 // to the matched substring. The replacement returned by repl is substituted
456 // directly, without using Expand.
457 func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
458 b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
459 return append(dst, repl(src[match[0]:match[1]])...)
464 func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
465 lastMatchEnd := 0 // end position of the most recent match
466 searchPos := 0 // position where we next look for a match
474 for searchPos <= endPos {
475 a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
477 break // no more matches
480 // Copy the unmatched characters before this match.
482 buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
484 buf = append(buf, src[lastMatchEnd:a[0]]...)
487 // Now insert a copy of the replacement string, but not for a
488 // match of the empty string immediately after another match.
489 // (Otherwise, we get double replacement for patterns that
490 // match both empty and nonempty strings.)
491 if a[1] > lastMatchEnd || a[0] == 0 {
496 // Advance past this match; always advance at least one character.
499 _, width = utf8.DecodeRune(bsrc[searchPos:])
501 _, width = utf8.DecodeRuneInString(src[searchPos:])
503 if searchPos+width > a[1] {
505 } else if searchPos+1 > a[1] {
506 // This clause is only needed at the end of the input
507 // string. In that case, DecodeRuneInString returns width=0.
514 // Copy the unmatched characters after the last match.
516 buf = append(buf, bsrc[lastMatchEnd:]...)
518 buf = append(buf, src[lastMatchEnd:]...)
524 // ReplaceAll returns a copy of src, replacing matches of the Regexp
525 // with the replacement text repl. Inside repl, $ signs are interpreted as
526 // in Expand, so for instance $1 represents the text of the first submatch.
527 func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
529 if bytes.IndexByte(repl, '$') >= 0 {
530 n = 2 * (re.numSubexp + 1)
533 b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte {
534 if len(srepl) != len(repl) {
537 return re.expand(dst, srepl, src, "", match)
542 // ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
543 // with the replacement bytes repl. The replacement repl is substituted directly,
544 // without using Expand.
545 func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
546 return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
547 return append(dst, repl...)
551 // ReplaceAllFunc returns a copy of src in which all matches of the
552 // Regexp have been replaced by the return value of function repl applied
553 // to the matched byte slice. The replacement returned by repl is substituted
554 // directly, without using Expand.
555 func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
556 return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
557 return append(dst, repl(src[match[0]:match[1]])...)
561 var specialBytes = []byte(`\.+*?()|[]{}^$`)
563 func special(b byte) bool {
564 return bytes.IndexByte(specialBytes, b) >= 0
567 // QuoteMeta returns a string that quotes all regular expression metacharacters
568 // inside the argument text; the returned string is a regular expression matching
569 // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
570 func QuoteMeta(s string) string {
571 b := make([]byte, 2*len(s))
573 // A byte loop is correct because all metacharacters are ASCII.
575 for i := 0; i < len(s); i++ {
583 return string(b[0:j])
586 // The number of capture values in the program may correspond
587 // to fewer capturing expressions than are in the regexp.
588 // For example, "(a){0}" turns into an empty program, so the
589 // maximum capture in the program is 0 but we need to return
590 // an expression for \1. Pad appends -1s to the slice a as needed.
591 func (re *Regexp) pad(a []int) []int {
596 n := (1 + re.numSubexp) * 2
603 // Find matches in slice b if b is non-nil, otherwise find matches in string s.
604 func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
612 for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
613 matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
614 if len(matches) == 0 {
619 if matches[1] == pos {
620 // We've found an empty match.
621 if matches[0] == prevMatchEnd {
622 // We don't allow an empty match right
623 // after a previous match, so ignore it.
629 _, width = utf8.DecodeRuneInString(s[pos:end])
631 _, width = utf8.DecodeRune(b[pos:end])
641 prevMatchEnd = matches[1]
644 deliver(re.pad(matches))
650 // Find returns a slice holding the text of the leftmost match in b of the regular expression.
651 // A return value of nil indicates no match.
652 func (re *Regexp) Find(b []byte) []byte {
653 a := re.doExecute(nil, b, "", 0, 2)
660 // FindIndex returns a two-element slice of integers defining the location of
661 // the leftmost match in b of the regular expression. The match itself is at
663 // A return value of nil indicates no match.
664 func (re *Regexp) FindIndex(b []byte) (loc []int) {
665 a := re.doExecute(nil, b, "", 0, 2)
672 // FindString returns a string holding the text of the leftmost match in s of the regular
673 // expression. If there is no match, the return value is an empty string,
674 // but it will also be empty if the regular expression successfully matches
675 // an empty string. Use FindStringIndex or FindStringSubmatch if it is
676 // necessary to distinguish these cases.
677 func (re *Regexp) FindString(s string) string {
678 a := re.doExecute(nil, nil, s, 0, 2)
685 // FindStringIndex returns a two-element slice of integers defining the
686 // location of the leftmost match in s of the regular expression. The match
687 // itself is at s[loc[0]:loc[1]].
688 // A return value of nil indicates no match.
689 func (re *Regexp) FindStringIndex(s string) (loc []int) {
690 a := re.doExecute(nil, nil, s, 0, 2)
697 // FindReaderIndex returns a two-element slice of integers defining the
698 // location of the leftmost match of the regular expression in text read from
699 // the RuneReader. The match text was found in the input stream at
700 // byte offset loc[0] through loc[1]-1.
701 // A return value of nil indicates no match.
702 func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
703 a := re.doExecute(r, nil, "", 0, 2)
710 // FindSubmatch returns a slice of slices holding the text of the leftmost
711 // match of the regular expression in b and the matches, if any, of its
712 // subexpressions, as defined by the 'Submatch' descriptions in the package
714 // A return value of nil indicates no match.
715 func (re *Regexp) FindSubmatch(b []byte) [][]byte {
716 a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
720 ret := make([][]byte, 1+re.numSubexp)
722 if 2*i < len(a) && a[2*i] >= 0 {
723 ret[i] = b[a[2*i]:a[2*i+1]]
729 // Expand appends template to dst and returns the result; during the
730 // append, Expand replaces variables in the template with corresponding
731 // matches drawn from src. The match slice should have been returned by
732 // FindSubmatchIndex.
734 // In the template, a variable is denoted by a substring of the form
735 // $name or ${name}, where name is a non-empty sequence of letters,
736 // digits, and underscores. A purely numeric name like $1 refers to
737 // the submatch with the corresponding index; other names refer to
738 // capturing parentheses named with the (?P<name>...) syntax. A
739 // reference to an out of range or unmatched index or a name that is not
740 // present in the regular expression is replaced with an empty slice.
742 // In the $name form, name is taken to be as long as possible: $1x is
743 // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
745 // To insert a literal $ in the output, use $$ in the template.
746 func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
747 return re.expand(dst, string(template), src, "", match)
750 // ExpandString is like Expand but the template and source are strings.
751 // It appends to and returns a byte slice in order to give the calling
752 // code control over allocation.
753 func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte {
754 return re.expand(dst, template, nil, src, match)
757 func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
758 for len(template) > 0 {
759 i := strings.Index(template, "$")
763 dst = append(dst, template[:i]...)
764 template = template[i:]
765 if len(template) > 1 && template[1] == '$' {
767 dst = append(dst, '$')
768 template = template[2:]
771 name, num, rest, ok := extract(template)
773 // Malformed; treat $ as raw text.
774 dst = append(dst, '$')
775 template = template[1:]
780 if 2*num+1 < len(match) && match[2*num] >= 0 {
782 dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
784 dst = append(dst, src[match[2*num]:match[2*num+1]]...)
788 for i, namei := range re.subexpNames {
789 if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 {
791 dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...)
793 dst = append(dst, src[match[2*i]:match[2*i+1]]...)
800 dst = append(dst, template...)
804 // extract returns the name from a leading "$name" or "${name}" in str.
805 // If it is a number, extract returns num set to that number; otherwise num = -1.
806 func extract(str string) (name string, num int, rest string, ok bool) {
807 if len(str) < 2 || str[0] != '$' {
819 rune, size := utf8.DecodeRuneInString(str[i:])
820 if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' {
826 // empty name is not okay
831 if i >= len(str) || str[i] != '}' {
832 // missing closing brace
840 for i := 0; i < len(name); i++ {
841 if name[i] < '0' || '9' < name[i] || num >= 1e8 {
845 num = num*10 + int(name[i]) - '0'
847 // Disallow leading zeros.
848 if name[0] == '0' && len(name) > 1 {
857 // FindSubmatchIndex returns a slice holding the index pairs identifying the
858 // leftmost match of the regular expression in b and the matches, if any, of
859 // its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
860 // in the package comment.
861 // A return value of nil indicates no match.
862 func (re *Regexp) FindSubmatchIndex(b []byte) []int {
863 return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
866 // FindStringSubmatch returns a slice of strings holding the text of the
867 // leftmost match of the regular expression in s and the matches, if any, of
868 // its subexpressions, as defined by the 'Submatch' description in the
870 // A return value of nil indicates no match.
871 func (re *Regexp) FindStringSubmatch(s string) []string {
872 a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
876 ret := make([]string, 1+re.numSubexp)
878 if 2*i < len(a) && a[2*i] >= 0 {
879 ret[i] = s[a[2*i]:a[2*i+1]]
885 // FindStringSubmatchIndex returns a slice holding the index pairs
886 // identifying the leftmost match of the regular expression in s and the
887 // matches, if any, of its subexpressions, as defined by the 'Submatch' and
888 // 'Index' descriptions in the package comment.
889 // A return value of nil indicates no match.
890 func (re *Regexp) FindStringSubmatchIndex(s string) []int {
891 return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
894 // FindReaderSubmatchIndex returns a slice holding the index pairs
895 // identifying the leftmost match of the regular expression of text read by
896 // the RuneReader, and the matches, if any, of its subexpressions, as defined
897 // by the 'Submatch' and 'Index' descriptions in the package comment. A
898 // return value of nil indicates no match.
899 func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
900 return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
903 const startSize = 10 // The size at which to start a slice in the 'All' routines.
905 // FindAll is the 'All' version of Find; it returns a slice of all successive
906 // matches of the expression, as defined by the 'All' description in the
908 // A return value of nil indicates no match.
909 func (re *Regexp) FindAll(b []byte, n int) [][]byte {
913 result := make([][]byte, 0, startSize)
914 re.allMatches("", b, n, func(match []int) {
915 result = append(result, b[match[0]:match[1]])
917 if len(result) == 0 {
923 // FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
924 // successive matches of the expression, as defined by the 'All' description
925 // in the package comment.
926 // A return value of nil indicates no match.
927 func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
931 result := make([][]int, 0, startSize)
932 re.allMatches("", b, n, func(match []int) {
933 result = append(result, match[0:2])
935 if len(result) == 0 {
941 // FindAllString is the 'All' version of FindString; it returns a slice of all
942 // successive matches of the expression, as defined by the 'All' description
943 // in the package comment.
944 // A return value of nil indicates no match.
945 func (re *Regexp) FindAllString(s string, n int) []string {
949 result := make([]string, 0, startSize)
950 re.allMatches(s, nil, n, func(match []int) {
951 result = append(result, s[match[0]:match[1]])
953 if len(result) == 0 {
959 // FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
960 // slice of all successive matches of the expression, as defined by the 'All'
961 // description in the package comment.
962 // A return value of nil indicates no match.
963 func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
967 result := make([][]int, 0, startSize)
968 re.allMatches(s, nil, n, func(match []int) {
969 result = append(result, match[0:2])
971 if len(result) == 0 {
977 // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
978 // of all successive matches of the expression, as defined by the 'All'
979 // description in the package comment.
980 // A return value of nil indicates no match.
981 func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
985 result := make([][][]byte, 0, startSize)
986 re.allMatches("", b, n, func(match []int) {
987 slice := make([][]byte, len(match)/2)
988 for j := range slice {
990 slice[j] = b[match[2*j]:match[2*j+1]]
993 result = append(result, slice)
995 if len(result) == 0 {
1001 // FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
1002 // a slice of all successive matches of the expression, as defined by the
1003 // 'All' description in the package comment.
1004 // A return value of nil indicates no match.
1005 func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
1009 result := make([][]int, 0, startSize)
1010 re.allMatches("", b, n, func(match []int) {
1011 result = append(result, match)
1013 if len(result) == 0 {
1019 // FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
1020 // returns a slice of all successive matches of the expression, as defined by
1021 // the 'All' description in the package comment.
1022 // A return value of nil indicates no match.
1023 func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
1027 result := make([][]string, 0, startSize)
1028 re.allMatches(s, nil, n, func(match []int) {
1029 slice := make([]string, len(match)/2)
1030 for j := range slice {
1031 if match[2*j] >= 0 {
1032 slice[j] = s[match[2*j]:match[2*j+1]]
1035 result = append(result, slice)
1037 if len(result) == 0 {
1043 // FindAllStringSubmatchIndex is the 'All' version of
1044 // FindStringSubmatchIndex; it returns a slice of all successive matches of
1045 // the expression, as defined by the 'All' description in the package
1047 // A return value of nil indicates no match.
1048 func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
1052 result := make([][]int, 0, startSize)
1053 re.allMatches(s, nil, n, func(match []int) {
1054 result = append(result, match)
1056 if len(result) == 0 {
1062 // Split slices s into substrings separated by the expression and returns a slice of
1063 // the substrings between those expression matches.
1065 // The slice returned by this method consists of all the substrings of s
1066 // not contained in the slice returned by FindAllString. When called on an expression
1067 // that contains no metacharacters, it is equivalent to strings.SplitN.
1070 // s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
1071 // // s: ["", "b", "b", "c", "cadaaae"]
1073 // The count determines the number of substrings to return:
1074 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
1075 // n == 0: the result is nil (zero substrings)
1076 // n < 0: all substrings
1077 func (re *Regexp) Split(s string, n int) []string {
1083 if len(re.expr) > 0 && len(s) == 0 {
1087 matches := re.FindAllStringIndex(s, n)
1088 strings := make([]string, 0, len(matches))
1092 for _, match := range matches {
1093 if n > 0 && len(strings) >= n-1 {
1099 strings = append(strings, s[beg:end])
1105 strings = append(strings, s[beg:])