1 /* Matcher.java -- Instance of a regular expression applied to a char sequence.
2 Copyright (C) 2002, 2004, 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.util.regex;
41 import gnu.java.lang.CPStringBuilder;
43 import gnu.java.util.regex.CharIndexed;
44 import gnu.java.util.regex.RE;
45 import gnu.java.util.regex.REMatch;
48 * Instance of a regular expression applied to a char sequence.
52 public final class Matcher implements MatchResult
54 private Pattern pattern;
55 private CharSequence input;
56 // We use CharIndexed as an input object to the getMatch method in order
57 // that /\G/ (the end of the previous match) may work. The information
58 // of the previous match is stored in the CharIndexed object.
59 private CharIndexed inputCharIndexed;
61 private int appendPosition;
62 private REMatch match;
65 * The start of the region of the input on which to match.
67 private int regionStart;
70 * The end of the region of the input on which to match.
72 private int regionEnd;
75 * True if the match process should look beyond the
76 * region marked by regionStart to regionEnd when
77 * performing lookAhead, lookBehind and boundary
80 private boolean transparentBounds;
83 * The flags that affect the anchoring bounds.
84 * If {@link #hasAnchoringBounds()} is {@code true},
85 * the match process will honour the
86 * anchoring bounds: ^, \A, \Z, \z and $. If
87 * {@link #hasAnchoringBounds()} is {@code false},
88 * the anchors are ignored and appropriate flags,
89 * stored in this variable, are used to provide this
92 private int anchoringBounds;
94 Matcher(Pattern pattern, CharSequence input)
96 this.pattern = pattern;
98 this.inputCharIndexed = RE.makeCharIndexed(input, 0);
100 regionEnd = input.length();
101 transparentBounds = false;
106 * Changes the pattern used by the {@link Matcher} to
107 * the one specified. Existing match information is lost,
108 * but the input and the matcher's position within it is
111 * @param newPattern the new pattern to use.
112 * @return this matcher.
113 * @throws IllegalArgumentException if {@code newPattern} is
117 public Matcher usePattern(Pattern newPattern)
119 if (newPattern == null)
120 throw new IllegalArgumentException("The new pattern was null.");
121 pattern = newPattern;
128 * @param sb The target string buffer
129 * @param replacement The replacement string
131 * @exception IllegalStateException If no match has yet been attempted,
132 * or if the previous match operation failed
133 * @exception IndexOutOfBoundsException If the replacement string refers
134 * to a capturing group that does not exist in the pattern
136 public Matcher appendReplacement (StringBuffer sb, String replacement)
137 throws IllegalStateException
140 sb.append(input.subSequence(appendPosition,
141 match.getStartIndex()).toString());
142 sb.append(RE.getReplacement(replacement, match,
143 RE.REG_REPLACE_USE_BACKSLASHESCAPE));
144 appendPosition = match.getEndIndex();
149 * @param sb The target string buffer
151 public StringBuffer appendTail (StringBuffer sb)
153 sb.append(input.subSequence(appendPosition, input.length()).toString());
158 * @exception IllegalStateException If no match has yet been attempted,
159 * or if the previous match operation failed
162 throws IllegalStateException
165 return match.getEndIndex();
169 * @param group The index of a capturing group in this matcher's pattern
171 * @exception IllegalStateException If no match has yet been attempted,
172 * or if the previous match operation failed
173 * @exception IndexOutOfBoundsException If the replacement string refers
174 * to a capturing group that does not exist in the pattern
176 public int end (int group)
177 throws IllegalStateException
180 return match.getEndIndex(group);
183 public boolean find ()
185 boolean first = (match == null);
186 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
187 match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds);
189 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
190 position, anchoringBounds);
193 int endIndex = match.getEndIndex();
194 // Is the match within input limits?
195 if (endIndex > input.length())
200 // Are we stuck at the same position?
201 if (!first && endIndex == position)
204 // Not at the end of the input yet?
205 if (position < input.length() - 1)
208 return find(position);
220 * @param start The index to start the new pattern matching
222 * @exception IndexOutOfBoundsException If the replacement string refers
223 * to a capturing group that does not exist in the pattern
225 public boolean find (int start)
227 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
228 match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds);
230 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
231 start, anchoringBounds);
234 position = match.getEndIndex();
241 * @exception IllegalStateException If no match has yet been attempted,
242 * or if the previous match operation failed
244 public String group ()
247 return match.toString();
251 * @param group The index of a capturing group in this matcher's pattern
253 * @exception IllegalStateException If no match has yet been attempted,
254 * or if the previous match operation failed
255 * @exception IndexOutOfBoundsException If the replacement string refers
256 * to a capturing group that does not exist in the pattern
258 public String group (int group)
259 throws IllegalStateException
262 return match.toString(group);
266 * @param replacement The replacement string
268 public String replaceFirst (String replacement)
271 // Semantics might not quite match
272 return pattern.getRE().substitute(input, replacement, position,
273 RE.REG_REPLACE_USE_BACKSLASHESCAPE);
277 * @param replacement The replacement string
279 public String replaceAll (String replacement)
282 return pattern.getRE().substituteAll(input, replacement, position,
283 RE.REG_REPLACE_USE_BACKSLASHESCAPE);
286 public int groupCount ()
288 return pattern.getRE().getNumSubs();
291 public boolean lookingAt ()
293 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
294 match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
295 anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
297 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
298 anchoringBounds|RE.REG_FIX_STARTING_POSITION);
301 if (match.getStartIndex() == 0)
303 position = match.getEndIndex();
312 * Attempts to match the entire input sequence against the pattern.
314 * If the match succeeds then more information can be obtained via the
315 * start, end, and group methods.
321 public boolean matches ()
323 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
324 match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
325 anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
327 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
328 anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION);
331 if (match.getStartIndex() == 0)
333 position = match.getEndIndex();
334 if (position == input.length())
343 * Returns the Pattern that is interpreted by this Matcher
345 public Pattern pattern ()
351 * Resets the internal state of the matcher, including
352 * resetting the region to its default state of encompassing
353 * the whole input. The state of {@link #hasTransparentBounds()}
354 * and {@link #hasAnchoringBounds()} are unaffected.
356 * @return a reference to this matcher.
357 * @see #regionStart()
359 * @see #hasTransparentBounds()
360 * @see #hasAnchoringBounds()
362 public Matcher reset ()
367 regionEnd = input.length();
373 * Resets the internal state of the matcher, including
374 * resetting the region to its default state of encompassing
375 * the whole input. The state of {@link #hasTransparentBounds()}
376 * and {@link #hasAnchoringBounds()} are unaffected.
378 * @param input The new input character sequence.
379 * @return a reference to this matcher.
380 * @see #regionStart()
382 * @see #hasTransparentBounds()
383 * @see #hasAnchoringBounds()
385 public Matcher reset (CharSequence input)
388 this.inputCharIndexed = RE.makeCharIndexed(input, 0);
393 * @return the index of a capturing group in this matcher's pattern
395 * @exception IllegalStateException If no match has yet been attempted,
396 * or if the previous match operation failed
399 throws IllegalStateException
402 return match.getStartIndex();
406 * @param group The index of a capturing group in this matcher's pattern
408 * @exception IllegalStateException If no match has yet been attempted,
409 * or if the previous match operation failed
410 * @exception IndexOutOfBoundsException If the replacement string refers
411 * to a capturing group that does not exist in the pattern
413 public int start (int group)
414 throws IllegalStateException
417 return match.getStartIndex(group);
421 * @return True if and only if the matcher hit the end of input.
424 public boolean hitEnd()
426 return inputCharIndexed.hitEnd();
430 * @return A string expression of this matcher.
432 public String toString()
434 CPStringBuilder sb = new CPStringBuilder();
435 sb.append(this.getClass().getName())
436 .append("[pattern=").append(pattern.pattern())
437 .append(" region=").append(regionStart).append(",").append(regionEnd)
438 .append(" anchoringBounds=").append(anchoringBounds == 0)
439 .append(" transparentBounds=").append(transparentBounds)
440 .append(" lastmatch=").append(match == null ? "" : match.toString())
442 return sb.toString();
445 private void assertMatchOp()
447 if (match == null) throw new IllegalStateException();
452 * Defines the region of the input on which to match.
453 * By default, the {@link Matcher} attempts to match
454 * the whole string (from 0 to the length of the input),
455 * but a region between {@code start} (inclusive) and
456 * {@code end} (exclusive) on which to match may instead
457 * be defined using this method.
460 * The behaviour of region matching is further affected
461 * by the use of transparent or opaque bounds (see
462 * {@link #useTransparentBounds(boolean)}) and whether or not
463 * anchors ({@code ^} and {@code $}) are in use
464 * (see {@link #useAnchoringBounds(boolean)}). With transparent
465 * bounds, the matcher is aware of input outside the bounds
466 * set by this method, whereas, with opaque bounds (the default)
467 * only the input within the bounds is used. The use of
468 * anchors are affected by this setting; with transparent
469 * bounds, anchors will match the beginning of the real input,
470 * while with opaque bounds they match the beginning of the
471 * region. {@link #useAnchoringBounds(boolean)} can be used
472 * to turn on or off the matching of anchors.
475 * @param start the start of the region (inclusive).
476 * @param end the end of the region (exclusive).
477 * @return a reference to this matcher.
478 * @throws IndexOutOfBoundsException if either {@code start} or
479 * {@code end} are less than zero,
480 * if either {@code start} or
481 * {@code end} are greater than the
482 * length of the input, or if
483 * {@code start} is greater than
485 * @see #regionStart()
487 * @see #hasTransparentBounds()
488 * @see #useTransparentBounds(boolean)
489 * @see #hasAnchoringBounds()
490 * @see #useAnchoringBounds(boolean)
493 public Matcher region(int start, int end)
495 int length = input.length();
497 throw new IndexOutOfBoundsException("The start position was less than zero.");
499 throw new IndexOutOfBoundsException("The start position is after the end of the input.");
501 throw new IndexOutOfBoundsException("The end position was less than zero.");
503 throw new IndexOutOfBoundsException("The end position is after the end of the input.");
505 throw new IndexOutOfBoundsException("The start position is after the end position.");
513 * The start of the region on which to perform matches (inclusive).
515 * @return the start index of the region.
516 * @see #region(int,int)
520 public int regionStart()
526 * The end of the region on which to perform matches (exclusive).
528 * @return the end index of the region.
529 * @see #region(int,int)
530 * @see #regionStart()
533 public int regionEnd()
539 * Returns true if the bounds of the region marked by
540 * {@link #regionStart()} and {@link #regionEnd()} are
541 * transparent. When these bounds are transparent, the
542 * matching process can look beyond them to perform
543 * lookahead, lookbehind and boundary matching operations.
544 * By default, the bounds are opaque.
546 * @return true if the bounds of the matching region are
548 * @see #useTransparentBounds(boolean)
549 * @see #region(int,int)
550 * @see #regionStart()
554 public boolean hasTransparentBounds()
556 return transparentBounds;
560 * Sets the transparency of the bounds of the region
561 * marked by {@link #regionStart()} and {@link #regionEnd()}.
562 * A value of {@code true} makes the bounds transparent,
563 * so the matcher can see beyond them to perform lookahead,
564 * lookbehind and boundary matching operations. A value
565 * of {@code false} (the default) makes the bounds opaque,
566 * restricting the match to the input region denoted
567 * by {@link #regionStart()} and {@link #regionEnd()}.
569 * @param transparent true if the bounds should be transparent.
570 * @return a reference to this matcher.
571 * @see #hasTransparentBounds()
572 * @see #region(int,int)
573 * @see #regionStart()
577 public Matcher useTransparentBounds(boolean transparent)
579 transparentBounds = transparent;
584 * Returns true if the matcher will honour the use of
585 * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z},
586 * {@code \z} and {@code $}. By default, the anchors
587 * are used. Note that the effect of the anchors is
588 * also affected by {@link #hasTransparentBounds()}.
590 * @return true if the matcher will attempt to match
591 * the anchoring bounds.
592 * @see #useAnchoringBounds(boolean)
593 * @see #hasTransparentBounds()
596 public boolean hasAnchoringBounds()
598 return anchoringBounds == 0;
602 * Enables or disables the use of the anchoring bounds:
603 * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and
604 * {@code $}. By default, their use is enabled. When
605 * disabled, the matcher will not attempt to match
608 * @param useAnchors true if anchoring bounds should be used.
609 * @return a reference to this matcher.
611 * @see #hasAnchoringBounds()
613 public Matcher useAnchoringBounds(boolean useAnchors)
618 anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL;
623 * Returns a read-only snapshot of the current state of
624 * the {@link Matcher} as a {@link MatchResult}. Any
625 * subsequent changes to this instance are not reflected
626 * in the returned {@link MatchResult}.
628 * @return a {@link MatchResult} instance representing the
629 * current state of the {@link Matcher}.
631 public MatchResult toMatchResult()
633 Matcher snapshot = new Matcher(pattern, input);
635 snapshot.match = (REMatch) match.clone();
640 * Returns a literalized string of s where characters {@code $} and {@code
643 * @param s the string to literalize.
644 * @return the literalized string.
647 public static String quoteReplacement(String s)
650 throw new NullPointerException();
651 CPStringBuilder sb = new CPStringBuilder();
652 for (int i = 0; i < s.length(); i++)
654 char ch = s.charAt(i);
655 if (ch == '$' || ch == '\\')
659 return sb.toString();