1 /*******************************************************************************
2 * Copyright (c) 2000, 2008 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
9 * IBM Corporation - initial API and implementation
11 * Markus Schorn (Wind River Systems)
12 *******************************************************************************/
13 package org.eclipse.cdt.internal.ui.util;
15 import java.util.Vector;
17 public class StringMatcher {
18 protected String fPattern;
19 protected int fLength; // pattern length
20 protected boolean fIgnoreWildCards;
21 protected boolean fIgnoreCase;
22 protected boolean fHasLeadingStar;
23 protected boolean fHasTrailingStar;
24 protected String fSegments[]; //the given pattern is split into * separated segments
26 /* boundary value beyond which we don't need to search in the text */
27 protected int fBound= 0;
29 protected static final char fSingleWildCard= '\u0000';
31 public static class Position {
32 int start; //inclusive
34 public Position(int start, int end) {
38 public int getStart() {
47 * Find the first occurrence of the pattern between <code>start</code)(inclusive)
48 * and <code>end</code>(exclusive).
49 * @param text the String object to search in
50 * @param start the starting index of the search range, inclusive
51 * @param end the ending index of the search range, exclusive
52 * @return an <code>StringMatcher.Position</code> object that keeps the starting
53 * (inclusive) and ending positions (exclusive) of the first occurrence of the
54 * pattern in the specified range of the text; return null if not found or subtext
55 * is empty (start==end). A pair of zeros is returned if pattern is empty string
56 * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
57 * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
60 public StringMatcher.Position find(String text, int start, int end) {
61 if (fPattern == null || text == null)
62 throw new IllegalArgumentException();
64 int tlen= text.length();
69 if (end < 0 || start >= end)
72 return new Position(start, start);
73 if (fIgnoreWildCards) {
74 int x= posIn(text, start, end);
77 return new Position(x, x + fLength);
80 int segCount= fSegments.length;
81 if (segCount == 0) //pattern contains only '*'(s)
82 return new Position(start, end);
86 for (int i= 0; i < segCount && curPos < end; ++i) {
87 String current= fSegments[i];
88 int nextMatch= regExpPosIn(text, curPos, end, current);
92 matchStart= nextMatch;
93 curPos= nextMatch + current.length();
95 return new Position(matchStart, curPos);
98 * StringMatcher constructor takes in a String object that is a simple
99 * pattern which may contain
\18*
\19 for 0 and many characters and
100 *
\18?
\19 for exactly one character. Also takes as parameter a boolean object
101 * specifying if case should be ignored
102 * @deprecated Use StringMatcher(pattern, ignoreCase, ignoreWildCards).
105 public StringMatcher(String aPattern, boolean ignoreCase) {
106 this(aPattern, ignoreCase, false);
109 * StringMatcher constructor takes in a String object that is a simple
110 * pattern which may contain
\18*
\19 for 0 and many characters and
111 *
\18?
\19 for exactly one character.
113 * Literal '*' and '?' characters must be escaped in the pattern
114 * e.g., "\*" means literal "*", etc.
116 * Escaping any other character (including the escape character itself),
117 * just results in that character in the pattern.
118 * e.g., "\a" means "a" and "\\" means "\"
120 * If invoking the StringMatcher with string literals in Java, don't forget
121 * escape characters are represented by "\\".
123 * @param aPattern the pattern to match text against
124 * @param ignoreCase if true, case is ignored
125 * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
126 * (everything is taken literally).
128 public StringMatcher(String aPattern, boolean ignoreCase, boolean ignoreWildCards) {
129 fIgnoreCase= ignoreCase;
130 fIgnoreWildCards= ignoreWildCards;
131 fLength= aPattern.length();
135 char[] chars= aPattern.toCharArray();
136 for (int i = 0; i < chars.length; i++) {
137 chars[i]= Character.toUpperCase(chars[i]);
139 fPattern= new String(chars);
144 if (fIgnoreWildCards) {
151 * Given the starting (inclusive) and the ending (exclusive) poisitions in the
152 * <code>text</code>, determine if the given substring matches with aPattern
153 * @return true if the specified portion of the text matches the pattern
154 * @param text a String object that contains the substring to match
155 * @param start marks the starting position (inclusive) of the substring
156 * @param end marks the ending index (exclusive) of the substring
158 public boolean match(String text, int start, int end) {
159 if (null == fPattern || null == text)
160 throw new IllegalArgumentException();
165 if (fIgnoreWildCards)
166 return fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
167 int segCount= fSegments.length;
168 if (segCount == 0) //pattern contains only '*'(s) or empty pattern
175 int tlen= text.length();
182 int bound= end - fBound;
186 String current= fSegments[i];
187 int segLength= current.length();
189 /* process first segment */
190 if (!fHasLeadingStar) {
191 if (!regExpRegionMatches(text, start, current, 0, segLength)) {
195 tCurPos= tCurPos + segLength;
198 /* process middle segments */
199 for (; i < segCount && tCurPos <= bound; ++i) {
200 current= fSegments[i];
202 int k= current.indexOf(fSingleWildCard);
204 currentMatch= textPosIn(text, tCurPos, end, current);
205 if (currentMatch < 0)
208 currentMatch= regExpPosIn(text, tCurPos, end, current);
209 if (currentMatch < 0)
212 tCurPos= currentMatch + current.length();
215 /* process final segment */
216 if (!fHasTrailingStar && tCurPos != end) {
217 int clen= current.length();
218 return regExpRegionMatches(text, end - clen, current, 0, clen);
220 return i == segCount;
223 * match the given <code>text</code> with the pattern
224 * @return true if matched eitherwise false
225 * @param text a String object
227 public boolean match(String text) {
228 return match(text, 0, text.length());
231 * This method parses the given pattern into segments seperated by wildcard '*' characters.
232 * Since wildcards are not being used in this case, the pattern consists of a single segment.
234 private void parseNoWildCards() {
235 fSegments= new String[1];
236 fSegments[0]= fPattern;
240 * This method parses the given pattern into segments seperated by wildcard '*' characters.
242 private void parseWildCards() {
243 if (fPattern.startsWith("*")) //$NON-NLS-1$
244 fHasLeadingStar= true;
245 if (fPattern.endsWith("*")) { //$NON-NLS-1$
246 /* make sure it's not an escaped wildcard */
247 if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
248 fHasTrailingStar= true;
252 Vector<String> temp= new Vector<String>();
255 StringBuffer buf= new StringBuffer();
256 while (pos < fLength) {
257 char c= fPattern.charAt(pos++);
260 if (pos >= fLength) {
263 char next= fPattern.charAt(pos++);
264 /* if it's an escape sequence */
265 if (next == '*' || next == '?' || next == '\\') {
268 /* not an escape sequence, just insert literally */
275 if (buf.length() > 0) {
277 temp.addElement(buf.toString());
278 fBound += buf.length();
283 /* append special character representing single match wildcard */
284 buf.append(fSingleWildCard);
291 /* add last buffer to segment list */
292 if (buf.length() > 0) {
293 temp.addElement(buf.toString());
294 fBound += buf.length();
297 fSegments= new String[temp.size()];
298 temp.copyInto(fSegments);
301 * @param text a string which contains no wildcard
302 * @param start the starting index in the text for search, inclusive
303 * @param end the stopping point of search, exclusive
304 * @return the starting index in the text of the pattern , or -1 if not found
306 protected int posIn(String text, int start, int end) { //no wild card in pattern
307 int max= end - fLength;
310 int i= text.indexOf(fPattern, start);
311 if (i == -1 || i > max)
316 for (int i= start; i <= max; ++i) {
317 if (text.regionMatches(true, i, fPattern, 0, fLength))
324 * @param text a simple regular expression that may only contain '?'(s)
325 * @param start the starting index in the text for search, inclusive
326 * @param end the stopping point of search, exclusive
327 * @param p a simple regular expression that may contains '?'
328 * @return the starting index in the text of the pattern , or -1 if not found
330 protected int regExpPosIn(String text, int start, int end, String p) {
331 int plen= p.length();
334 for (int i= start; i <= max; ++i) {
335 if (regExpRegionMatches(text, i, p, 0, plen))
341 protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
343 char tchar= text.charAt(tStart++);
344 char pchar= p.charAt(pStart++);
346 /* process wild cards */
347 if (!fIgnoreWildCards) {
348 /* skip single wild cards */
349 if (pchar == fSingleWildCard) {
356 char tc= Character.toUpperCase(tchar);
365 * @param text the string to match
366 * @param start the starting index in the text for search, inclusive
367 * @param end the stopping point of search, exclusive
368 * @param p a string that has no wildcard
369 * @return the starting index in the text of the pattern , or -1 if not found
371 protected int textPosIn(String text, int start, int end, String p) {
373 int plen= p.length();
377 int i= text.indexOf(p, start);
378 if (i == -1 || i > max)
383 for (int i= 0; i <= max; ++i) {
384 if (text.regionMatches(true, i, p, 0, plen))