2 /********************************************
4 copyright 1991, Michael D. Brennan
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
14 * Revision 1.3 1993/07/24 17:55:15 mike
17 * Revision 1.2 1993/07/23 13:21:48 mike
20 * Revision 1.1.1.1 1993/07/03 18:58:28 mike
23 * Revision 3.6 1992/12/24 00:44:53 mike
24 * fixed potential LMDOS bozo with M_STR+U_ON+END_ON
25 * fixed minor bug in M_CLASS+U_ON+END_ON
27 * Revision 3.5 1992/01/21 17:33:20 brennan
28 * added some casts so that character classes work with signed chars
30 * Revision 3.4 91/10/29 10:54:09 brennan
33 * Revision 3.3 91/08/13 09:10:18 brennan
36 * Revision 3.2 91/06/10 16:18:17 brennan
39 * Revision 3.1 91/06/07 10:33:28 brennan
42 * Revision 1.4 91/05/31 10:56:32 brennan
43 * stack_empty hack for DOS large model
47 /* match a string against a machine */
53 extern RT_STATE *RE_run_stack_base ;
54 extern RT_STATE *RE_run_stack_limit ;
55 extern RT_STATE *RE_run_stack_empty ;
57 RT_STATE *RE_new_run_stack() ;
60 #define push(mx,sx,ssx,ux) if (++stackp == RE_run_stack_limit)\
61 stackp = RE_new_run_stack() ;\
62 stackp->m=(mx);stackp->s=(sx);stackp->ss=(ssx);\
66 #define CASE_UANY(x) case x + U_OFF : case x + U_ON
68 /* returns start of first longest match and the length by
69 reference. If no match returns NULL and length zero */
71 char *REmatch(str, machine, lenp)
76 register STATE *m = (STATE *) machine ;
77 register char *s = str ;
79 register RT_STATE *stackp ;
83 /* state of current best match stored here */
84 char *cb_ss ; /* the start */
85 char *cb_e ; /* the end , pts at first char not matched */
89 /* check for the easy case */
90 if ((m + 1)->type == M_ACCEPT && m->type == M_STR)
92 if ((ts = str_str(s, m->data.str, m->len))) *lenp = m->len ;
96 u_flag = U_ON ; cb_ss = ss = str_end = (char *) 0 ;
97 stackp = RE_run_stack_empty ;
101 if (stackp == RE_run_stack_empty)
103 if (cb_ss) *lenp = cb_e - cb_ss ;
108 if (cb_ss) /* does new state start too late ? */
112 if (cb_ss < ss) goto refill ;
114 else if (cb_ss < s) goto refill ;
117 m = (stackp + 1)->m ;
118 u_flag = (stackp + 1)->u ;
123 switch (m->type + u_flag)
125 case M_STR + U_OFF + END_OFF:
126 if (strncmp(s, m->data.str, m->len)) goto refill ;
129 if (cb_ss && s > cb_ss) goto refill ;
135 case M_STR + U_OFF + END_ON:
136 if (strcmp(s, m->data.str)) goto refill ;
139 if (cb_ss && s > cb_ss) goto refill ;
145 case M_STR + U_ON + END_OFF:
146 if (!(s = str_str(s, m->data.str, m->len))) goto refill ;
147 push(m, s + 1, ss, U_ON) ;
150 if (cb_ss && s > cb_ss) goto refill ;
153 s += m->len ; m++ ; u_flag = U_OFF ;
156 case M_STR + U_ON + END_ON:
157 if (!str_end) str_end = s + strlen(s) ;
158 t = (str_end - s) - m->len ;
159 if (t < 0 || memcmp(ts = s + t, m->data.str, m->len))
163 if (cb_ss && ts > cb_ss) goto refill ;
166 s = str_end ; m++ ; u_flag = U_OFF ;
169 case M_CLASS + U_OFF + END_OFF:
170 if (!ison(*m->data.bvp, s[0])) goto refill ;
173 if (cb_ss && s > cb_ss) goto refill ;
179 case M_CLASS + U_OFF + END_ON:
180 if (s[1] || !ison(*m->data.bvp, s[0])) goto refill ;
183 if (cb_ss && s > cb_ss) goto refill ;
189 case M_CLASS + U_ON + END_OFF:
190 while (!ison(*m->data.bvp, s[0]))
192 if (s[0] == 0) goto refill ;
196 push(m, s, ss, U_ON) ;
199 if (cb_ss && s - 1 > cb_ss) goto refill ;
202 m++ ; u_flag = U_OFF ;
205 case M_CLASS + U_ON + END_ON:
206 if (!str_end) str_end = s + strlen(s) ;
207 if (s[0] == 0 || !ison(*m->data.bvp, str_end[-1]))
211 if (cb_ss && str_end - 1 > cb_ss) goto refill ;
212 else ss = str_end - 1 ;
214 s = str_end ; m++ ; u_flag = U_OFF ;
217 case M_ANY + U_OFF + END_OFF:
218 if (s[0] == 0) goto refill ;
221 if (cb_ss && s > cb_ss) goto refill ;
227 case M_ANY + U_OFF + END_ON:
228 if (s[0] == 0 || s[1] != 0) goto refill ;
231 if (cb_ss && s > cb_ss) goto refill ;
237 case M_ANY + U_ON + END_OFF:
238 if (s[0] == 0) goto refill ;
240 push(m, s, ss, U_ON) ;
243 if (cb_ss && s - 1 > cb_ss) goto refill ;
246 m++ ; u_flag = U_OFF ;
249 case M_ANY + U_ON + END_ON:
250 if (s[0] == 0) goto refill ;
251 if (!str_end) str_end = s + strlen(s) ;
254 if (cb_ss && str_end - 1 > cb_ss) goto refill ;
255 else ss = str_end - 1 ;
257 s = str_end ; m++ ; u_flag = U_OFF ;
260 case M_START + U_OFF + END_OFF:
261 case M_START + U_ON + END_OFF:
262 if (s != str) goto refill ;
264 m++ ; u_flag = U_OFF ;
267 case M_START + U_OFF + END_ON:
268 case M_START + U_ON + END_ON:
269 if (s != str || s[0] != 0) goto refill ;
271 m++ ; u_flag = U_OFF ;
275 if (s[0] != 0) goto refill ;
278 if (cb_ss && s > cb_ss) goto refill ;
281 m++ ; goto reswitch ;
284 s = str_end ? str_end : (str_end = s + strlen(s)) ;
287 if (cb_ss && s > cb_ss) goto refill ;
290 m++ ; u_flag = U_OFF ;
296 if (cb_ss && s > cb_ss) goto refill ;
299 u_flag = U_ON ; m++ ;
306 CASE_UANY(M_2JA): /* take the non jump branch */
307 push(m + m->data.jump, s, ss, u_flag) ;
311 CASE_UANY(M_2JB): /* take the jump branch */
312 push(m + 1, s, ss, u_flag) ;
316 case M_ACCEPT + U_OFF:
318 if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e))
320 /* we have a new current best */
321 cb_ss = ss ; cb_e = s ;
325 case M_ACCEPT + U_ON:
327 else s = str_end ? str_end : (str_end = s + strlen(s)) ;
329 if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e))
331 /* we have a new current best */
332 cb_ss = ss ; cb_e = s ;
337 RE_panic("unexpected case in REmatch") ;