1/*
2 * The matching engine and friends.  This file is #included by regexec.c
3 * after suitable #defines of a variety of macros used herein, so that
4 * different state representations can be used without duplicating masses
5 * of code.
6 */
7
8#ifdef SNAMES
9#define matcher smatcher
10#define fast    sfast
11#define slow    sslow
12#define dissect sdissect
13#define backref sbackref
14#define step    sstep
15#define print   sprint
16#define at  sat
17#define match   smat
18#endif
19#ifdef LNAMES
20#define matcher lmatcher
21#define fast    lfast
22#define slow    lslow
23#define dissect ldissect
24#define backref lbackref
25#define step    lstep
26#define print   lprint
27#define at  lat
28#define match   lmat
29#endif
30
31/* another structure passed up and down to avoid zillions of parameters */
32struct match {
33    struct re_guts *g;
34    int eflags;
35    regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
36    unsigned char *offp;        /* offsets work from here */
37    unsigned char *beginp;      /* start of string -- virtual NUL precedes */
38    unsigned char *endp;        /* end of string -- virtual NUL here */
39    unsigned char *coldp;       /* can be no match starting before here */
40    unsigned char **lastpos;        /* [nplus+1] */
41    STATEVARS;
42    states st;      /* current states */
43    states fresh;       /* states for a fresh start */
44    states tmp;     /* temporary */
45    states empty;       /* empty set of states */
46};
47
48#include "engine.ih"
49
50#ifdef REDEBUG
51#define SP(t, s, c) print(m, t, s, c, stdout)
52#define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
53#define NOTE(str)   { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
54#else
55#define SP(t, s, c) /* nothing */
56#define AT(t, p1, p2, s1, s2)   /* nothing */
57#define NOTE(s) /* nothing */
58#endif
59
60/*
61 - matcher - the actual matching engine
62 == static int matcher(register struct re_guts *g, char *string, \
63 == size_t nmatch, regmatch_t pmatch[], int eflags);
64 */
65static int          /* 0 success, REG_NOMATCH failure */
66matcher(g, string, nmatch, pmatch, eflags)
67register struct re_guts *g;
68unsigned char *string;
69size_t nmatch;
70regmatch_t pmatch[];
71int eflags;
72{
73    register unsigned char *endp;
74    register size_t i;
75    struct match mv;
76    register struct match *m = &mv;
77    register unsigned char *dp;
78    const register sopno gf = g->firststate+1;  /* +1 for OEND */
79    const register sopno gl = g->laststate;
80    unsigned char *start;
81    unsigned char *stop;
82
83    /* simplify the situation where possible */
84    if (g->cflags&REG_NOSUB)
85        nmatch = 0;
86    if (eflags&REG_STARTEND) {
87        start = string + pmatch[0].rm_so;
88        stop = string + pmatch[0].rm_eo;
89    } else {
90        start = string;
91        stop = start + strlen(start);
92    }
93    if (stop < start)
94        return(REG_INVARG);
95
96    /* prescreening; this does wonders for this rather slow code */
97    if (g->must != NULL) {
98        for (dp = start; dp < stop; dp++)
99            if (*dp == g->must[0] && stop - dp >= g->mlen &&
100                memcmp(dp, g->must, (size_t)g->mlen) == 0)
101                break;
102        if (dp == stop)     /* we didn't find g->must */
103            return(REG_NOMATCH);
104    }
105
106    /* match struct setup */
107    m->g = g;
108    m->eflags = eflags;
109    m->pmatch = NULL;
110    m->lastpos = NULL;
111    m->offp = string;
112    m->beginp = start;
113    m->endp = stop;
114    STATESETUP(m, 4);
115    SETUP(m->st);
116    SETUP(m->fresh);
117    SETUP(m->tmp);
118    SETUP(m->empty);
119    CLEAR(m->empty);
120
121    /* this loop does only one repetition except for backrefs */
122    for (;;) {
123        endp = fast(m, start, stop, gf, gl);
124        if (endp == NULL) {     /* a miss */
125            STATETEARDOWN(m);
126            return(REG_NOMATCH);
127        }
128        if (nmatch == 0 && !g->backrefs)
129            break;      /* no further info needed */
130
131        /* where? */
132        assert(m->coldp != NULL);
133        for (;;) {
134            NOTE("finding start");
135            endp = slow(m, m->coldp, stop, gf, gl);
136            if (endp != NULL)
137                break;
138            assert(m->coldp < m->endp);
139            m->coldp++;
140        }
141        if (nmatch == 1 && !g->backrefs)
142            break;      /* no further info needed */
143
144        /* oh my, he wants the subexpressions... */
145        if (m->pmatch == NULL)
146            m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
147                            sizeof(regmatch_t));
148        if (m->pmatch == NULL) {
149            STATETEARDOWN(m);
150            return(REG_ESPACE);
151        }
152        for (i = 1; i <= m->g->nsub; i++)
153            m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
154        if (!g->backrefs && !(m->eflags&REG_BACKR)) {
155            NOTE("dissecting");
156            dp = dissect(m, m->coldp, endp, gf, gl);
157        } else {
158            if (g->nplus > 0 && m->lastpos == NULL)
159                m->lastpos = (unsigned char **)malloc((g->nplus+1) *
160                            sizeof(unsigned char *));
161            if (g->nplus > 0 && m->lastpos == NULL) {
162                free((char *)m->pmatch);
163                STATETEARDOWN(m);
164                return(REG_ESPACE);
165            }
166            NOTE("backref dissect");
167            dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
168        }
169        if (dp != NULL)
170            break;
171
172        /* uh-oh... we couldn't find a subexpression-level match */
173        assert(g->backrefs);    /* must be back references doing it */
174        assert(g->nplus == 0 || m->lastpos != NULL);
175        for (;;) {
176            if (dp != NULL || endp <= m->coldp)
177                break;      /* defeat */
178            NOTE("backoff");
179            endp = slow(m, m->coldp, endp-1, gf, gl);
180            if (endp == NULL)
181                break;      /* defeat */
182            /* try it on a shorter possibility */
183#ifndef NDEBUG
184            for (i = 1; i <= m->g->nsub; i++) {
185                assert(m->pmatch[i].rm_so == -1);
186                assert(m->pmatch[i].rm_eo == -1);
187            }
188#endif
189            NOTE("backoff dissect");
190            dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
191        }
192        assert(dp == NULL || dp == endp);
193        if (dp != NULL)     /* found a shorter one */
194            break;
195
196        /* despite initial appearances, there is no match here */
197        NOTE("false alarm");
198        start = m->coldp + 1;   /* recycle starting later */
199        assert(start <= stop);
200    }
201
202    /* fill in the details if requested */
203    if (nmatch > 0) {
204        pmatch[0].rm_so = m->coldp - m->offp;
205        pmatch[0].rm_eo = endp - m->offp;
206    }
207    if (nmatch > 1) {
208        assert(m->pmatch != NULL);
209        for (i = 1; i < nmatch; i++)
210            if (i <= m->g->nsub)
211                pmatch[i] = m->pmatch[i];
212            else {
213                pmatch[i].rm_so = -1;
214                pmatch[i].rm_eo = -1;
215            }
216    }
217
218    if (m->pmatch != NULL)
219        free((char *)m->pmatch);
220    if (m->lastpos != NULL)
221        free((char *)m->lastpos);
222    STATETEARDOWN(m);
223    return(0);
224}
225
226/*
227 - dissect - figure out what matched what, no back references
228 == static unsigned char *dissect(register struct match *m, unsigned char *start, \
229 == unsigned char *stop, sopno startst, sopno stopst);
230 */
231static unsigned char *          /* == stop (success) always */
232dissect(m, start, stop, startst, stopst)
233register struct match *m;
234unsigned char *start;
235unsigned char *stop;
236sopno startst;
237sopno stopst;
238{
239    register int i;
240    register sopno ss;  /* start sop of current subRE */
241    register sopno es;  /* end sop of current subRE */
242    register unsigned char *sp; /* start of string matched by it */
243    register unsigned char *stp;    /* string matched by it cannot pass here */
244    register unsigned char *rest;   /* start of rest of string */
245    register unsigned char *tail;   /* string unmatched by rest of RE */
246    register sopno ssub;    /* start sop of subsubRE */
247    register sopno esub;    /* end sop of subsubRE */
248    register unsigned char *ssp;    /* start of string matched by subsubRE */
249    register unsigned char *sep;    /* end of string matched by subsubRE */
250    register unsigned char *oldssp; /* previous ssp */
251    register unsigned char *dp;
252
253    AT("diss", start, stop, startst, stopst);
254    sp = start;
255    for (ss = startst; ss < stopst; ss = es) {
256        /* identify end of subRE */
257        es = ss;
258        switch (OP(m->g->strip[es])) {
259        case OPLUS_:
260        case OQUEST_:
261            es += OPND(m->g->strip[es]);
262            break;
263        case OCH_:
264            while (OP(m->g->strip[es]) != O_CH)
265                es += OPND(m->g->strip[es]);
266            break;
267        }
268        es++;
269
270        /* figure out what it matched */
271        switch (OP(m->g->strip[ss])) {
272        case OEND:
273            assert(PHP_REGEX_NOPE);
274            break;
275        case OCHAR:
276            sp++;
277            break;
278        case OBOL:
279        case OEOL:
280        case OBOW:
281        case OEOW:
282            break;
283        case OANY:
284        case OANYOF:
285            sp++;
286            break;
287        case OBACK_:
288        case O_BACK:
289            assert(PHP_REGEX_NOPE);
290            break;
291        /* cases where length of match is hard to find */
292        case OQUEST_:
293            stp = stop;
294            for (;;) {
295                /* how long could this one be? */
296                rest = slow(m, sp, stp, ss, es);
297                assert(rest != NULL);   /* it did match */
298                /* could the rest match the rest? */
299                tail = slow(m, rest, stop, es, stopst);
300                if (tail == stop)
301                    break;      /* yes! */
302                /* no -- try a shorter match for this one */
303                stp = rest - 1;
304                assert(stp >= sp);  /* it did work */
305            }
306            ssub = ss + 1;
307            esub = es - 1;
308            /* did innards match? */
309            if (slow(m, sp, rest, ssub, esub) != NULL) {
310                dp = dissect(m, sp, rest, ssub, esub);
311                assert(dp == rest);
312            } else      /* no */
313                assert(sp == rest);
314            sp = rest;
315            break;
316        case OPLUS_:
317            stp = stop;
318            for (;;) {
319                /* how long could this one be? */
320                rest = slow(m, sp, stp, ss, es);
321                assert(rest != NULL);   /* it did match */
322                /* could the rest match the rest? */
323                tail = slow(m, rest, stop, es, stopst);
324                if (tail == stop)
325                    break;      /* yes! */
326                /* no -- try a shorter match for this one */
327                stp = rest - 1;
328                assert(stp >= sp);  /* it did work */
329            }
330            ssub = ss + 1;
331            esub = es - 1;
332            ssp = sp;
333            oldssp = ssp;
334            for (;;) {  /* find last match of innards */
335                sep = slow(m, ssp, rest, ssub, esub);
336                if (sep == NULL || sep == ssp)
337                    break;  /* failed or matched null */
338                oldssp = ssp;   /* on to next try */
339                ssp = sep;
340            }
341            if (sep == NULL) {
342                /* last successful match */
343                sep = ssp;
344                ssp = oldssp;
345            }
346            assert(sep == rest);    /* must exhaust substring */
347            assert(slow(m, ssp, sep, ssub, esub) == rest);
348            dp = dissect(m, ssp, sep, ssub, esub);
349            assert(dp == sep);
350            sp = rest;
351            break;
352        case OCH_:
353            stp = stop;
354            for (;;) {
355                /* how long could this one be? */
356                rest = slow(m, sp, stp, ss, es);
357                assert(rest != NULL);   /* it did match */
358                /* could the rest match the rest? */
359                tail = slow(m, rest, stop, es, stopst);
360                if (tail == stop)
361                    break;      /* yes! */
362                /* no -- try a shorter match for this one */
363                stp = rest - 1;
364                assert(stp >= sp);  /* it did work */
365            }
366            ssub = ss + 1;
367            esub = ss + OPND(m->g->strip[ss]) - 1;
368            assert(OP(m->g->strip[esub]) == OOR1);
369            for (;;) {  /* find first matching branch */
370                if (slow(m, sp, rest, ssub, esub) == rest)
371                    break;  /* it matched all of it */
372                /* that one missed, try next one */
373                assert(OP(m->g->strip[esub]) == OOR1);
374                esub++;
375                assert(OP(m->g->strip[esub]) == OOR2);
376                ssub = esub + 1;
377                esub += OPND(m->g->strip[esub]);
378                if (OP(m->g->strip[esub]) == OOR2)
379                    esub--;
380                else
381                    assert(OP(m->g->strip[esub]) == O_CH);
382            }
383            dp = dissect(m, sp, rest, ssub, esub);
384            assert(dp == rest);
385            sp = rest;
386            break;
387        case O_PLUS:
388        case O_QUEST:
389        case OOR1:
390        case OOR2:
391        case O_CH:
392            assert(PHP_REGEX_NOPE);
393            break;
394        case OLPAREN:
395            i = OPND(m->g->strip[ss]);
396            assert(0 < i && i <= m->g->nsub);
397            m->pmatch[i].rm_so = sp - m->offp;
398            break;
399        case ORPAREN:
400            i = OPND(m->g->strip[ss]);
401            assert(0 < i && i <= m->g->nsub);
402            m->pmatch[i].rm_eo = sp - m->offp;
403            break;
404        default:        /* uh oh */
405            assert(PHP_REGEX_NOPE);
406            break;
407        }
408    }
409
410    assert(sp == stop);
411    return(sp);
412}
413
414/*
415 - backref - figure out what matched what, figuring in back references
416 == static unsigned char *backref(register struct match *m, unsigned char *start, \
417 == unsigned char *stop, sopno startst, sopno stopst, sopno lev);
418 */
419static unsigned char *          /* == stop (success) or NULL (failure) */
420backref(m, start, stop, startst, stopst, lev)
421register struct match *m;
422unsigned char *start;
423unsigned char *stop;
424sopno startst;
425sopno stopst;
426sopno lev;          /* PLUS nesting level */
427{
428    register int i;
429    register sopno ss;  /* start sop of current subRE */
430    register unsigned char *sp; /* start of string matched by it */
431    register sopno ssub;    /* start sop of subsubRE */
432    register sopno esub;    /* end sop of subsubRE */
433    register unsigned char *ssp;    /* start of string matched by subsubRE */
434    register unsigned char *dp;
435    register size_t len;
436    register int hard;
437    register sop s;
438    register regoff_t offsave;
439    register cset *cs;
440
441    AT("back", start, stop, startst, stopst);
442    sp = start;
443
444    /* get as far as we can with easy stuff */
445    hard = 0;
446    for (ss = startst; !hard && ss < stopst; ss++)
447        switch (OP(s = m->g->strip[ss])) {
448        case OCHAR:
449            if (sp == stop || *sp++ != (unsigned char)OPND(s))
450                return(NULL);
451            break;
452        case OANY:
453            if (sp == stop)
454                return(NULL);
455            sp++;
456            break;
457        case OANYOF:
458            cs = &m->g->sets[OPND(s)];
459            if (sp == stop || !CHIN(cs, *sp++))
460                return(NULL);
461            break;
462        case OBOL:
463            if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
464                    (sp < m->endp && *(sp-1) == '\n' &&
465                        (m->g->cflags&REG_NEWLINE)) )
466                { /* yes */ }
467            else
468                return(NULL);
469            break;
470        case OEOL:
471            if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
472                    (sp < m->endp && *sp == '\n' &&
473                        (m->g->cflags&REG_NEWLINE)) )
474                { /* yes */ }
475            else
476                return(NULL);
477            break;
478        case OBOW:
479            if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
480                    (sp < m->endp && *(sp-1) == '\n' &&
481                        (m->g->cflags&REG_NEWLINE)) ||
482                    (sp > m->beginp &&
483                            !ISWORD(*(sp-1))) ) &&
484                    (sp < m->endp && ISWORD(*sp)) )
485                { /* yes */ }
486            else
487                return(NULL);
488            break;
489        case OEOW:
490            if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
491                    (sp < m->endp && *sp == '\n' &&
492                        (m->g->cflags&REG_NEWLINE)) ||
493                    (sp < m->endp && !ISWORD(*sp)) ) &&
494                    (sp > m->beginp && ISWORD(*(sp-1))) )
495                { /* yes */ }
496            else
497                return(NULL);
498            break;
499        case O_QUEST:
500            break;
501        case OOR1:  /* matches null but needs to skip */
502            ss++;
503            s = m->g->strip[ss];
504            do {
505                assert(OP(s) == OOR2);
506                ss += OPND(s);
507            } while (OP(s = m->g->strip[ss]) != O_CH);
508            /* note that the ss++ gets us past the O_CH */
509            break;
510        default:    /* have to make a choice */
511            hard = 1;
512            break;
513        }
514    if (!hard) {        /* that was it! */
515        if (sp != stop)
516            return(NULL);
517        return(sp);
518    }
519    ss--;           /* adjust for the for's final increment */
520
521    /* the hard stuff */
522    AT("hard", sp, stop, ss, stopst);
523    s = m->g->strip[ss];
524    switch (OP(s)) {
525    case OBACK_:        /* the vilest depths */
526        i = OPND(s);
527        assert(0 < i && i <= m->g->nsub);
528        if (m->pmatch[i].rm_eo == -1)
529            return(NULL);
530        assert(m->pmatch[i].rm_so != -1);
531        len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
532        assert(stop - m->beginp >= len);
533        if (sp > stop - len)
534            return(NULL);   /* not enough left to match */
535        ssp = m->offp + m->pmatch[i].rm_so;
536        if (memcmp(sp, ssp, len) != 0)
537            return(NULL);
538        while (m->g->strip[ss] != SOP(O_BACK, i))
539            ss++;
540        return(backref(m, sp+len, stop, ss+1, stopst, lev));
541        break;
542    case OQUEST_:       /* to null or not */
543        dp = backref(m, sp, stop, ss+1, stopst, lev);
544        if (dp != NULL)
545            return(dp); /* not */
546        return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
547        break;
548    case OPLUS_:
549        assert(m->lastpos != NULL);
550        assert(lev+1 <= m->g->nplus);
551        m->lastpos[lev+1] = sp;
552        return(backref(m, sp, stop, ss+1, stopst, lev+1));
553        break;
554    case O_PLUS:
555        if (sp == m->lastpos[lev])  /* last pass matched null */
556            return(backref(m, sp, stop, ss+1, stopst, lev-1));
557        /* try another pass */
558        m->lastpos[lev] = sp;
559        dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
560        if (dp == NULL)
561            return(backref(m, sp, stop, ss+1, stopst, lev-1));
562        else
563            return(dp);
564        break;
565    case OCH_:      /* find the right one, if any */
566        ssub = ss + 1;
567        esub = ss + OPND(s) - 1;
568        assert(OP(m->g->strip[esub]) == OOR1);
569        for (;;) {  /* find first matching branch */
570            dp = backref(m, sp, stop, ssub, esub, lev);
571            if (dp != NULL)
572                return(dp);
573            /* that one missed, try next one */
574            if (OP(m->g->strip[esub]) == O_CH)
575                return(NULL);   /* there is none */
576            esub++;
577            assert(OP(m->g->strip[esub]) == OOR2);
578            ssub = esub + 1;
579            esub += OPND(m->g->strip[esub]);
580            if (OP(m->g->strip[esub]) == OOR2)
581                esub--;
582            else
583                assert(OP(m->g->strip[esub]) == O_CH);
584        }
585        break;
586    case OLPAREN:       /* must undo assignment if rest fails */
587        i = OPND(s);
588        assert(0 < i && i <= m->g->nsub);
589        offsave = m->pmatch[i].rm_so;
590        m->pmatch[i].rm_so = sp - m->offp;
591        dp = backref(m, sp, stop, ss+1, stopst, lev);
592        if (dp != NULL)
593            return(dp);
594        m->pmatch[i].rm_so = offsave;
595        return(NULL);
596        break;
597    case ORPAREN:       /* must undo assignment if rest fails */
598        i = OPND(s);
599        assert(0 < i && i <= m->g->nsub);
600        offsave = m->pmatch[i].rm_eo;
601        m->pmatch[i].rm_eo = sp - m->offp;
602        dp = backref(m, sp, stop, ss+1, stopst, lev);
603        if (dp != NULL)
604            return(dp);
605        m->pmatch[i].rm_eo = offsave;
606        return(NULL);
607        break;
608    default:        /* uh oh */
609        assert(PHP_REGEX_NOPE);
610        break;
611    }
612
613    /* "can't happen" */
614    assert(PHP_REGEX_NOPE);
615    /* NOTREACHED */
616    return((unsigned char *)NULL);  /* dummy */
617}
618
619/*
620 - fast - step through the string at top speed
621 == static unsigned char *fast(register struct match *m, unsigned char *start, \
622 == unsigned char *stop, sopno startst, sopno stopst);
623 */
624static unsigned char *          /* where tentative match ended, or NULL */
625fast(m, start, stop, startst, stopst)
626register struct match *m;
627unsigned char *start;
628unsigned char *stop;
629sopno startst;
630sopno stopst;
631{
632    register states st = m->st;
633    register states fresh = m->fresh;
634    register states tmp = m->tmp;
635    register unsigned char *p = start;
636    register int c = (start == m->beginp) ? OUT : *(start-1);
637    register int lastc; /* previous c */
638    register int flagch;
639    register int i;
640    register unsigned char *coldp;  /* last p after which no match was underway */
641
642    CLEAR(st);
643    SET1(st, startst);
644    st = step(m->g, startst, stopst, st, NOTHING, st);
645    ASSIGN(fresh, st);
646    SP("start", st, *p);
647    coldp = NULL;
648    for (;;) {
649        /* next character */
650        lastc = c;
651        c = (p == m->endp) ? OUT : *p;
652        if (EQ(st, fresh))
653            coldp = p;
654
655        /* is there an EOL and/or BOL between lastc and c? */
656        flagch = '\0';
657        i = 0;
658        if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
659                (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
660            flagch = BOL;
661            i = m->g->nbol;
662        }
663        if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
664                (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
665            flagch = (flagch == BOL) ? BOLEOL : EOL;
666            i += m->g->neol;
667        }
668        if (i != 0) {
669            for (; i > 0; i--)
670                st = step(m->g, startst, stopst, st, flagch, st);
671            SP("boleol", st, c);
672        }
673
674        /* how about a word boundary? */
675        if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
676                    (c != OUT && ISWORD(c)) ) {
677            flagch = BOW;
678        }
679        if ( (lastc != OUT && ISWORD(lastc)) &&
680                (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
681            flagch = EOW;
682        }
683        if (flagch == BOW || flagch == EOW) {
684            st = step(m->g, startst, stopst, st, flagch, st);
685            SP("boweow", st, c);
686        }
687
688        /* are we done? */
689        if (ISSET(st, stopst) || p == stop)
690            break;      /* NOTE BREAK OUT */
691
692        /* no, we must deal with this character */
693        ASSIGN(tmp, st);
694        ASSIGN(st, fresh);
695        assert(c != OUT);
696        st = step(m->g, startst, stopst, tmp, c, st);
697        SP("aft", st, c);
698        assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
699        p++;
700    }
701
702    assert(coldp != NULL);
703    m->coldp = coldp;
704    if (ISSET(st, stopst))
705        return(p+1);
706    else
707        return(NULL);
708}
709
710/*
711 - slow - step through the string more deliberately
712 == static unsigned char *slow(register struct match *m, unsigned char *start, \
713 == unsigned char *stop, sopno startst, sopno stopst);
714 */
715static unsigned char *          /* where it ended */
716slow(m, start, stop, startst, stopst)
717register struct match *m;
718unsigned char *start;
719unsigned char *stop;
720sopno startst;
721sopno stopst;
722{
723    register states st = m->st;
724    register states empty = m->empty;
725    register states tmp = m->tmp;
726    register unsigned char *p = start;
727    register int c = (start == m->beginp) ? OUT : *(start-1);
728    register int lastc; /* previous c */
729    register int flagch;
730    register int i;
731    register unsigned char *matchp; /* last p at which a match ended */
732
733    AT("slow", start, stop, startst, stopst);
734    CLEAR(st);
735    SET1(st, startst);
736    SP("sstart", st, *p);
737    st = step(m->g, startst, stopst, st, NOTHING, st);
738    matchp = NULL;
739    for (;;) {
740        /* next character */
741        lastc = c;
742        c = (p == m->endp) ? OUT : *p;
743
744        /* is there an EOL and/or BOL between lastc and c? */
745        flagch = '\0';
746        i = 0;
747        if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
748                (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
749            flagch = BOL;
750            i = m->g->nbol;
751        }
752        if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
753                (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
754            flagch = (flagch == BOL) ? BOLEOL : EOL;
755            i += m->g->neol;
756        }
757        if (i != 0) {
758            for (; i > 0; i--)
759                st = step(m->g, startst, stopst, st, flagch, st);
760            SP("sboleol", st, c);
761        }
762
763        /* how about a word boundary? */
764        if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
765                    (c != OUT && ISWORD(c)) ) {
766            flagch = BOW;
767        }
768        if ( (lastc != OUT && ISWORD(lastc)) &&
769                (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
770            flagch = EOW;
771        }
772        if (flagch == BOW || flagch == EOW) {
773            st = step(m->g, startst, stopst, st, flagch, st);
774            SP("sboweow", st, c);
775        }
776
777        /* are we done? */
778        if (ISSET(st, stopst))
779            matchp = p;
780        if (EQ(st, empty) || p == stop)
781            break;      /* NOTE BREAK OUT */
782
783        /* no, we must deal with this character */
784        ASSIGN(tmp, st);
785        ASSIGN(st, empty);
786        assert(c != OUT);
787        st = step(m->g, startst, stopst, tmp, c, st);
788        SP("saft", st, c);
789        assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
790        p++;
791    }
792
793    return(matchp);
794}
795
796
797/*
798 - step - map set of states reachable before char to set reachable after
799 == static states step(register struct re_guts *g, sopno start, sopno stop, \
800 == register states bef, int ch, register states aft);
801 == #define BOL (OUT+1)
802 == #define EOL (BOL+1)
803 == #define BOLEOL  (BOL+2)
804 == #define NOTHING (BOL+3)
805 == #define BOW (BOL+4)
806 == #define EOW (BOL+5)
807 == #define CODEMAX (BOL+5)     // highest code used
808 == #define NONCHAR(c)  ((c) > UCHAR_MAX)
809 == #define NNONCHAR    (CODEMAX-UCHAR_MAX)
810 */
811static states
812step(g, start, stop, bef, ch, aft)
813register struct re_guts *g;
814sopno start;            /* start state within strip */
815sopno stop;         /* state after stop state within strip */
816register states bef;        /* states reachable before */
817int ch;             /* character or NONCHAR code */
818register states aft;        /* states already known reachable after */
819{
820    register cset *cs;
821    register sop s;
822    register sopno pc;
823    register onestate here;     /* note, macros know this name */
824    register sopno look;
825    register long i;
826
827    for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
828        s = g->strip[pc];
829        switch (OP(s)) {
830        case OEND:
831            assert(pc == stop-1);
832            break;
833        case OCHAR:
834            /* only characters can match */
835            assert(!NONCHAR(ch) || ch != (unsigned char)OPND(s));
836            if (ch == (unsigned char)OPND(s))
837                FWD(aft, bef, 1);
838            break;
839        case OBOL:
840            if (ch == BOL || ch == BOLEOL)
841                FWD(aft, bef, 1);
842            break;
843        case OEOL:
844            if (ch == EOL || ch == BOLEOL)
845                FWD(aft, bef, 1);
846            break;
847        case OBOW:
848            if (ch == BOW)
849                FWD(aft, bef, 1);
850            break;
851        case OEOW:
852            if (ch == EOW)
853                FWD(aft, bef, 1);
854            break;
855        case OANY:
856            if (!NONCHAR(ch))
857                FWD(aft, bef, 1);
858            break;
859        case OANYOF:
860            cs = &g->sets[OPND(s)];
861            if (!NONCHAR(ch) && CHIN(cs, ch))
862                FWD(aft, bef, 1);
863            break;
864        case OBACK_:        /* ignored here */
865        case O_BACK:
866            FWD(aft, aft, 1);
867            break;
868        case OPLUS_:        /* forward, this is just an empty */
869            FWD(aft, aft, 1);
870            break;
871        case O_PLUS:        /* both forward and back */
872            FWD(aft, aft, 1);
873            i = ISSETBACK(aft, OPND(s));
874            BACK(aft, aft, OPND(s));
875            if (!i && ISSETBACK(aft, OPND(s))) {
876                /* oho, must reconsider loop body */
877                pc -= OPND(s) + 1;
878                INIT(here, pc);
879            }
880            break;
881        case OQUEST_:       /* two branches, both forward */
882            FWD(aft, aft, 1);
883            FWD(aft, aft, OPND(s));
884            break;
885        case O_QUEST:       /* just an empty */
886            FWD(aft, aft, 1);
887            break;
888        case OLPAREN:       /* not significant here */
889        case ORPAREN:
890            FWD(aft, aft, 1);
891            break;
892        case OCH_:      /* mark the first two branches */
893            FWD(aft, aft, 1);
894            assert(OP(g->strip[pc+OPND(s)]) == OOR2);
895            FWD(aft, aft, OPND(s));
896            break;
897        case OOR1:      /* done a branch, find the O_CH */
898            if (ISSTATEIN(aft, here)) {
899                for (look = 1;
900                        OP(s = g->strip[pc+look]) != O_CH;
901                        look += OPND(s))
902                    assert(OP(s) == OOR2);
903                FWD(aft, aft, look);
904            }
905            break;
906        case OOR2:      /* propagate OCH_'s marking */
907            FWD(aft, aft, 1);
908            if (OP(g->strip[pc+OPND(s)]) != O_CH) {
909                assert(OP(g->strip[pc+OPND(s)]) == OOR2);
910                FWD(aft, aft, OPND(s));
911            }
912            break;
913        case O_CH:      /* just empty */
914            FWD(aft, aft, 1);
915            break;
916        default:        /* ooooops... */
917            assert(PHP_REGEX_NOPE);
918            break;
919        }
920    }
921
922    return(aft);
923}
924
925#ifdef REDEBUG
926/*
927 - print - print a set of states
928 == #ifdef REDEBUG
929 == static void print(struct match *m, unsigned char *caption, states st, \
930 == int ch, FILE *d);
931 == #endif
932 */
933static void
934print(m, caption, st, ch, d)
935struct match *m;
936unsigned char *caption;
937states st;
938int ch;
939FILE *d;
940{
941    register struct re_guts *g = m->g;
942    register int i;
943    register int first = 1;
944
945    if (!(m->eflags&REG_TRACE))
946        return;
947
948    fprintf(d, "%s", caption);
949    if (ch != '\0')
950        fprintf(d, " %s", pchar(ch));
951    for (i = 0; i < g->nstates; i++)
952        if (ISSET(st, i)) {
953            fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
954            first = 0;
955        }
956    fprintf(d, "\n");
957}
958
959/*
960 - at - print current situation
961 == #ifdef REDEBUG
962 == static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, \
963 ==                     sopno startst, sopno stopst);
964 == #endif
965 */
966static void
967at(m, title, start, stop, startst, stopst)
968struct match *m;
969unsigned char *title;
970unsigned char *start;
971unsigned char *stop;
972sopno startst;
973sopno stopst;
974{
975    if (!(m->eflags&REG_TRACE))
976        return;
977
978    printf("%s %s-", title, pchar(*start));
979    printf("%s ", pchar(*stop));
980    printf("%ld-%ld\n", (long)startst, (long)stopst);
981}
982
983#ifndef PCHARDONE
984#define PCHARDONE   /* never again */
985/*
986 - pchar - make a character printable
987 == #ifdef REDEBUG
988 == static unsigned char *pchar(int ch);
989 == #endif
990 *
991 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
992 * duplicate here avoids having a debugging-capable regexec.o tied to
993 * a matching debug.o, and this is convenient.  It all disappears in
994 * the non-debug compilation anyway, so it doesn't matter much.
995 */
996static unsigned char *          /* -> representation */
997pchar(ch)
998int ch;
999{
1000    static unsigned char pbuf[10];
1001
1002    if (isprint(ch) || ch == ' ')
1003        sprintf(pbuf, "%c", ch);
1004    else
1005        sprintf(pbuf, "\\%o", ch);
1006    return(pbuf);
1007}
1008#endif
1009#endif
1010
1011#undef  matcher
1012#undef  fast
1013#undef  slow
1014#undef  dissect
1015#undef  backref
1016#undef  step
1017#undef  print
1018#undef  at
1019#undef  match
1020