1#include <sys/types.h> 2#include <stdio.h> 3#include <string.h> 4#include <ctype.h> 5#include <limits.h> 6#include <stdlib.h> 7 8#define POSIX_MISTAKE 9 10#include "utils.h" 11#include "regex.h" 12#include "regex2.h" 13 14#include "cclass.h" 15#include "cname.h" 16 17/* 18 * parse structure, passed up and down to avoid global variables and 19 * other clumsinesses 20 */ 21struct parse { 22 unsigned char *next; /* next character in RE */ 23 unsigned char *end; /* end of string (-> NUL normally) */ 24 int error; /* has an error been seen? */ 25 sop *strip; /* malloced strip */ 26 sopno ssize; /* malloced strip size (allocated) */ 27 sopno slen; /* malloced strip length (used) */ 28 int ncsalloc; /* number of csets allocated */ 29 struct re_guts *g; 30# define NPAREN 10 /* we need to remember () 1-9 for back refs */ 31 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 32 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 33}; 34 35#include "regcomp.ih" 36 37static unsigned char nuls[10]; /* place to point scanner in event of error */ 38 39/* 40 * macros for use with parse structure 41 * BEWARE: these know that the parse structure is named `p' !!! 42 */ 43#define PEEK() (*p->next) 44#define PEEK2() (*(p->next+1)) 45#define MORE() (p->next < p->end) 46#define MORE2() (p->next+1 < p->end) 47#define SEE(c) (MORE() && PEEK() == (c)) 48#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 49#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 50#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 51#define NEXT() (p->next++) 52#define NEXT2() (p->next += 2) 53#define NEXTn(n) (p->next += (n)) 54#define GETNEXT() (*p->next++) 55#define SETERROR(e) seterr(p, (e)) 56#define REQUIRE(co, e) (void) ((co) || SETERROR(e)) 57#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 58#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 59#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 60#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 61#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 62#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 63#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 64#define HERE() (p->slen) 65#define THERE() (p->slen - 1) 66#define THERETHERE() (p->slen - 2) 67#define DROP(n) (p->slen -= (n)) 68 69#ifndef NDEBUG 70static int never = 0; /* for use in asserts; shuts lint up */ 71#else 72#define never 0 /* some <assert.h>s have bugs too */ 73#endif 74 75/* 76 - regcomp - interface for parser and compilation 77 = API_EXPORT(int) regcomp(regex_t *, const char *, int); 78 = #define REG_BASIC 0000 79 = #define REG_EXTENDED 0001 80 = #define REG_ICASE 0002 81 = #define REG_NOSUB 0004 82 = #define REG_NEWLINE 0010 83 = #define REG_NOSPEC 0020 84 = #define REG_PEND 0040 85 = #define REG_DUMP 0200 86 */ 87API_EXPORT(int) /* 0 success, otherwise REG_something */ 88regcomp(preg, pattern, cflags) 89regex_t *preg; 90const char *pattern; 91int cflags; 92{ 93 struct parse pa; 94 register struct re_guts *g; 95 register struct parse *p = &pa; 96 register int i; 97 register size_t len; 98#ifdef REDEBUG 99# define GOODFLAGS(f) (f) 100#else 101# define GOODFLAGS(f) ((f)&~REG_DUMP) 102#endif 103 104 cflags = GOODFLAGS(cflags); 105 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 106 return(REG_INVARG); 107 108 if (cflags®_PEND) { 109 if (preg->re_endp < pattern) 110 return(REG_INVARG); 111 len = preg->re_endp - pattern; 112 } else 113 len = strlen((char *)pattern); 114 115 /* do the mallocs early so failure handling is easy */ 116 g = (struct re_guts *)malloc(sizeof(struct re_guts) + 117 (NC-1)*sizeof(cat_t)); 118 if (g == NULL) 119 return(REG_ESPACE); 120 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 121 p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 122 p->slen = 0; 123 if (p->strip == NULL) { 124 free((char *)g); 125 return(REG_ESPACE); 126 } 127 128 /* set things up */ 129 p->g = g; 130 p->next = (unsigned char *)pattern; /* convenience; we do not modify it */ 131 p->end = p->next + len; 132 p->error = 0; 133 p->ncsalloc = 0; 134 for (i = 0; i < NPAREN; i++) { 135 p->pbegin[i] = 0; 136 p->pend[i] = 0; 137 } 138 g->csetsize = NC; 139 g->sets = NULL; 140 g->setbits = NULL; 141 g->ncsets = 0; 142 g->cflags = cflags; 143 g->iflags = 0; 144 g->nbol = 0; 145 g->neol = 0; 146 g->must = NULL; 147 g->mlen = 0; 148 g->nsub = 0; 149 g->ncategories = 1; /* category 0 is "everything else" */ 150 g->categories = &g->catspace[0]; 151 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 152 g->backrefs = 0; 153 154 /* do it */ 155 EMIT(OEND, 0); 156 g->firststate = THERE(); 157 if (cflags®_EXTENDED) 158 p_ere(p, OUT); 159 else if (cflags®_NOSPEC) 160 p_str(p); 161 else 162 p_bre(p, OUT, OUT); 163 EMIT(OEND, 0); 164 g->laststate = THERE(); 165 166 /* tidy up loose ends and fill things in */ 167 categorize(p, g); 168 stripsnug(p, g); 169 findmust(p, g); 170 g->nplus = pluscount(p, g); 171 g->magic = MAGIC2; 172 preg->re_nsub = g->nsub; 173 preg->re_g = g; 174 preg->re_magic = MAGIC1; 175#ifndef REDEBUG 176 /* not debugging, so can't rely on the assert() in regexec() */ 177 if (g->iflags&BAD) 178 SETERROR(REG_ASSERT); 179#endif 180 181 /* win or lose, we're done */ 182 if (p->error != 0) /* lose */ 183 regfree(preg); 184 return(p->error); 185} 186 187/* 188 - p_ere - ERE parser top level, concatenation and alternation 189 == static void p_ere(register struct parse *p, int stop); 190 */ 191static void 192p_ere(p, stop) 193register struct parse *p; 194int stop; /* character this ERE should end at */ 195{ 196 register unsigned char c; 197 register sopno prevback = 0; 198 register sopno prevfwd = 0; 199 register sopno conc; 200 register int first = 1; /* is this the first alternative? */ 201 202 for (;;) { 203 /* do a bunch of concatenated expressions */ 204 conc = HERE(); 205 while (MORE() && (c = PEEK()) != '|' && c != stop) 206 p_ere_exp(p); 207 (void) REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 208 209 if (!EAT('|')) 210 break; /* NOTE BREAK OUT */ 211 212 if (first) { 213 INSERT(OCH_, conc); /* offset is wrong */ 214 prevfwd = conc; 215 prevback = conc; 216 first = 0; 217 } 218 ASTERN(OOR1, prevback); 219 prevback = THERE(); 220 AHEAD(prevfwd); /* fix previous offset */ 221 prevfwd = HERE(); 222 EMIT(OOR2, 0); /* offset is very wrong */ 223 } 224 225 if (!first) { /* tail-end fixups */ 226 AHEAD(prevfwd); 227 ASTERN(O_CH, prevback); 228 } 229 230 assert(!MORE() || SEE(stop)); 231} 232 233/* 234 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 235 == static void p_ere_exp(register struct parse *p); 236 */ 237static void 238p_ere_exp(p) 239register struct parse *p; 240{ 241 register unsigned char c; 242 register sopno pos; 243 register int count; 244 register int count2; 245 register sopno subno; 246 int wascaret = 0; 247 248 assert(MORE()); /* caller should have ensured this */ 249 c = GETNEXT(); 250 251 pos = HERE(); 252 switch (c) { 253 case '(': 254 REQUIRE(MORE(), REG_EPAREN); 255 p->g->nsub++; 256 subno = p->g->nsub; 257 if (subno < NPAREN) 258 p->pbegin[subno] = HERE(); 259 EMIT(OLPAREN, subno); 260 if (!SEE(')')) 261 p_ere(p, ')'); 262 if (subno < NPAREN) { 263 p->pend[subno] = HERE(); 264 assert(p->pend[subno] != 0); 265 } 266 EMIT(ORPAREN, subno); 267 MUSTEAT(')', REG_EPAREN); 268 break; 269#ifndef POSIX_MISTAKE 270 case ')': /* happens only if no current unmatched ( */ 271 /* 272 * You may ask, why the ifndef? Because I didn't notice 273 * this until slightly too late for 1003.2, and none of the 274 * other 1003.2 regular-expression reviewers noticed it at 275 * all. So an unmatched ) is legal POSIX, at least until 276 * we can get it fixed. 277 */ 278 SETERROR(REG_EPAREN); 279 break; 280#endif 281 case '^': 282 EMIT(OBOL, 0); 283 p->g->iflags |= USEBOL; 284 p->g->nbol++; 285 wascaret = 1; 286 break; 287 case '$': 288 EMIT(OEOL, 0); 289 p->g->iflags |= USEEOL; 290 p->g->neol++; 291 break; 292 case '|': 293 SETERROR(REG_EMPTY); 294 break; 295 case '*': 296 case '+': 297 case '?': 298 SETERROR(REG_BADRPT); 299 break; 300 case '.': 301 if (p->g->cflags®_NEWLINE) 302 nonnewline(p); 303 else 304 EMIT(OANY, 0); 305 break; 306 case '[': 307 p_bracket(p); 308 break; 309 case '\\': 310 REQUIRE(MORE(), REG_EESCAPE); 311 c = GETNEXT(); 312 ordinary(p, c); 313 break; 314 case '{': /* okay as ordinary except if digit follows */ 315 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 316 /* FALLTHROUGH */ 317 default: 318 ordinary(p, c); 319 break; 320 } 321 322 if (!MORE()) 323 return; 324 c = PEEK(); 325 /* we call { a repetition if followed by a digit */ 326 if (!( c == '*' || c == '+' || c == '?' || 327 (c == '{' && MORE2() && isdigit(PEEK2())) )) 328 return; /* no repetition, we're done */ 329 NEXT(); 330 331 REQUIRE(!wascaret, REG_BADRPT); 332 switch (c) { 333 case '*': /* implemented as +? */ 334 /* this case does not require the (y|) trick, noKLUDGE */ 335 INSERT(OPLUS_, pos); 336 ASTERN(O_PLUS, pos); 337 INSERT(OQUEST_, pos); 338 ASTERN(O_QUEST, pos); 339 break; 340 case '+': 341 INSERT(OPLUS_, pos); 342 ASTERN(O_PLUS, pos); 343 break; 344 case '?': 345 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 346 INSERT(OCH_, pos); /* offset slightly wrong */ 347 ASTERN(OOR1, pos); /* this one's right */ 348 AHEAD(pos); /* fix the OCH_ */ 349 EMIT(OOR2, 0); /* offset very wrong... */ 350 AHEAD(THERE()); /* ...so fix it */ 351 ASTERN(O_CH, THERETHERE()); 352 break; 353 case '{': 354 count = p_count(p); 355 if (EAT(',')) { 356 if (isdigit(PEEK())) { 357 count2 = p_count(p); 358 REQUIRE(count <= count2, REG_BADBR); 359 } else /* single number with comma */ 360 count2 = INFINITY; 361 } else /* just a single number */ 362 count2 = count; 363 repeat(p, pos, count, count2); 364 if (!EAT('}')) { /* error heuristics */ 365 while (MORE() && PEEK() != '}') 366 NEXT(); 367 REQUIRE(MORE(), REG_EBRACE); 368 SETERROR(REG_BADBR); 369 } 370 break; 371 } 372 373 if (!MORE()) 374 return; 375 c = PEEK(); 376 if (!( c == '*' || c == '+' || c == '?' || 377 (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 378 return; 379 SETERROR(REG_BADRPT); 380} 381 382/* 383 - p_str - string (no metacharacters) "parser" 384 == static void p_str(register struct parse *p); 385 */ 386static void 387p_str(p) 388register struct parse *p; 389{ 390 REQUIRE(MORE(), REG_EMPTY); 391 while (MORE()) 392 ordinary(p, GETNEXT()); 393} 394 395/* 396 - p_bre - BRE parser top level, anchoring and concatenation 397 == static void p_bre(register struct parse *p, register int end1, \ 398 == register int end2); 399 * Giving end1 as OUT essentially eliminates the end1/end2 check. 400 * 401 * This implementation is a bit of a kludge, in that a trailing $ is first 402 * taken as an ordinary character and then revised to be an anchor. The 403 * only undesirable side effect is that '$' gets included as a character 404 * category in such cases. This is fairly harmless; not worth fixing. 405 * The amount of lookahead needed to avoid this kludge is excessive. 406 */ 407static void 408p_bre(p, end1, end2) 409register struct parse *p; 410register int end1; /* first terminating character */ 411register int end2; /* second terminating character */ 412{ 413 register sopno start = HERE(); 414 register int first = 1; /* first subexpression? */ 415 register int wasdollar = 0; 416 417 if (EAT('^')) { 418 EMIT(OBOL, 0); 419 p->g->iflags |= USEBOL; 420 p->g->nbol++; 421 } 422 while (MORE() && !SEETWO(end1, end2)) { 423 wasdollar = p_simp_re(p, first); 424 first = 0; 425 } 426 if (wasdollar) { /* oops, that was a trailing anchor */ 427 DROP(1); 428 EMIT(OEOL, 0); 429 p->g->iflags |= USEEOL; 430 p->g->neol++; 431 } 432 433 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 434} 435 436/* 437 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 438 == static int p_simp_re(register struct parse *p, int starordinary); 439 */ 440static int /* was the simple RE an unbackslashed $? */ 441p_simp_re(p, starordinary) 442register struct parse *p; 443int starordinary; /* is a leading * an ordinary character? */ 444{ 445 register int c; 446 register int count; 447 register int count2; 448 register sopno pos; 449 register int i; 450 register sopno subno; 451# define BACKSL (1<<CHAR_BIT) 452 453 pos = HERE(); /* repetion op, if any, covers from here */ 454 455 assert(MORE()); /* caller should have ensured this */ 456 c = GETNEXT(); 457 if (c == '\\') { 458 REQUIRE(MORE(), REG_EESCAPE); 459 c = BACKSL | (unsigned char)GETNEXT(); 460 } 461 switch (c) { 462 case '.': 463 if (p->g->cflags®_NEWLINE) 464 nonnewline(p); 465 else 466 EMIT(OANY, 0); 467 break; 468 case '[': 469 p_bracket(p); 470 break; 471 case BACKSL|'{': 472 SETERROR(REG_BADRPT); 473 break; 474 case BACKSL|'(': 475 p->g->nsub++; 476 subno = p->g->nsub; 477 if (subno < NPAREN) 478 p->pbegin[subno] = HERE(); 479 EMIT(OLPAREN, subno); 480 /* the MORE here is an error heuristic */ 481 if (MORE() && !SEETWO('\\', ')')) 482 p_bre(p, '\\', ')'); 483 if (subno < NPAREN) { 484 p->pend[subno] = HERE(); 485 assert(p->pend[subno] != 0); 486 } 487 EMIT(ORPAREN, subno); 488 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 489 break; 490 case BACKSL|')': /* should not get here -- must be user */ 491 case BACKSL|'}': 492 SETERROR(REG_EPAREN); 493 break; 494 case BACKSL|'1': 495 case BACKSL|'2': 496 case BACKSL|'3': 497 case BACKSL|'4': 498 case BACKSL|'5': 499 case BACKSL|'6': 500 case BACKSL|'7': 501 case BACKSL|'8': 502 case BACKSL|'9': 503 i = (c&~BACKSL) - '0'; 504 assert(i < NPAREN); 505 if (p->pend[i] != 0) { 506 assert(i <= p->g->nsub); 507 EMIT(OBACK_, i); 508 assert(p->pbegin[i] != 0); 509 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 510 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 511 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 512 EMIT(O_BACK, i); 513 } else 514 SETERROR(REG_ESUBREG); 515 p->g->backrefs = 1; 516 break; 517 case '*': 518 REQUIRE(starordinary, REG_BADRPT); 519 /* FALLTHROUGH */ 520 default: 521 ordinary(p, (unsigned char)c); /* takes off BACKSL, if any */ 522 break; 523 } 524 525 if (EAT('*')) { /* implemented as +? */ 526 /* this case does not require the (y|) trick, noKLUDGE */ 527 INSERT(OPLUS_, pos); 528 ASTERN(O_PLUS, pos); 529 INSERT(OQUEST_, pos); 530 ASTERN(O_QUEST, pos); 531 } else if (EATTWO('\\', '{')) { 532 count = p_count(p); 533 if (EAT(',')) { 534 if (MORE() && isdigit(PEEK())) { 535 count2 = p_count(p); 536 REQUIRE(count <= count2, REG_BADBR); 537 } else /* single number with comma */ 538 count2 = INFINITY; 539 } else /* just a single number */ 540 count2 = count; 541 repeat(p, pos, count, count2); 542 if (!EATTWO('\\', '}')) { /* error heuristics */ 543 while (MORE() && !SEETWO('\\', '}')) 544 NEXT(); 545 REQUIRE(MORE(), REG_EBRACE); 546 SETERROR(REG_BADBR); 547 } 548 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 549 return(1); 550 551 return(0); 552} 553 554/* 555 - p_count - parse a repetition count 556 == static int p_count(register struct parse *p); 557 */ 558static int /* the value */ 559p_count(p) 560register struct parse *p; 561{ 562 register int count = 0; 563 register int ndigits = 0; 564 565 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 566 count = count*10 + (GETNEXT() - '0'); 567 ndigits++; 568 } 569 570 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 571 return(count); 572} 573 574/* 575 - p_bracket - parse a bracketed character list 576 == static void p_bracket(register struct parse *p); 577 * 578 * Note a significant property of this code: if the allocset() did SETERROR, 579 * no set operations are done. 580 */ 581static void 582p_bracket(p) 583register struct parse *p; 584{ 585 register cset *cs = allocset(p); 586 register int invert = 0; 587 588 /* Dept of Truly Sickening Special-Case Kludges */ 589 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 590 EMIT(OBOW, 0); 591 NEXTn(6); 592 return; 593 } 594 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 595 EMIT(OEOW, 0); 596 NEXTn(6); 597 return; 598 } 599 600 if (EAT('^')) 601 invert++; /* make note to invert set at end */ 602 if (EAT(']')) 603 CHadd(cs, ']'); 604 else if (EAT('-')) 605 CHadd(cs, '-'); 606 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 607 p_b_term(p, cs); 608 if (EAT('-')) 609 CHadd(cs, '-'); 610 MUSTEAT(']', REG_EBRACK); 611 612 if (p->error != 0) /* don't mess things up further */ 613 return; 614 615 if (p->g->cflags®_ICASE) { 616 register int i; 617 register int ci; 618 619 for (i = p->g->csetsize - 1; i >= 0; i--) 620 if (CHIN(cs, i) && isalpha(i)) { 621 ci = othercase(i); 622 if (ci != i) 623 CHadd(cs, ci); 624 } 625 if (cs->multis != NULL) 626 mccase(p, cs); 627 } 628 if (invert) { 629 register int i; 630 631 for (i = p->g->csetsize - 1; i >= 0; i--) 632 if (CHIN(cs, i)) 633 CHsub(cs, i); 634 else 635 CHadd(cs, i); 636 if (p->g->cflags®_NEWLINE) 637 CHsub(cs, '\n'); 638 if (cs->multis != NULL) 639 mcinvert(p, cs); 640 } 641 642 assert(cs->multis == NULL); /* xxx */ 643 644 if (nch(p, cs) == 1) { /* optimize singleton sets */ 645 ordinary(p, firstch(p, cs)); 646 freeset(p, cs); 647 } else 648 EMIT(OANYOF, freezeset(p, cs)); 649} 650 651/* 652 - p_b_term - parse one term of a bracketed character list 653 == static void p_b_term(register struct parse *p, register cset *cs); 654 */ 655static void 656p_b_term(p, cs) 657register struct parse *p; 658register cset *cs; 659{ 660 register unsigned char c; 661 register unsigned char start, finish; 662 register int i; 663 664 /* classify what we've got */ 665 switch ((MORE()) ? PEEK() : '\0') { 666 case '[': 667 c = (MORE2()) ? PEEK2() : '\0'; 668 break; 669 case '-': 670 SETERROR(REG_ERANGE); 671 return; /* NOTE RETURN */ 672 break; 673 default: 674 c = '\0'; 675 break; 676 } 677 678 switch (c) { 679 case ':': /* character class */ 680 NEXT2(); 681 REQUIRE(MORE(), REG_EBRACK); 682 c = PEEK(); 683 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 684 p_b_cclass(p, cs); 685 REQUIRE(MORE(), REG_EBRACK); 686 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 687 break; 688 case '=': /* equivalence class */ 689 NEXT2(); 690 REQUIRE(MORE(), REG_EBRACK); 691 c = PEEK(); 692 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 693 p_b_eclass(p, cs); 694 REQUIRE(MORE(), REG_EBRACK); 695 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 696 break; 697 default: /* symbol, ordinary character, or range */ 698/* xxx revision needed for multichar stuff */ 699 start = p_b_symbol(p); 700 if (SEE('-') && MORE2() && PEEK2() != ']') { 701 /* range */ 702 NEXT(); 703 if (EAT('-')) 704 finish = '-'; 705 else 706 finish = p_b_symbol(p); 707 } else 708 finish = start; 709/* xxx what about signed chars here... */ 710 REQUIRE(start <= finish, REG_ERANGE); 711 for (i = start; i <= finish; i++) 712 CHadd(cs, i); 713 break; 714 } 715} 716 717/* 718 - p_b_cclass - parse a character-class name and deal with it 719 == static void p_b_cclass(register struct parse *p, register cset *cs); 720 */ 721static void 722p_b_cclass(p, cs) 723register struct parse *p; 724register cset *cs; 725{ 726 register unsigned char *sp = p->next; 727 register const struct cclass *cp; 728 register size_t len; 729 register const unsigned char *u; 730 register unsigned char c; 731 732 while (MORE() && isalpha(PEEK())) 733 NEXT(); 734 len = p->next - sp; 735 for (cp = cclasses; cp->name != NULL; cp++) 736 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 737 break; 738 if (cp->name == NULL) { 739 /* oops, didn't find it */ 740 SETERROR(REG_ECTYPE); 741 return; 742 } 743 744 u = cp->chars; 745 while ((c = *u++) != '\0') 746 CHadd(cs, c); 747 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 748 MCadd(p, cs, u); 749} 750 751/* 752 - p_b_eclass - parse an equivalence-class name and deal with it 753 == static void p_b_eclass(register struct parse *p, register cset *cs); 754 * 755 * This implementation is incomplete. xxx 756 */ 757static void 758p_b_eclass(p, cs) 759register struct parse *p; 760register cset *cs; 761{ 762 register unsigned char c; 763 764 c = p_b_coll_elem(p, '='); 765 CHadd(cs, c); 766} 767 768/* 769 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 770 == static char p_b_symbol(register struct parse *p); 771 */ 772static unsigned char /* value of symbol */ 773p_b_symbol(p) 774register struct parse *p; 775{ 776 register unsigned char value; 777 778 REQUIRE(MORE(), REG_EBRACK); 779 if (!EATTWO('[', '.')) 780 return(GETNEXT()); 781 782 /* collating symbol */ 783 value = p_b_coll_elem(p, '.'); 784 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 785 return(value); 786} 787 788/* 789 - p_b_coll_elem - parse a collating-element name and look it up 790 == static char p_b_coll_elem(register struct parse *p, int endc); 791 */ 792static unsigned char /* value of collating element */ 793p_b_coll_elem(p, endc) 794register struct parse *p; 795int endc; /* name ended by endc,']' */ 796{ 797 register unsigned char *sp = p->next; 798 register const struct cname *cp; 799 register int len; 800 801 while (MORE() && !SEETWO(endc, ']')) 802 NEXT(); 803 if (!MORE()) { 804 SETERROR(REG_EBRACK); 805 return(0); 806 } 807 len = p->next - sp; 808 for (cp = cnames; cp->name != NULL; cp++) 809 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 810 return(cp->code); /* known name */ 811 if (len == 1) 812 return(*sp); /* single character */ 813 SETERROR(REG_ECOLLATE); /* neither */ 814 return(0); 815} 816 817/* 818 - othercase - return the case counterpart of an alphabetic 819 == static char othercase(int ch); 820 */ 821static unsigned char /* if no counterpart, return ch */ 822othercase(ch) 823int ch; 824{ 825 assert(isalpha(ch)); 826 if (isupper(ch)) 827 return(tolower(ch)); 828 else if (islower(ch)) 829 return(toupper(ch)); 830 else /* peculiar, but could happen */ 831 return(ch); 832} 833 834/* 835 - bothcases - emit a dualcase version of a two-case character 836 == static void bothcases(register struct parse *p, int ch); 837 * 838 * Boy, is this implementation ever a kludge... 839 */ 840static void 841bothcases(p, ch) 842register struct parse *p; 843int ch; 844{ 845 register unsigned char *oldnext = p->next; 846 register unsigned char *oldend = p->end; 847 unsigned char bracket[3]; 848 849 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 850 p->next = bracket; 851 p->end = bracket+2; 852 bracket[0] = ch; 853 bracket[1] = ']'; 854 bracket[2] = '\0'; 855 p_bracket(p); 856 assert(p->next == bracket+2); 857 p->next = oldnext; 858 p->end = oldend; 859} 860 861/* 862 - ordinary - emit an ordinary character 863 == static void ordinary(register struct parse *p, register int ch); 864 */ 865static void 866ordinary(p, ch) 867register struct parse *p; 868register int ch; 869{ 870 register cat_t *cap = p->g->categories; 871 872 if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 873 bothcases(p, ch); 874 else { 875 EMIT(OCHAR, (unsigned char)ch); 876 if (cap[ch] == 0) 877 cap[ch] = p->g->ncategories++; 878 } 879} 880 881/* 882 - nonnewline - emit REG_NEWLINE version of OANY 883 == static void nonnewline(register struct parse *p); 884 * 885 * Boy, is this implementation ever a kludge... 886 */ 887static void 888nonnewline(p) 889register struct parse *p; 890{ 891 register unsigned char *oldnext = p->next; 892 register unsigned char *oldend = p->end; 893 unsigned char bracket[4]; 894 895 p->next = bracket; 896 p->end = bracket+3; 897 bracket[0] = '^'; 898 bracket[1] = '\n'; 899 bracket[2] = ']'; 900 bracket[3] = '\0'; 901 p_bracket(p); 902 assert(p->next == bracket+3); 903 p->next = oldnext; 904 p->end = oldend; 905} 906 907/* 908 - repeat - generate code for a bounded repetition, recursively if needed 909 == static void repeat(register struct parse *p, sopno start, int from, int to); 910 */ 911static void 912repeat(p, start, from, to) 913register struct parse *p; 914sopno start; /* operand from here to end of strip */ 915int from; /* repeated from this number */ 916int to; /* to this number of times (maybe INFINITY) */ 917{ 918 register sopno finish = HERE(); 919# define N 2 920# define INF 3 921# define REP(f, t) ((f)*8 + (t)) 922# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 923 register sopno copy; 924 925 if (p->error != 0) /* head off possible runaway recursion */ 926 return; 927 928 assert(from <= to); 929 930 switch (REP(MAP(from), MAP(to))) { 931 case REP(0, 0): /* must be user doing this */ 932 DROP(finish-start); /* drop the operand */ 933 break; 934 case REP(0, 1): /* as x{1,1}? */ 935 case REP(0, N): /* as x{1,n}? */ 936 case REP(0, INF): /* as x{1,}? */ 937 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 938 INSERT(OCH_, start); /* offset is wrong... */ 939 repeat(p, start+1, 1, to); 940 ASTERN(OOR1, start); 941 AHEAD(start); /* ... fix it */ 942 EMIT(OOR2, 0); 943 AHEAD(THERE()); 944 ASTERN(O_CH, THERETHERE()); 945 break; 946 case REP(1, 1): /* trivial case */ 947 /* done */ 948 break; 949 case REP(1, N): /* as x?x{1,n-1} */ 950 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 951 INSERT(OCH_, start); 952 ASTERN(OOR1, start); 953 AHEAD(start); 954 EMIT(OOR2, 0); /* offset very wrong... */ 955 AHEAD(THERE()); /* ...so fix it */ 956 ASTERN(O_CH, THERETHERE()); 957 copy = dupl(p, start+1, finish+1); 958 assert(copy == finish+4); 959 repeat(p, copy, 1, to-1); 960 break; 961 case REP(1, INF): /* as x+ */ 962 INSERT(OPLUS_, start); 963 ASTERN(O_PLUS, start); 964 break; 965 case REP(N, N): /* as xx{m-1,n-1} */ 966 copy = dupl(p, start, finish); 967 repeat(p, copy, from-1, to-1); 968 break; 969 case REP(N, INF): /* as xx{n-1,INF} */ 970 copy = dupl(p, start, finish); 971 repeat(p, copy, from-1, to); 972 break; 973 default: /* "can't happen" */ 974 SETERROR(REG_ASSERT); /* just in case */ 975 break; 976 } 977} 978 979/* 980 - seterr - set an error condition 981 == static int seterr(register struct parse *p, int e); 982 */ 983static int /* useless but makes type checking happy */ 984seterr(p, e) 985register struct parse *p; 986int e; 987{ 988 if (p->error == 0) /* keep earliest error condition */ 989 p->error = e; 990 p->next = nuls; /* try to bring things to a halt */ 991 p->end = nuls; 992 return(0); /* make the return value well-defined */ 993} 994 995/* 996 - allocset - allocate a set of characters for [] 997 == static cset *allocset(register struct parse *p); 998 */ 999static cset * 1000allocset(p) 1001register struct parse *p; 1002{ 1003 register int no = p->g->ncsets++; 1004 register size_t nc; 1005 register size_t nbytes; 1006 register cset *cs; 1007 register size_t css = (size_t)p->g->csetsize; 1008 register int i; 1009 1010 if (no >= p->ncsalloc) { /* need another column of space */ 1011 p->ncsalloc += CHAR_BIT; 1012 nc = p->ncsalloc; 1013 assert(nc % CHAR_BIT == 0); 1014 nbytes = nc / CHAR_BIT * css; 1015 if (p->g->sets == NULL) 1016 p->g->sets = (cset *)malloc(nc * sizeof(cset)); 1017 else 1018 p->g->sets = (cset *)realloc((unsigned char *)p->g->sets, 1019 nc * sizeof(cset)); 1020 if (p->g->setbits == NULL) 1021 p->g->setbits = (uch *)malloc(nbytes); 1022 else { 1023 p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits, 1024 nbytes); 1025 /* xxx this isn't right if setbits is now NULL */ 1026 for (i = 0; i < no; i++) 1027 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1028 } 1029 if (p->g->sets != NULL && p->g->setbits != NULL) 1030 (void) memset((unsigned char *)p->g->setbits + (nbytes - css), 1031 0, css); 1032 else { 1033 no = 0; 1034 SETERROR(REG_ESPACE); 1035 /* caller's responsibility not to do set ops */ 1036 } 1037 } 1038 1039 assert(p->g->sets != NULL); /* xxx */ 1040 cs = &p->g->sets[no]; 1041 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1042 cs->mask = 1 << ((no) % CHAR_BIT); 1043 cs->hash = 0; 1044 cs->smultis = 0; 1045 cs->multis = NULL; 1046 1047 return(cs); 1048} 1049 1050/* 1051 - freeset - free a now-unused set 1052 == static void freeset(register struct parse *p, register cset *cs); 1053 */ 1054static void 1055freeset(p, cs) 1056register struct parse *p; 1057register cset *cs; 1058{ 1059 register size_t i; 1060 register cset *top = &p->g->sets[p->g->ncsets]; 1061 register size_t css = (size_t)p->g->csetsize; 1062 1063 for (i = 0; i < css; i++) 1064 CHsub(cs, i); 1065 if (cs == top-1) /* recover only the easy case */ 1066 p->g->ncsets--; 1067} 1068 1069/* 1070 - freezeset - final processing on a set of characters 1071 == static int freezeset(register struct parse *p, register cset *cs); 1072 * 1073 * The main task here is merging identical sets. This is usually a waste 1074 * of time (although the hash code minimizes the overhead), but can win 1075 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1076 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1077 * the same value! 1078 */ 1079static int /* set number */ 1080freezeset(p, cs) 1081register struct parse *p; 1082register cset *cs; 1083{ 1084 register uch h = cs->hash; 1085 register size_t i; 1086 register cset *top = &p->g->sets[p->g->ncsets]; 1087 register cset *cs2; 1088 register size_t css = (size_t)p->g->csetsize; 1089 1090 /* look for an earlier one which is the same */ 1091 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1092 if (cs2->hash == h && cs2 != cs) { 1093 /* maybe */ 1094 for (i = 0; i < css; i++) 1095 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1096 break; /* no */ 1097 if (i == css) 1098 break; /* yes */ 1099 } 1100 1101 if (cs2 < top) { /* found one */ 1102 freeset(p, cs); 1103 cs = cs2; 1104 } 1105 1106 return((int)(cs - p->g->sets)); 1107} 1108 1109/* 1110 - firstch - return first character in a set (which must have at least one) 1111 == static int firstch(register struct parse *p, register cset *cs); 1112 */ 1113static int /* character; there is no "none" value */ 1114firstch(p, cs) 1115register struct parse *p; 1116register cset *cs; 1117{ 1118 register size_t i; 1119 register size_t css = (size_t)p->g->csetsize; 1120 1121 for (i = 0; i < css; i++) 1122 if (CHIN(cs, i)) 1123 return((unsigned char)i); 1124 assert(never); 1125 return(0); /* arbitrary */ 1126} 1127 1128/* 1129 - nch - number of characters in a set 1130 == static int nch(register struct parse *p, register cset *cs); 1131 */ 1132static int 1133nch(p, cs) 1134register struct parse *p; 1135register cset *cs; 1136{ 1137 register size_t i; 1138 register size_t css = (size_t)p->g->csetsize; 1139 register int n = 0; 1140 1141 for (i = 0; i < css; i++) 1142 if (CHIN(cs, i)) 1143 n++; 1144 return(n); 1145} 1146 1147/* 1148 - mcadd - add a collating element to a cset 1149 == static void mcadd(register struct parse *p, register cset *cs, \ 1150 == register char *cp); 1151 */ 1152static void 1153mcadd(p, cs, cp) 1154register struct parse *p; 1155register cset *cs; 1156register const unsigned char *cp; 1157{ 1158 register size_t oldend = cs->smultis; 1159 1160 cs->smultis += strlen(cp) + 1; 1161 if (cs->multis == NULL) 1162 cs->multis = malloc(cs->smultis); 1163 else 1164 cs->multis = realloc(cs->multis, cs->smultis); 1165 if (cs->multis == NULL) { 1166 SETERROR(REG_ESPACE); 1167 return; 1168 } 1169 1170 (void) strcpy(cs->multis + oldend - 1, cp); 1171 cs->multis[cs->smultis - 1] = '\0'; 1172} 1173 1174#if 0 1175/* 1176 - mcsub - subtract a collating element from a cset 1177 == static void mcsub(register cset *cs, register unsigned char *cp); 1178 */ 1179static void 1180mcsub(cs, cp) 1181register unsigned cset *cs; 1182register unsigned char *cp; 1183{ 1184 register unsigned char *fp = mcfind(cs, cp); 1185 register size_t len = strlen(fp); 1186 1187 assert(fp != NULL); 1188 (void) memmove(fp, fp + len + 1, 1189 cs->smultis - (fp + len + 1 - cs->multis)); 1190 cs->smultis -= len; 1191 1192 if (cs->smultis == 0) { 1193 free(cs->multis); 1194 cs->multis = NULL; 1195 return; 1196 } 1197 1198 cs->multis = realloc(cs->multis, cs->smultis); 1199 assert(cs->multis != NULL); 1200} 1201 1202/* 1203 - mcin - is a collating element in a cset? 1204 == static int mcin(register cset *cs, register unsigned char *cp); 1205 */ 1206static int 1207mcin(cs, cp) 1208register cset *cs; 1209register unsigned char *cp; 1210{ 1211 return(mcfind(cs, cp) != NULL); 1212} 1213 1214 1215/* 1216 - mcfind - find a collating element in a cset 1217 == static unsigned char *mcfind(register cset *cs, register unsigned char *cp); 1218 */ 1219static unsigned char * 1220mcfind(cs, cp) 1221register cset *cs; 1222register unsigned char *cp; 1223{ 1224 register unsigned char *p; 1225 1226 if (cs->multis == NULL) 1227 return(NULL); 1228 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1229 if (strcmp(cp, p) == 0) 1230 return(p); 1231 return(NULL); 1232} 1233#endif 1234 1235/* 1236 - mcinvert - invert the list of collating elements in a cset 1237 == static void mcinvert(register struct parse *p, register cset *cs); 1238 * 1239 * This would have to know the set of possibilities. Implementation 1240 * is deferred. 1241 */ 1242static void 1243mcinvert(p, cs) 1244register struct parse *p; 1245register cset *cs; 1246{ 1247 assert(cs->multis == NULL); /* xxx */ 1248} 1249 1250/* 1251 - mccase - add case counterparts of the list of collating elements in a cset 1252 == static void mccase(register struct parse *p, register cset *cs); 1253 * 1254 * This would have to know the set of possibilities. Implementation 1255 * is deferred. 1256 */ 1257static void 1258mccase(p, cs) 1259register struct parse *p; 1260register cset *cs; 1261{ 1262 assert(cs->multis == NULL); /* xxx */ 1263} 1264 1265/* 1266 - isinsets - is this character in any sets? 1267 == static int isinsets(register struct re_guts *g, int c); 1268 */ 1269static int /* predicate */ 1270isinsets(g, c) 1271register struct re_guts *g; 1272int c; 1273{ 1274 register uch *col; 1275 register int i; 1276 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1277 register unsigned uc = (unsigned char)c; 1278 1279 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1280 if (col[uc] != 0) 1281 return(1); 1282 return(0); 1283} 1284 1285/* 1286 - samesets - are these two characters in exactly the same sets? 1287 == static int samesets(register struct re_guts *g, int c1, int c2); 1288 */ 1289static int /* predicate */ 1290samesets(g, c1, c2) 1291register struct re_guts *g; 1292int c1; 1293int c2; 1294{ 1295 register uch *col; 1296 register int i; 1297 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1298 register unsigned uc1 = (unsigned char)c1; 1299 register unsigned uc2 = (unsigned char)c2; 1300 1301 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1302 if (col[uc1] != col[uc2]) 1303 return(0); 1304 return(1); 1305} 1306 1307/* 1308 - categorize - sort out character categories 1309 == static void categorize(struct parse *p, register struct re_guts *g); 1310 */ 1311static void 1312categorize(p, g) 1313struct parse *p; 1314register struct re_guts *g; 1315{ 1316 register cat_t *cats = g->categories; 1317 register int c; 1318 register int c2; 1319 register cat_t cat; 1320 1321 /* avoid making error situations worse */ 1322 if (p->error != 0) 1323 return; 1324 1325 for (c = 0; c <= UCHAR_MAX; c++) 1326 if (cats[c] == 0 && isinsets(g, c)) { 1327 cat = g->ncategories++; 1328 cats[c] = cat; 1329 for (c2 = c+1; c2 <= UCHAR_MAX; c2++) 1330 if (cats[c2] == 0 && samesets(g, c, c2)) 1331 cats[c2] = cat; 1332 } 1333} 1334 1335/* 1336 - dupl - emit a duplicate of a bunch of sops 1337 == static sopno dupl(register struct parse *p, sopno start, sopno finish); 1338 */ 1339static sopno /* start of duplicate */ 1340dupl(p, start, finish) 1341register struct parse *p; 1342sopno start; /* from here */ 1343sopno finish; /* to this less one */ 1344{ 1345 register sopno ret = HERE(); 1346 register sopno len = finish - start; 1347 1348 assert(finish >= start); 1349 if (len == 0) 1350 return(ret); 1351 enlarge(p, p->ssize + len); /* this many unexpected additions */ 1352 assert(p->ssize >= p->slen + len); 1353 (void) memcpy((char *)(p->strip + p->slen), 1354 (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1355 p->slen += len; 1356 return(ret); 1357} 1358 1359/* 1360 - doemit - emit a strip operator 1361 == static void doemit(register struct parse *p, sop op, size_t opnd); 1362 * 1363 * It might seem better to implement this as a macro with a function as 1364 * hard-case backup, but it's just too big and messy unless there are 1365 * some changes to the data structures. Maybe later. 1366 */ 1367static void 1368doemit(p, op, opnd) 1369register struct parse *p; 1370sop op; 1371size_t opnd; 1372{ 1373 /* avoid making error situations worse */ 1374 if (p->error != 0) 1375 return; 1376 1377 /* deal with oversize operands ("can't happen", more or less) */ 1378 assert(opnd < 1<<OPSHIFT); 1379 1380 /* deal with undersized strip */ 1381 if (p->slen >= p->ssize) 1382 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 1383 assert(p->slen < p->ssize); 1384 1385 /* finally, it's all reduced to the easy case */ 1386 p->strip[p->slen++] = SOP(op, opnd); 1387} 1388 1389/* 1390 - doinsert - insert a sop into the strip 1391 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 1392 */ 1393static void 1394doinsert(p, op, opnd, pos) 1395register struct parse *p; 1396sop op; 1397size_t opnd; 1398sopno pos; 1399{ 1400 register sopno sn; 1401 register sop s; 1402 register int i; 1403 1404 /* avoid making error situations worse */ 1405 if (p->error != 0) 1406 return; 1407 1408 sn = HERE(); 1409 EMIT(op, opnd); /* do checks, ensure space */ 1410 assert(HERE() == sn+1); 1411 s = p->strip[sn]; 1412 1413 /* adjust paren pointers */ 1414 assert(pos > 0); 1415 for (i = 1; i < NPAREN; i++) { 1416 if (p->pbegin[i] >= pos) { 1417 p->pbegin[i]++; 1418 } 1419 if (p->pend[i] >= pos) { 1420 p->pend[i]++; 1421 } 1422 } 1423 1424 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1425 (HERE()-pos-1)*sizeof(sop)); 1426 p->strip[pos] = s; 1427} 1428 1429/* 1430 - dofwd - complete a forward reference 1431 == static void dofwd(register struct parse *p, sopno pos, sop value); 1432 */ 1433static void 1434dofwd(p, pos, value) 1435register struct parse *p; 1436register sopno pos; 1437sop value; 1438{ 1439 /* avoid making error situations worse */ 1440 if (p->error != 0) 1441 return; 1442 1443 assert(value < 1<<OPSHIFT); 1444 p->strip[pos] = OP(p->strip[pos]) | value; 1445} 1446 1447/* 1448 - enlarge - enlarge the strip 1449 == static void enlarge(register struct parse *p, sopno size); 1450 */ 1451static void 1452enlarge(p, size) 1453register struct parse *p; 1454register sopno size; 1455{ 1456 register sop *sp; 1457 1458 if (p->ssize >= size) 1459 return; 1460 1461 sp = (sop *)realloc(p->strip, size*sizeof(sop)); 1462 if (sp == NULL) { 1463 SETERROR(REG_ESPACE); 1464 return; 1465 } 1466 p->strip = sp; 1467 p->ssize = size; 1468} 1469 1470/* 1471 - stripsnug - compact the strip 1472 == static void stripsnug(register struct parse *p, register struct re_guts *g); 1473 */ 1474static void 1475stripsnug(p, g) 1476register struct parse *p; 1477register struct re_guts *g; 1478{ 1479 g->nstates = p->slen; 1480 g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop)); 1481 if (g->strip == NULL) { 1482 SETERROR(REG_ESPACE); 1483 g->strip = p->strip; 1484 } 1485} 1486 1487/* 1488 - findmust - fill in must and mlen with longest mandatory literal string 1489 == static void findmust(register struct parse *p, register struct re_guts *g); 1490 * 1491 * This algorithm could do fancy things like analyzing the operands of | 1492 * for common subsequences. Someday. This code is simple and finds most 1493 * of the interesting cases. 1494 * 1495 * Note that must and mlen got initialized during setup. 1496 */ 1497static void 1498findmust(p, g) 1499struct parse *p; 1500register struct re_guts *g; 1501{ 1502 register sop *scan; 1503 sop *start = NULL; 1504 register sop *newstart = NULL; 1505 register sopno newlen; 1506 register sop s; 1507 register unsigned char *cp; 1508 register sopno i; 1509 1510 /* avoid making error situations worse */ 1511 if (p->error != 0) 1512 return; 1513 1514 /* find the longest OCHAR sequence in strip */ 1515 newlen = 0; 1516 scan = g->strip + 1; 1517 do { 1518 s = *scan++; 1519 switch (OP(s)) { 1520 case OCHAR: /* sequence member */ 1521 if (newlen == 0) /* new sequence */ 1522 newstart = scan - 1; 1523 newlen++; 1524 break; 1525 case OPLUS_: /* things that don't break one */ 1526 case OLPAREN: 1527 case ORPAREN: 1528 break; 1529 case OQUEST_: /* things that must be skipped */ 1530 case OCH_: 1531 scan--; 1532 do { 1533 scan += OPND(s); 1534 s = *scan; 1535 /* assert() interferes w debug printouts */ 1536 if (OP(s) != O_QUEST && OP(s) != O_CH && 1537 OP(s) != OOR2) { 1538 g->iflags |= BAD; 1539 return; 1540 } 1541 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1542 /* fallthrough */ 1543 default: /* things that break a sequence */ 1544 if (newlen > g->mlen) { /* ends one */ 1545 start = newstart; 1546 g->mlen = newlen; 1547 } 1548 newlen = 0; 1549 break; 1550 } 1551 } while (OP(s) != OEND); 1552 1553 if (g->mlen == 0) /* there isn't one */ 1554 return; 1555 1556 if (!start) { 1557 g->mlen = 0; 1558 return; 1559 } 1560 1561 /* turn it into a character string */ 1562 g->must = malloc((size_t)g->mlen + 1); 1563 if (g->must == NULL) { /* argh; just forget it */ 1564 g->mlen = 0; 1565 return; 1566 } 1567 cp = g->must; 1568 scan = start; 1569 for (i = g->mlen; i > 0; i--) { 1570 while (OP(s = *scan++) != OCHAR) 1571 continue; 1572 assert(cp < g->must + g->mlen); 1573 *cp++ = (unsigned char)OPND(s); 1574 } 1575 assert(cp == g->must + g->mlen); 1576 *cp++ = '\0'; /* just on general principles */ 1577} 1578 1579/* 1580 - pluscount - count + nesting 1581 == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1582 */ 1583static sopno /* nesting depth */ 1584pluscount(p, g) 1585struct parse *p; 1586register struct re_guts *g; 1587{ 1588 register sop *scan; 1589 register sop s; 1590 register sopno plusnest = 0; 1591 register sopno maxnest = 0; 1592 1593 if (p->error != 0) 1594 return(0); /* there may not be an OEND */ 1595 1596 scan = g->strip + 1; 1597 do { 1598 s = *scan++; 1599 switch (OP(s)) { 1600 case OPLUS_: 1601 plusnest++; 1602 break; 1603 case O_PLUS: 1604 if (plusnest > maxnest) 1605 maxnest = plusnest; 1606 plusnest--; 1607 break; 1608 } 1609 } while (OP(s) != OEND); 1610 if (plusnest != 0) 1611 g->iflags |= BAD; 1612 return(maxnest); 1613} 1614