/home/arjun/llvm-project/llvm/lib/Support/regcomp.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /*- | 
| 2 |  |  * This code is derived from OpenBSD's libc/regex, original license follows: | 
| 3 |  |  * | 
| 4 |  |  * Copyright (c) 1992, 1993, 1994 Henry Spencer. | 
| 5 |  |  * Copyright (c) 1992, 1993, 1994 | 
| 6 |  |  *  The Regents of the University of California.  All rights reserved. | 
| 7 |  |  * | 
| 8 |  |  * This code is derived from software contributed to Berkeley by | 
| 9 |  |  * Henry Spencer. | 
| 10 |  |  * | 
| 11 |  |  * Redistribution and use in source and binary forms, with or without | 
| 12 |  |  * modification, are permitted provided that the following conditions | 
| 13 |  |  * are met: | 
| 14 |  |  * 1. Redistributions of source code must retain the above copyright | 
| 15 |  |  *    notice, this list of conditions and the following disclaimer. | 
| 16 |  |  * 2. Redistributions in binary form must reproduce the above copyright | 
| 17 |  |  *    notice, this list of conditions and the following disclaimer in the | 
| 18 |  |  *    documentation and/or other materials provided with the distribution. | 
| 19 |  |  * 3. Neither the name of the University nor the names of its contributors | 
| 20 |  |  *    may be used to endorse or promote products derived from this software | 
| 21 |  |  *    without specific prior written permission. | 
| 22 |  |  * | 
| 23 |  |  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 
| 24 |  |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 25 |  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
| 26 |  |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 
| 27 |  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 
| 28 |  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 
| 29 |  |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
| 30 |  |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 
| 31 |  |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 
| 32 |  |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
| 33 |  |  * SUCH DAMAGE. | 
| 34 |  |  * | 
| 35 |  |  *  @(#)regcomp.c 8.5 (Berkeley) 3/20/94 | 
| 36 |  |  */ | 
| 37 |  |  | 
| 38 |  | #include <sys/types.h> | 
| 39 |  | #include <stdint.h> | 
| 40 |  | #include <stdio.h> | 
| 41 |  | #include <string.h> | 
| 42 |  | #include <ctype.h> | 
| 43 |  | #include <limits.h> | 
| 44 |  | #include <stdlib.h> | 
| 45 |  | #include "regex_impl.h" | 
| 46 |  |  | 
| 47 |  | #include "regutils.h" | 
| 48 |  | #include "regex2.h" | 
| 49 |  |  | 
| 50 |  | #include "llvm/Config/config.h" | 
| 51 |  | #include "llvm/Support/Compiler.h" | 
| 52 |  |  | 
| 53 |  | /* character-class table */ | 
| 54 |  | static struct cclass { | 
| 55 |  |   const char *name; | 
| 56 |  |   const char *chars; | 
| 57 |  |   const char *multis; | 
| 58 |  | } cclasses[] = { | 
| 59 |  |   { "alnum",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | 
| 60 |  | 0123456789",        ""} , | 
| 61 |  |   { "alpha",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", | 
| 62 |  |           ""} , | 
| 63 |  |   { "blank",  " \t",    ""} , | 
| 64 |  |   { "cntrl",  "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ | 
| 65 |  | \25\26\27\30\31\32\33\34\35\36\37\177", ""} , | 
| 66 |  |   { "digit",  "0123456789", ""} , | 
| 67 |  |   { "graph",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | 
| 68 |  | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | 
| 69 |  |           ""} , | 
| 70 |  |   { "lower",  "abcdefghijklmnopqrstuvwxyz", | 
| 71 |  |           ""} , | 
| 72 |  |   { "print",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | 
| 73 |  | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", | 
| 74 |  |           ""} , | 
| 75 |  |   { "punct",  "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | 
| 76 |  |           ""} , | 
| 77 |  |   { "space",  "\t\n\v\f\r ",  ""} , | 
| 78 |  |   { "upper",  "ABCDEFGHIJKLMNOPQRSTUVWXYZ", | 
| 79 |  |           ""} , | 
| 80 |  |   { "xdigit", "0123456789ABCDEFabcdef", | 
| 81 |  |           ""} , | 
| 82 |  |   { NULL,   0,    "" } | 
| 83 |  | }; | 
| 84 |  |  | 
| 85 |  | /* character-name table */ | 
| 86 |  | static struct cname { | 
| 87 |  |   const char *name; | 
| 88 |  |   char code; | 
| 89 |  | } cnames[] = { | 
| 90 |  |   { "NUL",      '\0' }, | 
| 91 |  |   { "SOH",      '\001' }, | 
| 92 |  |   { "STX",      '\002' }, | 
| 93 |  |   { "ETX",      '\003' }, | 
| 94 |  |   { "EOT",      '\004' }, | 
| 95 |  |   { "ENQ",      '\005' }, | 
| 96 |  |   { "ACK",      '\006' }, | 
| 97 |  |   { "BEL",      '\007' }, | 
| 98 |  |   { "alert",      '\007' }, | 
| 99 |  |   { "BS",       '\010' }, | 
| 100 |  |   { "backspace",      '\b' }, | 
| 101 |  |   { "HT",       '\011' }, | 
| 102 |  |   { "tab",      '\t' }, | 
| 103 |  |   { "LF",       '\012' }, | 
| 104 |  |   { "newline",      '\n' }, | 
| 105 |  |   { "VT",       '\013' }, | 
| 106 |  |   { "vertical-tab",   '\v' }, | 
| 107 |  |   { "FF",       '\014' }, | 
| 108 |  |   { "form-feed",      '\f' }, | 
| 109 |  |   { "CR",       '\015' }, | 
| 110 |  |   { "carriage-return",    '\r' }, | 
| 111 |  |   { "SO",       '\016' }, | 
| 112 |  |   { "SI",       '\017' }, | 
| 113 |  |   { "DLE",      '\020' }, | 
| 114 |  |   { "DC1",      '\021' }, | 
| 115 |  |   { "DC2",      '\022' }, | 
| 116 |  |   { "DC3",      '\023' }, | 
| 117 |  |   { "DC4",      '\024' }, | 
| 118 |  |   { "NAK",      '\025' }, | 
| 119 |  |   { "SYN",      '\026' }, | 
| 120 |  |   { "ETB",      '\027' }, | 
| 121 |  |   { "CAN",      '\030' }, | 
| 122 |  |   { "EM",       '\031' }, | 
| 123 |  |   { "SUB",      '\032' }, | 
| 124 |  |   { "ESC",      '\033' }, | 
| 125 |  |   { "IS4",      '\034' }, | 
| 126 |  |   { "FS",       '\034' }, | 
| 127 |  |   { "IS3",      '\035' }, | 
| 128 |  |   { "GS",       '\035' }, | 
| 129 |  |   { "IS2",      '\036' }, | 
| 130 |  |   { "RS",       '\036' }, | 
| 131 |  |   { "IS1",      '\037' }, | 
| 132 |  |   { "US",       '\037' }, | 
| 133 |  |   { "space",      ' ' }, | 
| 134 |  |   { "exclamation-mark",   '!' }, | 
| 135 |  |   { "quotation-mark",   '"' }, | 
| 136 |  |   { "number-sign",    '#' }, | 
| 137 |  |   { "dollar-sign",    '$' }, | 
| 138 |  |   { "percent-sign",   '%' }, | 
| 139 |  |   { "ampersand",      '&' }, | 
| 140 |  |   { "apostrophe",     '\'' }, | 
| 141 |  |   { "left-parenthesis",   '(' }, | 
| 142 |  |   { "right-parenthesis",    ')' }, | 
| 143 |  |   { "asterisk",     '*' }, | 
| 144 |  |   { "plus-sign",      '+' }, | 
| 145 |  |   { "comma",      ',' }, | 
| 146 |  |   { "hyphen",     '-' }, | 
| 147 |  |   { "hyphen-minus",   '-' }, | 
| 148 |  |   { "period",     '.' }, | 
| 149 |  |   { "full-stop",      '.' }, | 
| 150 |  |   { "slash",      '/' }, | 
| 151 |  |   { "solidus",      '/' }, | 
| 152 |  |   { "zero",     '0' }, | 
| 153 |  |   { "one",      '1' }, | 
| 154 |  |   { "two",      '2' }, | 
| 155 |  |   { "three",      '3' }, | 
| 156 |  |   { "four",     '4' }, | 
| 157 |  |   { "five",     '5' }, | 
| 158 |  |   { "six",      '6' }, | 
| 159 |  |   { "seven",      '7' }, | 
| 160 |  |   { "eight",      '8' }, | 
| 161 |  |   { "nine",     '9' }, | 
| 162 |  |   { "colon",      ':' }, | 
| 163 |  |   { "semicolon",      ';' }, | 
| 164 |  |   { "less-than-sign",   '<' }, | 
| 165 |  |   { "equals-sign",    '=' }, | 
| 166 |  |   { "greater-than-sign",    '>' }, | 
| 167 |  |   { "question-mark",    '?' }, | 
| 168 |  |   { "commercial-at",    '@' }, | 
| 169 |  |   { "left-square-bracket",  '[' }, | 
| 170 |  |   { "backslash",      '\\' }, | 
| 171 |  |   { "reverse-solidus",    '\\' }, | 
| 172 |  |   { "right-square-bracket", ']' }, | 
| 173 |  |   { "circumflex",     '^' }, | 
| 174 |  |   { "circumflex-accent",    '^' }, | 
| 175 |  |   { "underscore",     '_' }, | 
| 176 |  |   { "low-line",     '_' }, | 
| 177 |  |   { "grave-accent",   '`' }, | 
| 178 |  |   { "left-brace",     '{' }, | 
| 179 |  |   { "left-curly-bracket",   '{' }, | 
| 180 |  |   { "vertical-line",    '|' }, | 
| 181 |  |   { "right-brace",    '}' }, | 
| 182 |  |   { "right-curly-bracket",  '}' }, | 
| 183 |  |   { "tilde",      '~' }, | 
| 184 |  |   { "DEL",      '\177' }, | 
| 185 |  |   { NULL,       0 } | 
| 186 |  | }; | 
| 187 |  |  | 
| 188 |  | /* | 
| 189 |  |  * parse structure, passed up and down to avoid global variables and | 
| 190 |  |  * other clumsinesses | 
| 191 |  |  */ | 
| 192 |  | struct parse { | 
| 193 |  |   char *next;   /* next character in RE */ | 
| 194 |  |   char *end;    /* end of string (-> NUL normally) */ | 
| 195 |  |   int error;    /* has an error been seen? */ | 
| 196 |  |   sop *strip;   /* malloced strip */ | 
| 197 |  |   sopno ssize;    /* malloced strip size (allocated) */ | 
| 198 |  |   sopno slen;   /* malloced strip length (used) */ | 
| 199 |  |   int ncsalloc;   /* number of csets allocated */ | 
| 200 |  |   struct re_guts *g; | 
| 201 | 0 | # define  NPAREN  10  /* we need to remember () 1-9 for back refs */ | 
| 202 |  |   sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | 
| 203 |  |   sopno pend[NPAREN]; /* -> ) ([0] unused) */ | 
| 204 |  | }; | 
| 205 |  |  | 
| 206 |  | static void p_ere(struct parse *, int); | 
| 207 |  | static void p_ere_exp(struct parse *); | 
| 208 |  | static void p_str(struct parse *); | 
| 209 |  | static void p_bre(struct parse *, int, int); | 
| 210 |  | static int p_simp_re(struct parse *, int); | 
| 211 |  | static int p_count(struct parse *); | 
| 212 |  | static void p_bracket(struct parse *); | 
| 213 |  | static void p_b_term(struct parse *, cset *); | 
| 214 |  | static void p_b_cclass(struct parse *, cset *); | 
| 215 |  | static void p_b_eclass(struct parse *, cset *); | 
| 216 |  | static char p_b_symbol(struct parse *); | 
| 217 |  | static char p_b_coll_elem(struct parse *, int); | 
| 218 |  | static char othercase(int); | 
| 219 |  | static void bothcases(struct parse *, int); | 
| 220 |  | static void ordinary(struct parse *, int); | 
| 221 |  | static void nonnewline(struct parse *); | 
| 222 |  | static void repeat(struct parse *, sopno, int, int); | 
| 223 |  | static int seterr(struct parse *, int); | 
| 224 |  | static cset *allocset(struct parse *); | 
| 225 |  | static void freeset(struct parse *, cset *); | 
| 226 |  | static int freezeset(struct parse *, cset *); | 
| 227 |  | static int firstch(struct parse *, cset *); | 
| 228 |  | static int nch(struct parse *, cset *); | 
| 229 |  | static void mcadd(struct parse *, cset *, const char *); | 
| 230 |  | static void mcinvert(struct parse *, cset *); | 
| 231 |  | static void mccase(struct parse *, cset *); | 
| 232 |  | static int isinsets(struct re_guts *, int); | 
| 233 |  | static int samesets(struct re_guts *, int, int); | 
| 234 |  | static void categorize(struct parse *, struct re_guts *); | 
| 235 |  | static sopno dupl(struct parse *, sopno, sopno); | 
| 236 |  | static void doemit(struct parse *, sop, size_t); | 
| 237 |  | static void doinsert(struct parse *, sop, size_t, sopno); | 
| 238 |  | static void dofwd(struct parse *, sopno, sop); | 
| 239 |  | static void enlarge(struct parse *, sopno); | 
| 240 |  | static void stripsnug(struct parse *, struct re_guts *); | 
| 241 |  | static void findmust(struct parse *, struct re_guts *); | 
| 242 |  | static sopno pluscount(struct parse *, struct re_guts *); | 
| 243 |  |  | 
| 244 |  | static char nuls[10];   /* place to point scanner in event of error */ | 
| 245 |  |  | 
| 246 |  | /* | 
| 247 |  |  * macros for use with parse structure | 
| 248 |  |  * BEWARE:  these know that the parse structure is named `p' !!! | 
| 249 |  |  */ | 
| 250 | 0 | #define PEEK()  (*p->next) | 
| 251 | 0 | #define PEEK2() (*(p->next+1)) | 
| 252 | 0 | #define MORE()  (p->next < p->end) | 
| 253 | 0 | #define MORE2() (p->next+1 < p->end) | 
| 254 | 0 | #define SEE(c)  (MORE() && PEEK() == (c)) | 
| 255 | 0 | #define SEETWO(a, b)  (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) | 
| 256 | 0 | #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0) | 
| 257 | 0 | #define EATTWO(a, b)  ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) | 
| 258 | 0 | #define NEXT()  (p->next++) | 
| 259 | 0 | #define NEXT2() (p->next += 2) | 
| 260 | 0 | #define NEXTn(n)  (p->next += (n)) | 
| 261 | 0 | #define GETNEXT() (*p->next++) | 
| 262 | 0 | #define SETERROR(e) seterr(p, (e)) | 
| 263 | 0 | #define REQUIRE(co, e)  (void)((co) || SETERROR(e)) | 
| 264 |  | #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) | 
| 265 | 0 | #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) | 
| 266 |  | #define MUSTNOTSEE(c, e)  (REQUIRE(!MORE() || PEEK() != (c), e)) | 
| 267 | 0 | #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) | 
| 268 | 0 | #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) | 
| 269 | 0 | #define AHEAD(pos)    dofwd(p, pos, HERE()-(pos)) | 
| 270 | 0 | #define ASTERN(sop, pos)  EMIT(sop, HERE()-pos) | 
| 271 | 0 | #define HERE()    (p->slen) | 
| 272 | 0 | #define THERE()   (p->slen - 1) | 
| 273 |  | #define THERETHERE()  (p->slen - 2) | 
| 274 | 0 | #define DROP(n) (p->slen -= (n)) | 
| 275 |  |  | 
| 276 |  | #ifdef  _POSIX2_RE_DUP_MAX | 
| 277 | 0 | #define DUPMAX  _POSIX2_RE_DUP_MAX | 
| 278 |  | #else | 
| 279 |  | #define DUPMAX  255 | 
| 280 |  | #endif | 
| 281 | 0 | #define INFINITY  (DUPMAX + 1) | 
| 282 |  |  | 
| 283 |  | #ifndef NDEBUG | 
| 284 |  | static int never = 0;   /* for use in asserts; shuts lint up */ | 
| 285 |  | #else | 
| 286 |  | #define never 0   /* some <assert.h>s have bugs too */ | 
| 287 |  | #endif | 
| 288 |  |  | 
| 289 |  | /* | 
| 290 |  |  - llvm_regcomp - interface for parser and compilation | 
| 291 |  |  */ | 
| 292 |  | int       /* 0 success, otherwise REG_something */ | 
| 293 |  | llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags) | 
| 294 | 0 | { | 
| 295 | 0 |   struct parse pa; | 
| 296 | 0 |   struct re_guts *g; | 
| 297 | 0 |   struct parse *p = &pa; | 
| 298 | 0 |   int i; | 
| 299 | 0 |   size_t len; | 
| 300 |  | #ifdef REDEBUG | 
| 301 |  | # define  GOODFLAGS(f)  (f) | 
| 302 |  | #else | 
| 303 | 0 | # define  GOODFLAGS(f)  ((f)&~REG_DUMP) | 
| 304 | 0 | #endif | 
| 305 | 0 | 
 | 
| 306 | 0 |   cflags = GOODFLAGS(cflags); | 
| 307 | 0 |   if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) | 
| 308 | 0 |     return(REG_INVARG); | 
| 309 | 0 | 
 | 
| 310 | 0 |   if (cflags®_PEND) { | 
| 311 | 0 |     if (preg->re_endp < pattern) | 
| 312 | 0 |       return(REG_INVARG); | 
| 313 | 0 |     len = preg->re_endp - pattern; | 
| 314 | 0 |   } else | 
| 315 | 0 |     len = strlen((const char *)pattern); | 
| 316 | 0 | 
 | 
| 317 | 0 |   /* do the mallocs early so failure handling is easy */ | 
| 318 | 0 |   g = (struct re_guts *)malloc(sizeof(struct re_guts) + | 
| 319 | 0 |               (NC-1)*sizeof(cat_t)); | 
| 320 | 0 |   if (g == NULL) | 
| 321 | 0 |     return(REG_ESPACE); | 
| 322 | 0 |   p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ | 
| 323 | 0 |   p->strip = (sop *)calloc(p->ssize, sizeof(sop)); | 
| 324 | 0 |   p->slen = 0; | 
| 325 | 0 |   if (p->strip == NULL) { | 
| 326 | 0 |     free((char *)g); | 
| 327 | 0 |     return(REG_ESPACE); | 
| 328 | 0 |   } | 
| 329 | 0 | 
 | 
| 330 | 0 |   /* set things up */ | 
| 331 | 0 |   p->g = g; | 
| 332 | 0 |   p->next = (char *)pattern;  /* convenience; we do not modify it */ | 
| 333 | 0 |   p->end = p->next + len; | 
| 334 | 0 |   p->error = 0; | 
| 335 | 0 |   p->ncsalloc = 0; | 
| 336 | 0 |   for (i = 0; i < NPAREN; i++) { | 
| 337 | 0 |     p->pbegin[i] = 0; | 
| 338 | 0 |     p->pend[i] = 0; | 
| 339 | 0 |   } | 
| 340 | 0 |   g->csetsize = NC; | 
| 341 | 0 |   g->sets = NULL; | 
| 342 | 0 |   g->setbits = NULL; | 
| 343 | 0 |   g->ncsets = 0; | 
| 344 | 0 |   g->cflags = cflags; | 
| 345 | 0 |   g->iflags = 0; | 
| 346 | 0 |   g->nbol = 0; | 
| 347 | 0 |   g->neol = 0; | 
| 348 | 0 |   g->must = NULL; | 
| 349 | 0 |   g->mlen = 0; | 
| 350 | 0 |   g->nsub = 0; | 
| 351 | 0 |   g->ncategories = 1; /* category 0 is "everything else" */ | 
| 352 | 0 |   g->categories = &g->catspace[-(CHAR_MIN)]; | 
| 353 | 0 |   (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); | 
| 354 | 0 |   g->backrefs = 0; | 
| 355 | 0 | 
 | 
| 356 | 0 |   /* do it */ | 
| 357 | 0 |   EMIT(OEND, 0); | 
| 358 | 0 |   g->firststate = THERE(); | 
| 359 | 0 |   if (cflags®_EXTENDED) | 
| 360 | 0 |     p_ere(p, OUT); | 
| 361 | 0 |   else if (cflags®_NOSPEC) | 
| 362 | 0 |     p_str(p); | 
| 363 | 0 |   else | 
| 364 | 0 |     p_bre(p, OUT, OUT); | 
| 365 | 0 |   EMIT(OEND, 0); | 
| 366 | 0 |   g->laststate = THERE(); | 
| 367 | 0 | 
 | 
| 368 | 0 |   /* tidy up loose ends and fill things in */ | 
| 369 | 0 |   categorize(p, g); | 
| 370 | 0 |   stripsnug(p, g); | 
| 371 | 0 |   findmust(p, g); | 
| 372 | 0 |   g->nplus = pluscount(p, g); | 
| 373 | 0 |   g->magic = MAGIC2; | 
| 374 | 0 |   preg->re_nsub = g->nsub; | 
| 375 | 0 |   preg->re_g = g; | 
| 376 | 0 |   preg->re_magic = MAGIC1; | 
| 377 | 0 | #ifndef REDEBUG | 
| 378 | 0 |   /* not debugging, so can't rely on the assert() in llvm_regexec() */ | 
| 379 | 0 |   if (g->iflags®EX_BAD) | 
| 380 | 0 |     SETERROR(REG_ASSERT); | 
| 381 | 0 | #endif | 
| 382 | 0 | 
 | 
| 383 | 0 |   /* win or lose, we're done */ | 
| 384 | 0 |   if (p->error != 0) /* lose */ | 
| 385 | 0 |     llvm_regfree(preg); | 
| 386 | 0 |   return(p->error); | 
| 387 | 0 | } | 
| 388 |  |  | 
| 389 |  | /* | 
| 390 |  |  - p_ere - ERE parser top level, concatenation and alternation | 
| 391 |  |  */ | 
| 392 |  | static void | 
| 393 |  | p_ere(struct parse *p, int stop)  /* character this ERE should end at */ | 
| 394 | 0 | { | 
| 395 | 0 |   char c; | 
| 396 | 0 |   sopno prevback = 0; | 
| 397 | 0 |   sopno prevfwd = 0; | 
| 398 | 0 |   sopno conc; | 
| 399 | 0 |   int first = 1;    /* is this the first alternative? */ | 
| 400 | 0 | 
 | 
| 401 | 0 |   for (;;) { | 
| 402 | 0 |     /* do a bunch of concatenated expressions */ | 
| 403 | 0 |     conc = HERE(); | 
| 404 | 0 |     while (MORE() && (c = PEEK()) != '|' && c != stop) | 
| 405 | 0 |       p_ere_exp(p); | 
| 406 | 0 |     REQUIRE(HERE() != conc, REG_EMPTY);  /* require nonempty */ | 
| 407 | 0 | 
 | 
| 408 | 0 |     if (!EAT('|')) | 
| 409 | 0 |       break;   /* NOTE BREAK OUT */ | 
| 410 | 0 |  | 
| 411 | 0 |     if (first) { | 
| 412 | 0 |       INSERT(OCH_, conc); /* offset is wrong */ | 
| 413 | 0 |       prevfwd = conc; | 
| 414 | 0 |       prevback = conc; | 
| 415 | 0 |       first = 0; | 
| 416 | 0 |     } | 
| 417 | 0 |     ASTERN(OOR1, prevback); | 
| 418 | 0 |     prevback = THERE(); | 
| 419 | 0 |     AHEAD(prevfwd);      /* fix previous offset */ | 
| 420 | 0 |     prevfwd = HERE(); | 
| 421 | 0 |     EMIT(OOR2, 0);      /* offset is very wrong */ | 
| 422 | 0 |   } | 
| 423 | 0 | 
 | 
| 424 | 0 |   if (!first) {   /* tail-end fixups */ | 
| 425 | 0 |     AHEAD(prevfwd); | 
| 426 | 0 |     ASTERN(O_CH, prevback); | 
| 427 | 0 |   } | 
| 428 | 0 | 
 | 
| 429 | 0 |   assert(!MORE() || SEE(stop)); | 
| 430 | 0 | } | 
| 431 |  |  | 
| 432 |  | /* | 
| 433 |  |  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op | 
| 434 |  |  */ | 
| 435 |  | static void | 
| 436 |  | p_ere_exp(struct parse *p) | 
| 437 | 0 | { | 
| 438 | 0 |   char c; | 
| 439 | 0 |   sopno pos; | 
| 440 | 0 |   int count; | 
| 441 | 0 |   int count2; | 
| 442 | 0 |   int backrefnum; | 
| 443 | 0 |   sopno subno; | 
| 444 | 0 |   int wascaret = 0; | 
| 445 | 0 | 
 | 
| 446 | 0 |   assert(MORE());   /* caller should have ensured this */ | 
| 447 | 0 |   c = GETNEXT(); | 
| 448 | 0 | 
 | 
| 449 | 0 |   pos = HERE(); | 
| 450 | 0 |   switch (c) { | 
| 451 | 0 |   case '(': | 
| 452 | 0 |     REQUIRE(MORE(), REG_EPAREN); | 
| 453 | 0 |     p->g->nsub++; | 
| 454 | 0 |     subno = p->g->nsub; | 
| 455 | 0 |     if (subno < NPAREN) | 
| 456 | 0 |       p->pbegin[subno] = HERE(); | 
| 457 | 0 |     EMIT(OLPAREN, subno); | 
| 458 | 0 |     if (!SEE(')')) | 
| 459 | 0 |       p_ere(p, ')'); | 
| 460 | 0 |     if (subno < NPAREN) { | 
| 461 | 0 |       p->pend[subno] = HERE(); | 
| 462 | 0 |       assert(p->pend[subno] != 0); | 
| 463 | 0 |     } | 
| 464 | 0 |     EMIT(ORPAREN, subno); | 
| 465 | 0 |     MUSTEAT(')', REG_EPAREN); | 
| 466 | 0 |     break; | 
| 467 | 0 | #ifndef POSIX_MISTAKE | 
| 468 | 0 |   case ')':   /* happens only if no current unmatched ( */ | 
| 469 | 0 |     /* | 
| 470 | 0 |      * You may ask, why the ifndef?  Because I didn't notice | 
| 471 | 0 |      * this until slightly too late for 1003.2, and none of the | 
| 472 | 0 |      * other 1003.2 regular-expression reviewers noticed it at | 
| 473 | 0 |      * all.  So an unmatched ) is legal POSIX, at least until | 
| 474 | 0 |      * we can get it fixed. | 
| 475 | 0 |      */ | 
| 476 | 0 |     SETERROR(REG_EPAREN); | 
| 477 | 0 |     break; | 
| 478 | 0 | #endif | 
| 479 | 0 |   case '^': | 
| 480 | 0 |     EMIT(OBOL, 0); | 
| 481 | 0 |     p->g->iflags |= USEBOL; | 
| 482 | 0 |     p->g->nbol++; | 
| 483 | 0 |     wascaret = 1; | 
| 484 | 0 |     break; | 
| 485 | 0 |   case '$': | 
| 486 | 0 |     EMIT(OEOL, 0); | 
| 487 | 0 |     p->g->iflags |= USEEOL; | 
| 488 | 0 |     p->g->neol++; | 
| 489 | 0 |     break; | 
| 490 | 0 |   case '|': | 
| 491 | 0 |     SETERROR(REG_EMPTY); | 
| 492 | 0 |     break; | 
| 493 | 0 |   case '*': | 
| 494 | 0 |   case '+': | 
| 495 | 0 |   case '?': | 
| 496 | 0 |     SETERROR(REG_BADRPT); | 
| 497 | 0 |     break; | 
| 498 | 0 |   case '.': | 
| 499 | 0 |     if (p->g->cflags®_NEWLINE) | 
| 500 | 0 |       nonnewline(p); | 
| 501 | 0 |     else | 
| 502 | 0 |       EMIT(OANY, 0); | 
| 503 | 0 |     break; | 
| 504 | 0 |   case '[': | 
| 505 | 0 |     p_bracket(p); | 
| 506 | 0 |     break; | 
| 507 | 0 |   case '\\': | 
| 508 | 0 |     REQUIRE(MORE(), REG_EESCAPE); | 
| 509 | 0 |     c = GETNEXT(); | 
| 510 | 0 |     if (c >= '1' && c <= '9') { | 
| 511 | 0 |       /* \[0-9] is taken to be a back-reference to a previously specified | 
| 512 | 0 |        * matching group. backrefnum will hold the number. The matching | 
| 513 | 0 |        * group must exist (i.e. if \4 is found there must have been at | 
| 514 | 0 |        * least 4 matching groups specified in the pattern previously). | 
| 515 | 0 |        */ | 
| 516 | 0 |       backrefnum = c - '0'; | 
| 517 | 0 |       if (p->pend[backrefnum] == 0) { | 
| 518 | 0 |         SETERROR(REG_ESUBREG); | 
| 519 | 0 |         break; | 
| 520 | 0 |       } | 
| 521 | 0 | 
 | 
| 522 | 0 |       /* Make sure everything checks out and emit the sequence | 
| 523 | 0 |        * that marks a back-reference to the parse structure. | 
| 524 | 0 |        */ | 
| 525 | 0 |       assert(backrefnum <= p->g->nsub); | 
| 526 | 0 |       EMIT(OBACK_, backrefnum); | 
| 527 | 0 |       assert(p->pbegin[backrefnum] != 0); | 
| 528 | 0 |       assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN); | 
| 529 | 0 |       assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN); | 
| 530 | 0 |       (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]); | 
| 531 | 0 |       EMIT(O_BACK, backrefnum); | 
| 532 | 0 |       p->g->backrefs = 1; | 
| 533 | 0 |     } else { | 
| 534 | 0 |       /* Other chars are simply themselves when escaped with a backslash. | 
| 535 | 0 |        */ | 
| 536 | 0 |       ordinary(p, c); | 
| 537 | 0 |     } | 
| 538 | 0 |     break; | 
| 539 | 0 |   case '{':   /* okay as ordinary except if digit follows */ | 
| 540 | 0 |     REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); | 
| 541 | 0 |     LLVM_FALLTHROUGH; | 
| 542 | 0 |   default: | 
| 543 | 0 |     ordinary(p, c); | 
| 544 | 0 |     break; | 
| 545 | 0 |   } | 
| 546 | 0 |  | 
| 547 | 0 |   if (!MORE()) | 
| 548 | 0 |     return; | 
| 549 | 0 |   c = PEEK(); | 
| 550 | 0 |   /* we call { a repetition if followed by a digit */ | 
| 551 | 0 |   if (!( c == '*' || c == '+' || c == '?' || | 
| 552 | 0 |         (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) | 
| 553 | 0 |     return;   /* no repetition, we're done */ | 
| 554 | 0 |   NEXT(); | 
| 555 | 0 | 
 | 
| 556 | 0 |   REQUIRE(!wascaret, REG_BADRPT); | 
| 557 | 0 |   switch (c) { | 
| 558 | 0 |   case '*': /* implemented as +? */ | 
| 559 | 0 |     /* this case does not require the (y|) trick, noKLUDGE */ | 
| 560 | 0 |     INSERT(OPLUS_, pos); | 
| 561 | 0 |     ASTERN(O_PLUS, pos); | 
| 562 | 0 |     INSERT(OQUEST_, pos); | 
| 563 | 0 |     ASTERN(O_QUEST, pos); | 
| 564 | 0 |     break; | 
| 565 | 0 |   case '+': | 
| 566 | 0 |     INSERT(OPLUS_, pos); | 
| 567 | 0 |     ASTERN(O_PLUS, pos); | 
| 568 | 0 |     break; | 
| 569 | 0 |   case '?': | 
| 570 | 0 |     /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ | 
| 571 | 0 |     INSERT(OCH_, pos);    /* offset slightly wrong */ | 
| 572 | 0 |     ASTERN(OOR1, pos);    /* this one's right */ | 
| 573 | 0 |     AHEAD(pos);      /* fix the OCH_ */ | 
| 574 | 0 |     EMIT(OOR2, 0);      /* offset very wrong... */ | 
| 575 | 0 |     AHEAD(THERE());      /* ...so fix it */ | 
| 576 | 0 |     ASTERN(O_CH, THERETHERE()); | 
| 577 | 0 |     break; | 
| 578 | 0 |   case '{': | 
| 579 | 0 |     count = p_count(p); | 
| 580 | 0 |     if (EAT(',')) { | 
| 581 | 0 |       if (isdigit((uch)PEEK())) { | 
| 582 | 0 |         count2 = p_count(p); | 
| 583 | 0 |         REQUIRE(count <= count2, REG_BADBR); | 
| 584 | 0 |       } else    /* single number with comma */ | 
| 585 | 0 |         count2 = INFINITY; | 
| 586 | 0 |     } else    /* just a single number */ | 
| 587 | 0 |       count2 = count; | 
| 588 | 0 |     repeat(p, pos, count, count2); | 
| 589 | 0 |     if (!EAT('}')) { /* error heuristics */ | 
| 590 | 0 |       while (MORE() && PEEK() != '}') | 
| 591 | 0 |         NEXT(); | 
| 592 | 0 |       REQUIRE(MORE(), REG_EBRACE); | 
| 593 | 0 |       SETERROR(REG_BADBR); | 
| 594 | 0 |     } | 
| 595 | 0 |     break; | 
| 596 | 0 |   } | 
| 597 | 0 | 
 | 
| 598 | 0 |   if (!MORE()) | 
| 599 | 0 |     return; | 
| 600 | 0 |   c = PEEK(); | 
| 601 | 0 |   if (!( c == '*' || c == '+' || c == '?' || | 
| 602 | 0 |         (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) | 
| 603 | 0 |     return; | 
| 604 | 0 |   SETERROR(REG_BADRPT); | 
| 605 | 0 | } | 
| 606 |  |  | 
| 607 |  | /* | 
| 608 |  |  - p_str - string (no metacharacters) "parser" | 
| 609 |  |  */ | 
| 610 |  | static void | 
| 611 |  | p_str(struct parse *p) | 
| 612 | 0 | { | 
| 613 | 0 |   REQUIRE(MORE(), REG_EMPTY); | 
| 614 | 0 |   while (MORE()) | 
| 615 | 0 |     ordinary(p, GETNEXT()); | 
| 616 | 0 | } | 
| 617 |  |  | 
| 618 |  | /* | 
| 619 |  |  - p_bre - BRE parser top level, anchoring and concatenation | 
| 620 |  |  * Giving end1 as OUT essentially eliminates the end1/end2 check. | 
| 621 |  |  * | 
| 622 |  |  * This implementation is a bit of a kludge, in that a trailing $ is first | 
| 623 |  |  * taken as an ordinary character and then revised to be an anchor.  The | 
| 624 |  |  * only undesirable side effect is that '$' gets included as a character | 
| 625 |  |  * category in such cases.  This is fairly harmless; not worth fixing. | 
| 626 |  |  * The amount of lookahead needed to avoid this kludge is excessive. | 
| 627 |  |  */ | 
| 628 |  | static void | 
| 629 |  | p_bre(struct parse *p, | 
| 630 |  |     int end1,   /* first terminating character */ | 
| 631 |  |     int end2)   /* second terminating character */ | 
| 632 | 0 | { | 
| 633 | 0 |   sopno start = HERE(); | 
| 634 | 0 |   int first = 1;      /* first subexpression? */ | 
| 635 | 0 |   int wasdollar = 0; | 
| 636 | 0 | 
 | 
| 637 | 0 |   if (EAT('^')) { | 
| 638 | 0 |     EMIT(OBOL, 0); | 
| 639 | 0 |     p->g->iflags |= USEBOL; | 
| 640 | 0 |     p->g->nbol++; | 
| 641 | 0 |   } | 
| 642 | 0 |   while (MORE() && !SEETWO(end1, end2)) { | 
| 643 | 0 |     wasdollar = p_simp_re(p, first); | 
| 644 | 0 |     first = 0; | 
| 645 | 0 |   } | 
| 646 | 0 |   if (wasdollar) { /* oops, that was a trailing anchor */ | 
| 647 | 0 |     DROP(1); | 
| 648 | 0 |     EMIT(OEOL, 0); | 
| 649 | 0 |     p->g->iflags |= USEEOL; | 
| 650 | 0 |     p->g->neol++; | 
| 651 | 0 |   } | 
| 652 | 0 | 
 | 
| 653 | 0 |   REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ | 
| 654 | 0 | } | 
| 655 |  |  | 
| 656 |  | /* | 
| 657 |  |  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition | 
| 658 |  |  */ | 
| 659 |  | static int      /* was the simple RE an unbackslashed $? */ | 
| 660 |  | p_simp_re(struct parse *p, | 
| 661 |  |     int starordinary)   /* is a leading * an ordinary character? */ | 
| 662 | 0 | { | 
| 663 | 0 |   int c; | 
| 664 | 0 |   int count; | 
| 665 | 0 |   int count2; | 
| 666 | 0 |   sopno pos; | 
| 667 | 0 |   int i; | 
| 668 | 0 |   sopno subno; | 
| 669 | 0 | # define  BACKSL  (1<<CHAR_BIT) | 
| 670 | 0 | 
 | 
| 671 | 0 |         pos = HERE(); /* repetition op, if any, covers from here */ | 
| 672 | 0 | 
 | 
| 673 | 0 |         assert(MORE()); /* caller should have ensured this */ | 
| 674 | 0 |         c = GETNEXT(); | 
| 675 | 0 |   if (c == '\\') { | 
| 676 | 0 |     REQUIRE(MORE(), REG_EESCAPE); | 
| 677 | 0 |     c = BACKSL | GETNEXT(); | 
| 678 | 0 |   } | 
| 679 | 0 |   switch (c) { | 
| 680 | 0 |   case '.': | 
| 681 | 0 |     if (p->g->cflags®_NEWLINE) | 
| 682 | 0 |       nonnewline(p); | 
| 683 | 0 |     else | 
| 684 | 0 |       EMIT(OANY, 0); | 
| 685 | 0 |     break; | 
| 686 | 0 |   case '[': | 
| 687 | 0 |     p_bracket(p); | 
| 688 | 0 |     break; | 
| 689 | 0 |   case BACKSL|'{': | 
| 690 | 0 |     SETERROR(REG_BADRPT); | 
| 691 | 0 |     break; | 
| 692 | 0 |   case BACKSL|'(': | 
| 693 | 0 |     p->g->nsub++; | 
| 694 | 0 |     subno = p->g->nsub; | 
| 695 | 0 |     if (subno < NPAREN) | 
| 696 | 0 |       p->pbegin[subno] = HERE(); | 
| 697 | 0 |     EMIT(OLPAREN, subno); | 
| 698 | 0 |     /* the MORE here is an error heuristic */ | 
| 699 | 0 |     if (MORE() && !SEETWO('\\', ')')) | 
| 700 | 0 |       p_bre(p, '\\', ')'); | 
| 701 | 0 |     if (subno < NPAREN) { | 
| 702 | 0 |       p->pend[subno] = HERE(); | 
| 703 | 0 |       assert(p->pend[subno] != 0); | 
| 704 | 0 |     } | 
| 705 | 0 |     EMIT(ORPAREN, subno); | 
| 706 | 0 |     REQUIRE(EATTWO('\\', ')'), REG_EPAREN); | 
| 707 | 0 |     break; | 
| 708 | 0 |   case BACKSL|')': /* should not get here -- must be user */ | 
| 709 | 0 |   case BACKSL|'}': | 
| 710 | 0 |     SETERROR(REG_EPAREN); | 
| 711 | 0 |     break; | 
| 712 | 0 |   case BACKSL|'1': | 
| 713 | 0 |   case BACKSL|'2': | 
| 714 | 0 |   case BACKSL|'3': | 
| 715 | 0 |   case BACKSL|'4': | 
| 716 | 0 |   case BACKSL|'5': | 
| 717 | 0 |   case BACKSL|'6': | 
| 718 | 0 |   case BACKSL|'7': | 
| 719 | 0 |   case BACKSL|'8': | 
| 720 | 0 |   case BACKSL|'9': | 
| 721 | 0 |     i = (c&~BACKSL) - '0'; | 
| 722 | 0 |     assert(i < NPAREN); | 
| 723 | 0 |     if (p->pend[i] != 0) { | 
| 724 | 0 |       assert(i <= p->g->nsub); | 
| 725 | 0 |       EMIT(OBACK_, i); | 
| 726 | 0 |       assert(p->pbegin[i] != 0); | 
| 727 | 0 |       assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); | 
| 728 | 0 |       assert(OP(p->strip[p->pend[i]]) == ORPAREN); | 
| 729 | 0 |       (void) dupl(p, p->pbegin[i]+1, p->pend[i]); | 
| 730 | 0 |       EMIT(O_BACK, i); | 
| 731 | 0 |     } else | 
| 732 | 0 |       SETERROR(REG_ESUBREG); | 
| 733 | 0 |     p->g->backrefs = 1; | 
| 734 | 0 |     break; | 
| 735 | 0 |   case '*': | 
| 736 | 0 |     REQUIRE(starordinary, REG_BADRPT); | 
| 737 | 0 |     LLVM_FALLTHROUGH; | 
| 738 | 0 |   default: | 
| 739 | 0 |     ordinary(p, (char)c); | 
| 740 | 0 |     break; | 
| 741 | 0 |   } | 
| 742 | 0 |  | 
| 743 | 0 |   if (EAT('*')) {   /* implemented as +? */ | 
| 744 | 0 |     /* this case does not require the (y|) trick, noKLUDGE */ | 
| 745 | 0 |     INSERT(OPLUS_, pos); | 
| 746 | 0 |     ASTERN(O_PLUS, pos); | 
| 747 | 0 |     INSERT(OQUEST_, pos); | 
| 748 | 0 |     ASTERN(O_QUEST, pos); | 
| 749 | 0 |   } else if (EATTWO('\\', '{')) { | 
| 750 | 0 |     count = p_count(p); | 
| 751 | 0 |     if (EAT(',')) { | 
| 752 | 0 |       if (MORE() && isdigit((uch)PEEK())) { | 
| 753 | 0 |         count2 = p_count(p); | 
| 754 | 0 |         REQUIRE(count <= count2, REG_BADBR); | 
| 755 | 0 |       } else    /* single number with comma */ | 
| 756 | 0 |         count2 = INFINITY; | 
| 757 | 0 |     } else    /* just a single number */ | 
| 758 | 0 |       count2 = count; | 
| 759 | 0 |     repeat(p, pos, count, count2); | 
| 760 | 0 |     if (!EATTWO('\\', '}')) { /* error heuristics */ | 
| 761 | 0 |       while (MORE() && !SEETWO('\\', '}')) | 
| 762 | 0 |         NEXT(); | 
| 763 | 0 |       REQUIRE(MORE(), REG_EBRACE); | 
| 764 | 0 |       SETERROR(REG_BADBR); | 
| 765 | 0 |     } | 
| 766 | 0 |   } else if (c == '$') /* $ (but not \$) ends it */ | 
| 767 | 0 |     return(1); | 
| 768 | 0 |  | 
| 769 | 0 |   return(0); | 
| 770 | 0 | } | 
| 771 |  |  | 
| 772 |  | /* | 
| 773 |  |  - p_count - parse a repetition count | 
| 774 |  |  */ | 
| 775 |  | static int      /* the value */ | 
| 776 |  | p_count(struct parse *p) | 
| 777 | 0 | { | 
| 778 | 0 |   int count = 0; | 
| 779 | 0 |   int ndigits = 0; | 
| 780 | 0 | 
 | 
| 781 | 0 |   while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { | 
| 782 | 0 |     count = count*10 + (GETNEXT() - '0'); | 
| 783 | 0 |     ndigits++; | 
| 784 | 0 |   } | 
| 785 | 0 | 
 | 
| 786 | 0 |   REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); | 
| 787 | 0 |   return(count); | 
| 788 | 0 | } | 
| 789 |  |  | 
| 790 |  | /* | 
| 791 |  |  - p_bracket - parse a bracketed character list | 
| 792 |  |  * | 
| 793 |  |  * Note a significant property of this code:  if the allocset() did SETERROR, | 
| 794 |  |  * no set operations are done. | 
| 795 |  |  */ | 
| 796 |  | static void | 
| 797 |  | p_bracket(struct parse *p) | 
| 798 | 0 | { | 
| 799 | 0 |   cset *cs; | 
| 800 | 0 |   int invert = 0; | 
| 801 | 0 | 
 | 
| 802 | 0 |   /* Dept of Truly Sickening Special-Case Kludges */ | 
| 803 | 0 |   if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { | 
| 804 | 0 |     EMIT(OBOW, 0); | 
| 805 | 0 |     NEXTn(6); | 
| 806 | 0 |     return; | 
| 807 | 0 |   } | 
| 808 | 0 |   if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { | 
| 809 | 0 |     EMIT(OEOW, 0); | 
| 810 | 0 |     NEXTn(6); | 
| 811 | 0 |     return; | 
| 812 | 0 |   } | 
| 813 | 0 | 
 | 
| 814 | 0 |   if ((cs = allocset(p)) == NULL) { | 
| 815 | 0 |     /* allocset did set error status in p */ | 
| 816 | 0 |     return; | 
| 817 | 0 |   } | 
| 818 | 0 |  | 
| 819 | 0 |   if (EAT('^')) | 
| 820 | 0 |     invert++; /* make note to invert set at end */ | 
| 821 | 0 |   if (EAT(']')) | 
| 822 | 0 |     CHadd(cs, ']'); | 
| 823 | 0 |   else if (EAT('-')) | 
| 824 | 0 |     CHadd(cs, '-'); | 
| 825 | 0 |   while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) | 
| 826 | 0 |     p_b_term(p, cs); | 
| 827 | 0 |   if (EAT('-')) | 
| 828 | 0 |     CHadd(cs, '-'); | 
| 829 | 0 |   MUSTEAT(']', REG_EBRACK); | 
| 830 | 0 | 
 | 
| 831 | 0 |   if (p->error != 0) { /* don't mess things up further */ | 
| 832 | 0 |     freeset(p, cs); | 
| 833 | 0 |     return; | 
| 834 | 0 |   } | 
| 835 | 0 |  | 
| 836 | 0 |   if (p->g->cflags®_ICASE) { | 
| 837 | 0 |     int i; | 
| 838 | 0 |     int ci; | 
| 839 | 0 | 
 | 
| 840 | 0 |     for (i = p->g->csetsize - 1; i >= 0; i--) | 
| 841 | 0 |       if (CHIN(cs, i) && isalpha(i)) { | 
| 842 | 0 |         ci = othercase(i); | 
| 843 | 0 |         if (ci != i) | 
| 844 | 0 |           CHadd(cs, ci); | 
| 845 | 0 |       } | 
| 846 | 0 |     if (cs->multis != NULL) | 
| 847 | 0 |       mccase(p, cs); | 
| 848 | 0 |   } | 
| 849 | 0 |   if (invert) { | 
| 850 | 0 |     int i; | 
| 851 | 0 | 
 | 
| 852 | 0 |     for (i = p->g->csetsize - 1; i >= 0; i--) | 
| 853 | 0 |       if (CHIN(cs, i)) | 
| 854 | 0 |         CHsub(cs, i); | 
| 855 | 0 |       else | 
| 856 | 0 |         CHadd(cs, i); | 
| 857 | 0 |     if (p->g->cflags®_NEWLINE) | 
| 858 | 0 |       CHsub(cs, '\n'); | 
| 859 | 0 |     if (cs->multis != NULL) | 
| 860 | 0 |       mcinvert(p, cs); | 
| 861 | 0 |   } | 
| 862 | 0 | 
 | 
| 863 | 0 |   assert(cs->multis == NULL);   /* xxx */ | 
| 864 | 0 | 
 | 
| 865 | 0 |   if (nch(p, cs) == 1) {   /* optimize singleton sets */ | 
| 866 | 0 |     ordinary(p, firstch(p, cs)); | 
| 867 | 0 |     freeset(p, cs); | 
| 868 | 0 |   } else | 
| 869 | 0 |     EMIT(OANYOF, freezeset(p, cs)); | 
| 870 | 0 | } | 
| 871 |  |  | 
| 872 |  | /* | 
| 873 |  |  - p_b_term - parse one term of a bracketed character list | 
| 874 |  |  */ | 
| 875 |  | static void | 
| 876 |  | p_b_term(struct parse *p, cset *cs) | 
| 877 | 0 | { | 
| 878 | 0 |   char c; | 
| 879 | 0 |   char start, finish; | 
| 880 | 0 |   int i; | 
| 881 | 0 | 
 | 
| 882 | 0 |   /* classify what we've got */ | 
| 883 | 0 |   switch ((MORE()) ? PEEK() : '\0') { | 
| 884 | 0 |   case '[': | 
| 885 | 0 |     c = (MORE2()) ? PEEK2() : '\0'; | 
| 886 | 0 |     break; | 
| 887 | 0 |   case '-': | 
| 888 | 0 |     SETERROR(REG_ERANGE); | 
| 889 | 0 |     return;     /* NOTE RETURN */ | 
| 890 | 0 |     break; | 
| 891 | 0 |   default: | 
| 892 | 0 |     c = '\0'; | 
| 893 | 0 |     break; | 
| 894 | 0 |   } | 
| 895 | 0 |  | 
| 896 | 0 |   switch (c) { | 
| 897 | 0 |   case ':':   /* character class */ | 
| 898 | 0 |     NEXT2(); | 
| 899 | 0 |     REQUIRE(MORE(), REG_EBRACK); | 
| 900 | 0 |     c = PEEK(); | 
| 901 | 0 |     REQUIRE(c != '-' && c != ']', REG_ECTYPE); | 
| 902 | 0 |     p_b_cclass(p, cs); | 
| 903 | 0 |     REQUIRE(MORE(), REG_EBRACK); | 
| 904 | 0 |     REQUIRE(EATTWO(':', ']'), REG_ECTYPE); | 
| 905 | 0 |     break; | 
| 906 | 0 |   case '=':   /* equivalence class */ | 
| 907 | 0 |     NEXT2(); | 
| 908 | 0 |     REQUIRE(MORE(), REG_EBRACK); | 
| 909 | 0 |     c = PEEK(); | 
| 910 | 0 |     REQUIRE(c != '-' && c != ']', REG_ECOLLATE); | 
| 911 | 0 |     p_b_eclass(p, cs); | 
| 912 | 0 |     REQUIRE(MORE(), REG_EBRACK); | 
| 913 | 0 |     REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); | 
| 914 | 0 |     break; | 
| 915 | 0 |   default:    /* symbol, ordinary character, or range */ | 
| 916 | 0 | /* xxx revision needed for multichar stuff */ | 
| 917 | 0 |     start = p_b_symbol(p); | 
| 918 | 0 |     if (SEE('-') && MORE2() && PEEK2() != ']') { | 
| 919 | 0 |       /* range */ | 
| 920 | 0 |       NEXT(); | 
| 921 | 0 |       if (EAT('-')) | 
| 922 | 0 |         finish = '-'; | 
| 923 | 0 |       else | 
| 924 | 0 |         finish = p_b_symbol(p); | 
| 925 | 0 |     } else | 
| 926 | 0 |       finish = start; | 
| 927 | 0 | /* xxx what about signed chars here... */ | 
| 928 | 0 |     REQUIRE(start <= finish, REG_ERANGE); | 
| 929 | 0 |     for (i = start; i <= finish; i++) | 
| 930 | 0 |       CHadd(cs, i); | 
| 931 | 0 |     break; | 
| 932 | 0 |   } | 
| 933 | 0 | } | 
| 934 |  |  | 
| 935 |  | /* | 
| 936 |  |  - p_b_cclass - parse a character-class name and deal with it | 
| 937 |  |  */ | 
| 938 |  | static void | 
| 939 |  | p_b_cclass(struct parse *p, cset *cs) | 
| 940 | 0 | { | 
| 941 | 0 |   char *sp = p->next; | 
| 942 | 0 |   struct cclass *cp; | 
| 943 | 0 |   size_t len; | 
| 944 | 0 |   const char *u; | 
| 945 | 0 |   char c; | 
| 946 | 0 | 
 | 
| 947 | 0 |   while (MORE() && isalpha((uch)PEEK())) | 
| 948 | 0 |     NEXT(); | 
| 949 | 0 |   len = p->next - sp; | 
| 950 | 0 |   for (cp = cclasses; cp->name != NULL; cp++) | 
| 951 | 0 |     if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') | 
| 952 | 0 |       break; | 
| 953 | 0 |   if (cp->name == NULL) { | 
| 954 | 0 |     /* oops, didn't find it */ | 
| 955 | 0 |     SETERROR(REG_ECTYPE); | 
| 956 | 0 |     return; | 
| 957 | 0 |   } | 
| 958 | 0 | 
 | 
| 959 | 0 |   u = cp->chars; | 
| 960 | 0 |   while ((c = *u++) != '\0') | 
| 961 | 0 |     CHadd(cs, c); | 
| 962 | 0 |   for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) | 
| 963 | 0 |     MCadd(p, cs, u); | 
| 964 | 0 | } | 
| 965 |  |  | 
| 966 |  | /* | 
| 967 |  |  - p_b_eclass - parse an equivalence-class name and deal with it | 
| 968 |  |  * | 
| 969 |  |  * This implementation is incomplete. xxx | 
| 970 |  |  */ | 
| 971 |  | static void | 
| 972 |  | p_b_eclass(struct parse *p, cset *cs) | 
| 973 | 0 | { | 
| 974 | 0 |   char c; | 
| 975 | 0 | 
 | 
| 976 | 0 |   c = p_b_coll_elem(p, '='); | 
| 977 | 0 |   CHadd(cs, c); | 
| 978 | 0 | } | 
| 979 |  |  | 
| 980 |  | /* | 
| 981 |  |  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol | 
| 982 |  |  */ | 
| 983 |  | static char     /* value of symbol */ | 
| 984 |  | p_b_symbol(struct parse *p) | 
| 985 | 0 | { | 
| 986 | 0 |   char value; | 
| 987 | 0 | 
 | 
| 988 | 0 |   REQUIRE(MORE(), REG_EBRACK); | 
| 989 | 0 |   if (!EATTWO('[', '.')) | 
| 990 | 0 |     return(GETNEXT()); | 
| 991 | 0 | 
 | 
| 992 | 0 |   /* collating symbol */ | 
| 993 | 0 |   value = p_b_coll_elem(p, '.'); | 
| 994 | 0 |   REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); | 
| 995 | 0 |   return(value); | 
| 996 | 0 | } | 
| 997 |  |  | 
| 998 |  | /* | 
| 999 |  |  - p_b_coll_elem - parse a collating-element name and look it up | 
| 1000 |  |  */ | 
| 1001 |  | static char     /* value of collating element */ | 
| 1002 |  | p_b_coll_elem(struct parse *p, | 
| 1003 |  |     int endc)     /* name ended by endc,']' */ | 
| 1004 | 0 | { | 
| 1005 | 0 |   char *sp = p->next; | 
| 1006 | 0 |   struct cname *cp; | 
| 1007 | 0 |   size_t len; | 
| 1008 | 0 | 
 | 
| 1009 | 0 |   while (MORE() && !SEETWO(endc, ']')) | 
| 1010 | 0 |     NEXT(); | 
| 1011 | 0 |   if (!MORE()) { | 
| 1012 | 0 |     SETERROR(REG_EBRACK); | 
| 1013 | 0 |     return(0); | 
| 1014 | 0 |   } | 
| 1015 | 0 |   len = p->next - sp; | 
| 1016 | 0 |   for (cp = cnames; cp->name != NULL; cp++) | 
| 1017 | 0 |     if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len) | 
| 1018 | 0 |       return(cp->code); /* known name */ | 
| 1019 | 0 |   if (len == 1) | 
| 1020 | 0 |     return(*sp); /* single character */ | 
| 1021 | 0 |   SETERROR(REG_ECOLLATE);     /* neither */ | 
| 1022 | 0 |   return(0); | 
| 1023 | 0 | } | 
| 1024 |  |  | 
| 1025 |  | /* | 
| 1026 |  |  - othercase - return the case counterpart of an alphabetic | 
| 1027 |  |  */ | 
| 1028 |  | static char     /* if no counterpart, return ch */ | 
| 1029 |  | othercase(int ch) | 
| 1030 | 0 | { | 
| 1031 | 0 |   ch = (uch)ch; | 
| 1032 | 0 |   assert(isalpha(ch)); | 
| 1033 | 0 |   if (isupper(ch)) | 
| 1034 | 0 |     return ((uch)tolower(ch)); | 
| 1035 | 0 |   else if (islower(ch)) | 
| 1036 | 0 |     return ((uch)toupper(ch)); | 
| 1037 | 0 |   else      /* peculiar, but could happen */ | 
| 1038 | 0 |     return(ch); | 
| 1039 | 0 | } | 
| 1040 |  |  | 
| 1041 |  | /* | 
| 1042 |  |  - bothcases - emit a dualcase version of a two-case character | 
| 1043 |  |  * | 
| 1044 |  |  * Boy, is this implementation ever a kludge... | 
| 1045 |  |  */ | 
| 1046 |  | static void | 
| 1047 |  | bothcases(struct parse *p, int ch) | 
| 1048 | 0 | { | 
| 1049 | 0 |   char *oldnext = p->next; | 
| 1050 | 0 |   char *oldend = p->end; | 
| 1051 | 0 |   char bracket[3]; | 
| 1052 | 0 | 
 | 
| 1053 | 0 |   ch = (uch)ch; | 
| 1054 | 0 |   assert(othercase(ch) != ch);  /* p_bracket() would recurse */ | 
| 1055 | 0 |   p->next = bracket; | 
| 1056 | 0 |   p->end = bracket+2; | 
| 1057 | 0 |   bracket[0] = ch; | 
| 1058 | 0 |   bracket[1] = ']'; | 
| 1059 | 0 |   bracket[2] = '\0'; | 
| 1060 | 0 |   p_bracket(p); | 
| 1061 | 0 |   assert(p->next == bracket+2); | 
| 1062 | 0 |   p->next = oldnext; | 
| 1063 | 0 |   p->end = oldend; | 
| 1064 | 0 | } | 
| 1065 |  |  | 
| 1066 |  | /* | 
| 1067 |  |  - ordinary - emit an ordinary character | 
| 1068 |  |  */ | 
| 1069 |  | static void | 
| 1070 |  | ordinary(struct parse *p, int ch) | 
| 1071 | 0 | { | 
| 1072 | 0 |   cat_t *cap = p->g->categories; | 
| 1073 | 0 | 
 | 
| 1074 | 0 |   if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) | 
| 1075 | 0 |     bothcases(p, ch); | 
| 1076 | 0 |   else { | 
| 1077 | 0 |     EMIT(OCHAR, (uch)ch); | 
| 1078 | 0 |     if (cap[ch] == 0) | 
| 1079 | 0 |       cap[ch] = p->g->ncategories++; | 
| 1080 | 0 |   } | 
| 1081 | 0 | } | 
| 1082 |  |  | 
| 1083 |  | /* | 
| 1084 |  |  - nonnewline - emit REG_NEWLINE version of OANY | 
| 1085 |  |  * | 
| 1086 |  |  * Boy, is this implementation ever a kludge... | 
| 1087 |  |  */ | 
| 1088 |  | static void | 
| 1089 |  | nonnewline(struct parse *p) | 
| 1090 | 0 | { | 
| 1091 | 0 |   char *oldnext = p->next; | 
| 1092 | 0 |   char *oldend = p->end; | 
| 1093 | 0 |   char bracket[4]; | 
| 1094 | 0 | 
 | 
| 1095 | 0 |   p->next = bracket; | 
| 1096 | 0 |   p->end = bracket+3; | 
| 1097 | 0 |   bracket[0] = '^'; | 
| 1098 | 0 |   bracket[1] = '\n'; | 
| 1099 | 0 |   bracket[2] = ']'; | 
| 1100 | 0 |   bracket[3] = '\0'; | 
| 1101 | 0 |   p_bracket(p); | 
| 1102 | 0 |   assert(p->next == bracket+3); | 
| 1103 | 0 |   p->next = oldnext; | 
| 1104 | 0 |   p->end = oldend; | 
| 1105 | 0 | } | 
| 1106 |  |  | 
| 1107 |  | /* | 
| 1108 |  |  - repeat - generate code for a bounded repetition, recursively if needed | 
| 1109 |  |  */ | 
| 1110 |  | static void | 
| 1111 |  | repeat(struct parse *p, | 
| 1112 |  |     sopno start,    /* operand from here to end of strip */ | 
| 1113 |  |     int from,     /* repeated from this number */ | 
| 1114 |  |     int to)     /* to this number of times (maybe INFINITY) */ | 
| 1115 | 0 | { | 
| 1116 | 0 |   sopno finish = HERE(); | 
| 1117 | 0 | # define  N 2 | 
| 1118 | 0 | # define  INF 3 | 
| 1119 | 0 | # define  REP(f, t) ((f)*8 + (t)) | 
| 1120 | 0 | # define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) | 
| 1121 | 0 |   sopno copy; | 
| 1122 | 0 | 
 | 
| 1123 | 0 |   if (p->error != 0) /* head off possible runaway recursion */ | 
| 1124 | 0 |     return; | 
| 1125 | 0 |  | 
| 1126 | 0 |   assert(from <= to); | 
| 1127 | 0 | 
 | 
| 1128 | 0 |   switch (REP(MAP(from), MAP(to))) { | 
| 1129 | 0 |   case REP(0, 0):     /* must be user doing this */ | 
| 1130 | 0 |     DROP(finish-start); /* drop the operand */ | 
| 1131 | 0 |     break; | 
| 1132 | 0 |   case REP(0, 1):     /* as x{1,1}? */ | 
| 1133 | 0 |   case REP(0, N):     /* as x{1,n}? */ | 
| 1134 | 0 |   case REP(0, INF):   /* as x{1,}? */ | 
| 1135 | 0 |     /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ | 
| 1136 | 0 |     INSERT(OCH_, start);    /* offset is wrong... */ | 
| 1137 | 0 |     repeat(p, start+1, 1, to); | 
| 1138 | 0 |     ASTERN(OOR1, start); | 
| 1139 | 0 |     AHEAD(start);      /* ... fix it */ | 
| 1140 | 0 |     EMIT(OOR2, 0); | 
| 1141 | 0 |     AHEAD(THERE()); | 
| 1142 | 0 |     ASTERN(O_CH, THERETHERE()); | 
| 1143 | 0 |     break; | 
| 1144 | 0 |   case REP(1, 1):     /* trivial case */ | 
| 1145 | 0 |     /* done */ | 
| 1146 | 0 |     break; | 
| 1147 | 0 |   case REP(1, N):     /* as x?x{1,n-1} */ | 
| 1148 | 0 |     /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ | 
| 1149 | 0 |     INSERT(OCH_, start); | 
| 1150 | 0 |     ASTERN(OOR1, start); | 
| 1151 | 0 |     AHEAD(start); | 
| 1152 | 0 |     EMIT(OOR2, 0);      /* offset very wrong... */ | 
| 1153 | 0 |     AHEAD(THERE());      /* ...so fix it */ | 
| 1154 | 0 |     ASTERN(O_CH, THERETHERE()); | 
| 1155 | 0 |     copy = dupl(p, start+1, finish+1); | 
| 1156 | 0 |     assert(copy == finish+4); | 
| 1157 | 0 |     repeat(p, copy, 1, to-1); | 
| 1158 | 0 |     break; | 
| 1159 | 0 |   case REP(1, INF):   /* as x+ */ | 
| 1160 | 0 |     INSERT(OPLUS_, start); | 
| 1161 | 0 |     ASTERN(O_PLUS, start); | 
| 1162 | 0 |     break; | 
| 1163 | 0 |   case REP(N, N):     /* as xx{m-1,n-1} */ | 
| 1164 | 0 |     copy = dupl(p, start, finish); | 
| 1165 | 0 |     repeat(p, copy, from-1, to-1); | 
| 1166 | 0 |     break; | 
| 1167 | 0 |   case REP(N, INF):   /* as xx{n-1,INF} */ | 
| 1168 | 0 |     copy = dupl(p, start, finish); | 
| 1169 | 0 |     repeat(p, copy, from-1, to); | 
| 1170 | 0 |     break; | 
| 1171 | 0 |   default:      /* "can't happen" */ | 
| 1172 | 0 |     SETERROR(REG_ASSERT); /* just in case */ | 
| 1173 | 0 |     break; | 
| 1174 | 0 |   } | 
| 1175 | 0 | } | 
| 1176 |  |  | 
| 1177 |  | /* | 
| 1178 |  |  - seterr - set an error condition | 
| 1179 |  |  */ | 
| 1180 |  | static int      /* useless but makes type checking happy */ | 
| 1181 |  | seterr(struct parse *p, int e) | 
| 1182 | 0 | { | 
| 1183 | 0 |   if (p->error == 0) /* keep earliest error condition */ | 
| 1184 | 0 |     p->error = e; | 
| 1185 | 0 |   p->next = nuls;   /* try to bring things to a halt */ | 
| 1186 | 0 |   p->end = nuls; | 
| 1187 | 0 |   return(0);    /* make the return value well-defined */ | 
| 1188 | 0 | } | 
| 1189 |  |  | 
| 1190 |  | /* | 
| 1191 |  |  - allocset - allocate a set of characters for [] | 
| 1192 |  |  */ | 
| 1193 |  | static cset * | 
| 1194 |  | allocset(struct parse *p) | 
| 1195 | 0 | { | 
| 1196 | 0 |   int no = p->g->ncsets++; | 
| 1197 | 0 |   size_t nc; | 
| 1198 | 0 |   size_t nbytes; | 
| 1199 | 0 |   cset *cs; | 
| 1200 | 0 |   size_t css = (size_t)p->g->csetsize; | 
| 1201 | 0 |   int i; | 
| 1202 | 0 | 
 | 
| 1203 | 0 |   if (no >= p->ncsalloc) { /* need another column of space */ | 
| 1204 | 0 |     void *ptr; | 
| 1205 | 0 | 
 | 
| 1206 | 0 |     p->ncsalloc += CHAR_BIT; | 
| 1207 | 0 |     nc = p->ncsalloc; | 
| 1208 | 0 |     if (nc > SIZE_MAX / sizeof(cset)) | 
| 1209 | 0 |       goto nomem; | 
| 1210 | 0 |     assert(nc % CHAR_BIT == 0); | 
| 1211 | 0 |     nbytes = nc / CHAR_BIT * css; | 
| 1212 | 0 | 
 | 
| 1213 | 0 |     ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); | 
| 1214 | 0 |     if (ptr == NULL) | 
| 1215 | 0 |       goto nomem; | 
| 1216 | 0 |     p->g->sets = ptr; | 
| 1217 | 0 | 
 | 
| 1218 | 0 |     ptr = (uch *)realloc((char *)p->g->setbits, nbytes); | 
| 1219 | 0 |     if (ptr == NULL) | 
| 1220 | 0 |       goto nomem; | 
| 1221 | 0 |     p->g->setbits = ptr; | 
| 1222 | 0 | 
 | 
| 1223 | 0 |     for (i = 0; i < no; i++) | 
| 1224 | 0 |       p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); | 
| 1225 | 0 | 
 | 
| 1226 | 0 |     (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); | 
| 1227 | 0 |   } | 
| 1228 | 0 |   /* XXX should not happen */ | 
| 1229 | 0 |   if (p->g->sets == NULL || p->g->setbits == NULL) | 
| 1230 | 0 |     goto nomem; | 
| 1231 | 0 |  | 
| 1232 | 0 |   cs = &p->g->sets[no]; | 
| 1233 | 0 |   cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); | 
| 1234 | 0 |   cs->mask = 1 << ((no) % CHAR_BIT); | 
| 1235 | 0 |   cs->hash = 0; | 
| 1236 | 0 |   cs->smultis = 0; | 
| 1237 | 0 |   cs->multis = NULL; | 
| 1238 | 0 | 
 | 
| 1239 | 0 |   return(cs); | 
| 1240 | 0 | nomem: | 
| 1241 | 0 |   free(p->g->sets); | 
| 1242 | 0 |   p->g->sets = NULL; | 
| 1243 | 0 |   free(p->g->setbits); | 
| 1244 | 0 |   p->g->setbits = NULL; | 
| 1245 | 0 | 
 | 
| 1246 | 0 |   SETERROR(REG_ESPACE); | 
| 1247 | 0 |   /* caller's responsibility not to do set ops */ | 
| 1248 | 0 |   return(NULL); | 
| 1249 | 0 | } | 
| 1250 |  |  | 
| 1251 |  | /* | 
| 1252 |  |  - freeset - free a now-unused set | 
| 1253 |  |  */ | 
| 1254 |  | static void | 
| 1255 |  | freeset(struct parse *p, cset *cs) | 
| 1256 | 0 | { | 
| 1257 | 0 |   size_t i; | 
| 1258 | 0 |   cset *top = &p->g->sets[p->g->ncsets]; | 
| 1259 | 0 |   size_t css = (size_t)p->g->csetsize; | 
| 1260 | 0 | 
 | 
| 1261 | 0 |   for (i = 0; i < css; i++) | 
| 1262 | 0 |     CHsub(cs, i); | 
| 1263 | 0 |   if (cs == top-1) /* recover only the easy case */ | 
| 1264 | 0 |     p->g->ncsets--; | 
| 1265 | 0 | } | 
| 1266 |  |  | 
| 1267 |  | /* | 
| 1268 |  |  - freezeset - final processing on a set of characters | 
| 1269 |  |  * | 
| 1270 |  |  * The main task here is merging identical sets.  This is usually a waste | 
| 1271 |  |  * of time (although the hash code minimizes the overhead), but can win | 
| 1272 |  |  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash | 
| 1273 |  |  * is done using addition rather than xor -- all ASCII [aA] sets xor to | 
| 1274 |  |  * the same value! | 
| 1275 |  |  */ | 
| 1276 |  | static int      /* set number */ | 
| 1277 |  | freezeset(struct parse *p, cset *cs) | 
| 1278 | 0 | { | 
| 1279 | 0 |   uch h = cs->hash; | 
| 1280 | 0 |   size_t i; | 
| 1281 | 0 |   cset *top = &p->g->sets[p->g->ncsets]; | 
| 1282 | 0 |   cset *cs2; | 
| 1283 | 0 |   size_t css = (size_t)p->g->csetsize; | 
| 1284 | 0 | 
 | 
| 1285 | 0 |   /* look for an earlier one which is the same */ | 
| 1286 | 0 |   for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) | 
| 1287 | 0 |     if (cs2->hash == h && cs2 != cs) { | 
| 1288 | 0 |       /* maybe */ | 
| 1289 | 0 |       for (i = 0; i < css; i++) | 
| 1290 | 0 |         if (!!CHIN(cs2, i) != !!CHIN(cs, i)) | 
| 1291 | 0 |           break;   /* no */ | 
| 1292 | 0 |       if (i == css) | 
| 1293 | 0 |         break;     /* yes */ | 
| 1294 | 0 |     } | 
| 1295 | 0 | 
 | 
| 1296 | 0 |   if (cs2 < top) { /* found one */ | 
| 1297 | 0 |     freeset(p, cs); | 
| 1298 | 0 |     cs = cs2; | 
| 1299 | 0 |   } | 
| 1300 | 0 | 
 | 
| 1301 | 0 |   return((int)(cs - p->g->sets)); | 
| 1302 | 0 | } | 
| 1303 |  |  | 
| 1304 |  | /* | 
| 1305 |  |  - firstch - return first character in a set (which must have at least one) | 
| 1306 |  |  */ | 
| 1307 |  | static int      /* character; there is no "none" value */ | 
| 1308 |  | firstch(struct parse *p, cset *cs) | 
| 1309 | 0 | { | 
| 1310 | 0 |   size_t i; | 
| 1311 | 0 |   size_t css = (size_t)p->g->csetsize; | 
| 1312 | 0 | 
 | 
| 1313 | 0 |   for (i = 0; i < css; i++) | 
| 1314 | 0 |     if (CHIN(cs, i)) | 
| 1315 | 0 |       return((char)i); | 
| 1316 | 0 |   assert(never); | 
| 1317 | 0 |   return(0);   /* arbitrary */ | 
| 1318 | 0 | } | 
| 1319 |  |  | 
| 1320 |  | /* | 
| 1321 |  |  - nch - number of characters in a set | 
| 1322 |  |  */ | 
| 1323 |  | static int | 
| 1324 |  | nch(struct parse *p, cset *cs) | 
| 1325 | 0 | { | 
| 1326 | 0 |   size_t i; | 
| 1327 | 0 |   size_t css = (size_t)p->g->csetsize; | 
| 1328 | 0 |   int n = 0; | 
| 1329 | 0 | 
 | 
| 1330 | 0 |   for (i = 0; i < css; i++) | 
| 1331 | 0 |     if (CHIN(cs, i)) | 
| 1332 | 0 |       n++; | 
| 1333 | 0 |   return(n); | 
| 1334 | 0 | } | 
| 1335 |  |  | 
| 1336 |  | /* | 
| 1337 |  |  - mcadd - add a collating element to a cset | 
| 1338 |  |  */ | 
| 1339 |  | static void | 
| 1340 |  | mcadd( struct parse *p, cset *cs, const char *cp) | 
| 1341 | 0 | { | 
| 1342 | 0 |   size_t oldend = cs->smultis; | 
| 1343 | 0 |   void *np; | 
| 1344 | 0 | 
 | 
| 1345 | 0 |   cs->smultis += strlen(cp) + 1; | 
| 1346 | 0 |   np = realloc(cs->multis, cs->smultis); | 
| 1347 | 0 |   if (np == NULL) { | 
| 1348 | 0 |     if (cs->multis) | 
| 1349 | 0 |       free(cs->multis); | 
| 1350 | 0 |     cs->multis = NULL; | 
| 1351 | 0 |     SETERROR(REG_ESPACE); | 
| 1352 | 0 |     return; | 
| 1353 | 0 |   } | 
| 1354 | 0 |   cs->multis = np; | 
| 1355 | 0 | 
 | 
| 1356 | 0 |   llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); | 
| 1357 | 0 | } | 
| 1358 |  |  | 
| 1359 |  | /* | 
| 1360 |  |  - mcinvert - invert the list of collating elements in a cset | 
| 1361 |  |  * | 
| 1362 |  |  * This would have to know the set of possibilities.  Implementation | 
| 1363 |  |  * is deferred. | 
| 1364 |  |  */ | 
| 1365 |  | /* ARGSUSED */ | 
| 1366 |  | static void | 
| 1367 |  | mcinvert(struct parse *p, cset *cs) | 
| 1368 | 0 | { | 
| 1369 | 0 |   assert(cs->multis == NULL); /* xxx */ | 
| 1370 | 0 | } | 
| 1371 |  |  | 
| 1372 |  | /* | 
| 1373 |  |  - mccase - add case counterparts of the list of collating elements in a cset | 
| 1374 |  |  * | 
| 1375 |  |  * This would have to know the set of possibilities.  Implementation | 
| 1376 |  |  * is deferred. | 
| 1377 |  |  */ | 
| 1378 |  | /* ARGSUSED */ | 
| 1379 |  | static void | 
| 1380 |  | mccase(struct parse *p, cset *cs) | 
| 1381 | 0 | { | 
| 1382 | 0 |   assert(cs->multis == NULL); /* xxx */ | 
| 1383 | 0 | } | 
| 1384 |  |  | 
| 1385 |  | /* | 
| 1386 |  |  - isinsets - is this character in any sets? | 
| 1387 |  |  */ | 
| 1388 |  | static int      /* predicate */ | 
| 1389 |  | isinsets(struct re_guts *g, int c) | 
| 1390 | 0 | { | 
| 1391 | 0 |   uch *col; | 
| 1392 | 0 |   int i; | 
| 1393 | 0 |   int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; | 
| 1394 | 0 |   unsigned uc = (uch)c; | 
| 1395 | 0 | 
 | 
| 1396 | 0 |   for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) | 
| 1397 | 0 |     if (col[uc] != 0) | 
| 1398 | 0 |       return(1); | 
| 1399 | 0 |   return(0); | 
| 1400 | 0 | } | 
| 1401 |  |  | 
| 1402 |  | /* | 
| 1403 |  |  - samesets - are these two characters in exactly the same sets? | 
| 1404 |  |  */ | 
| 1405 |  | static int      /* predicate */ | 
| 1406 |  | samesets(struct re_guts *g, int c1, int c2) | 
| 1407 | 0 | { | 
| 1408 | 0 |   uch *col; | 
| 1409 | 0 |   int i; | 
| 1410 | 0 |   int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; | 
| 1411 | 0 |   unsigned uc1 = (uch)c1; | 
| 1412 | 0 |   unsigned uc2 = (uch)c2; | 
| 1413 | 0 | 
 | 
| 1414 | 0 |   for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) | 
| 1415 | 0 |     if (col[uc1] != col[uc2]) | 
| 1416 | 0 |       return(0); | 
| 1417 | 0 |   return(1); | 
| 1418 | 0 | } | 
| 1419 |  |  | 
| 1420 |  | /* | 
| 1421 |  |  - categorize - sort out character categories | 
| 1422 |  |  */ | 
| 1423 |  | static void | 
| 1424 |  | categorize(struct parse *p, struct re_guts *g) | 
| 1425 | 0 | { | 
| 1426 | 0 |   cat_t *cats = g->categories; | 
| 1427 | 0 |   int c; | 
| 1428 | 0 |   int c2; | 
| 1429 | 0 |   cat_t cat; | 
| 1430 | 0 | 
 | 
| 1431 | 0 |   /* avoid making error situations worse */ | 
| 1432 | 0 |   if (p->error != 0) | 
| 1433 | 0 |     return; | 
| 1434 | 0 |  | 
| 1435 | 0 |   for (c = CHAR_MIN; c <= CHAR_MAX; c++) | 
| 1436 | 0 |     if (cats[c] == 0 && isinsets(g, c)) { | 
| 1437 | 0 |       cat = g->ncategories++; | 
| 1438 | 0 |       cats[c] = cat; | 
| 1439 | 0 |       for (c2 = c+1; c2 <= CHAR_MAX; c2++) | 
| 1440 | 0 |         if (cats[c2] == 0 && samesets(g, c, c2)) | 
| 1441 | 0 |           cats[c2] = cat; | 
| 1442 | 0 |     } | 
| 1443 | 0 | } | 
| 1444 |  |  | 
| 1445 |  | /* | 
| 1446 |  |  - dupl - emit a duplicate of a bunch of sops | 
| 1447 |  |  */ | 
| 1448 |  | static sopno      /* start of duplicate */ | 
| 1449 |  | dupl(struct parse *p, | 
| 1450 |  |     sopno start,    /* from here */ | 
| 1451 |  |     sopno finish)   /* to this less one */ | 
| 1452 | 0 | { | 
| 1453 | 0 |   sopno ret = HERE(); | 
| 1454 | 0 |   sopno len = finish - start; | 
| 1455 | 0 | 
 | 
| 1456 | 0 |   assert(finish >= start); | 
| 1457 | 0 |   if (len == 0) | 
| 1458 | 0 |     return(ret); | 
| 1459 | 0 |   enlarge(p, p->ssize + len); /* this many unexpected additions */ | 
| 1460 | 0 |   assert(p->ssize >= p->slen + len); | 
| 1461 | 0 |   (void) memmove((char *)(p->strip + p->slen), | 
| 1462 | 0 |     (char *)(p->strip + start), (size_t)len*sizeof(sop)); | 
| 1463 | 0 |   p->slen += len; | 
| 1464 | 0 |   return(ret); | 
| 1465 | 0 | } | 
| 1466 |  |  | 
| 1467 |  | /* | 
| 1468 |  |  - doemit - emit a strip operator | 
| 1469 |  |  * | 
| 1470 |  |  * It might seem better to implement this as a macro with a function as | 
| 1471 |  |  * hard-case backup, but it's just too big and messy unless there are | 
| 1472 |  |  * some changes to the data structures.  Maybe later. | 
| 1473 |  |  */ | 
| 1474 |  | static void | 
| 1475 |  | doemit(struct parse *p, sop op, size_t opnd) | 
| 1476 | 0 | { | 
| 1477 | 0 |   /* avoid making error situations worse */ | 
| 1478 | 0 |   if (p->error != 0) | 
| 1479 | 0 |     return; | 
| 1480 | 0 |  | 
| 1481 | 0 |   /* deal with oversize operands ("can't happen", more or less) */ | 
| 1482 | 0 |   assert(opnd < 1<<OPSHIFT); | 
| 1483 | 0 | 
 | 
| 1484 | 0 |   /* deal with undersized strip */ | 
| 1485 | 0 |   if (p->slen >= p->ssize) | 
| 1486 | 0 |     enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ | 
| 1487 | 0 |   assert(p->slen < p->ssize); | 
| 1488 | 0 | 
 | 
| 1489 | 0 |   /* finally, it's all reduced to the easy case */ | 
| 1490 | 0 |   p->strip[p->slen++] = SOP(op, opnd); | 
| 1491 | 0 | } | 
| 1492 |  |  | 
| 1493 |  | /* | 
| 1494 |  |  - doinsert - insert a sop into the strip | 
| 1495 |  |  */ | 
| 1496 |  | static void | 
| 1497 |  | doinsert(struct parse *p, sop op, size_t opnd, sopno pos) | 
| 1498 | 0 | { | 
| 1499 | 0 |   sopno sn; | 
| 1500 | 0 |   sop s; | 
| 1501 | 0 |   int i; | 
| 1502 | 0 | 
 | 
| 1503 | 0 |   /* avoid making error situations worse */ | 
| 1504 | 0 |   if (p->error != 0) | 
| 1505 | 0 |     return; | 
| 1506 | 0 |  | 
| 1507 | 0 |   sn = HERE(); | 
| 1508 | 0 |   EMIT(op, opnd);   /* do checks, ensure space */ | 
| 1509 | 0 |   assert(HERE() == sn+1); | 
| 1510 | 0 |   s = p->strip[sn]; | 
| 1511 | 0 | 
 | 
| 1512 | 0 |   /* adjust paren pointers */ | 
| 1513 | 0 |   assert(pos > 0); | 
| 1514 | 0 |   for (i = 1; i < NPAREN; i++) { | 
| 1515 | 0 |     if (p->pbegin[i] >= pos) { | 
| 1516 | 0 |       p->pbegin[i]++; | 
| 1517 | 0 |     } | 
| 1518 | 0 |     if (p->pend[i] >= pos) { | 
| 1519 | 0 |       p->pend[i]++; | 
| 1520 | 0 |     } | 
| 1521 | 0 |   } | 
| 1522 | 0 | 
 | 
| 1523 | 0 |   memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], | 
| 1524 | 0 |             (HERE()-pos-1)*sizeof(sop)); | 
| 1525 | 0 |   p->strip[pos] = s; | 
| 1526 | 0 | } | 
| 1527 |  |  | 
| 1528 |  | /* | 
| 1529 |  |  - dofwd - complete a forward reference | 
| 1530 |  |  */ | 
| 1531 |  | static void | 
| 1532 |  | dofwd(struct parse *p, sopno pos, sop value) | 
| 1533 | 0 | { | 
| 1534 | 0 |   /* avoid making error situations worse */ | 
| 1535 | 0 |   if (p->error != 0) | 
| 1536 | 0 |     return; | 
| 1537 | 0 |  | 
| 1538 | 0 |   assert(value < 1<<OPSHIFT); | 
| 1539 | 0 |   p->strip[pos] = OP(p->strip[pos]) | value; | 
| 1540 | 0 | } | 
| 1541 |  |  | 
| 1542 |  | /* | 
| 1543 |  |  - enlarge - enlarge the strip | 
| 1544 |  |  */ | 
| 1545 |  | static void | 
| 1546 |  | enlarge(struct parse *p, sopno size) | 
| 1547 | 0 | { | 
| 1548 | 0 |   sop *sp; | 
| 1549 | 0 | 
 | 
| 1550 | 0 |   if (p->ssize >= size) | 
| 1551 | 0 |     return; | 
| 1552 | 0 |  | 
| 1553 | 0 |   if ((uintptr_t)size > SIZE_MAX / sizeof(sop)) { | 
| 1554 | 0 |     SETERROR(REG_ESPACE); | 
| 1555 | 0 |     return; | 
| 1556 | 0 |   } | 
| 1557 | 0 | 
 | 
| 1558 | 0 |   sp = (sop *)realloc(p->strip, size*sizeof(sop)); | 
| 1559 | 0 |   if (sp == NULL) { | 
| 1560 | 0 |     SETERROR(REG_ESPACE); | 
| 1561 | 0 |     return; | 
| 1562 | 0 |   } | 
| 1563 | 0 |   p->strip = sp; | 
| 1564 | 0 |   p->ssize = size; | 
| 1565 | 0 | } | 
| 1566 |  |  | 
| 1567 |  | /* | 
| 1568 |  |  - stripsnug - compact the strip | 
| 1569 |  |  */ | 
| 1570 |  | static void | 
| 1571 |  | stripsnug(struct parse *p, struct re_guts *g) | 
| 1572 | 0 | { | 
| 1573 | 0 |   g->nstates = p->slen; | 
| 1574 | 0 |   if ((uintptr_t)p->slen > SIZE_MAX / sizeof(sop)) { | 
| 1575 | 0 |     g->strip = p->strip; | 
| 1576 | 0 |     SETERROR(REG_ESPACE); | 
| 1577 | 0 |     return; | 
| 1578 | 0 |   } | 
| 1579 | 0 | 
 | 
| 1580 | 0 |   g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); | 
| 1581 | 0 |   if (g->strip == NULL) { | 
| 1582 | 0 |     SETERROR(REG_ESPACE); | 
| 1583 | 0 |     g->strip = p->strip; | 
| 1584 | 0 |   } | 
| 1585 | 0 | } | 
| 1586 |  |  | 
| 1587 |  | /* | 
| 1588 |  |  - findmust - fill in must and mlen with longest mandatory literal string | 
| 1589 |  |  * | 
| 1590 |  |  * This algorithm could do fancy things like analyzing the operands of | | 
| 1591 |  |  * for common subsequences.  Someday.  This code is simple and finds most | 
| 1592 |  |  * of the interesting cases. | 
| 1593 |  |  * | 
| 1594 |  |  * Note that must and mlen got initialized during setup. | 
| 1595 |  |  */ | 
| 1596 |  | static void | 
| 1597 |  | findmust(struct parse *p, struct re_guts *g) | 
| 1598 | 0 | { | 
| 1599 | 0 |   sop *scan; | 
| 1600 | 0 |   sop *start = 0; /* start initialized in the default case, after that */ | 
| 1601 | 0 |   sop *newstart = 0; /* newstart was initialized in the OCHAR case */ | 
| 1602 | 0 |   sopno newlen; | 
| 1603 | 0 |   sop s; | 
| 1604 | 0 |   char *cp; | 
| 1605 | 0 |   sopno i; | 
| 1606 | 0 | 
 | 
| 1607 | 0 |   /* avoid making error situations worse */ | 
| 1608 | 0 |   if (p->error != 0) | 
| 1609 | 0 |     return; | 
| 1610 | 0 |  | 
| 1611 | 0 |   /* find the longest OCHAR sequence in strip */ | 
| 1612 | 0 |   newlen = 0; | 
| 1613 | 0 |   scan = g->strip + 1; | 
| 1614 | 0 |   do { | 
| 1615 | 0 |     s = *scan++; | 
| 1616 | 0 |     switch (OP(s)) { | 
| 1617 | 0 |     case OCHAR:   /* sequence member */ | 
| 1618 | 0 |       if (newlen == 0)   /* new sequence */ | 
| 1619 | 0 |         newstart = scan - 1; | 
| 1620 | 0 |       newlen++; | 
| 1621 | 0 |       break; | 
| 1622 | 0 |     case OPLUS_:   /* things that don't break one */ | 
| 1623 | 0 |     case OLPAREN: | 
| 1624 | 0 |     case ORPAREN: | 
| 1625 | 0 |       break; | 
| 1626 | 0 |     case OQUEST_:   /* things that must be skipped */ | 
| 1627 | 0 |     case OCH_: | 
| 1628 | 0 |       scan--; | 
| 1629 | 0 |       do { | 
| 1630 | 0 |         scan += OPND(s); | 
| 1631 | 0 |         s = *scan; | 
| 1632 | 0 |         /* assert() interferes w debug printouts */ | 
| 1633 | 0 |         if (OP(s) != O_QUEST && OP(s) != O_CH && | 
| 1634 | 0 |               OP(s) != OOR2) { | 
| 1635 | 0 |           g->iflags |= REGEX_BAD; | 
| 1636 | 0 |           return; | 
| 1637 | 0 |         } | 
| 1638 | 0 |       } while (OP(s) != O_QUEST && OP(s) != O_CH); | 
| 1639 | 0 |       LLVM_FALLTHROUGH; | 
| 1640 | 0 |     default:    /* things that break a sequence */ | 
| 1641 | 0 |       if (newlen > g->mlen) {   /* ends one */ | 
| 1642 | 0 |         start = newstart; | 
| 1643 | 0 |         g->mlen = newlen; | 
| 1644 | 0 |       } | 
| 1645 | 0 |       newlen = 0; | 
| 1646 | 0 |       break; | 
| 1647 | 0 |     } | 
| 1648 | 0 |   } while (OP(s) != OEND); | 
| 1649 | 0 | 
 | 
| 1650 | 0 |   if (g->mlen == 0)   /* there isn't one */ | 
| 1651 | 0 |     return; | 
| 1652 | 0 |  | 
| 1653 | 0 |   /* turn it into a character string */ | 
| 1654 | 0 |   g->must = malloc((size_t)g->mlen + 1); | 
| 1655 | 0 |   if (g->must == NULL) {   /* argh; just forget it */ | 
| 1656 | 0 |     g->mlen = 0; | 
| 1657 | 0 |     return; | 
| 1658 | 0 |   } | 
| 1659 | 0 |   cp = g->must; | 
| 1660 | 0 |   scan = start; | 
| 1661 | 0 |   for (i = g->mlen; i > 0; i--) { | 
| 1662 | 0 |     while (OP(s = *scan++) != OCHAR) | 
| 1663 | 0 |       continue; | 
| 1664 | 0 |     assert(cp < g->must + g->mlen); | 
| 1665 | 0 |     *cp++ = (char)OPND(s); | 
| 1666 | 0 |   } | 
| 1667 | 0 |   assert(cp == g->must + g->mlen); | 
| 1668 | 0 |   *cp++ = '\0';   /* just on general principles */ | 
| 1669 | 0 | } | 
| 1670 |  |  | 
| 1671 |  | /* | 
| 1672 |  |  - pluscount - count + nesting | 
| 1673 |  |  */ | 
| 1674 |  | static sopno      /* nesting depth */ | 
| 1675 |  | pluscount(struct parse *p, struct re_guts *g) | 
| 1676 | 0 | { | 
| 1677 | 0 |   sop *scan; | 
| 1678 | 0 |   sop s; | 
| 1679 | 0 |   sopno plusnest = 0; | 
| 1680 | 0 |   sopno maxnest = 0; | 
| 1681 | 0 | 
 | 
| 1682 | 0 |   if (p->error != 0) | 
| 1683 | 0 |     return(0); /* there may not be an OEND */ | 
| 1684 | 0 |  | 
| 1685 | 0 |   scan = g->strip + 1; | 
| 1686 | 0 |   do { | 
| 1687 | 0 |     s = *scan++; | 
| 1688 | 0 |     switch (OP(s)) { | 
| 1689 | 0 |     case OPLUS_: | 
| 1690 | 0 |       plusnest++; | 
| 1691 | 0 |       break; | 
| 1692 | 0 |     case O_PLUS: | 
| 1693 | 0 |       if (plusnest > maxnest) | 
| 1694 | 0 |         maxnest = plusnest; | 
| 1695 | 0 |       plusnest--; | 
| 1696 | 0 |       break; | 
| 1697 | 0 |     } | 
| 1698 | 0 |   } while (OP(s) != OEND); | 
| 1699 | 0 |   if (plusnest != 0) | 
| 1700 | 0 |     g->iflags |= REGEX_BAD; | 
| 1701 | 0 |   return(maxnest); | 
| 1702 | 0 | } |