Coverage Report

Created: 2020-06-26 05:44

/home/arjun/llvm-project/llvm/lib/Support/regengine.inc
Line
Count
Source (jump to first uncovered line)
1
/*-
2
 * This code is derived from OpenBSD's libc/regex, original license follows:
3
 *
4
 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5
 * Copyright (c) 1992, 1993, 1994
6
 *  The Regents of the University of California.  All rights reserved.
7
 *
8
 * This code is derived from software contributed to Berkeley by
9
 * Henry Spencer.
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 * 3. Neither the name of the University nor the names of its contributors
20
 *    may be used to endorse or promote products derived from this software
21
 *    without specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
 * SUCH DAMAGE.
34
 *
35
 *  @(#)engine.c  8.5 (Berkeley) 3/20/94
36
 */
37
38
/*
39
 * The matching engine and friends.  This file is #included by regexec.c
40
 * after suitable #defines of a variety of macros used herein, so that
41
 * different state representations can be used without duplicating masses
42
 * of code.
43
 */
44
45
#ifdef SNAMES
46
#define matcher smatcher
47
0
#define fast  sfast
48
0
#define slow  sslow
49
0
#define dissect sdissect
50
0
#define backref sbackref
51
0
#define step  sstep
52
#define print sprint
53
#define at  sat
54
#define match smat
55
#define nope  snope
56
#endif
57
#ifdef LNAMES
58
#define matcher lmatcher
59
0
#define fast  lfast
60
0
#define slow  lslow
61
0
#define dissect ldissect
62
0
#define backref lbackref
63
0
#define step  lstep
64
#define print lprint
65
#define at  lat
66
#define match lmat
67
#define nope  lnope
68
#endif
69
70
/* another structure passed up and down to avoid zillions of parameters */
71
struct match {
72
  struct re_guts *g;
73
  int eflags;
74
  llvm_regmatch_t *pmatch;  /* [nsub+1] (0 element unused) */
75
  const char *offp;   /* offsets work from here */
76
  const char *beginp;   /* start of string -- virtual NUL precedes */
77
  const char *endp;   /* end of string -- virtual NUL here */
78
  const char *coldp;    /* can be no match starting before here */
79
  const char **lastpos;   /* [nplus+1] */
80
  STATEVARS;
81
  states st;    /* current states */
82
  states fresh;   /* states for a fresh start */
83
  states tmp;   /* temporary */
84
  states empty;   /* empty set of states */
85
};
86
87
static int matcher(struct re_guts *, const char *, size_t,
88
                   llvm_regmatch_t[], int);
89
static const char *dissect(struct match *, const char *, const char *, sopno,
90
                           sopno);
91
static const char *backref(struct match *, const char *, const char *, sopno,
92
                           sopno, sopno, int);
93
static const char *fast(struct match *, const char *, const char *, sopno, sopno);
94
static const char *slow(struct match *, const char *, const char *, sopno, sopno);
95
static states step(struct re_guts *, sopno, sopno, states, int, states);
96
0
#define MAX_RECURSION 100
97
0
#define BOL (OUT+1)
98
0
#define EOL (BOL+1)
99
0
#define BOLEOL  (BOL+2)
100
0
#define NOTHING (BOL+3)
101
0
#define BOW (BOL+4)
102
0
#define EOW (BOL+5)
103
#define CODEMAX (BOL+5)   /* highest code used */
104
0
#define NONCHAR(c)  ((c) > CHAR_MAX)
105
#define NNONCHAR  (CODEMAX-CHAR_MAX)
106
#ifdef REDEBUG
107
static void print(struct match *, char *, states, int, FILE *);
108
#endif
109
#ifdef REDEBUG
110
static void at(struct match *, char *, char *, char *, sopno, sopno);
111
#endif
112
#ifdef REDEBUG
113
static char *pchar(int);
114
#endif
115
116
#ifdef REDEBUG
117
#define SP(t, s, c) print(m, t, s, c, stdout)
118
#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
119
#define NOTE(str) { if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
120
static int nope = 0;
121
#else
122
#define SP(t, s, c) /* nothing */
123
#define AT(t, p1, p2, s1, s2) /* nothing */
124
#define NOTE(s) /* nothing */
125
#endif
126
127
/*
128
 - matcher - the actual matching engine
129
 */
130
static int      /* 0 success, REG_NOMATCH failure */
131
matcher(struct re_guts *g, const char *string, size_t nmatch,
132
        llvm_regmatch_t pmatch[],
133
    int eflags)
134
0
{
135
0
  const char *endp;
136
0
  size_t i;
137
0
  struct match mv;
138
0
  struct match *m = &mv;
139
0
  const char *dp;
140
0
  const sopno gf = g->firststate+1; /* +1 for OEND */
141
0
  const sopno gl = g->laststate;
142
0
  const char *start;
143
0
  const char *stop;
144
0
145
0
  /* simplify the situation where possible */
146
0
  if (g->cflags&REG_NOSUB)
147
0
    nmatch = 0;
148
0
  if (eflags&REG_STARTEND) {
149
0
    start = string + pmatch[0].rm_so;
150
0
    stop = string + pmatch[0].rm_eo;
151
0
  } else {
152
0
    start = string;
153
0
    stop = start + strlen(start);
154
0
  }
155
0
  if (stop < start)
156
0
    return(REG_INVARG);
157
0
158
0
  /* prescreening; this does wonders for this rather slow code */
159
0
  if (g->must != NULL) {
160
0
    for (dp = start; dp < stop; dp++)
161
0
      if (*dp == g->must[0] && stop - dp >= g->mlen &&
162
0
        memcmp(dp, g->must, (size_t)g->mlen) == 0)
163
0
        break;
164
0
    if (dp == stop)   /* we didn't find g->must */
165
0
      return(REG_NOMATCH);
166
0
  }
167
0
168
0
  /* match struct setup */
169
0
  m->g = g;
170
0
  m->eflags = eflags;
171
0
  m->pmatch = NULL;
172
0
  m->lastpos = NULL;
173
0
  m->offp = string;
174
0
  m->beginp = start;
175
0
  m->endp = stop;
176
0
  STATESETUP(m, 4);
177
0
  SETUP(m->st);
178
0
  SETUP(m->fresh);
179
0
  SETUP(m->tmp);
180
0
  SETUP(m->empty);
181
0
  CLEAR(m->empty);
182
0
183
0
  /* this loop does only one repetition except for backrefs */
184
0
  for (;;) {
185
0
    endp = fast(m, start, stop, gf, gl);
186
0
    if (endp == NULL) {   /* a miss */
187
0
      free(m->pmatch);
188
0
      free((void*)m->lastpos);
189
0
      STATETEARDOWN(m);
190
0
      return(REG_NOMATCH);
191
0
    }
192
0
    if (nmatch == 0 && !g->backrefs)
193
0
      break;   /* no further info needed */
194
0
195
0
    /* where? */
196
0
    assert(m->coldp != NULL);
197
0
    for (;;) {
198
0
      NOTE("finding start");
199
0
      endp = slow(m, m->coldp, stop, gf, gl);
200
0
      if (endp != NULL)
201
0
        break;
202
0
      assert(m->coldp < m->endp);
203
0
      m->coldp++;
204
0
    }
205
0
    if (nmatch == 1 && !g->backrefs)
206
0
      break;   /* no further info needed */
207
0
208
0
    /* oh my, they want the subexpressions... */
209
0
    if (m->pmatch == NULL)
210
0
      m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
211
0
              sizeof(llvm_regmatch_t));
212
0
    if (m->pmatch == NULL) {
213
0
      STATETEARDOWN(m);
214
0
      return(REG_ESPACE);
215
0
    }
216
0
    for (i = 1; i <= m->g->nsub; i++)
217
0
      m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
218
0
    if (!g->backrefs && !(m->eflags&REG_BACKR)) {
219
0
      NOTE("dissecting");
220
0
      dp = dissect(m, m->coldp, endp, gf, gl);
221
0
    } else {
222
0
      if (g->nplus > 0 && m->lastpos == NULL)
223
0
        m->lastpos = (const char **)malloc((g->nplus+1) *
224
0
              sizeof(char *));
225
0
      if (g->nplus > 0 && m->lastpos == NULL) {
226
0
        free(m->pmatch);
227
0
        STATETEARDOWN(m);
228
0
        return(REG_ESPACE);
229
0
      }
230
0
      NOTE("backref dissect");
231
0
      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
232
0
    }
233
0
    if (dp != NULL)
234
0
      break;
235
0
236
0
    /* uh-oh... we couldn't find a subexpression-level match */
237
0
    assert(g->backrefs);  /* must be back references doing it */
238
0
    assert(g->nplus == 0 || m->lastpos != NULL);
239
0
    for (;;) {
240
0
      if (dp != NULL || endp <= m->coldp)
241
0
        break;   /* defeat */
242
0
      NOTE("backoff");
243
0
      endp = slow(m, m->coldp, endp-1, gf, gl);
244
0
      if (endp == NULL)
245
0
        break;   /* defeat */
246
0
      /* try it on a shorter possibility */
247
#ifndef NDEBUG
248
      for (i = 1; i <= m->g->nsub; i++) {
249
        assert(m->pmatch[i].rm_so == -1);
250
        assert(m->pmatch[i].rm_eo == -1);
251
      }
252
#endif
253
0
      NOTE("backoff dissect");
254
0
      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
255
0
    }
256
0
    assert(dp == NULL || dp == endp);
257
0
    if (dp != NULL)   /* found a shorter one */
258
0
      break;
259
0
260
0
    /* despite initial appearances, there is no match here */
261
0
    NOTE("false alarm");
262
0
    if (m->coldp == stop)
263
0
      break;
264
0
    start = m->coldp + 1; /* recycle starting later */
265
0
  }
266
0
267
0
  /* fill in the details if requested */
268
0
  if (nmatch > 0) {
269
0
    pmatch[0].rm_so = m->coldp - m->offp;
270
0
    pmatch[0].rm_eo = endp - m->offp;
271
0
  }
272
0
  if (nmatch > 1) {
273
0
    assert(m->pmatch != NULL);
274
0
    for (i = 1; i < nmatch; i++)
275
0
      if (i <= m->g->nsub)
276
0
        pmatch[i] = m->pmatch[i];
277
0
      else {
278
0
        pmatch[i].rm_so = -1;
279
0
        pmatch[i].rm_eo = -1;
280
0
      }
281
0
  }
282
0
283
0
  if (m->pmatch != NULL)
284
0
    free((char *)m->pmatch);
285
0
  if (m->lastpos != NULL)
286
0
    free((char *)m->lastpos);
287
0
  STATETEARDOWN(m);
288
0
  return(0);
289
0
}
Unexecuted instantiation: regexec.c:smatcher
Unexecuted instantiation: regexec.c:lmatcher
290
291
/*
292
 - dissect - figure out what matched what, no back references
293
 */
294
static const char *     /* == stop (success) always */
295
dissect(struct match *m, const char *start, const char *stop, sopno startst,
296
        sopno stopst)
297
0
{
298
0
  int i;
299
0
  sopno ss; /* start sop of current subRE */
300
0
  sopno es; /* end sop of current subRE */
301
0
  const char *sp; /* start of string matched by it */
302
0
  const char *stp;  /* string matched by it cannot pass here */
303
0
  const char *rest; /* start of rest of string */
304
0
  const char *tail; /* string unmatched by rest of RE */
305
0
  sopno ssub; /* start sop of subsubRE */
306
0
  sopno esub; /* end sop of subsubRE */
307
0
  const char *ssp;  /* start of string matched by subsubRE */
308
0
  const char *sep;  /* end of string matched by subsubRE */
309
0
  const char *oldssp; /* previous ssp */
310
0
311
0
  AT("diss", start, stop, startst, stopst);
312
0
  sp = start;
313
0
  for (ss = startst; ss < stopst; ss = es) {
314
0
    /* identify end of subRE */
315
0
    es = ss;
316
0
    switch (OP(m->g->strip[es])) {
317
0
    case OPLUS_:
318
0
    case OQUEST_:
319
0
      es += OPND(m->g->strip[es]);
320
0
      break;
321
0
    case OCH_:
322
0
      while (OP(m->g->strip[es]) != O_CH)
323
0
        es += OPND(m->g->strip[es]);
324
0
      break;
325
0
    }
326
0
    es++;
327
0
328
0
    /* figure out what it matched */
329
0
    switch (OP(m->g->strip[ss])) {
330
0
    case OEND:
331
0
      assert(nope);
332
0
      break;
333
0
    case OCHAR:
334
0
      sp++;
335
0
      break;
336
0
    case OBOL:
337
0
    case OEOL:
338
0
    case OBOW:
339
0
    case OEOW:
340
0
      break;
341
0
    case OANY:
342
0
    case OANYOF:
343
0
      sp++;
344
0
      break;
345
0
    case OBACK_:
346
0
    case O_BACK:
347
0
      assert(nope);
348
0
      break;
349
0
    /* cases where length of match is hard to find */
350
0
    case OQUEST_:
351
0
      stp = stop;
352
0
      for (;;) {
353
0
        /* how long could this one be? */
354
0
        rest = slow(m, sp, stp, ss, es);
355
0
        assert(rest != NULL); /* it did match */
356
0
        /* could the rest match the rest? */
357
0
        tail = slow(m, rest, stop, es, stopst);
358
0
        if (tail == stop)
359
0
          break;   /* yes! */
360
0
        /* no -- try a shorter match for this one */
361
0
        stp = rest - 1;
362
0
        assert(stp >= sp);  /* it did work */
363
0
      }
364
0
      ssub = ss + 1;
365
0
      esub = es - 1;
366
0
      /* did innards match? */
367
0
      if (slow(m, sp, rest, ssub, esub) != NULL) {
368
0
        const char *dp = dissect(m, sp, rest, ssub, esub);
369
0
        (void)dp; /* avoid warning if assertions off */
370
0
        assert(dp == rest);
371
0
      } else    /* no */
372
0
        assert(sp == rest);
373
0
      sp = rest;
374
0
      break;
375
0
    case OPLUS_:
376
0
      stp = stop;
377
0
      for (;;) {
378
0
        /* how long could this one be? */
379
0
        rest = slow(m, sp, stp, ss, es);
380
0
        assert(rest != NULL); /* it did match */
381
0
        /* could the rest match the rest? */
382
0
        tail = slow(m, rest, stop, es, stopst);
383
0
        if (tail == stop)
384
0
          break;   /* yes! */
385
0
        /* no -- try a shorter match for this one */
386
0
        stp = rest - 1;
387
0
        assert(stp >= sp);  /* it did work */
388
0
      }
389
0
      ssub = ss + 1;
390
0
      esub = es - 1;
391
0
      ssp = sp;
392
0
      oldssp = ssp;
393
0
      for (;;) { /* find last match of innards */
394
0
        sep = slow(m, ssp, rest, ssub, esub);
395
0
        if (sep == NULL || sep == ssp)
396
0
          break; /* failed or matched null */
397
0
        oldssp = ssp; /* on to next try */
398
0
        ssp = sep;
399
0
      }
400
0
      if (sep == NULL) {
401
0
        /* last successful match */
402
0
        sep = ssp;
403
0
        ssp = oldssp;
404
0
      }
405
0
      assert(sep == rest);  /* must exhaust substring */
406
0
      assert(slow(m, ssp, sep, ssub, esub) == rest);
407
0
      {
408
0
        const char *dp = dissect(m, ssp, sep, ssub, esub);
409
0
        (void)dp; /* avoid warning if assertions off */
410
0
        assert(dp == sep);
411
0
      }
412
0
      sp = rest;
413
0
      break;
414
0
    case OCH_:
415
0
      stp = stop;
416
0
      for (;;) {
417
0
        /* how long could this one be? */
418
0
        rest = slow(m, sp, stp, ss, es);
419
0
        assert(rest != NULL); /* it did match */
420
0
        /* could the rest match the rest? */
421
0
        tail = slow(m, rest, stop, es, stopst);
422
0
        if (tail == stop)
423
0
          break;   /* yes! */
424
0
        /* no -- try a shorter match for this one */
425
0
        stp = rest - 1;
426
0
        assert(stp >= sp);  /* it did work */
427
0
      }
428
0
      ssub = ss + 1;
429
0
      esub = ss + OPND(m->g->strip[ss]) - 1;
430
0
      assert(OP(m->g->strip[esub]) == OOR1);
431
0
      for (;;) { /* find first matching branch */
432
0
        if (slow(m, sp, rest, ssub, esub) == rest)
433
0
          break; /* it matched all of it */
434
0
        /* that one missed, try next one */
435
0
        assert(OP(m->g->strip[esub]) == OOR1);
436
0
        esub++;
437
0
        assert(OP(m->g->strip[esub]) == OOR2);
438
0
        ssub = esub + 1;
439
0
        esub += OPND(m->g->strip[esub]);
440
0
        if (OP(m->g->strip[esub]) == OOR2)
441
0
          esub--;
442
0
        else
443
0
          assert(OP(m->g->strip[esub]) == O_CH);
444
0
      }
445
0
      {
446
0
        const char *dp = dissect(m, sp, rest, ssub, esub);
447
0
        (void)dp; /* avoid warning if assertions off */
448
0
        assert(dp == rest);
449
0
      }
450
0
      sp = rest;
451
0
      break;
452
0
    case O_PLUS:
453
0
    case O_QUEST:
454
0
    case OOR1:
455
0
    case OOR2:
456
0
    case O_CH:
457
0
      assert(nope);
458
0
      break;
459
0
    case OLPAREN:
460
0
      i = OPND(m->g->strip[ss]);
461
0
      assert(0 < i && i <= m->g->nsub);
462
0
      m->pmatch[i].rm_so = sp - m->offp;
463
0
      break;
464
0
    case ORPAREN:
465
0
      i = OPND(m->g->strip[ss]);
466
0
      assert(0 < i && i <= m->g->nsub);
467
0
      m->pmatch[i].rm_eo = sp - m->offp;
468
0
      break;
469
0
    default:    /* uh oh */
470
0
      assert(nope);
471
0
      break;
472
0
    }
473
0
  }
474
0
475
0
  assert(sp == stop);
476
0
  return(sp);
477
0
}
Unexecuted instantiation: regexec.c:sdissect
Unexecuted instantiation: regexec.c:ldissect
478
479
/*
480
 - backref - figure out what matched what, figuring in back references
481
 */
482
static const char *     /* == stop (success) or NULL (failure) */
483
backref(struct match *m, const char *start, const char *stop, sopno startst,
484
        sopno stopst, sopno lev, int rec)     /* PLUS nesting level */
485
0
{
486
0
  int i;
487
0
  sopno ss; /* start sop of current subRE */
488
0
  const char *sp; /* start of string matched by it */
489
0
  sopno ssub; /* start sop of subsubRE */
490
0
  sopno esub; /* end sop of subsubRE */
491
0
  const char *ssp;  /* start of string matched by subsubRE */
492
0
  const char *dp;
493
0
  size_t len;
494
0
  int hard;
495
0
  sop s;
496
0
  llvm_regoff_t offsave;
497
0
  cset *cs;
498
0
499
0
  AT("back", start, stop, startst, stopst);
500
0
  sp = start;
501
0
502
0
  /* get as far as we can with easy stuff */
503
0
  hard = 0;
504
0
  for (ss = startst; !hard && ss < stopst; ss++)
505
0
    switch (OP(s = m->g->strip[ss])) {
506
0
    case OCHAR:
507
0
      if (sp == stop || *sp++ != (char)OPND(s))
508
0
        return(NULL);
509
0
      break;
510
0
    case OANY:
511
0
      if (sp == stop)
512
0
        return(NULL);
513
0
      sp++;
514
0
      break;
515
0
    case OANYOF:
516
0
      cs = &m->g->sets[OPND(s)];
517
0
      if (sp == stop || !CHIN(cs, *sp++))
518
0
        return(NULL);
519
0
      break;
520
0
    case OBOL:
521
0
      if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
522
0
          (sp < m->endp && *(sp-1) == '\n' &&
523
0
            (m->g->cflags&REG_NEWLINE)) )
524
0
        { /* yes */ }
525
0
      else
526
0
        return(NULL);
527
0
      break;
528
0
    case OEOL:
529
0
      if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
530
0
          (sp < m->endp && *sp == '\n' &&
531
0
            (m->g->cflags&REG_NEWLINE)) )
532
0
        { /* yes */ }
533
0
      else
534
0
        return(NULL);
535
0
      break;
536
0
    case OBOW:
537
0
      if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
538
0
          (sp < m->endp && *(sp-1) == '\n' &&
539
0
            (m->g->cflags&REG_NEWLINE)) ||
540
0
          (sp > m->beginp &&
541
0
              !ISWORD(*(sp-1))) ) &&
542
0
          (sp < m->endp && ISWORD(*sp)) )
543
0
        { /* yes */ }
544
0
      else
545
0
        return(NULL);
546
0
      break;
547
0
    case OEOW:
548
0
      if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
549
0
          (sp < m->endp && *sp == '\n' &&
550
0
            (m->g->cflags&REG_NEWLINE)) ||
551
0
          (sp < m->endp && !ISWORD(*sp)) ) &&
552
0
          (sp > m->beginp && ISWORD(*(sp-1))) )
553
0
        { /* yes */ }
554
0
      else
555
0
        return(NULL);
556
0
      break;
557
0
    case O_QUEST:
558
0
      break;
559
0
    case OOR1: /* matches null but needs to skip */
560
0
      ss++;
561
0
      s = m->g->strip[ss];
562
0
      do {
563
0
        assert(OP(s) == OOR2);
564
0
        ss += OPND(s);
565
0
      } while (OP(s = m->g->strip[ss]) != O_CH);
566
0
      /* note that the ss++ gets us past the O_CH */
567
0
      break;
568
0
    default:  /* have to make a choice */
569
0
      hard = 1;
570
0
      break;
571
0
    }
572
0
  if (!hard) {   /* that was it! */
573
0
    if (sp != stop)
574
0
      return(NULL);
575
0
    return(sp);
576
0
  }
577
0
  ss--;     /* adjust for the for's final increment */
578
0
579
0
  /* the hard stuff */
580
0
  AT("hard", sp, stop, ss, stopst);
581
0
  s = m->g->strip[ss];
582
0
  switch (OP(s)) {
583
0
  case OBACK_:   /* the vilest depths */
584
0
    i = OPND(s);
585
0
    assert(0 < i && i <= m->g->nsub);
586
0
    if (m->pmatch[i].rm_eo == -1)
587
0
      return(NULL);
588
0
    assert(m->pmatch[i].rm_so != -1);
589
0
    len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
590
0
    if (len == 0 && rec++ > MAX_RECURSION)
591
0
      return(NULL);
592
0
    assert(stop - m->beginp >= len);
593
0
    if (sp > stop - len)
594
0
      return(NULL); /* not enough left to match */
595
0
    ssp = m->offp + m->pmatch[i].rm_so;
596
0
    if (memcmp(sp, ssp, len) != 0)
597
0
      return(NULL);
598
0
    while (m->g->strip[ss] != SOP(O_BACK, i))
599
0
      ss++;
600
0
    return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
601
0
    break;
602
0
  case OQUEST_:   /* to null or not */
603
0
    dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
604
0
    if (dp != NULL)
605
0
      return(dp); /* not */
606
0
    return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
607
0
    break;
608
0
  case OPLUS_:
609
0
    assert(m->lastpos != NULL);
610
0
    assert(lev+1 <= m->g->nplus);
611
0
    m->lastpos[lev+1] = sp;
612
0
    return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
613
0
    break;
614
0
  case O_PLUS:
615
0
    if (sp == m->lastpos[lev]) /* last pass matched null */
616
0
      return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
617
0
    /* try another pass */
618
0
    m->lastpos[lev] = sp;
619
0
    dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
620
0
    if (dp == NULL)
621
0
      return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
622
0
    else
623
0
      return(dp);
624
0
    break;
625
0
  case OCH_:   /* find the right one, if any */
626
0
    ssub = ss + 1;
627
0
    esub = ss + OPND(s) - 1;
628
0
    assert(OP(m->g->strip[esub]) == OOR1);
629
0
    for (;;) { /* find first matching branch */
630
0
      dp = backref(m, sp, stop, ssub, esub, lev, rec);
631
0
      if (dp != NULL)
632
0
        return(dp);
633
0
      /* that one missed, try next one */
634
0
      if (OP(m->g->strip[esub]) == O_CH)
635
0
        return(NULL); /* there is none */
636
0
      esub++;
637
0
      assert(OP(m->g->strip[esub]) == OOR2);
638
0
      ssub = esub + 1;
639
0
      esub += OPND(m->g->strip[esub]);
640
0
      if (OP(m->g->strip[esub]) == OOR2)
641
0
        esub--;
642
0
      else
643
0
        assert(OP(m->g->strip[esub]) == O_CH);
644
0
    }
645
0
    break;
646
0
  case OLPAREN:   /* must undo assignment if rest fails */
647
0
    i = OPND(s);
648
0
    assert(0 < i && i <= m->g->nsub);
649
0
    offsave = m->pmatch[i].rm_so;
650
0
    m->pmatch[i].rm_so = sp - m->offp;
651
0
    dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
652
0
    if (dp != NULL)
653
0
      return(dp);
654
0
    m->pmatch[i].rm_so = offsave;
655
0
    return(NULL);
656
0
    break;
657
0
  case ORPAREN:   /* must undo assignment if rest fails */
658
0
    i = OPND(s);
659
0
    assert(0 < i && i <= m->g->nsub);
660
0
    offsave = m->pmatch[i].rm_eo;
661
0
    m->pmatch[i].rm_eo = sp - m->offp;
662
0
    dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
663
0
    if (dp != NULL)
664
0
      return(dp);
665
0
    m->pmatch[i].rm_eo = offsave;
666
0
    return(NULL);
667
0
    break;
668
0
  default:    /* uh oh */
669
0
    assert(nope);
670
0
    break;
671
0
  }
672
0
673
0
  /* "can't happen" */
674
0
  assert(nope);
675
0
  /* NOTREACHED */
676
0
        return NULL;
677
0
}
Unexecuted instantiation: regexec.c:sbackref
Unexecuted instantiation: regexec.c:lbackref
678
679
/*
680
 - fast - step through the string at top speed
681
 */
682
static const char *     /* where tentative match ended, or NULL */
683
fast(struct match *m, const char *start, const char *stop, sopno startst,
684
     sopno stopst)
685
0
{
686
0
  states st = m->st;
687
0
  states fresh = m->fresh;
688
0
  states tmp = m->tmp;
689
0
  const char *p = start;
690
0
  int c = (start == m->beginp) ? OUT : *(start-1);
691
0
  int lastc;  /* previous c */
692
0
  int flagch;
693
0
  int i;
694
0
  const char *coldp;  /* last p after which no match was underway */
695
0
696
0
  CLEAR(st);
697
0
  SET1(st, startst);
698
0
  st = step(m->g, startst, stopst, st, NOTHING, st);
699
0
  ASSIGN(fresh, st);
700
0
  SP("start", st, *p);
701
0
  coldp = NULL;
702
0
  for (;;) {
703
0
    /* next character */
704
0
    lastc = c;
705
0
    c = (p == m->endp) ? OUT : *p;
706
0
    if (EQ(st, fresh))
707
0
      coldp = p;
708
0
709
0
    /* is there an EOL and/or BOL between lastc and c? */
710
0
    flagch = '\0';
711
0
    i = 0;
712
0
    if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
713
0
        (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
714
0
      flagch = BOL;
715
0
      i = m->g->nbol;
716
0
    }
717
0
    if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
718
0
        (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
719
0
      flagch = (flagch == BOL) ? BOLEOL : EOL;
720
0
      i += m->g->neol;
721
0
    }
722
0
    if (i != 0) {
723
0
      for (; i > 0; i--)
724
0
        st = step(m->g, startst, stopst, st, flagch, st);
725
0
      SP("boleol", st, c);
726
0
    }
727
0
728
0
    /* how about a word boundary? */
729
0
    if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
730
0
          (c != OUT && ISWORD(c)) ) {
731
0
      flagch = BOW;
732
0
    }
733
0
    if ( (lastc != OUT && ISWORD(lastc)) &&
734
0
        (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
735
0
      flagch = EOW;
736
0
    }
737
0
    if (flagch == BOW || flagch == EOW) {
738
0
      st = step(m->g, startst, stopst, st, flagch, st);
739
0
      SP("boweow", st, c);
740
0
    }
741
0
742
0
    /* are we done? */
743
0
    if (ISSET(st, stopst) || p == stop)
744
0
      break;   /* NOTE BREAK OUT */
745
0
746
0
    /* no, we must deal with this character */
747
0
    ASSIGN(tmp, st);
748
0
    ASSIGN(st, fresh);
749
0
    assert(c != OUT);
750
0
    st = step(m->g, startst, stopst, tmp, c, st);
751
0
    SP("aft", st, c);
752
0
    assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
753
0
    p++;
754
0
  }
755
0
756
0
  assert(coldp != NULL);
757
0
  m->coldp = coldp;
758
0
  if (ISSET(st, stopst))
759
0
    return(p+1);
760
0
  else
761
0
    return(NULL);
762
0
}
Unexecuted instantiation: regexec.c:sfast
Unexecuted instantiation: regexec.c:lfast
763
764
/*
765
 - slow - step through the string more deliberately
766
 */
767
static const char *     /* where it ended */
768
slow(struct match *m, const char *start, const char *stop, sopno startst,
769
     sopno stopst)
770
0
{
771
0
  states st = m->st;
772
0
  states empty = m->empty;
773
0
  states tmp = m->tmp;
774
0
  const char *p = start;
775
0
  int c = (start == m->beginp) ? OUT : *(start-1);
776
0
  int lastc;  /* previous c */
777
0
  int flagch;
778
0
  int i;
779
0
  const char *matchp; /* last p at which a match ended */
780
0
781
0
  AT("slow", start, stop, startst, stopst);
782
0
  CLEAR(st);
783
0
  SET1(st, startst);
784
0
  SP("sstart", st, *p);
785
0
  st = step(m->g, startst, stopst, st, NOTHING, st);
786
0
  matchp = NULL;
787
0
  for (;;) {
788
0
    /* next character */
789
0
    lastc = c;
790
0
    c = (p == m->endp) ? OUT : *p;
791
0
792
0
    /* is there an EOL and/or BOL between lastc and c? */
793
0
    flagch = '\0';
794
0
    i = 0;
795
0
    if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
796
0
        (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
797
0
      flagch = BOL;
798
0
      i = m->g->nbol;
799
0
    }
800
0
    if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
801
0
        (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
802
0
      flagch = (flagch == BOL) ? BOLEOL : EOL;
803
0
      i += m->g->neol;
804
0
    }
805
0
    if (i != 0) {
806
0
      for (; i > 0; i--)
807
0
        st = step(m->g, startst, stopst, st, flagch, st);
808
0
      SP("sboleol", st, c);
809
0
    }
810
0
811
0
    /* how about a word boundary? */
812
0
    if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
813
0
          (c != OUT && ISWORD(c)) ) {
814
0
      flagch = BOW;
815
0
    }
816
0
    if ( (lastc != OUT && ISWORD(lastc)) &&
817
0
        (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
818
0
      flagch = EOW;
819
0
    }
820
0
    if (flagch == BOW || flagch == EOW) {
821
0
      st = step(m->g, startst, stopst, st, flagch, st);
822
0
      SP("sboweow", st, c);
823
0
    }
824
0
825
0
    /* are we done? */
826
0
    if (ISSET(st, stopst))
827
0
      matchp = p;
828
0
    if (EQ(st, empty) || p == stop)
829
0
      break;   /* NOTE BREAK OUT */
830
0
831
0
    /* no, we must deal with this character */
832
0
    ASSIGN(tmp, st);
833
0
    ASSIGN(st, empty);
834
0
    assert(c != OUT);
835
0
    st = step(m->g, startst, stopst, tmp, c, st);
836
0
    SP("saft", st, c);
837
0
    assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
838
0
    p++;
839
0
  }
840
0
841
0
  return(matchp);
842
0
}
Unexecuted instantiation: regexec.c:sslow
Unexecuted instantiation: regexec.c:lslow
843
844
845
/*
846
 - step - map set of states reachable before char to set reachable after
847
 */
848
static states
849
step(struct re_guts *g,
850
    sopno start,    /* start state within strip */
851
    sopno stop,     /* state after stop state within strip */
852
    states bef,     /* states reachable before */
853
    int ch,     /* character or NONCHAR code */
854
    states aft)     /* states already known reachable after */
855
0
{
856
0
  cset *cs;
857
0
  sop s;
858
0
  sopno pc;
859
0
  onestate here;    /* note, macros know this name */
860
0
  sopno look;
861
0
  int i;
862
0
863
0
  for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
864
0
    s = g->strip[pc];
865
0
    switch (OP(s)) {
866
0
    case OEND:
867
0
      assert(pc == stop-1);
868
0
      break;
869
0
    case OCHAR:
870
0
      /* only characters can match */
871
0
      assert(!NONCHAR(ch) || ch != (char)OPND(s));
872
0
      if (ch == (char)OPND(s))
873
0
        FWD(aft, bef, 1);
874
0
      break;
875
0
    case OBOL:
876
0
      if (ch == BOL || ch == BOLEOL)
877
0
        FWD(aft, bef, 1);
878
0
      break;
879
0
    case OEOL:
880
0
      if (ch == EOL || ch == BOLEOL)
881
0
        FWD(aft, bef, 1);
882
0
      break;
883
0
    case OBOW:
884
0
      if (ch == BOW)
885
0
        FWD(aft, bef, 1);
886
0
      break;
887
0
    case OEOW:
888
0
      if (ch == EOW)
889
0
        FWD(aft, bef, 1);
890
0
      break;
891
0
    case OANY:
892
0
      if (!NONCHAR(ch))
893
0
        FWD(aft, bef, 1);
894
0
      break;
895
0
    case OANYOF:
896
0
      cs = &g->sets[OPND(s)];
897
0
      if (!NONCHAR(ch) && CHIN(cs, ch))
898
0
        FWD(aft, bef, 1);
899
0
      break;
900
0
    case OBACK_:   /* ignored here */
901
0
    case O_BACK:
902
0
      FWD(aft, aft, 1);
903
0
      break;
904
0
    case OPLUS_:   /* forward, this is just an empty */
905
0
      FWD(aft, aft, 1);
906
0
      break;
907
0
    case O_PLUS:   /* both forward and back */
908
0
      FWD(aft, aft, 1);
909
0
      i = ISSETBACK(aft, OPND(s));
910
0
      BACK(aft, aft, OPND(s));
911
0
      if (!i && ISSETBACK(aft, OPND(s))) {
912
0
        /* oho, must reconsider loop body */
913
0
        pc -= OPND(s) + 1;
914
0
        INIT(here, pc);
915
0
      }
916
0
      break;
917
0
    case OQUEST_:   /* two branches, both forward */
918
0
      FWD(aft, aft, 1);
919
0
      FWD(aft, aft, OPND(s));
920
0
      break;
921
0
    case O_QUEST:   /* just an empty */
922
0
      FWD(aft, aft, 1);
923
0
      break;
924
0
    case OLPAREN:   /* not significant here */
925
0
    case ORPAREN:
926
0
      FWD(aft, aft, 1);
927
0
      break;
928
0
    case OCH_:   /* mark the first two branches */
929
0
      FWD(aft, aft, 1);
930
0
      assert(OP(g->strip[pc+OPND(s)]) == OOR2);
931
0
      FWD(aft, aft, OPND(s));
932
0
      break;
933
0
    case OOR1:   /* done a branch, find the O_CH */
934
0
      if (ISSTATEIN(aft, here)) {
935
0
        for (look = 1;
936
0
            OP(s = g->strip[pc+look]) != O_CH;
937
0
            look += OPND(s))
938
0
          assert(OP(s) == OOR2);
939
0
        FWD(aft, aft, look);
940
0
      }
941
0
      break;
942
0
    case OOR2:   /* propagate OCH_'s marking */
943
0
      FWD(aft, aft, 1);
944
0
      if (OP(g->strip[pc+OPND(s)]) != O_CH) {
945
0
        assert(OP(g->strip[pc+OPND(s)]) == OOR2);
946
0
        FWD(aft, aft, OPND(s));
947
0
      }
948
0
      break;
949
0
    case O_CH:   /* just empty */
950
0
      FWD(aft, aft, 1);
951
0
      break;
952
0
    default:    /* ooooops... */
953
0
      assert(nope);
954
0
      break;
955
0
    }
956
0
  }
957
0
958
0
  return(aft);
959
0
}
Unexecuted instantiation: regexec.c:sstep
Unexecuted instantiation: regexec.c:lstep
960
961
#ifdef REDEBUG
962
/*
963
 - print - print a set of states
964
 */
965
static void
966
print(struct match *m, char *caption, states st, int ch, FILE *d)
967
{
968
  struct re_guts *g = m->g;
969
  int i;
970
  int first = 1;
971
972
  if (!(m->eflags&REG_TRACE))
973
    return;
974
975
  (void)fprintf(d, "%s", caption);
976
  if (ch != '\0')
977
    (void)fprintf(d, " %s", pchar(ch));
978
  for (i = 0; i < g->nstates; i++)
979
    if (ISSET(st, i)) {
980
      (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
981
      first = 0;
982
    }
983
  (void)fprintf(d, "\n");
984
}
985
986
/* 
987
 - at - print current situation
988
 */
989
static void
990
at(struct match *m, char *title, char *start, char *stop, sopno startst,
991
    sopno stopst)
992
{
993
  if (!(m->eflags&REG_TRACE))
994
    return;
995
996
  (void)printf("%s %s-", title, pchar(*start));
997
  (void)printf("%s ", pchar(*stop));
998
  (void)printf("%ld-%ld\n", (long)startst, (long)stopst);
999
}
1000
1001
#ifndef PCHARDONE
1002
#define PCHARDONE /* never again */
1003
/*
1004
 - pchar - make a character printable
1005
 *
1006
 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1007
 * duplicate here avoids having a debugging-capable regexec.o tied to
1008
 * a matching debug.o, and this is convenient.  It all disappears in
1009
 * the non-debug compilation anyway, so it doesn't matter much.
1010
 */
1011
static char *     /* -> representation */
1012
pchar(int ch)
1013
{
1014
  static char pbuf[10];
1015
1016
  if (isPrint(ch) || ch == ' ')
1017
    (void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1018
  else
1019
    (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1020
  return(pbuf);
1021
}
1022
#endif
1023
#endif
1024
1025
#undef  matcher
1026
#undef  fast
1027
#undef  slow
1028
#undef  dissect
1029
#undef  backref
1030
#undef  step
1031
#undef  print
1032
#undef  at
1033
#undef  match
1034
#undef  nope