Re[5]: Исходники Perl...
От: SergeCpp Россия http://zoozahita.ru
Дата: 08.02.07 09:31
Оценка: :))
Кстати, не удержусь...

Вот функция, имеющая отношение к оптимизации регулярного выражения...

Ровно 1000 строк!

Ну, чуть больше, просто в SciTE сейчас смотрю, свернул её "плюсиком",
открывающая скобка на 674 строке, а следующая за свёрнутым блоком строка — 1674

Модераторы — не удаляйте... Пусть Ларри Уоллу будет стыдно

/* REx optimizer.  Converts nodes into quickier variants "in place".
   Finds fixed substrings.  */

/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
   to the position after last scanned or to NULL. */

STATIC I32
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags)
            /* scanp: Start here (read-write). */
            /* deltap: Write maxlen-minlen here. */
            /* last: Stop before this one. */
{
    I32 min = 0, pars = 0, code;
    regnode *scan = *scanp, *next;
    I32 delta = 0;
    int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
    int is_inf_internal = 0;        /* The studied chunk is infinite */
    I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
    scan_data_t data_fake;
    struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */

    while (scan && OP(scan) != END && scan < last) {
    /* Peephole optimizer: */

    if (PL_regkind[(U8)OP(scan)] == EXACT) {
        /* Merge several consecutive EXACTish nodes into one. */
        regnode *n = regnext(scan);
        U32 stringok = 1;
#ifdef DEBUGGING
        regnode *stop = scan;
#endif

        next = scan + NODE_SZ_STR(scan);
        /* Skip NOTHING, merge EXACT*. */
        while (n &&
           ( PL_regkind[(U8)OP(n)] == NOTHING ||
             (stringok && (OP(n) == OP(scan))))
           && NEXT_OFF(n)
           && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
        if (OP(n) == TAIL || n > next)
            stringok = 0;
        if (PL_regkind[(U8)OP(n)] == NOTHING) {
            NEXT_OFF(scan) += NEXT_OFF(n);
            next = n + NODE_STEP_REGNODE;
#ifdef DEBUGGING
            if (stringok)
            stop = n;
#endif
            n = regnext(n);
        }
        else if (stringok) {
            const int oldl = STR_LEN(scan);
            regnode *nnext = regnext(n);

            if (oldl + STR_LEN(n) > U8_MAX)
            break;
            NEXT_OFF(scan) += NEXT_OFF(n);
            STR_LEN(scan) += STR_LEN(n);
            next = n + NODE_SZ_STR(n);
            /* Now we can overwrite *n : */
            Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
#ifdef DEBUGGING
            stop = next - 1;
#endif
            n = nnext;
        }
        }

        if (UTF && OP(scan) == EXACTF && STR_LEN(scan) >= 6) {
/*
  Two problematic code points in Unicode casefolding of EXACT nodes:

   U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
   U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS

   which casefold to

   Unicode            UTF-8

   U+03B9 U+0308 U+0301        0xCE 0xB9 0xCC 0x88 0xCC 0x81
   U+03C5 U+0308 U+0301        0xCF 0x85 0xCC 0x88 0xCC 0x81

   This means that in case-insensitive matching (or "loose matching",
   as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
   length of the above casefolded versions) can match a target string
   of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
   This would rather mess up the minimum length computation.

   What we'll do is to look for the tail four bytes, and then peek
   at the preceding two bytes to see whether we need to decrease
   the minimum length by four (six minus two).

   Thanks to the design of UTF-8, there cannot be false matches:
   A sequence of valid UTF-8 bytes cannot be a subsequence of
   another valid sequence of UTF-8 bytes.

*/
         char *s0 = STRING(scan), *s, *t;
         char *s1 = s0 + STR_LEN(scan) - 1, *s2 = s1 - 4;
         const char * const t0 = "\xcc\x88\xcc\x81";
         const char * const t1 = t0 + 3;

         for (s = s0 + 2;
              s < s2 && (t = ninstr(s, s1, t0, t1));
              s = t + 4) {
              if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
              ((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
               min -= 4;
         }
        }

#ifdef DEBUGGING
        /* Allow dumping */
        n = scan + NODE_SZ_STR(scan);
        while (n <= stop) {
        if (PL_regkind[(U8)OP(n)] != NOTHING || OP(n) == NOTHING) {
            OP(n) = OPTIMIZED;
            NEXT_OFF(n) = 0;
        }
        n++;
        }
#endif
    }
    /* Follow the next-chain of the current node and optimize
       away all the NOTHINGs from it.  */
    if (OP(scan) != CURLYX) {
        const int max = (reg_off_by_arg[OP(scan)]
               ? I32_MAX
               /* I32 may be smaller than U16 on CRAYs! */
               : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
        int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
        int noff;
        regnode *n = scan;
    
        /* Skip NOTHING and LONGJMP. */
        while ((n = regnext(n))
           && ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
               || ((OP(n) == LONGJMP) && (noff = ARG(n))))
           && off + noff < max)
        off += noff;
        if (reg_off_by_arg[OP(scan)])
        ARG(scan) = off;
        else
        NEXT_OFF(scan) = off;
    }
    /* The principal pseudo-switch.  Cannot be a switch, since we
       look into several different things.  */
    if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
           || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
        next = regnext(scan);
        code = OP(scan);
    
        if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
        I32 max1 = 0, min1 = I32_MAX, num = 0;
        struct regnode_charclass_class accum;
        
        if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
            scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
        if (flags & SCF_DO_STCLASS)
            cl_init_zero(pRExC_state, &accum);
        while (OP(scan) == code) {
            I32 deltanext, minnext, f = 0, fake;
            struct regnode_charclass_class this_class;

            num++;
            data_fake.flags = 0;
            if (data) {        
            data_fake.whilem_c = data->whilem_c;
            data_fake.last_closep = data->last_closep;
            }
            else
            data_fake.last_closep = &fake;
            next = regnext(scan);
            scan = NEXTOPER(scan);
            if (code != BRANCH)
            scan = NEXTOPER(scan);
            if (flags & SCF_DO_STCLASS) {
            cl_init(pRExC_state, &this_class);
            data_fake.start_class = &this_class;
            f = SCF_DO_STCLASS_AND;
            }        
            if (flags & SCF_WHILEM_VISITED_POS)
            f |= SCF_WHILEM_VISITED_POS;
            /* we suppose the run is continuous, last=next...*/
            minnext = study_chunk(pRExC_state, &scan, &deltanext,
                      next, &data_fake, f);
            if (min1 > minnext)
            min1 = minnext;
            if (max1 < minnext + deltanext)
            max1 = minnext + deltanext;
            if (deltanext == I32_MAX)
            is_inf = is_inf_internal = 1;
            scan = next;
            if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
            pars++;
            if (data && (data_fake.flags & SF_HAS_EVAL))
            data->flags |= SF_HAS_EVAL;
            if (data)
            data->whilem_c = data_fake.whilem_c;
            if (flags & SCF_DO_STCLASS)
            cl_or(pRExC_state, &accum, &this_class);
            if (code == SUSPEND)
            break;
        }
        if (code == IFTHEN && num < 2) /* Empty ELSE branch */
            min1 = 0;
        if (flags & SCF_DO_SUBSTR) {
            data->pos_min += min1;
            data->pos_delta += max1 - min1;
            if (max1 != min1 || is_inf)
            data->longest = &(data->longest_float);
        }
        min += min1;
        delta += max1 - min1;
        if (flags & SCF_DO_STCLASS_OR) {
            cl_or(pRExC_state, data->start_class, &accum);
            if (min1) {
            cl_and(data->start_class, &and_with);
            flags &= ~SCF_DO_STCLASS;
            }
        }
        else if (flags & SCF_DO_STCLASS_AND) {
            if (min1) {
            cl_and(data->start_class, &accum);
            flags &= ~SCF_DO_STCLASS;
            }
            else {
            /* Switch to OR mode: cache the old value of
             * data->start_class */
            StructCopy(data->start_class, &and_with,
                   struct regnode_charclass_class);
            flags &= ~SCF_DO_STCLASS_AND;
            StructCopy(&accum, data->start_class,
                   struct regnode_charclass_class);
            flags |= SCF_DO_STCLASS_OR;
            data->start_class->flags |= ANYOF_EOS;
            }
        }

        }
        else if (code == BRANCHJ)    /* single branch is optimized. */
        scan = NEXTOPER(NEXTOPER(scan));
        else            /* single branch is optimized. */
        scan = NEXTOPER(scan);
        continue;
    }
    else if (OP(scan) == EXACT) {
        I32 l = STR_LEN(scan);
        UV uc = *((U8*)STRING(scan));
        if (UTF) {
        const U8 * const s = (U8*)STRING(scan);
        l = utf8_length((U8 *)s, (U8 *)s + l);
        uc = utf8_to_uvchr((U8 *)s, NULL);
        }
        min += l;
        if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
        /* The code below prefers earlier match for fixed
           offset, later match for variable offset.  */
        if (data->last_end == -1) { /* Update the start info. */
            data->last_start_min = data->pos_min;
             data->last_start_max = is_inf
             ? I32_MAX : data->pos_min + data->pos_delta;
        }
        sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
        {
            SV * const sv = data->last_found;
            MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
            mg_find(sv, PERL_MAGIC_utf8) : NULL;
            if (mg && mg->mg_len >= 0)
            mg->mg_len += utf8_length((U8*)STRING(scan),
                          (U8*)STRING(scan)+STR_LEN(scan));
        }
        if (UTF)
            SvUTF8_on(data->last_found);
        data->last_end = data->pos_min + l;
        data->pos_min += l; /* As in the first entry. */
        data->flags &= ~SF_BEFORE_EOL;
        }
        if (flags & SCF_DO_STCLASS_AND) {
        /* Check whether it is compatible with what we know already! */
        int compat = 1;

        if (uc >= 0x100 ||
            (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
            && !ANYOF_BITMAP_TEST(data->start_class, uc)
            && (!(data->start_class->flags & ANYOF_FOLD)
            || !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
                    )
            compat = 0;
        ANYOF_CLASS_ZERO(data->start_class);
        ANYOF_BITMAP_ZERO(data->start_class);
        if (compat)
            ANYOF_BITMAP_SET(data->start_class, uc);
        data->start_class->flags &= ~ANYOF_EOS;
        if (uc < 0x100)
          data->start_class->flags &= ~ANYOF_UNICODE_ALL;
        }
        else if (flags & SCF_DO_STCLASS_OR) {
        /* false positive possible if the class is case-folded */
        if (uc < 0x100)
            ANYOF_BITMAP_SET(data->start_class, uc);
        else
            data->start_class->flags |= ANYOF_UNICODE_ALL;
        data->start_class->flags &= ~ANYOF_EOS;
        cl_and(data->start_class, &and_with);
        }
        flags &= ~SCF_DO_STCLASS;
    }
    else if (PL_regkind[(U8)OP(scan)] == EXACT) { /* But OP != EXACT! */
        I32 l = STR_LEN(scan);
        UV uc = *((U8*)STRING(scan));

        /* Search for fixed substrings supports EXACT only. */
        if (flags & SCF_DO_SUBSTR)
        scan_commit(pRExC_state, data);
        if (UTF) {
        U8 *s = (U8 *)STRING(scan);
        l = utf8_length(s, s + l);
        uc = utf8_to_uvchr(s, NULL);
        }
        min += l;
        if (data && (flags & SCF_DO_SUBSTR))
        data->pos_min += l;
        if (flags & SCF_DO_STCLASS_AND) {
        /* Check whether it is compatible with what we know already! */
        int compat = 1;

        if (uc >= 0x100 ||
            (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
            && !ANYOF_BITMAP_TEST(data->start_class, uc)
             && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
            compat = 0;
        ANYOF_CLASS_ZERO(data->start_class);
        ANYOF_BITMAP_ZERO(data->start_class);
        if (compat) {
            ANYOF_BITMAP_SET(data->start_class, uc);
            data->start_class->flags &= ~ANYOF_EOS;
            data->start_class->flags |= ANYOF_FOLD;
            if (OP(scan) == EXACTFL)
            data->start_class->flags |= ANYOF_LOCALE;
        }
        }
        else if (flags & SCF_DO_STCLASS_OR) {
        if (data->start_class->flags & ANYOF_FOLD) {
            /* false positive possible if the class is case-folded.
               Assume that the locale settings are the same... */
            if (uc < 0x100)
            ANYOF_BITMAP_SET(data->start_class, uc);
            data->start_class->flags &= ~ANYOF_EOS;
        }
        cl_and(data->start_class, &and_with);
        }
        flags &= ~SCF_DO_STCLASS;
    }
    else if (strchr((const char*)PL_varies,OP(scan))) {
        I32 mincount, maxcount, minnext, deltanext, fl = 0;
        I32 f = flags, pos_before = 0;
        regnode *oscan = scan;
        struct regnode_charclass_class this_class;
        struct regnode_charclass_class *oclass = NULL;
        I32 next_is_eval = 0;

        switch (PL_regkind[(U8)OP(scan)]) {
        case WHILEM:        /* End of (?:...)* . */
        scan = NEXTOPER(scan);
        goto finish;
        case PLUS:
        if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
            next = NEXTOPER(scan);
            if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) {
            mincount = 1;
            maxcount = REG_INFTY;
            next = regnext(scan);
            scan = NEXTOPER(scan);
            goto do_curly;
            }
        }
        if (flags & SCF_DO_SUBSTR)
            data->pos_min++;
        min++;
        /* Fall through. */
        case STAR:
        if (flags & SCF_DO_STCLASS) {
            mincount = 0;
            maxcount = REG_INFTY;
            next = regnext(scan);
            scan = NEXTOPER(scan);
            goto do_curly;
        }
        is_inf = is_inf_internal = 1;
        scan = regnext(scan);
        if (flags & SCF_DO_SUBSTR) {
            scan_commit(pRExC_state, data); /* Cannot extend fixed substrings */
            data->longest = &(data->longest_float);
        }
        goto optimize_curly_tail;
        case CURLY:
        mincount = ARG1(scan);
        maxcount = ARG2(scan);
        next = regnext(scan);
        if (OP(scan) == CURLYX) {
            I32 lp = (data ? *(data->last_closep) : 0);

            scan->flags = ((lp <= U8_MAX) ? lp : U8_MAX);
        }
        scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
        next_is_eval = (OP(scan) == EVAL);
          do_curly:
        if (flags & SCF_DO_SUBSTR) {
            if (mincount == 0) scan_commit(pRExC_state,data); /* Cannot extend fixed substrings */
            pos_before = data->pos_min;
        }
        if (data) {
            fl = data->flags;
            data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
            if (is_inf)
            data->flags |= SF_IS_INF;
        }
        if (flags & SCF_DO_STCLASS) {
            cl_init(pRExC_state, &this_class);
            oclass = data->start_class;
            data->start_class = &this_class;
            f |= SCF_DO_STCLASS_AND;
            f &= ~SCF_DO_STCLASS_OR;
        }
        /* These are the cases when once a subexpression
           fails at a particular position, it cannot succeed
           even after backtracking at the enclosing scope.
        
           XXXX what if minimal match and we are at the
                initial run of {n,m}? */
        if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
            f &= ~SCF_WHILEM_VISITED_POS;

        /* This will finish on WHILEM, setting scan, or on NULL: */
        minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data,
                      mincount == 0
                    ? (f & ~SCF_DO_SUBSTR) : f);

        if (flags & SCF_DO_STCLASS)
            data->start_class = oclass;
        if (mincount == 0 || minnext == 0) {
            if (flags & SCF_DO_STCLASS_OR) {
            cl_or(pRExC_state, data->start_class, &this_class);
            }
            else if (flags & SCF_DO_STCLASS_AND) {
            /* Switch to OR mode: cache the old value of
             * data->start_class */
            StructCopy(data->start_class, &and_with,
                   struct regnode_charclass_class);
            flags &= ~SCF_DO_STCLASS_AND;
            StructCopy(&this_class, data->start_class,
                   struct regnode_charclass_class);
            flags |= SCF_DO_STCLASS_OR;
            data->start_class->flags |= ANYOF_EOS;
            }
        } else {        /* Non-zero len */
            if (flags & SCF_DO_STCLASS_OR) {
            cl_or(pRExC_state, data->start_class, &this_class);
            cl_and(data->start_class, &and_with);
            }
            else if (flags & SCF_DO_STCLASS_AND)
            cl_and(data->start_class, &this_class);
            flags &= ~SCF_DO_STCLASS;
        }
        if (!scan)         /* It was not CURLYX, but CURLY. */
            scan = next;
        if ( /* ? quantifier ok, except for (?{ ... }) */
            (next_is_eval || !(mincount == 0 && maxcount == 1))
            && (minnext == 0) && (deltanext == 0)
            && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
            && maxcount <= REG_INFTY/3 /* Complement check for big count */
            && ckWARN(WARN_REGEXP))
        {
            vWARN(RExC_parse,
              "Quantifier unexpected on zero-length expression");
        }

        min += minnext * mincount;
        is_inf_internal |= ((maxcount == REG_INFTY
                     && (minnext + deltanext) > 0)
                    || deltanext == I32_MAX);
        is_inf |= is_inf_internal;
        delta += (minnext + deltanext) * maxcount - minnext * mincount;

        /* Try powerful optimization CURLYX => CURLYN. */
        if (  OP(oscan) == CURLYX && data
              && data->flags & SF_IN_PAR
              && !(data->flags & SF_HAS_EVAL)
              && !deltanext && minnext == 1 ) {
            /* Try to optimize to CURLYN.  */
            regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
            regnode *nxt1 = nxt;
#ifdef DEBUGGING
            regnode *nxt2;
#endif

            /* Skip open. */
            nxt = regnext(nxt);
            if (!strchr((const char*)PL_simple,OP(nxt))
            && !(PL_regkind[(U8)OP(nxt)] == EXACT
                 && STR_LEN(nxt) == 1))
            goto nogo;
#ifdef DEBUGGING
            nxt2 = nxt;
#endif
            nxt = regnext(nxt);
            if (OP(nxt) != CLOSE)
            goto nogo;
            /* Now we know that nxt2 is the only contents: */
            oscan->flags = (U8)ARG(nxt);
            OP(oscan) = CURLYN;
            OP(nxt1) = NOTHING;    /* was OPEN. */
#ifdef DEBUGGING
            OP(nxt1 + 1) = OPTIMIZED; /* was count. */
            NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
            NEXT_OFF(nxt2) = 0;    /* just for consistancy with CURLY. */
            OP(nxt) = OPTIMIZED;    /* was CLOSE. */
            OP(nxt + 1) = OPTIMIZED; /* was count. */
            NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
#endif
        }
          nogo:

        /* Try optimization CURLYX => CURLYM. */
        if (  OP(oscan) == CURLYX && data
              && !(data->flags & SF_HAS_PAR)
              && !(data->flags & SF_HAS_EVAL)
              && !deltanext    /* atom is fixed width */
              && minnext != 0    /* CURLYM can't handle zero width */
        ) {
            /* XXXX How to optimize if data == 0? */
            /* Optimize to a simpler form.  */
            regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
            regnode *nxt2;

            OP(oscan) = CURLYM;
            while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
                && (OP(nxt2) != WHILEM))
            nxt = nxt2;
            OP(nxt2)  = SUCCEED; /* Whas WHILEM */
            /* Need to optimize away parenths. */
            if (data->flags & SF_IN_PAR) {
            /* Set the parenth number.  */
            regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/

            if (OP(nxt) != CLOSE)
                FAIL("Panic opt close");
            oscan->flags = (U8)ARG(nxt);
            OP(nxt1) = OPTIMIZED;    /* was OPEN. */
            OP(nxt) = OPTIMIZED;    /* was CLOSE. */
#ifdef DEBUGGING
            OP(nxt1 + 1) = OPTIMIZED; /* was count. */
            OP(nxt + 1) = OPTIMIZED; /* was count. */
            NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
            NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
#endif
#if 0
            while ( nxt1 && (OP(nxt1) != WHILEM)) {
                regnode *nnxt = regnext(nxt1);
            
                if (nnxt == nxt) {
                if (reg_off_by_arg[OP(nxt1)])
                    ARG_SET(nxt1, nxt2 - nxt1);
                else if (nxt2 - nxt1 < U16_MAX)
                    NEXT_OFF(nxt1) = nxt2 - nxt1;
                else
                    OP(nxt) = NOTHING;    /* Cannot beautify */
                }
                nxt1 = nnxt;
            }
#endif
            /* Optimize again: */
            study_chunk(pRExC_state, &nxt1, &deltanext, nxt,
                    NULL, 0);
            }
            else
            oscan->flags = 0;
        }
        else if ((OP(oscan) == CURLYX)
             && (flags & SCF_WHILEM_VISITED_POS)
             /* See the comment on a similar expression above.
                However, this time it not a subexpression
                we care about, but the expression itself. */
             && (maxcount == REG_INFTY)
             && data && ++data->whilem_c < 16) {
            /* This stays as CURLYX, we can put the count/of pair. */
            /* Find WHILEM (as in regexec.c) */
            regnode *nxt = oscan + NEXT_OFF(oscan);

            if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
            nxt += ARG(nxt);
            PREVOPER(nxt)->flags = (U8)(data->whilem_c
            | (RExC_whilem_seen << 4)); /* On WHILEM */
        }
        if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
            pars++;
        if (flags & SCF_DO_SUBSTR) {
            SV *last_str = Nullsv;
            int counted = mincount != 0;

            if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
#if defined(SPARC64_GCC_WORKAROUND)
            I32 b = 0;
            STRLEN l = 0;
            const char *s = NULL;
            I32 old = 0;

            if (pos_before >= data->last_start_min)
                b = pos_before;
            else
                b = data->last_start_min;

            l = 0;
            s = SvPV_const(data->last_found, l);
            old = b - data->last_start_min;

#else
            I32 b = pos_before >= data->last_start_min
                ? pos_before : data->last_start_min;
            STRLEN l;
            const char *s = SvPV_const(data->last_found, l);
            I32 old = b - data->last_start_min;
#endif

            if (UTF)
                old = utf8_hop((U8*)s, old) - (U8*)s;
            
            l -= old;
            /* Get the added string: */
            last_str = newSVpvn(s  + old, l);
            if (UTF)
                SvUTF8_on(last_str);
            if (deltanext == 0 && pos_before == b) {
                /* What was added is a constant string */
                if (mincount > 1) {
                SvGROW(last_str, (mincount * l) + 1);
                repeatcpy(SvPVX(last_str) + l,
                      SvPVX_const(last_str), l, mincount - 1);
                SvCUR_set(last_str, SvCUR(last_str) * mincount);
                /* Add additional parts. */
                SvCUR_set(data->last_found,
                      SvCUR(data->last_found) - l);
                sv_catsv(data->last_found, last_str);
                {
                    SV * sv = data->last_found;
                    MAGIC *mg =
                    SvUTF8(sv) && SvMAGICAL(sv) ?
                    mg_find(sv, PERL_MAGIC_utf8) : NULL;
                    if (mg && mg->mg_len >= 0)
                    mg->mg_len += CHR_SVLEN(last_str);
                }
                data->last_end += l * (mincount - 1);
                }
            } else {
                /* start offset must point into the last copy */
                data->last_start_min += minnext * (mincount - 1);
                data->last_start_max += is_inf ? I32_MAX
                : (maxcount - 1) * (minnext + data->pos_delta);
            }
            }
            /* It is counted once already... */
            data->pos_min += minnext * (mincount - counted);
            data->pos_delta += - counted * deltanext +
            (minnext + deltanext) * maxcount - minnext * mincount;
            if (mincount != maxcount) {
             /* Cannot extend fixed substrings found inside
                the group.  */
            scan_commit(pRExC_state,data);
            if (mincount && last_str) {
                sv_setsv(data->last_found, last_str);
                data->last_end = data->pos_min;
                data->last_start_min =
                data->pos_min - CHR_SVLEN(last_str);
                data->last_start_max = is_inf
                ? I32_MAX
                : data->pos_min + data->pos_delta
                - CHR_SVLEN(last_str);
            }
            data->longest = &(data->longest_float);
            }
            SvREFCNT_dec(last_str);
        }
        if (data && (fl & SF_HAS_EVAL))
            data->flags |= SF_HAS_EVAL;
          optimize_curly_tail:
        if (OP(oscan) != CURLYX) {
            while (PL_regkind[(U8)OP(next = regnext(oscan))] == NOTHING
               && NEXT_OFF(next))
            NEXT_OFF(oscan) += NEXT_OFF(next);
        }
        continue;
        default:            /* REF and CLUMP only? */
        if (flags & SCF_DO_SUBSTR) {
            scan_commit(pRExC_state,data);    /* Cannot expect anything... */
            data->longest = &(data->longest_float);
        }
        is_inf = is_inf_internal = 1;
        if (flags & SCF_DO_STCLASS_OR)
            cl_anything(pRExC_state, data->start_class);
        flags &= ~SCF_DO_STCLASS;
        break;
        }
    }
    else if (strchr((const char*)PL_simple,OP(scan))) {
        int value = 0;

        if (flags & SCF_DO_SUBSTR) {
        scan_commit(pRExC_state,data);
        data->pos_min++;
        }
        min++;
        if (flags & SCF_DO_STCLASS) {
        data->start_class->flags &= ~ANYOF_EOS;    /* No match on empty */

        /* Some of the logic below assumes that switching
           locale on will only add false positives. */
        switch (PL_regkind[(U8)OP(scan)]) {
        case SANY:
        default:
          do_default:
            /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
            if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
            cl_anything(pRExC_state, data->start_class);
            break;
        case REG_ANY:
            if (OP(scan) == SANY)
            goto do_default;
            if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
            value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
                 || (data->start_class->flags & ANYOF_CLASS));
            cl_anything(pRExC_state, data->start_class);
            }
            if (flags & SCF_DO_STCLASS_AND || !value)
            ANYOF_BITMAP_CLEAR(data->start_class,'\n');
            break;
        case ANYOF:
            if (flags & SCF_DO_STCLASS_AND)
            cl_and(data->start_class,
                   (struct regnode_charclass_class*)scan);
            else
            cl_or(pRExC_state, data->start_class,
                  (struct regnode_charclass_class*)scan);
            break;
        case ALNUM:
            if (flags & SCF_DO_STCLASS_AND) {
            if (!(data->start_class->flags & ANYOF_LOCALE)) {
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
                for (value = 0; value < 256; value++)
                if (!isALNUM(value))
                    ANYOF_BITMAP_CLEAR(data->start_class, value);
            }
            }
            else {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
            else {
                for (value = 0; value < 256; value++)
                if (isALNUM(value))
                    ANYOF_BITMAP_SET(data->start_class, value);            
            }
            }
            break;
        case ALNUML:
            if (flags & SCF_DO_STCLASS_AND) {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
            }
            else {
            ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
            data->start_class->flags |= ANYOF_LOCALE;
            }
            break;
        case NALNUM:
            if (flags & SCF_DO_STCLASS_AND) {
            if (!(data->start_class->flags & ANYOF_LOCALE)) {
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
                for (value = 0; value < 256; value++)
                if (isALNUM(value))
                    ANYOF_BITMAP_CLEAR(data->start_class, value);
            }
            }
            else {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
            else {
                for (value = 0; value < 256; value++)
                if (!isALNUM(value))
                    ANYOF_BITMAP_SET(data->start_class, value);            
            }
            }
            break;
        case NALNUML:
            if (flags & SCF_DO_STCLASS_AND) {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
            }
            else {
            data->start_class->flags |= ANYOF_LOCALE;
            ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
            }
            break;
        case SPACE:
            if (flags & SCF_DO_STCLASS_AND) {
            if (!(data->start_class->flags & ANYOF_LOCALE)) {
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
                for (value = 0; value < 256; value++)
                if (!isSPACE(value))
                    ANYOF_BITMAP_CLEAR(data->start_class, value);
            }
            }
            else {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
            else {
                for (value = 0; value < 256; value++)
                if (isSPACE(value))
                    ANYOF_BITMAP_SET(data->start_class, value);            
            }
            }
            break;
        case SPACEL:
            if (flags & SCF_DO_STCLASS_AND) {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
            }
            else {
            data->start_class->flags |= ANYOF_LOCALE;
            ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
            }
            break;
        case NSPACE:
            if (flags & SCF_DO_STCLASS_AND) {
            if (!(data->start_class->flags & ANYOF_LOCALE)) {
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
                for (value = 0; value < 256; value++)
                if (isSPACE(value))
                    ANYOF_BITMAP_CLEAR(data->start_class, value);
            }
            }
            else {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
            else {
                for (value = 0; value < 256; value++)
                if (!isSPACE(value))
                    ANYOF_BITMAP_SET(data->start_class, value);            
            }
            }
            break;
        case NSPACEL:
            if (flags & SCF_DO_STCLASS_AND) {
            if (data->start_class->flags & ANYOF_LOCALE) {
                ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
                for (value = 0; value < 256; value++)
                if (!isSPACE(value))
                    ANYOF_BITMAP_CLEAR(data->start_class, value);
            }
            }
            else {
            data->start_class->flags |= ANYOF_LOCALE;
            ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
            }
            break;
        case DIGIT:
            if (flags & SCF_DO_STCLASS_AND) {
            ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT);
            for (value = 0; value < 256; value++)
                if (!isDIGIT(value))
                ANYOF_BITMAP_CLEAR(data->start_class, value);
            }
            else {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
            else {
                for (value = 0; value < 256; value++)
                if (isDIGIT(value))
                    ANYOF_BITMAP_SET(data->start_class, value);            
            }
            }
            break;
        case NDIGIT:
            if (flags & SCF_DO_STCLASS_AND) {
            ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT);
            for (value = 0; value < 256; value++)
                if (isDIGIT(value))
                ANYOF_BITMAP_CLEAR(data->start_class, value);
            }
            else {
            if (data->start_class->flags & ANYOF_LOCALE)
                ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
            else {
                for (value = 0; value < 256; value++)
                if (!isDIGIT(value))
                    ANYOF_BITMAP_SET(data->start_class, value);            
            }
            }
            break;
        }
        if (flags & SCF_DO_STCLASS_OR)
            cl_and(data->start_class, &and_with);
        flags &= ~SCF_DO_STCLASS;
        }
    }
    else if (PL_regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
        data->flags |= (OP(scan) == MEOL
                ? SF_BEFORE_MEOL
                : SF_BEFORE_SEOL);
    }
    else if (  PL_regkind[(U8)OP(scan)] == BRANCHJ
         /* Lookbehind, or need to calculate parens/evals/stclass: */
           && (scan->flags || data || (flags & SCF_DO_STCLASS))
           && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
        /* Lookahead/lookbehind */
        I32 deltanext, minnext, fake = 0;
        regnode *nscan;
        struct regnode_charclass_class intrnl;
        int f = 0;

        data_fake.flags = 0;
        if (data) {        
        data_fake.whilem_c = data->whilem_c;
        data_fake.last_closep = data->last_closep;
        }
        else
        data_fake.last_closep = &fake;
        if ( flags & SCF_DO_STCLASS && !scan->flags
         && OP(scan) == IFMATCH ) { /* Lookahead */
        cl_init(pRExC_state, &intrnl);
        data_fake.start_class = &intrnl;
        f |= SCF_DO_STCLASS_AND;
        }
        if (flags & SCF_WHILEM_VISITED_POS)
        f |= SCF_WHILEM_VISITED_POS;
        next = regnext(scan);
        nscan = NEXTOPER(NEXTOPER(scan));
        minnext = study_chunk(pRExC_state, &nscan, &deltanext, last, &data_fake, f);
        if (scan->flags) {
        if (deltanext) {
            vFAIL("Variable length lookbehind not implemented");
        }
        else if (minnext > U8_MAX) {
            vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
        }
        scan->flags = (U8)minnext;
        }
        if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
        pars++;
        if (data && (data_fake.flags & SF_HAS_EVAL))
        data->flags |= SF_HAS_EVAL;
        if (data)
        data->whilem_c = data_fake.whilem_c;
        if (f & SCF_DO_STCLASS_AND) {
        const int was = (data->start_class->flags & ANYOF_EOS);

        cl_and(data->start_class, &intrnl);
        if (was)
            data->start_class->flags |= ANYOF_EOS;
        }
    }
    else if (OP(scan) == OPEN) {
        pars++;
    }
    else if (OP(scan) == CLOSE) {
        if ((I32)ARG(scan) == is_par) {
        next = regnext(scan);

        if ( next && (OP(next) != WHILEM) && next < last)
            is_par = 0;        /* Disable optimization */
        }
        if (data)
        *(data->last_closep) = ARG(scan);
    }
    else if (OP(scan) == EVAL) {
        if (data)
            data->flags |= SF_HAS_EVAL;
    }
    else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded follows */
        if (flags & SCF_DO_SUBSTR) {
            scan_commit(pRExC_state,data);
            data->longest = &(data->longest_float);
        }
        is_inf = is_inf_internal = 1;
        if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
            cl_anything(pRExC_state, data->start_class);
        flags &= ~SCF_DO_STCLASS;
    }
    /* Else: zero-length, ignore. */
    scan = regnext(scan);
    }

  finish:
    *scanp = scan;
    *deltap = is_inf_internal ? I32_MAX : delta;
    if (flags & SCF_DO_SUBSTR && is_inf)
    data->pos_delta = I32_MAX - data->pos_min;
    if (is_par > U8_MAX)
    is_par = 0;
    if (is_par && pars==1 && data) {
    data->flags |= SF_IN_PAR;
    data->flags &= ~SF_HAS_PAR;
    }
    else if (pars && data) {
    data->flags |= SF_HAS_PAR;
    data->flags &= ~SF_IN_PAR;
    }
    if (flags & SCF_DO_STCLASS_OR)
    cl_and(data->start_class, &and_with);
    return min;
}

А метка в конце — finish
http://zoozahita.ruБездомные животные Екатеринбурга ищут хозяев
 
Подождите ...
Wait...
Пока на собственное сообщение не было ответов, его можно удалить.