home *** CD-ROM | disk | FTP | other *** search
- To: vim_dev@googlegroups.com
- Subject: Patch 7.3.1137
- Fcc: outbox
- From: Bram Moolenaar <Bram@moolenaar.net>
- Mime-Version: 1.0
- Content-Type: text/plain; charset=UTF-8
- Content-Transfer-Encoding: 8bit
- ------------
-
- Patch 7.3.1137
- Problem: New regexp engine: collections are slow.
- Solution: Handle all characters in one go.
- Files: src/regexp_nfa.c
-
-
- *** ../vim-7.3.1136/src/regexp_nfa.c 2013-06-06 18:46:00.000000000 +0200
- --- src/regexp_nfa.c 2013-06-07 13:40:58.000000000 +0200
- ***************
- *** 34,48 ****
- NFA_SPLIT = -1024,
- NFA_MATCH,
- NFA_SKIP_CHAR, /* matches a 0-length char */
- - NFA_END_NEG_RANGE, /* Used when expanding [^ab] */
-
- ! NFA_CONCAT,
- NFA_OR,
- NFA_STAR, /* greedy * */
- NFA_STAR_NONGREEDY, /* non-greedy * */
- NFA_QUEST, /* greedy \? */
- NFA_QUEST_NONGREEDY, /* non-greedy \? */
- - NFA_NOT, /* used for [^ab] negated char ranges */
-
- NFA_BOL, /* ^ Begin line */
- NFA_EOL, /* $ End line */
- --- 34,56 ----
- NFA_SPLIT = -1024,
- NFA_MATCH,
- NFA_SKIP_CHAR, /* matches a 0-length char */
-
- ! NFA_START_COLL, /* [abc] start */
- ! NFA_END_COLL, /* [abc] end */
- ! NFA_START_NEG_COLL, /* [^abc] start */
- ! NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */
- ! NFA_RANGE, /* range of the two previous items (only
- ! * used in postfix) */
- ! NFA_RANGE_MIN, /* low end of a range */
- ! NFA_RANGE_MAX, /* high end of a range */
- !
- ! NFA_CONCAT, /* concatenate two previous items (only
- ! * used in postfix) */
- NFA_OR,
- NFA_STAR, /* greedy * */
- NFA_STAR_NONGREEDY, /* non-greedy * */
- NFA_QUEST, /* greedy \? */
- NFA_QUEST_NONGREEDY, /* non-greedy \? */
-
- NFA_BOL, /* ^ Begin line */
- NFA_EOL, /* $ End line */
- ***************
- *** 260,266 ****
- static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
- static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
- static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
- ! static int nfa_emit_equi_class __ARGS((int c, int neg));
- static int nfa_regatom __ARGS((void));
- static int nfa_regpiece __ARGS((void));
- static int nfa_regconcat __ARGS((void));
- --- 268,274 ----
- static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
- static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
- static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
- ! static int nfa_emit_equi_class __ARGS((int c));
- static int nfa_regatom __ARGS((void));
- static int nfa_regpiece __ARGS((void));
- static int nfa_regconcat __ARGS((void));
- ***************
- *** 664,684 ****
- * NOTE! When changing this function, also update reg_equi_class()
- */
- static int
- ! nfa_emit_equi_class(c, neg)
- int c;
- - int neg;
- {
- ! int first = TRUE;
- ! int glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
- ! #define EMIT2(c) \
- ! EMIT(c); \
- ! if (neg == TRUE) { \
- ! EMIT(NFA_NOT); \
- ! } \
- ! if (first == FALSE) \
- ! EMIT(glue); \
- ! else \
- ! first = FALSE; \
-
- #ifdef FEAT_MBYTE
- if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
- --- 672,681 ----
- * NOTE! When changing this function, also update reg_equi_class()
- */
- static int
- ! nfa_emit_equi_class(c)
- int c;
- {
- ! #define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT);
-
- #ifdef FEAT_MBYTE
- if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
- ***************
- *** 687,770 ****
- {
- switch (c)
- {
- ! case 'A': case '\300': case '\301': case '\302':
- ! case '\303': case '\304': case '\305':
- ! EMIT2('A'); EMIT2('\300'); EMIT2('\301');
- ! EMIT2('\302'); EMIT2('\303'); EMIT2('\304');
- ! EMIT2('\305');
- return OK;
-
- ! case 'C': case '\307':
- ! EMIT2('C'); EMIT2('\307');
- return OK;
-
- ! case 'E': case '\310': case '\311': case '\312': case '\313':
- ! EMIT2('E'); EMIT2('\310'); EMIT2('\311');
- ! EMIT2('\312'); EMIT2('\313');
- return OK;
-
- ! case 'I': case '\314': case '\315': case '\316': case '\317':
- ! EMIT2('I'); EMIT2('\314'); EMIT2('\315');
- ! EMIT2('\316'); EMIT2('\317');
- return OK;
-
- ! case 'N': case '\321':
- ! EMIT2('N'); EMIT2('\321');
- return OK;
-
- ! case 'O': case '\322': case '\323': case '\324': case '\325':
- ! case '\326':
- ! EMIT2('O'); EMIT2('\322'); EMIT2('\323');
- ! EMIT2('\324'); EMIT2('\325'); EMIT2('\326');
- return OK;
-
- ! case 'U': case '\331': case '\332': case '\333': case '\334':
- ! EMIT2('U'); EMIT2('\331'); EMIT2('\332');
- ! EMIT2('\333'); EMIT2('\334');
- return OK;
-
- ! case 'Y': case '\335':
- ! EMIT2('Y'); EMIT2('\335');
- return OK;
-
- ! case 'a': case '\340': case '\341': case '\342':
- ! case '\343': case '\344': case '\345':
- ! EMIT2('a'); EMIT2('\340'); EMIT2('\341');
- ! EMIT2('\342'); EMIT2('\343'); EMIT2('\344');
- ! EMIT2('\345');
- return OK;
-
- ! case 'c': case '\347':
- ! EMIT2('c'); EMIT2('\347');
- return OK;
-
- ! case 'e': case '\350': case '\351': case '\352': case '\353':
- ! EMIT2('e'); EMIT2('\350'); EMIT2('\351');
- ! EMIT2('\352'); EMIT2('\353');
- return OK;
-
- ! case 'i': case '\354': case '\355': case '\356': case '\357':
- ! EMIT2('i'); EMIT2('\354'); EMIT2('\355');
- ! EMIT2('\356'); EMIT2('\357');
- return OK;
-
- ! case 'n': case '\361':
- ! EMIT2('n'); EMIT2('\361');
- return OK;
-
- ! case 'o': case '\362': case '\363': case '\364': case '\365':
- ! case '\366':
- ! EMIT2('o'); EMIT2('\362'); EMIT2('\363');
- ! EMIT2('\364'); EMIT2('\365'); EMIT2('\366');
- return OK;
-
- ! case 'u': case '\371': case '\372': case '\373': case '\374':
- ! EMIT2('u'); EMIT2('\371'); EMIT2('\372');
- ! EMIT2('\373'); EMIT2('\374');
- return OK;
-
- ! case 'y': case '\375': case '\377':
- ! EMIT2('y'); EMIT2('\375'); EMIT2('\377');
- return OK;
-
- default:
- --- 684,767 ----
- {
- switch (c)
- {
- ! case 'A': case 0300: case 0301: case 0302:
- ! case 0303: case 0304: case 0305:
- ! EMIT2('A'); EMIT2(0300); EMIT2(0301);
- ! EMIT2(0302); EMIT2(0303); EMIT2(0304);
- ! EMIT2(0305);
- return OK;
-
- ! case 'C': case 0307:
- ! EMIT2('C'); EMIT2(0307);
- return OK;
-
- ! case 'E': case 0310: case 0311: case 0312: case 0313:
- ! EMIT2('E'); EMIT2(0310); EMIT2(0311);
- ! EMIT2(0312); EMIT2(0313);
- return OK;
-
- ! case 'I': case 0314: case 0315: case 0316: case 0317:
- ! EMIT2('I'); EMIT2(0314); EMIT2(0315);
- ! EMIT2(0316); EMIT2(0317);
- return OK;
-
- ! case 'N': case 0321:
- ! EMIT2('N'); EMIT2(0321);
- return OK;
-
- ! case 'O': case 0322: case 0323: case 0324: case 0325:
- ! case 0326:
- ! EMIT2('O'); EMIT2(0322); EMIT2(0323);
- ! EMIT2(0324); EMIT2(0325); EMIT2(0326);
- return OK;
-
- ! case 'U': case 0331: case 0332: case 0333: case 0334:
- ! EMIT2('U'); EMIT2(0331); EMIT2(0332);
- ! EMIT2(0333); EMIT2(0334);
- return OK;
-
- ! case 'Y': case 0335:
- ! EMIT2('Y'); EMIT2(0335);
- return OK;
-
- ! case 'a': case 0340: case 0341: case 0342:
- ! case 0343: case 0344: case 0345:
- ! EMIT2('a'); EMIT2(0340); EMIT2(0341);
- ! EMIT2(0342); EMIT2(0343); EMIT2(0344);
- ! EMIT2(0345);
- return OK;
-
- ! case 'c': case 0347:
- ! EMIT2('c'); EMIT2(0347);
- return OK;
-
- ! case 'e': case 0350: case 0351: case 0352: case 0353:
- ! EMIT2('e'); EMIT2(0350); EMIT2(0351);
- ! EMIT2(0352); EMIT2(0353);
- return OK;
-
- ! case 'i': case 0354: case 0355: case 0356: case 0357:
- ! EMIT2('i'); EMIT2(0354); EMIT2(0355);
- ! EMIT2(0356); EMIT2(0357);
- return OK;
-
- ! case 'n': case 0361:
- ! EMIT2('n'); EMIT2(0361);
- return OK;
-
- ! case 'o': case 0362: case 0363: case 0364: case 0365:
- ! case 0366:
- ! EMIT2('o'); EMIT2(0362); EMIT2(0363);
- ! EMIT2(0364); EMIT2(0365); EMIT2(0366);
- return OK;
-
- ! case 'u': case 0371: case 0372: case 0373: case 0374:
- ! EMIT2('u'); EMIT2(0371); EMIT2(0372);
- ! EMIT2(0373); EMIT2(0374);
- return OK;
-
- ! case 'y': case 0375: case 0377:
- ! EMIT2('y'); EMIT2(0375); EMIT2(0377);
- return OK;
-
- default:
- ***************
- *** 811,824 ****
- char_u *old_regparse = regparse;
- #endif
- int extra = 0;
- - int first;
- int emit_range;
- int negated;
- int result;
- int startc = -1;
- int endc = -1;
- int oldstartc = -1;
- - int glue; /* ID that will "glue" nodes together */
-
- c = getchr();
- switch (c)
- --- 808,819 ----
- ***************
- *** 927,934 ****
-
- case Magic('n'):
- if (reg_string)
- ! /* In a string "\n" matches a newline character. */
- ! EMIT(NL);
- else
- {
- /* In buffer text "\n" matches the end of a line. */
- --- 922,929 ----
-
- case Magic('n'):
- if (reg_string)
- ! /* In a string "\n" matches a newline character. */
- ! EMIT(NL);
- else
- {
- /* In buffer text "\n" matches the end of a line. */
- ***************
- *** 1160,1191 ****
- case Magic('['):
- collection:
- /*
- ! * Glue is emitted between several atoms from the [].
- ! * It is either NFA_OR, or NFA_CONCAT.
- ! *
- ! * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
- ! * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
- ! * NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
- ! * notation)
- ! *
- */
-
- -
- - /* Emit negation atoms, if needed.
- - * The CONCAT below merges the NOT with the previous node. */
- - #define TRY_NEG() \
- - if (negated == TRUE) \
- - { \
- - EMIT(NFA_NOT); \
- - }
- -
- - /* Emit glue between important nodes : CONCAT or OR. */
- - #define EMIT_GLUE() \
- - if (first == FALSE) \
- - EMIT(glue); \
- - else \
- - first = FALSE;
- -
- p = regparse;
- endp = skip_anyof(p);
- if (*endp == ']')
- --- 1155,1169 ----
- case Magic('['):
- collection:
- /*
- ! * [abc] uses NFA_START_COLL - NFA_END_COLL
- ! * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
- ! * Each character is produced as a regular state, using
- ! * NFA_CONCAT to bind them together.
- ! * Besides normal characters there can be:
- ! * - character classes NFA_CLASS_*
- ! * - ranges, two characters followed by NFA_RANGE.
- */
-
- p = regparse;
- endp = skip_anyof(p);
- if (*endp == ']')
- ***************
- *** 1216,1236 ****
- * version that turns [abc] into 'a' OR 'b' OR 'c'
- */
- startc = endc = oldstartc = -1;
- - first = TRUE; /* Emitting first atom in this sequence? */
- negated = FALSE;
- - glue = NFA_OR;
- if (*regparse == '^') /* negated range */
- {
- negated = TRUE;
- - glue = NFA_CONCAT;
- mb_ptr_adv(regparse);
- }
- if (*regparse == '-')
- {
- startc = '-';
- EMIT(startc);
- ! TRY_NEG();
- ! EMIT_GLUE();
- mb_ptr_adv(regparse);
- }
- /* Emit the OR branches for each character in the [] */
- --- 1194,1213 ----
- * version that turns [abc] into 'a' OR 'b' OR 'c'
- */
- startc = endc = oldstartc = -1;
- negated = FALSE;
- if (*regparse == '^') /* negated range */
- {
- negated = TRUE;
- mb_ptr_adv(regparse);
- + EMIT(NFA_START_NEG_COLL);
- }
- + else
- + EMIT(NFA_START_COLL);
- if (*regparse == '-')
- {
- startc = '-';
- EMIT(startc);
- ! EMIT(NFA_CONCAT);
- mb_ptr_adv(regparse);
- }
- /* Emit the OR branches for each character in the [] */
- ***************
- *** 1306,1325 ****
- EMIT(NFA_CLASS_ESCAPE);
- break;
- }
- ! TRY_NEG();
- ! EMIT_GLUE();
- continue;
- }
- /* Try equivalence class [=a=] and the like */
- if (equiclass != 0)
- {
- ! result = nfa_emit_equi_class(equiclass, negated);
- if (result == FAIL)
- {
- /* should never happen */
- EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
- }
- - EMIT_GLUE();
- continue;
- }
- /* Try collating class like [. .] */
- --- 1283,1300 ----
- EMIT(NFA_CLASS_ESCAPE);
- break;
- }
- ! EMIT(NFA_CONCAT);
- continue;
- }
- /* Try equivalence class [=a=] and the like */
- if (equiclass != 0)
- {
- ! result = nfa_emit_equi_class(equiclass);
- if (result == FAIL)
- {
- /* should never happen */
- EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
- }
- continue;
- }
- /* Try collating class like [. .] */
- ***************
- *** 1391,1409 ****
- startc = oldstartc;
- if (startc > endc)
- EMSG_RET_FAIL(_(e_invrange));
- #ifdef FEAT_MBYTE
- ! if (has_mbyte && ((*mb_char2len)(startc) > 1
- || (*mb_char2len)(endc) > 1))
- {
- ! if (endc > startc + 256)
- ! EMSG_RET_FAIL(_(e_invrange));
- ! /* Emit the range. "startc" was already emitted, so
- ! * skip it. */
- for (c = startc + 1; c <= endc; c++)
- {
- EMIT(c);
- ! TRY_NEG();
- ! EMIT_GLUE();
- }
- }
- else
- --- 1366,1397 ----
- startc = oldstartc;
- if (startc > endc)
- EMSG_RET_FAIL(_(e_invrange));
- +
- + if (endc > startc + 2)
- + {
- + /* Emit a range instead of the sequence of
- + * individual characters. */
- + if (startc == 0)
- + /* \x00 is translated to \x0a, start at \x01. */
- + EMIT(1);
- + else
- + --post_ptr; /* remove NFA_CONCAT */
- + EMIT(endc);
- + EMIT(NFA_RANGE);
- + EMIT(NFA_CONCAT);
- + }
- + else
- #ifdef FEAT_MBYTE
- ! if (has_mbyte && ((*mb_char2len)(startc) > 1
- || (*mb_char2len)(endc) > 1))
- {
- ! /* Emit the characters in the range.
- ! * "startc" was already emitted, so skip it.
- ! * */
- for (c = startc + 1; c <= endc; c++)
- {
- EMIT(c);
- ! EMIT(NFA_CONCAT);
- }
- }
- else
- ***************
- *** 1425,1432 ****
- #endif
- {
- EMIT(c);
- ! TRY_NEG();
- ! EMIT_GLUE();
- }
- }
- emit_range = FALSE;
- --- 1413,1419 ----
- #endif
- {
- EMIT(c);
- ! EMIT(NFA_CONCAT);
- }
- }
- emit_range = FALSE;
- ***************
- *** 1434,1456 ****
- }
- else
- {
- ! /*
- ! * This char (startc) is not part of a range. Just
- * emit it.
- - *
- * Normally, simply emit startc. But if we get char
- * code=0 from a collating char, then replace it with
- * 0x0a.
- - *
- * This is needed to completely mimic the behaviour of
- ! * the backtracking engine.
- ! */
- ! if (got_coll_char == TRUE && startc == 0)
- ! EMIT(0x0a);
- else
- ! EMIT(startc);
- ! TRY_NEG();
- ! EMIT_GLUE();
- }
-
- mb_ptr_adv(regparse);
- --- 1421,1449 ----
- }
- else
- {
- ! /* This char (startc) is not part of a range. Just
- * emit it.
- * Normally, simply emit startc. But if we get char
- * code=0 from a collating char, then replace it with
- * 0x0a.
- * This is needed to completely mimic the behaviour of
- ! * the backtracking engine. */
- ! if (startc == NFA_NEWL)
- ! {
- ! /* Line break can't be matched as part of the
- ! * collection, add an OR below. But not for negated
- ! * range. */
- ! if (!negated)
- ! extra = ADD_NL;
- ! }
- else
- ! {
- ! if (got_coll_char == TRUE && startc == 0)
- ! EMIT(0x0a);
- ! else
- ! EMIT(startc);
- ! EMIT(NFA_CONCAT);
- ! }
- }
-
- mb_ptr_adv(regparse);
- ***************
- *** 1460,1479 ****
- if (*regparse == '-') /* if last, '-' is just a char */
- {
- EMIT('-');
- ! TRY_NEG();
- ! EMIT_GLUE();
- }
- mb_ptr_adv(regparse);
-
- /* skip the trailing ] */
- regparse = endp;
- mb_ptr_adv(regparse);
- if (negated == TRUE)
- ! {
- ! /* Mark end of negated char range */
- ! EMIT(NFA_END_NEG_RANGE);
- ! EMIT(NFA_CONCAT);
- ! }
-
- /* \_[] also matches \n but it's not negated */
- if (extra == ADD_NL)
- --- 1453,1471 ----
- if (*regparse == '-') /* if last, '-' is just a char */
- {
- EMIT('-');
- ! EMIT(NFA_CONCAT);
- }
- mb_ptr_adv(regparse);
-
- /* skip the trailing ] */
- regparse = endp;
- mb_ptr_adv(regparse);
- +
- + /* Mark end of the collection. */
- if (negated == TRUE)
- ! EMIT(NFA_END_NEG_COLL);
- ! else
- ! EMIT(NFA_END_COLL);
-
- /* \_[] also matches \n but it's not negated */
- if (extra == ADD_NL)
- ***************
- *** 1532,1540 ****
- }
- }
-
- - #undef TRY_NEG
- - #undef EMIT_GLUE
- -
- return OK;
- }
-
- --- 1524,1529 ----
- ***************
- *** 2091,2100 ****
- case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
- case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break;
- case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
- - case NFA_NOT: STRCPY(code, "NFA_NOT "); break;
- case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break;
- case NFA_OR: STRCPY(code, "NFA_OR"); break;
- ! case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break;
- case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break;
- case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break;
- case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break;
- --- 2080,2096 ----
- case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
- case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break;
- case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
- case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break;
- case NFA_OR: STRCPY(code, "NFA_OR"); break;
- !
- ! case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break;
- ! case NFA_END_COLL: STRCPY(code, "NFA_END_COLL"); break;
- ! case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
- ! case NFA_END_NEG_COLL: STRCPY(code, "NFA_END_NEG_COLL"); break;
- ! case NFA_RANGE: STRCPY(code, "NFA_RANGE"); break;
- ! case NFA_RANGE_MIN: STRCPY(code, "NFA_RANGE_MIN"); break;
- ! case NFA_RANGE_MAX: STRCPY(code, "NFA_RANGE_MAX"); break;
- !
- case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break;
- case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break;
- case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break;
- ***************
- *** 2231,2238 ****
- fprintf(debugf, " %s", p);
-
- nfa_set_code(state->c);
- ! fprintf(debugf, "%s%s (%d) (id=%d)\n",
- ! state->negated ? "NOT " : "", code, state->c, abs(state->id));
- if (state->id < 0)
- return;
-
- --- 2227,2238 ----
- fprintf(debugf, " %s", p);
-
- nfa_set_code(state->c);
- ! fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
- ! state->negated ? "NOT " : "",
- ! code,
- ! state->c,
- ! abs(state->id),
- ! state->val);
- if (state->id < 0)
- return;
-
- ***************
- *** 2325,2330 ****
- --- 2325,2331 ----
- s->c = c;
- s->out = out;
- s->out1 = out1;
- + s->val = 0;
-
- s->id = istate;
- s->lastlist[0] = 0;
- ***************
- *** 2565,2577 ****
- switch (*p)
- {
- case NFA_CONCAT:
- ! /* Catenation.
- ! * Pay attention: this operator does not exist
- ! * in the r.e. itself (it is implicit, really).
- ! * It is added when r.e. is translated to postfix
- ! * form in re2post().
- ! *
- ! * No new state added here. */
- if (nfa_calc_size == TRUE)
- {
- /* nstate += 0; */
- --- 2566,2575 ----
- switch (*p)
- {
- case NFA_CONCAT:
- ! /* Concatenation.
- ! * Pay attention: this operator does not exist in the r.e. itself
- ! * (it is implicit, really). It is added when r.e. is translated
- ! * to postfix form in re2post(). */
- if (nfa_calc_size == TRUE)
- {
- /* nstate += 0; */
- ***************
- *** 2583,2604 ****
- PUSH(frag(e1.start, e2.out));
- break;
-
- - case NFA_NOT:
- - /* Negation of a character */
- - if (nfa_calc_size == TRUE)
- - {
- - /* nstate += 0; */
- - break;
- - }
- - e1 = POP();
- - e1.start->negated = TRUE;
- - #ifdef FEAT_MBYTE
- - if (e1.start->c == NFA_COMPOSING)
- - e1.start->out1->negated = TRUE;
- - #endif
- - PUSH(e1);
- - break;
- -
- case NFA_OR:
- /* Alternation */
- if (nfa_calc_size == TRUE)
- --- 2581,2586 ----
- ***************
- *** 2672,2677 ****
- --- 2654,2696 ----
- PUSH(frag(s, append(e.out, list1(&s->out))));
- break;
-
- + case NFA_END_COLL:
- + case NFA_END_NEG_COLL:
- + /* On the stack is the sequence starting with NFA_START_COLL or
- + * NFA_START_NEG_COLL and all possible characters. Patch it to
- + * add the output to the start. */
- + if (nfa_calc_size == TRUE)
- + {
- + nstate++;
- + break;
- + }
- + e = POP();
- + s = alloc_state(NFA_END_COLL, NULL, NULL);
- + if (s == NULL)
- + goto theend;
- + patch(e.out, s);
- + e.start->out1 = s;
- + PUSH(frag(e.start, list1(&s->out)));
- + break;
- +
- + case NFA_RANGE:
- + /* Before this are two characters, the low and high end of a
- + * range. Turn them into two states with MIN and MAX. */
- + if (nfa_calc_size == TRUE)
- + {
- + /* nstate += 0; */
- + break;
- + }
- + e2 = POP();
- + e1 = POP();
- + e2.start->val = e2.start->c;
- + e2.start->c = NFA_RANGE_MAX;
- + e1.start->val = e1.start->c;
- + e1.start->c = NFA_RANGE_MIN;
- + patch(e1.out, e2.start);
- + PUSH(frag(e1.start, e2.out));
- + break;
- +
- case NFA_SKIP_CHAR:
- /* Symbol of 0-length, Used in a repetition
- * with max/min count of 0 */
- ***************
- *** 2990,2995 ****
- --- 3009,3016 ----
- matchstate = &state_ptr[istate++]; /* the match state */
- matchstate->c = NFA_MATCH;
- matchstate->out = matchstate->out1 = NULL;
- + matchstate->negated = FALSE;
- + matchstate->id = 0;
-
- patch(e.out, matchstate);
- ret = e.start;
- ***************
- *** 3308,3314 ****
- switch (state->c)
- {
- case NFA_SPLIT:
- - case NFA_NOT:
- case NFA_NOPEN:
- case NFA_SKIP_CHAR:
- case NFA_NCLOSE:
- --- 3329,3334 ----
- ***************
- *** 3782,3788 ****
-
- default:
- /* should not be here :P */
- ! EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
- }
- return FAIL;
- }
- --- 3802,3809 ----
-
- default:
- /* should not be here :P */
- ! EMSGN("E877: (NFA regexp) Invalid character class: %ld", class);
- ! return FAIL;
- }
- return FAIL;
- }
- ***************
- *** 4320,4327 ****
- addstate(thislist, start, m, 0);
-
- /* There are two cases when the NFA advances: 1. input char matches the
- ! * NFA node and 2. input char does not match the NFA node, but the next
- ! * node is NFA_NOT. The following macro calls addstate() according to
- * these rules. It is used A LOT, so use the "listtbl" table for speed */
- listtbl[0][0] = NULL;
- listtbl[0][1] = neglist;
- --- 4341,4348 ----
- addstate(thislist, start, m, 0);
-
- /* There are two cases when the NFA advances: 1. input char matches the
- ! * NFA node and 2. input char does not match the NFA node and the state
- ! * has the negated flag. The following macro calls addstate() according to
- * these rules. It is used A LOT, so use the "listtbl" table for speed */
- listtbl[0][0] = NULL;
- listtbl[0][1] = neglist;
- ***************
- *** 4845,4860 ****
- ADD_POS_NEG_STATE(t->state);
- break;
-
- ! case NFA_END_NEG_RANGE:
- ! /* This follows a series of negated nodes, like:
- ! * NOT CHAR(x), NOT CHAR(y), etc. */
- ! if (curc > 0)
- {
- ll = nextlist;
- ! add_state = t->state->out;
- add_off = clen;
- }
- break;
-
- case NFA_ANY:
- /* Any char except '\0', (end of input) does not match. */
- --- 4866,4944 ----
- ADD_POS_NEG_STATE(t->state);
- break;
-
- ! case NFA_START_COLL:
- ! case NFA_START_NEG_COLL:
- ! {
- ! /* What follows is a list of characters, until NFA_END_COLL.
- ! * One of them must match or none of them must match. */
- ! nfa_state_T *state;
- ! int result_if_matched;
- ! int c1, c2;
- !
- ! /* Never match EOL. If it's part of the collection it is added
- ! * as a separate state with an OR. */
- ! if (curc == NUL)
- ! break;
- !
- ! state = t->state->out;
- ! result_if_matched = (t->state->c == NFA_START_COLL);
- ! for (;;)
- {
- + if (state->c == NFA_END_COLL)
- + {
- + result = !result_if_matched;
- + break;
- + }
- + if (state->c == NFA_RANGE_MIN)
- + {
- + c1 = state->val;
- + state = state->out; /* advance to NFA_RANGE_MAX */
- + c2 = state->val;
- + #ifdef ENABLE_LOG
- + fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
- + curc, c1, c2);
- + #endif
- + if (curc >= c1 && curc <= c2)
- + {
- + result = result_if_matched;
- + break;
- + }
- + if (ireg_ic)
- + {
- + int curc_low = MB_TOLOWER(curc);
- + int done = FALSE;
- +
- + for ( ; c1 <= c2; ++c1)
- + if (MB_TOLOWER(c1) == curc_low)
- + {
- + result = result_if_matched;
- + done = TRUE;
- + break;
- + }
- + if (done)
- + break;
- + }
- + }
- + else if (state->c < 0 ? check_char_class(state->c, curc)
- + : (curc == state->c
- + || (ireg_ic && MB_TOLOWER(curc)
- + == MB_TOLOWER(state->c))))
- + {
- + result = result_if_matched;
- + break;
- + }
- + state = state->out;
- + }
- + if (result)
- + {
- + /* next state is in out of the NFA_END_COLL, out1 of
- + * START points to the END state */
- ll = nextlist;
- ! add_state = t->state->out1->out;
- add_off = clen;
- }
- break;
- + }
-
- case NFA_ANY:
- /* Any char except '\0', (end of input) does not match. */
- *** ../vim-7.3.1136/src/version.c 2013-06-06 21:31:02.000000000 +0200
- --- src/version.c 2013-06-07 13:21:57.000000000 +0200
- ***************
- *** 730,731 ****
- --- 730,733 ----
- { /* Add new patch number below this line */
- + /**/
- + 1137,
- /**/
-
- --
- From "know your smileys":
- :.-( Crying
-
- /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
- /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
- \\\ an exciting new programming language -- http://www.Zimbu.org ///
- \\\ help me help AIDS victims -- http://ICCF-Holland.org ///
-