home *** CD-ROM | disk | FTP | other *** search
- To: vim_dev@googlegroups.com
- Subject: Patch 7.3.1133
- Fcc: outbox
- From: Bram Moolenaar <Bram@moolenaar.net>
- Mime-Version: 1.0
- Content-Type: text/plain; charset=UTF-8
- Content-Transfer-Encoding: 8bit
- ------------
-
- Patch 7.3.1133
- Problem: New regexp engine is a bit slow.
- Solution: Skip ahead to a character that must match. Don't try matching a
- "^" patter past the start of line.
- Files: src/regexp_nfa.c, src/regexp.h
-
-
- *** ../vim-7.3.1132/src/regexp_nfa.c 2013-06-06 18:04:47.000000000 +0200
- --- src/regexp_nfa.c 2013-06-06 18:37:23.000000000 +0200
- ***************
- *** 257,262 ****
- --- 257,264 ----
- static int nfa_ll_index = 0;
-
- static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags));
- + static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
- + static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
- static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
- static int nfa_emit_equi_class __ARGS((int c, int neg));
- static int nfa_regatom __ARGS((void));
- ***************
- *** 331,336 ****
- --- 333,487 ----
- }
-
- /*
- + * Figure out if the NFA state list starts with an anchor, must match at start
- + * of the line.
- + */
- + static int
- + nfa_get_reganch(start, depth)
- + nfa_state_T *start;
- + int depth;
- + {
- + nfa_state_T *p = start;
- +
- + if (depth > 4)
- + return 0;
- +
- + while (p != NULL)
- + {
- + switch (p->c)
- + {
- + case NFA_BOL:
- + case NFA_BOF:
- + return 1; /* yes! */
- +
- + case NFA_ZSTART:
- + case NFA_ZEND:
- + case NFA_CURSOR:
- + case NFA_VISUAL:
- +
- + case NFA_MOPEN:
- + case NFA_MOPEN1:
- + case NFA_MOPEN2:
- + case NFA_MOPEN3:
- + case NFA_MOPEN4:
- + case NFA_MOPEN5:
- + case NFA_MOPEN6:
- + case NFA_MOPEN7:
- + case NFA_MOPEN8:
- + case NFA_MOPEN9:
- + case NFA_NOPEN:
- + #ifdef FEAT_SYN_HL
- + case NFA_ZOPEN:
- + case NFA_ZOPEN1:
- + case NFA_ZOPEN2:
- + case NFA_ZOPEN3:
- + case NFA_ZOPEN4:
- + case NFA_ZOPEN5:
- + case NFA_ZOPEN6:
- + case NFA_ZOPEN7:
- + case NFA_ZOPEN8:
- + case NFA_ZOPEN9:
- + #endif
- + p = p->out;
- + break;
- +
- + case NFA_SPLIT:
- + return nfa_get_reganch(p->out, depth + 1)
- + && nfa_get_reganch(p->out1, depth + 1);
- +
- + default:
- + return 0; /* noooo */
- + }
- + }
- + return 0;
- + }
- +
- + /*
- + * Figure out if the NFA state list starts with a character which must match
- + * at start of the match.
- + */
- + static int
- + nfa_get_regstart(start, depth)
- + nfa_state_T *start;
- + int depth;
- + {
- + nfa_state_T *p = start;
- +
- + if (depth > 4)
- + return 0;
- +
- + while (p != NULL)
- + {
- + switch (p->c)
- + {
- + /* all kinds of zero-width matches */
- + case NFA_BOL:
- + case NFA_BOF:
- + case NFA_BOW:
- + case NFA_EOW:
- + case NFA_ZSTART:
- + case NFA_ZEND:
- + case NFA_CURSOR:
- + case NFA_VISUAL:
- + case NFA_LNUM:
- + case NFA_LNUM_GT:
- + case NFA_LNUM_LT:
- + case NFA_COL:
- + case NFA_COL_GT:
- + case NFA_COL_LT:
- + case NFA_VCOL:
- + case NFA_VCOL_GT:
- + case NFA_VCOL_LT:
- + case NFA_MARK:
- + case NFA_MARK_GT:
- + case NFA_MARK_LT:
- +
- + case NFA_MOPEN:
- + case NFA_MOPEN1:
- + case NFA_MOPEN2:
- + case NFA_MOPEN3:
- + case NFA_MOPEN4:
- + case NFA_MOPEN5:
- + case NFA_MOPEN6:
- + case NFA_MOPEN7:
- + case NFA_MOPEN8:
- + case NFA_MOPEN9:
- + case NFA_NOPEN:
- + #ifdef FEAT_SYN_HL
- + case NFA_ZOPEN:
- + case NFA_ZOPEN1:
- + case NFA_ZOPEN2:
- + case NFA_ZOPEN3:
- + case NFA_ZOPEN4:
- + case NFA_ZOPEN5:
- + case NFA_ZOPEN6:
- + case NFA_ZOPEN7:
- + case NFA_ZOPEN8:
- + case NFA_ZOPEN9:
- + #endif
- + p = p->out;
- + break;
- +
- + case NFA_SPLIT:
- + {
- + int c1 = nfa_get_regstart(p->out, depth + 1);
- + int c2 = nfa_get_regstart(p->out1, depth + 1);
- +
- + if (c1 == c2)
- + return c1; /* yes! */
- + return 0;
- + }
- +
- + default:
- + if (p->c > 0 && !p->negated)
- + return p->c; /* yes! */
- + return 0;
- + }
- + }
- + return 0;
- + }
- +
- + /*
- * Allocate more space for post_start. Called when
- * running above the estimated number of states.
- */
- ***************
- *** 2121,2126 ****
- --- 2272,2281 ----
- if (debugf != NULL)
- {
- nfa_print_state(debugf, prog->start);
- +
- + fprintf(debugf, "reganch: %d\n", prog->reganch);
- + fprintf(debugf, "regstart: %d\n", prog->regstart);
- +
- fclose(debugf);
- }
- }
- ***************
- *** 4248,4253 ****
- --- 4403,4412 ----
- t = &neglist->t[0];
- neglist->n--;
- listidx--;
- + #ifdef ENABLE_LOG
- + fprintf(log_fd, " using neglist entry, %d remaining\n",
- + neglist->n);
- + #endif
- }
- else
- t = &thislist->t[listidx];
- ***************
- *** 4688,4694 ****
-
- case NFA_END_NEG_RANGE:
- /* This follows a series of negated nodes, like:
- ! * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */
- if (curc > 0)
- {
- ll = nextlist;
- --- 4847,4853 ----
-
- case NFA_END_NEG_RANGE:
- /* This follows a series of negated nodes, like:
- ! * NOT CHAR(x), NOT CHAR(y), etc. */
- if (curc > 0)
- {
- ll = nextlist;
- ***************
- *** 5304,5316 ****
- * Returns 0 for failure, number of lines contained in the match otherwise.
- */
- static long
- ! nfa_regexec_both(line, col)
- char_u *line;
- ! colnr_T col; /* column to start looking for match */
- {
- nfa_regprog_T *prog;
- long retval = 0L;
- int i;
-
- if (REG_MULTI)
- {
- --- 5463,5476 ----
- * Returns 0 for failure, number of lines contained in the match otherwise.
- */
- static long
- ! nfa_regexec_both(line, startcol)
- char_u *line;
- ! colnr_T startcol; /* column to start looking for match */
- {
- nfa_regprog_T *prog;
- long retval = 0L;
- int i;
- + colnr_T col = startcol;
-
- if (REG_MULTI)
- {
- ***************
- *** 5333,5342 ****
- goto theend;
- }
-
- - /* If the start column is past the maximum column: no need to try. */
- - if (ireg_maxcol > 0 && col >= ireg_maxcol)
- - goto theend;
- -
- /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
- if (prog->regflags & RF_ICASE)
- ireg_ic = TRUE;
- --- 5493,5498 ----
- ***************
- *** 5361,5366 ****
- --- 5517,5548 ----
- nfa_regengine.expr = prog->pattern;
- #endif
-
- + if (prog->reganch && col > 0)
- + return 0L;
- +
- + if (prog->regstart != NUL)
- + {
- + char_u *s;
- +
- + /* Skip until the char we know it must start with.
- + * Used often, do some work to avoid call overhead. */
- + if (!ireg_ic
- + #ifdef FEAT_MBYTE
- + && !has_mbyte
- + #endif
- + )
- + s = vim_strbyte(regline + col, prog->regstart);
- + else
- + s = cstrchr(regline + col, prog->regstart);
- + if (s == NULL)
- + return 0L;
- + col = (int)(s - regline);
- + }
- +
- + /* If the start column is past the maximum column: no need to try. */
- + if (ireg_maxcol > 0 && col >= ireg_maxcol)
- + goto theend;
- +
- nstate = prog->nstate;
- for (i = 0; i < nstate; ++i)
- {
- ***************
- *** 5459,5464 ****
- --- 5641,5650 ----
- prog->has_zend = nfa_has_zend;
- prog->has_backref = nfa_has_backref;
- prog->nsubexp = regnpar;
- +
- + prog->reganch = nfa_get_reganch(prog->start, 0);
- + prog->regstart = nfa_get_regstart(prog->start, 0);
- +
- #ifdef ENABLE_LOG
- nfa_postfix_dump(expr, OK);
- nfa_dump(prog);
- *** ../vim-7.3.1132/src/regexp.h 2013-06-03 12:17:00.000000000 +0200
- --- src/regexp.h 2013-06-06 17:19:23.000000000 +0200
- ***************
- *** 87,92 ****
- --- 87,96 ----
- unsigned regflags;
-
- nfa_state_T *start; /* points into state[] */
- +
- + int reganch; /* pattern starts with ^ */
- + int regstart; /* char at start of pattern */
- +
- int has_zend; /* pattern contains \ze */
- int has_backref; /* pattern contains \1 .. \9 */
- #ifdef FEAT_SYN_HL
- *** ../vim-7.3.1132/src/version.c 2013-06-06 18:04:47.000000000 +0200
- --- src/version.c 2013-06-06 18:43:55.000000000 +0200
- ***************
- *** 730,731 ****
- --- 730,733 ----
- { /* Add new patch number below this line */
- + /**/
- + 1133,
- /**/
-
- --
- From "know your smileys":
- % Bike accident. A bit far-fetched, I suppose; although...
- o _ _ _
- _o /\_ _ \\o (_)\__/o (_)
- _< \_ _>(_) (_)/<_ \_| \ _|/' \/
- (_)>(_) (_) (_) (_) (_)' _\o_
-
- /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
- /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
- \\\ an exciting new programming language -- http://www.Zimbu.org ///
- \\\ help me help AIDS victims -- http://ICCF-Holland.org ///
-