home *** CD-ROM | disk | FTP | other *** search
- /* Routines to compute the current syntactic context, for font-lock mode.
- Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
- Copyright (C) 1995 Sun Microsystems.
-
- This file is part of XEmacs.
-
- XEmacs is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
-
- XEmacs is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
-
- You should have received a copy of the GNU General Public License
- along with XEmacs; see the file COPYING. If not, write to the Free
- Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
-
- /* Synched up with: Not in FSF. */
-
- /* This code computes the syntactic context of the current point, that is,
- whether point is within a comment, a string, what have you. It does
- this by picking a point "known" to be outside of any syntactic constructs
- and moving forward, examining the syntax of each character.
-
- Two caches are used: one caches the last point computed, and the other
- caches the last point at the beginning of a line. This makes there
- be little penalty for moving left-to-right on a line a character at a
- time; makes starting over on a line be cheap; and makes random-accessing
- within a line relatively cheap.
-
- When we move to a different line farther down in the file (but within the
- current top-level form) we simply continue computing forward. If we move
- backward more than a line, or move beyond the end of the current tlf, or
- switch buffers, then we call `beginning-of-defun' and start over from
- there.
-
- #### We should really rewrite this to keep extents over the buffer
- that hold the current syntactic information. This would be a big win.
- This way there would be no guessing or incorrect results.
- */
-
- #include <config.h>
- #include "lisp.h"
-
- #include "buffer.h"
- #include "insdel.h"
- #include "syntax.h"
-
- Lisp_Object Qcomment;
- Lisp_Object Qblock_comment;
- Lisp_Object Qbeginning_of_defun;
-
- enum syntactic_context {
- context_none, context_string, context_comment, context_block_comment
- };
-
- enum block_comment_context {
- ccontext_none, ccontext_start1, ccontext_start2, ccontext_end1
- };
-
- enum comment_style {
- comment_style_none, comment_style_a, comment_style_b
- };
-
- struct context_cache {
- Bufpos start_point; /* beginning of defun */
- Bufpos cur_point; /* cache location */
- Bufpos end_point; /* end of defun */
- struct buffer *buffer; /* does this need to be staticpro'd? */
- enum syntactic_context context; /* single-char-syntax state */
- enum block_comment_context ccontext; /* block-comment state */
- enum comment_style style; /* which comment group */
- unsigned char scontext; /* active string delimiter */
- int depth; /* depth in parens */
- int backslash_p; /* just read a backslash */
- };
-
- /* We have two caches; one for the current point and one for
- the beginning of line. We used to rely on the caller to
- tell us when to invalidate them, but now we do it ourselves;
- it lets us be smarter. */
-
- static struct context_cache context_cache;
-
- static struct context_cache bol_context_cache;
-
- int font_lock_debug;
-
- #define reset_context_cache(cc) memset (cc, 0, sizeof (struct context_cache))
-
- /* This function is called from signal_after_change() to tell us when
- textual changes are made so we can flush our caches when necessary.
-
- We make the following somewhat heuristic assumptions:
-
- (remember that current_point is always >= start_point, but may be
- less than or greater than end_point (we might not be inside any
- top-level form)).
-
- 1) Textual changes before the beginning of the current top-level form
- don't affect anything; all we need to do is offset the caches
- appropriately.
- 2) Textual changes right at the beginning of the current
- top-level form messes things up and requires that we flush
- the caches.
- 3) Textual changes after the beginning of the current top-level form
- and before one or both or the caches invalidates the corresponding
- cache(s).
- 4) Textual changes after the caches and before the end of the
- current top-level form don't affect anything; all we need to do is
- offset the caches appropriately.
- 5) Textual changes right at the end of the current top-level form
- necessitate recomputing that end value.
- 6) Textual changes after the end of the current top-level form
- are ignored. */
-
-
- void
- font_lock_maybe_update_syntactic_caches (struct buffer *buf, Bufpos start,
- Bufpos orig_end, Bufpos new_end)
- {
- /* Note: either both context_cache and bol_context_cache are valid and
- point to the same buffer, or both are invalid. If we have to
- invalidate just context_cache, we recopy it from bol_context_cache.
- */
- if (context_cache.buffer != buf)
- /* caches don't apply */
- return;
- /* NOTE: The order of the if statements below is important. If you
- change them around unthinkingly, you will probably break something. */
- if (orig_end <= context_cache.start_point - 1)
- {
- /* case 1: before the beginning of the current top-level form */
- Charcount diff = new_end - orig_end;
- if (font_lock_debug)
- stderr_out ("font-lock; Case 1\n");
- context_cache.start_point += diff;
- context_cache.cur_point += diff;
- context_cache.end_point += diff;
- bol_context_cache.start_point += diff;
- bol_context_cache.cur_point += diff;
- bol_context_cache.end_point += diff;
- }
- else if (start <= context_cache.start_point)
- {
- if (font_lock_debug)
- stderr_out ("font-lock; Case 2\n");
- /* case 2: right at the current top-level form (paren that starts
- top level form got deleted or moved away from the newline it
- was touching) */
- reset_context_cache (&context_cache);
- reset_context_cache (&bol_context_cache);
- }
- /* OK, now we know that the start is after the beginning of the
- current top-level form. */
- else if (start < bol_context_cache.cur_point)
- {
- if (font_lock_debug)
- stderr_out ("font-lock; Case 3 (1)\n");
- /* case 3: after the beginning of the current top-level form
- and before both of the caches */
- reset_context_cache (&context_cache);
- reset_context_cache (&bol_context_cache);
- }
- else if (start < context_cache.cur_point)
- {
- if (font_lock_debug)
- stderr_out ("font-lock; Case 3 (2)\n");
- /* case 3: but only need to invalidate one cache */
- context_cache = bol_context_cache;
- }
- /* OK, now we know that the start is after the caches. */
- else if (start >= context_cache.end_point)
- {
- if (font_lock_debug)
- stderr_out ("font-lock; Case 6\n");
- /* case 6: after the end of the current top-level form
- and after the caches. */
- }
- else if (orig_end <= context_cache.end_point - 2)
- {
- /* case 4: after the caches and before the end of the
- current top-level form */
- Charcount diff = new_end - orig_end;
- if (font_lock_debug)
- stderr_out ("font-lock; Case 4\n");
- context_cache.end_point += diff;
- bol_context_cache.end_point += diff;
- }
- else
- {
- if (font_lock_debug)
- stderr_out ("font-lock; Case 5\n");
- /* case 5: right at the end of the current top-level form */
- context_cache.end_point = context_cache.start_point - 1;
- bol_context_cache.end_point = context_cache.start_point - 1;
- }
- }
-
- /* This function is called from Fkill_buffer(). */
-
- void
- font_lock_buffer_was_killed (struct buffer *buf)
- {
- if (context_cache.buffer == buf)
- {
- reset_context_cache (&context_cache);
- reset_context_cache (&bol_context_cache);
- }
- }
-
- static Bufpos
- beginning_of_defun (struct buffer *buf, Bufpos pt)
- {
- /* This function can GC */
- Bufpos opt = BUF_PT (buf);
- if (pt == BUF_BEGV (buf))
- return pt;
- BUF_SET_PT (buf, pt);
- /* There used to be some kludginess to call c++-beginning-of-defun
- if we're in C++ mode. There's no point in this any more;
- we're using cc-mode. If you really want to get the old c++
- mode working, fix it rather than the C code. */
- call0_in_buffer (buf, Qbeginning_of_defun);
- pt = BUF_PT (buf);
- BUF_SET_PT (buf, opt);
- return pt;
- }
-
- static Bufpos
- end_of_defun (struct buffer *buf, Bufpos pt)
- {
- Lisp_Object retval = scan_lists (buf, pt, 1, 0, 0, 1);
- if (NILP (retval))
- return BUF_ZV (buf);
- else
- return XINT (retval);
- }
-
- /* Set up context_cache for attempting to determine the syntactic context
- in buffer BUF at point PT. */
-
- static void
- setup_context_cache (struct buffer *buf, Bufpos pt)
- {
- /* This function can GC */
- if (context_cache.buffer != buf || pt < context_cache.start_point)
- {
- #if 0
- start_over:
- #endif
- if (font_lock_debug)
- stderr_out ("reset context cache\n");
- /* OK, completely invalid. */
- reset_context_cache (&context_cache);
- reset_context_cache (&bol_context_cache);
- }
- if (!context_cache.buffer)
- {
- /* Need to recompute the start point. */
- if (font_lock_debug)
- stderr_out ("recompute start\n");
- context_cache.start_point = beginning_of_defun (buf, pt);
- bol_context_cache.start_point = context_cache.start_point;
- bol_context_cache.buffer = context_cache.buffer = buf;
- }
- if (context_cache.end_point < context_cache.start_point)
- {
- /* Need to recompute the end point. */
- if (font_lock_debug)
- stderr_out ("recompute end\n");
- context_cache.end_point = end_of_defun (buf, context_cache.start_point);
- bol_context_cache.end_point = context_cache.end_point;
- }
- if (bol_context_cache.cur_point == 0 ||
- pt < bol_context_cache.cur_point)
- {
- if (font_lock_debug)
- stderr_out ("reset to start\n");
- pt = context_cache.start_point;
- /* Reset current point to start of buffer. */
- context_cache.cur_point = pt;
- context_cache.context = context_none;
- context_cache.ccontext = ccontext_none;
- context_cache.style = comment_style_none;
- context_cache.scontext = '\000';
- context_cache.depth = 0;
- context_cache.backslash_p = ((pt > 1) &&
- (BUF_FETCH_CHAR (buf, pt - 1) == '\\'));
- bol_context_cache = context_cache;
- return;
- }
- else if (pt < context_cache.cur_point)
- {
- if (font_lock_debug)
- stderr_out ("reset to bol\n");
- /* bol cache is OK but current_cache is not. */
- context_cache = bol_context_cache;
- return;
- }
- else if (pt <= context_cache.end_point)
- {
- if (font_lock_debug)
- stderr_out ("everything is OK\n");
- /* in same top-level form. */
- return;
- }
- {
- /* OK, we're past the end of the top-level form. */
- Bufpos maxpt = max (context_cache.end_point, context_cache.cur_point);
- #if 0
- int shortage;
- #endif
-
- if (font_lock_debug)
- stderr_out ("past end\n");
- if (pt <= maxpt)
- /* OK, fine. */
- return;
- #if 0
- /* This appears to cause huge slowdowns in files like
- emacsfns.h, which have no top-level forms.
-
- In any case, it's not really necessary that we know for
- sure the top-level form we're in; if we're in a form
- but the form we have recorded is the previous one,
- it will be OK. */
-
- scan_buffer (buf, '\n', maxpt, pt, 1, &shortage, 1);
- if (!shortage)
- /* If there was a newline in the region past the known universe,
- we might be inside another top-level form, so start over.
- Otherwise, we're outside of any top-level forms and we know
- the one directly before us, so it's OK. */
- goto start_over;
- #endif
- }
- }
-
- #define SYNTAX_START_STYLE(table, c1, c2) \
- (SYNTAX_STYLES_MATCH_START_P (table, c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
- comment_style_a : \
- SYNTAX_STYLES_MATCH_START_P (table, c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
- comment_style_b : \
- comment_style_none)
-
- #define SYNTAX_END_STYLE(table, c1, c2) \
- (SYNTAX_STYLES_MATCH_END_P (table, c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
- comment_style_a : \
- SYNTAX_STYLES_MATCH_END_P (table, c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
- comment_style_b : \
- comment_style_none)
-
- #define SINGLE_SYNTAX_STYLE(table, c) \
- (SYNTAX_STYLES_MATCH_1CHAR_P (table, c, SYNTAX_COMMENT_STYLE_A) ? \
- comment_style_a : \
- SYNTAX_STYLES_MATCH_1CHAR_P (table, c, SYNTAX_COMMENT_STYLE_B) ? \
- comment_style_b : \
- comment_style_none)
-
- /* Set up context_cache for position PT in BUF. */
-
- static void
- find_context (struct buffer *buf, Bufpos pt)
- {
- /* This function can GC */
- Lisp_Object syntax_table = buf->syntax_table;
- Bufbyte prev_c, c;
- Bufpos target = pt;
- setup_context_cache (buf, pt);
- pt = context_cache.cur_point;
-
- if (pt > BUF_BEGV (buf))
- c = BUF_FETCH_CHAR (buf, pt - 1);
- else
- c = '\n'; /* to get bol_context_cache at point-min */
-
- for (; pt < target; pt++, context_cache.cur_point = pt)
- {
- prev_c = c;
- c = BUF_FETCH_CHAR (buf, pt);
-
- if (prev_c == '\n')
- bol_context_cache = context_cache;
-
- if (context_cache.backslash_p)
- {
- context_cache.backslash_p = 0;
- continue;
- }
-
- switch (SYNTAX (syntax_table, c))
- {
- case Sescape:
- context_cache.backslash_p = 1;
- break;
-
- case Sopen:
- if (context_cache.context == context_none)
- context_cache.depth++;
- break;
-
- case Sclose:
- if (context_cache.context == context_none)
- context_cache.depth--;
- break;
-
- case Scomment:
- if (context_cache.context == context_none)
- {
- context_cache.context = context_comment;
- context_cache.ccontext = ccontext_none;
- context_cache.style = SINGLE_SYNTAX_STYLE (syntax_table, c);
- if (context_cache.style == comment_style_none) abort ();
- }
- break;
-
- case Sendcomment:
- if (context_cache.style != SINGLE_SYNTAX_STYLE (syntax_table, c))
- ;
- else
- if (context_cache.context == context_comment)
- {
- context_cache.context = context_none;
- context_cache.style = comment_style_none;
- }
- else if (context_cache.context == context_block_comment &&
- (context_cache.ccontext == ccontext_start2 ||
- context_cache.ccontext == ccontext_end1))
- {
- context_cache.context = context_none;
- context_cache.ccontext = ccontext_none;
- context_cache.style = comment_style_none;
- }
- break;
-
- case Sstring:
- {
- if (context_cache.context == context_string &&
- context_cache.scontext == c)
- {
- context_cache.context = context_none;
- context_cache.scontext = '\000';
- }
- else if (context_cache.context == context_none)
- {
- unsigned char stringterm = SYNTAX_MATCH (syntax_table, c);
- if (stringterm == 0) stringterm = c;
- context_cache.context = context_string;
- context_cache.scontext = stringterm;
- context_cache.ccontext = ccontext_none;
- }
- break;
- }
- default:
- ;
- }
-
- /* That takes care of the characters with manifest syntax.
- Now we've got to hack multi-char sequences that start
- and end block comments.
- */
- if ((SYNTAX_COMMENT_BITS (syntax_table, c) &
- SYNTAX_SECOND_CHAR_START) &&
- context_cache.context == context_none &&
- context_cache.ccontext == ccontext_start1 &&
- SYNTAX_START_P (syntax_table, prev_c, c) /* the two chars match */
- )
- {
- context_cache.ccontext = ccontext_start2;
- context_cache.style = SYNTAX_START_STYLE (syntax_table, prev_c, c);
- if (context_cache.style == comment_style_none) abort ();
- }
- else if ((SYNTAX_COMMENT_BITS (syntax_table, c) &
- SYNTAX_FIRST_CHAR_START) &&
- context_cache.context == context_none &&
- (context_cache.ccontext == ccontext_none ||
- context_cache.ccontext == ccontext_start1))
- {
- context_cache.ccontext = ccontext_start1;
- context_cache.style = comment_style_none; /* should be this already*/
- }
- else if ((SYNTAX_COMMENT_BITS (syntax_table, c) &
- SYNTAX_SECOND_CHAR_END) &&
- context_cache.context == context_block_comment &&
- context_cache.ccontext == ccontext_end1 &&
- SYNTAX_END_P (syntax_table, prev_c, c) &&
- /* the two chars match */
- context_cache.style ==
- SYNTAX_END_STYLE (syntax_table, prev_c, c)
- )
- {
- context_cache.context = context_none;
- context_cache.ccontext = ccontext_none;
- context_cache.style = comment_style_none;
- }
- else if ((SYNTAX_COMMENT_BITS (syntax_table, c) &
- SYNTAX_FIRST_CHAR_END) &&
- context_cache.context == context_block_comment &&
- (context_cache.style ==
- SYNTAX_END_STYLE (syntax_table, c,
- BUF_FETCH_CHAR (buf, pt+1))) &&
- (context_cache.ccontext == ccontext_start2 ||
- context_cache.ccontext == ccontext_end1))
- /* #### is it right to check for end1 here?? */
- {
- if (context_cache.style == comment_style_none) abort ();
- context_cache.ccontext = ccontext_end1;
- }
-
- else if (context_cache.ccontext == ccontext_start1)
- {
- if (context_cache.context != context_none) abort ();
- context_cache.ccontext = ccontext_none;
- }
- else if (context_cache.ccontext == ccontext_end1)
- {
- if (context_cache.context != context_block_comment) abort ();
- context_cache.context = context_none;
- context_cache.ccontext = ccontext_start2;
- }
-
- if (context_cache.ccontext == ccontext_start2 &&
- context_cache.context == context_none)
- {
- context_cache.context = context_block_comment;
- if (context_cache.style == comment_style_none) abort ();
- }
- else if (context_cache.ccontext == ccontext_none &&
- context_cache.context == context_block_comment)
- {
- context_cache.context = context_none;
- }
- }
- }
-
- static Lisp_Object
- context_to_symbol (enum syntactic_context context)
- {
- switch (context)
- {
- case context_none: return (Qnil);
- case context_string: return (Qstring);
- case context_comment: return (Qcomment);
- case context_block_comment: return (Qblock_comment);
- default: abort ();
- }
- return Qnil; /* suppress compiler warning */
- }
-
- DEFUN ("buffer-syntactic-context", Fbuffer_syntactic_context,
- Sbuffer_syntactic_context, 0, 1, 0,
- "Return the syntactic context of BUFFER at point.\n\
- If BUFFER is nil or omitted, the current buffer is assumed.\n\
- The returned value is one of the following symbols:\n\
- \n\
- nil ; meaning no special interpretation\n\
- string ; meaning point is within a string\n\
- comment ; meaning point is within a line comment\n\
- block-comment ; meaning point is within a block comment\n\
- \n\
- See also the function `buffer-syntactic-context-depth', which returns\n\
- the current nesting-depth within all parenthesis-syntax delimiters\n\
- and the function `syntactically-sectionize', which will map a function\n\
- over each syntactic context in a region.\n\
- \n\
- WARNING: this may alter match-data.")
- (buffer)
- Lisp_Object buffer;
- {
- /* This function can GC */
- struct buffer *buf = decode_buffer (buffer, 0);
- find_context (buf, BUF_PT (buf));
- return context_to_symbol (context_cache.context);
- }
-
- DEFUN ("buffer-syntactic-context-depth", Fbuffer_syntactic_context_depth,
- Sbuffer_syntactic_context_depth, 0, 1, 0,
- "Return the depth within all parenthesis-syntax delimiters at point.\n\
- If BUFFER is nil or omitted, the current buffer is assumed.\n\
- WARNING: this may alter match-data.")
- (buffer)
- Lisp_Object buffer;
- {
- /* This function can GC */
- struct buffer *buf = decode_buffer (buffer, 0);
- find_context (buf, BUF_PT (buf));
- return make_number (context_cache.depth);
- }
-
-
- DEFUN ("syntactically-sectionize", Fsyntactically_sectionize,
- Ssyntactically_sectionize, 3, 4, 0,
- "Calls FUNCTION for each contiguous syntactic context in the region.\n\
- Calls the given function with four arguments: the start and end of the\n\
- region, a symbol representing the syntactic context, and the current\n\
- depth (as returned by the functions `buffer-syntactic-context' and\n\
- `buffer-syntactic-context-depth'). When this function is called, the\n\
- current buffer will be set to BUFFER.\n\
- \n\
- WARNING: this may alter match-data.")
- (function, start, end, buffer)
- Lisp_Object function, start, end, buffer;
- {
- /* This function can GC */
- Bufpos pt, e;
- int edepth;
- enum syntactic_context this_context;
- Lisp_Object extent = Qnil;
- struct gcpro gcpro1;
- struct buffer *buf = decode_buffer (buffer, 0);
-
- get_bufrange (buf, start, end, &pt, &e, 0);
-
- find_context (buf, pt);
-
- GCPRO1 (extent);
- while (pt < e)
- {
- Bufpos estart, eend;
- /* skip over "blank" areas, and bug out at end-of-buffer. */
- while (context_cache.context == context_none)
- {
- pt++;
- if (pt >= e) goto DONE_LABEL;
- find_context (buf, pt);
- }
- /* We've found a non-blank area; keep going until we reach its end */
- this_context = context_cache.context;
- estart = pt;
-
- /* Minor kludge: consider the comment-start character(s) a part of
- the comment.
- */
- if (this_context == context_block_comment &&
- context_cache.ccontext == ccontext_start2)
- estart -= 2;
- else if (this_context == context_comment)
- estart -= 1;
-
- edepth = context_cache.depth;
- while (context_cache.context == this_context && pt < e)
- {
- pt++;
- find_context (buf, pt);
- }
-
- eend = pt;
-
- /* Minor kludge: consider the character which terminated the comment
- a part of the comment.
- */
- if ((this_context == context_block_comment ||
- this_context == context_comment)
- && pt < e)
- eend++;
-
- if (estart == eend)
- continue;
- call4_in_buffer (buf, function, make_number (estart),
- make_number (eend == e ? e : eend - 1),
- context_to_symbol (this_context),
- make_number (edepth));
- }
- DONE_LABEL:
- UNGCPRO;
- return Qnil;
- }
-
- void
- syms_of_font_lock (void)
- {
- defsymbol (&Qcomment, "comment");
- defsymbol (&Qblock_comment, "block-comment");
- defsymbol (&Qbeginning_of_defun, "beginning-of-defun");
-
- defsubr (&Sbuffer_syntactic_context);
- defsubr (&Sbuffer_syntactic_context_depth);
- defsubr (&Ssyntactically_sectionize);
- }
-
- void
- vars_of_font_lock (void)
- {
- memset (&context_cache, 0, sizeof (context_cache));
- memset (&bol_context_cache, 0, sizeof (bol_context_cache));
- }
-