Tools

home *** CD-ROM | disk | FTP | other *** search

/ Tools / WinSN5.0Ver.iso / NETSCAP.50 / WIN1998.ZIP / ns / lib / xp / xp_wrap.c < prev next >

Wrap

C/C++ Source or Header | 1998-04-08 | 22.4 KB | 805 lines

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ /* * * * * xp_wrap.c --- Word wrapping. * * Created: Jamie Zawinski <jwz@netscape.com>, 24-Jan-95. */ #include "xp.h" #include "libi18n.h" #include <ctype.h> #ifndef NOT_NULL #define NOT_NULL(x)x #endif #define XP_WORD_WRAP_NEW_CODE 1 #ifdef XP_WORD_WRAP_NEW_CODE #undef OUTPUT #define OUTPUT(b) \ do \ { \ if (out - ret >= size) \ { \ unsigned char *old; \ size *= 2; \ old = ret; \ ret = XP_REALLOC(old, size); \ if (!ret) \ { \ XP_FREE(old); \ return NULL; \ } \ out = ret + (size / 2); \ } \ *out++ = b; \ } while (0) #undef OUTPUT_MACHINE_NEW_LINE #if defined(XP_WIN) || defined(XP_OS2) #define OUTPUT_MACHINE_NEW_LINE() \ do \ { \ OUTPUT(CR); \ OUTPUT(LF); \ } while (0) #else #ifdef XP_MAC #define OUTPUT_MACHINE_NEW_LINE() OUTPUT(CR) #else #define OUTPUT_MACHINE_NEW_LINE() OUTPUT(LF) #endif #endif #undef OUTPUT_NEW_LINE #define OUTPUT_NEW_LINE() \ do \ { \ OUTPUT_MACHINE_NEW_LINE(); \ beginningOfLine = in; \ currentColumn = 0; \ lastBreakablePos = NULL; \ } while (0) #undef NEW_LINE #define NEW_LINE() \ do \ { \ if ((*in == CR) && (*(in + 1) == LF)) \ { \ in += 2; \ } \ else \ { \ in++; \ } \ OUTPUT_NEW_LINE(); \ } while (0) unsigned char * XP_WordWrap(int charset, unsigned char *str, int maxColumn, int checkQuoting) { unsigned char *beginningOfLine; int byteWidth; int columnWidth; int currentColumn; unsigned char *end; unsigned char *in; unsigned char *lastBreakablePos; unsigned char *out; unsigned char *p; unsigned char *ret; int32 size; if (!str) { return NULL; } size = 1; ret = XP_ALLOC(size); if (!ret) { return NULL; } in = str; out = ret; beginningOfLine = str; currentColumn = 0; lastBreakablePos = NULL; while (*in) { if (checkQuoting && (in == beginningOfLine) && (*in == '>')) { while (*in && (*in != CR) && (*in != LF)) { OUTPUT(*in++); } if (*in) { NEW_LINE(); } else { /* * to prevent writing out line again after * the main while loop */ beginningOfLine = in; } } else { if ((*in == CR) || (*in == LF)) { if (in != beginningOfLine) { p = beginningOfLine; while (p < in) { OUTPUT(*p++); } } NEW_LINE(); continue; } byteWidth = INTL_CharLen(charset, in); columnWidth = INTL_ColumnWidth(charset, in); if (currentColumn + columnWidth > (maxColumn + (((*in == ' ') || (*in == '\t')) ? 1 : 0))) { if (lastBreakablePos) { p = beginningOfLine; end = lastBreakablePos - 1; if ((*end != ' ') && (*end != '\t')) { end++; } while (p < end) { OUTPUT(*p++); } in = lastBreakablePos; OUTPUT_NEW_LINE(); continue; } } if ( ( ((in[0] == ' ') || (in[0] == '\t')) && ((in[1] != ' ') && (in[1] != '\t')) ) || ( (INTL_CharSetType(charset) == MULTIBYTE) && ( ((!(in[0] & 0x80)) && (in[1] & 0x80)) || ( (in[0] & 0x80) && (INTL_KinsokuClass(charset, in) != PROHIBIT_END_OF_LINE) && (!(in[byteWidth] & 0x80)) ) || ( (in[0] & 0x80) && (INTL_KinsokuClass(charset, in) != PROHIBIT_END_OF_LINE) && (in[byteWidth] & 0x80) && (INTL_KinsokuClass(charset, &in[byteWidth]) != PROHIBIT_BEGIN_OF_LINE) ) ) ) ) { lastBreakablePos = in + byteWidth; } in += byteWidth; currentColumn += columnWidth; } } if (in != beginningOfLine) { p = beginningOfLine; while (*p) { OUTPUT(*p++); } } OUTPUT(0); return ret; } #else /* XP_WORD_WRAP_NEW_CODE */ /* There are any number of approaches one can take to wrapping long lines in a body of text, and this code can be configured to do several of them, mainly as a legacy to the experimentation I did while writing it. The states are as follows: NEWLINES_ARE_SIGNIFICANT If this is defined, then all line breaks in the input cause line breaks in the output as well. If it is not defined, then single newlines in the input are considered to be simple whitespace, and a blank line (that is, two consecutive line breaks) is needed to break a paragraph. HACK_PREFIX If this is defined, then lines beginning with ">" will be treated specially. NEWLINES_ARE_SIGNIFICANT && !HACK_PREFIX In this state, all lines which are longer than the fill column will be wrapped, and all other lines will be left alone. NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX In this state, all lines which are longer than the fill column will be wrapped unless they begin with ">" in the first column. Lines which are shorter than the fill column, or lines which begin with ">", will be left alone. !NEWLINES_ARE_SIGNIFICANT && !HACK_PREFIX In this state, all paragraphs will be re-wrapped to the fill column, regardless of where the line breaks were in the input. A blank line delimits a paragraph. !NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX In this state, all paragraphs will be re-wrapped to the fill column, regardless of where the line breaks were in the input. A blank line delimits a paragraph. Fill prefixes on paragraphs are recognised, if they begin with ">" followed by any number of >'s or whitespace, and the paragraphs will be re-filled using that prefix. Paragraphs are delimited by blank lines, or by changes in the prefix sequence. NOTE: The code for this state is not yet fully debugged. The current plan is that, when the user generates a mail or news message, we will use a text editor which does display-time wrapping of lines; and then, when the time comes to send the message, we will wrap it to a fill column equal to the width of the user's window. In this way, we will (hopefully) insert newlines in the text in the same places where the text editor had wrapped them for display: what the user saw on the screen is what the person at the other end will see as well. However, this has the drawback that no line will ever be longer than the fill column, even when that would be better. A common situation is this: = The user cites a message; this causes the message to be inserted in their editor with a "> " prefix on each line. = If a line was 79 columns in the original text, it will now be 81 columns. = That means that the text editor will wrap the line: it will look something like this: > This is a body of text that was just shorter than the > fill column, and is now just longer than it. Which causes > the text to be displayed with alternating long and short > lines, which is extremely unpleasant. Now, if the user resized their window to be 81 columns wide instead of 80, that text would look "normal" again; but there are two problems with this, the first and most obvious being that most people aren't going to do that, and the second being that that would cause the user's new text to be filled to 81 columns as well, which would be a bad thing. = So, a good approach to solving this problem would be to re-fill the cited text before inserting it in the text area, to ensure that no line is longer than 79 columns after the prefix has been added; and then, when the user sends the message, to wrap the lines as display did. This would mean applying (!NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX) to the cited text, and then applying (NEWLINES_ARE_SIGNIFICANT && !HACK_PREFIX) to the outgoing text. = However, I couldn't get the (!NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX) state to work well enough. Which is the reason for the existance of the (NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX) state. the current plan is this: = The user cites a message; this causes the message to be inserted in their editor with a "> " prefix on each line. = Some lines may be wider than the window, causing the display to wrap them unattractively. = When the user sends the message, we insert newlines in the text at the same places where the display would have wrapped the lines, *except* that we don't touch lines which begin with ">". So, our line wrapping is WYSIWYG for new text, but not for cited text, which is passed through unaltered. I think this should do the right thing most of the time. Problems: = This will mess up on lines/paragraphs which just happen to begin with ">" as their first character. That could cause us to send extremely long lines. This should be pretty rare, howeveer. = It is possible that the user might edit a message which *looked* like it had a paragraph with ">" at the beginning of each line, but that really didn't contain any newlines at all; the implicit line wrapping done by display might be all that there is. This also seems pretty unlikely. = It's fairly important that the algorithm implemented by this code be the same as the algorithm used in redisplay by the various text editors. This code is pretty tough to read because of all the ifdefs; once we're really sure how it all should work, we should rip out the unused states, or live with duplicated code. */ #define NEWLINES_ARE_SIGNIFICANT #define HACK_PREFIX #undef DEBUG_WRAP /* Oops, what was I thinking. I wrote this thing to alter the string in place, forgetting that on the PC, the transformation "A B" -> "A\nB" actually increases the length of the string ("A\r\nB"). So... On Unix and Mac, this produces usable strings, but on the PC, lone \015 characters will need to be expanded to \015\012. */ #undef LINEBREAK #if defined(XP_MAC) # define LINEBREAK "\015" #elif defined(XP_WIN) # define LINEBREAK "\015" #elif defined(XP_OS2) # define LINEBREAK "\015" #elif defined(XP_UNIX) # define LINEBREAK "\012" #endif void XP_WordWrap (char *string, int fill_column) { char *s; char *input = string; char *output = string; char *word_start = input; /* beginning of last word seen */ char *word_end = input; /* end of last word seen */ int column = 0; /* current column */ Bool in_word = FALSE; Bool done = FALSE; #ifdef NEWLINES_ARE_SIGNIFICANT # ifdef HACK_PREFIX Bool cited_line_p = (*string == '>'); # endif /* !HACK_PREFIX */ #else /* !NEWLINES_ARE_SIGNIFICANT */ Bool white_has_newline = FALSE; # ifdef HACK_PREFIX char last_prefix [20]; char new_prefix [20]; # endif /* !HACK_PREFIX */ #endif /* !NEWLINES_ARE_SIGNIFICANT */ #ifdef DEBUG_WRAP int L = strlen (input) + 1; char *I = input; char *O = (char *) calloc (sizeof (char), L); output = O; #endif /* DEBUG_WRAP */ if (!string || !*string) return; #if !defined(NEWLINES_ARE_SIGNIFICANT) && defined(HACK_PREFIX) # define FIND_PREFIX() \ do { \ char *si = input; \ char *so = new_prefix; \ while (so < new_prefix + sizeof (new_prefix) && \ (*si == ' ' || *si == '\t' || *si == '>')) \ *so++ = *si++; \ so--; \ while (so > new_prefix && isspace (*so)) \ so--; \ so++; \ *so++ = 0; \ if (*so) \ input = si; \ } while (0) #endif /* !NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX */ #if !defined(NEWLINES_ARE_SIGNIFICANT) && defined(HACK_PREFIX) FIND_PREFIX (); XP_STRCPY (last_prefix, new_prefix); for (s = new_prefix; *s; s++) { *output++ = *s; column++; } #endif /* !NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX */ word_start = input; word_end = input; while (!done) { if (in_word && (XP_IS_SPACE (*input) || !*input)) /* If we are in a word and have reached whitespace, remember this position in `input' as the end of the word. */ { #ifdef DEBUG_WRAP printf ("WHITE: \""); for (s = word_end; s < word_start; s++) printf ("%c", *s); printf ("\"\n"); printf ("WORD: \""); for (s = word_start; s < input; s++) printf ("%c", *s); printf ("\"\n"); #endif /* DEBUG_WRAP */ if ( #if defined(NEWLINES_ARE_SIGNIFICANT) && defined(HACK_PREFIX) !cited_line_p && #endif /* NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX */ (column + ( #ifndef NEWLINES_ARE_SIGNIFICANT white_has_newline ? (input - word_start + 1) : #endif /* !NEWLINES_ARE_SIGNIFICANT */ (input - word_end)) > fill_column) /* If the word is itself longer than a line is allowed to be, then don't wrap - same behavior as if this is a cited line. */ && ((input - word_end) < fill_column) ) { for (s = LINEBREAK; *s; s++) *output++ = *s; column = 0; #if !defined(NEWLINES_ARE_SIGNIFICANT) && defined(HACK_PREFIX) for (s = new_prefix; *s; s++) *output++ = *s, column++; #endif /* NEWLINES_ARE_SIGNIFICANT && HACK_PREFIX */ #ifdef DEBUG_WRAP printf ("NEWLINE\n"); #endif /* DEBUG_WRAP */ } else { #ifndef NEWLINES_ARE_SIGNIFICANT if (white_has_newline) { *output++ = ' '; column++; } else #endif /* !NEWLINES_ARE_SIGNIFICANT */ { while (word_end < word_start) { *output++ = *word_end++; column++; } } } while (word_start < input) { *output++ = *word_start++; column++; } word_end = input; in_word = FALSE; #ifdef NEWLINES_ARE_SIGNIFICANT if (*input == CR || *input == LF) goto HARD_BREAK; #else /* !NEWLINES_ARE_SIGNIFICANT */ white_has_newline = (*input == CR || *input == LF); /* Treat CRLF as CR */ if (white_has_newline && *input == CR && input[1] == LF) input++; if (white_has_newline) { input++; # ifdef HACK_PREFIX FIND_PREFIX (); /* If the prefixes differ, treat this as a paragraph break. */ if (XP_STRCMP (new_prefix, last_prefix)) goto PARA_BREAK; XP_STRCPY (last_prefix, new_prefix); # endif /* HACK_PREFIX */ continue; } #endif /* !NEWLINES_ARE_SIGNIFICANT */ if (!*input) done = TRUE; } else if (!in_word && !*input) { break; } else if (!in_word && (!XP_IS_SPACE (*input))) /* If we are between words and have reached non-whitespace, remember this position in `input' as the beginning of the word. */ { in_word = TRUE; word_start = input; } else if (!in_word) { if (*input == CR || *input == LF) { #ifdef NEWLINES_ARE_SIGNIFICANT HARD_BREAK: # ifdef DEBUG_WRAP printf ("HARD BREAK\n"); # endif /* DEBUG_WRAP */ in_word = FALSE; column = 0; while (word_end < input) /* whitespace at eol */ *output++ = *word_end++; *output++ = *input++; word_end = input; word_start = input; # ifdef HACK_PREFIX s = input; /* while (isspace (s)) s++; */ cited_line_p = (*s == '>'); # endif /* !HACK_PREFIX */ continue; #else /* !NEWLINES_ARE_SIGNIFICANT */ /* Treat CRLF as CR */ if (*input == CR && input[1] == LF) input++; # ifdef HACK_PREFIX input++; FIND_PREFIX (); input--; /* If the prefixes differ, treat this as a paragraph break. */ if (XP_STRCMP (new_prefix, last_prefix)) white_has_newline = TRUE; PARA_BREAK: XP_STRCPY (last_prefix, new_prefix); # endif /* HACK_PREFIX */ if (white_has_newline) { /* This is the second newline - paragraph break. */ column = 0; for (s = LINEBREAK; *s; s++) *output++ = *s; # ifdef HACK_PREFIX for (s = new_prefix; *s; s++) *output++ = *s; # endif /* !HACK_PREFIX */ for (s = LINEBREAK; *s; s++) *output++ = *s; # ifdef HACK_PREFIX for (s = new_prefix; *s; s++) *output++ = *s, column++; # endif /* !HACK_PREFIX */ while (isspace (*input)) input++; white_has_newline = FALSE; in_word = TRUE; word_end = input; word_start = input; continue; } else { white_has_newline = TRUE; } #endif /* !NEWLINES_ARE_SIGNIFICANT */ } } input++; } *output = 0; #undef FIND_PREFIX #ifdef DEBUG_WRAP memcpy (I, O, L); #endif } #endif /* XP_WORD_WRAP_NEW_CODE */ #if 0 #include <stdio.h> int INTL_CharLen(int charsetid, unsigned char *str) { return 1; } int INTL_ColumnWidth(int charsetid, unsigned char *str) { return 1; } void main () { char *s; #ifdef XP_WORD_WRAP_NEW_CODE # define TEST(STR) \ s = strdup (STR); \ printf ("----------\n"); \ printf ("%s\n", s); \ printf ("----------\n"); \ printf ("%s\n", XP_WordWrap(CS_LATIN1, s, 79, 1)); \ printf ("----------\n"); \ free (s) #else # define TEST(STR) \ s = strdup (STR); \ printf ("----------\n"); \ printf ("%s\n", s); \ XP_WordWrap (s, 79); \ printf ("----------\n"); \ printf ("%s\n", s); \ printf ("----------\n"); \ free (s) #endif #if 1 TEST ("This is a test of the new wrapping code. This test does not " "have any newlines in it, but it does have a number of long words " "like unfathomably and nevertheless and perspicacious. Let's see " "how this works. Let's try making it a bit longer."); TEST ("This is a test of the new wrapping code. This test does not " "have any newlines in it, but it does have a number of long words " "like unfathomably and nevertheless and perspicacious. Let's see " "how this works. Let's try making it a bit longer. "); TEST ("This is a test of the new wrapping code. This test does not " "have any newlines in it, but it does have a number of long words " "like unfathomably and nevertheless and perspicacious. Let's see " "how this works. Let's try making it a bit longer. " " " ); TEST ("This is a test of the new wrapping code. This test does not " "have any newlines in it, but it does have a number of long words " "like unfathomably and nevertheless and perspicacious. Let's see " "how this works. Let's try making it a bit longer. " " " ); TEST ("This is a test of the new wrapping code. This test DOES contain\n" "newlines, so let's see \n" "how we deal with that. And (for good measure) we'll throw in\n" "the afforementioned long words: unfathomably, nevertheless, and\n" " perspicacious. Oh, and now `afforementioned.'"); TEST ("This is a test of the new wrapping code. This test DOES contain\n" "newlines, so let's see \n" "how we deal with that. And (for good measure) we'll throw in\n" "the afforementioned long words: unfathomably, nevertheless, and\n" " perspicacious. Oh, and now `afforementioned.'\n" "\n" "And now we'll try multiple paragraphs. This text should not\n" "be wrapped in with the preceeding text. Blah blah blah.\n" "\n" "And now we'll try multiple paragraphs. This text should not\n" "be wrapped in with the preceeding text. Blah blah blah.\n" " \n \n \n \n " "And now we'll try multiple paragraphs. This text should not " "be wrapped in with the preceeding text. Blah blah blah." ); #endif TEST (">> This is a test of the new wrapping code. This test DOES contain\n" ">> newlines, so let's see \n" ">> how we deal with that. And (for good measure) we'll throw in\n" ">> the afforementioned long words: unfathomably, nevertheless, and\n" ">> perspicacious. Oh, and now `afforementioned.'\n" "\n" "And now we'll try multiple paragraphs. This text should not\n" "be wrapped in with the preceeding text. Blah blah blah.\n" "Blab blab blab. Blab blab blab. Blab blab blab. Blab blab.\n" "\n" "> And now we'll try multiple paragraphs. This text should not\n" "> be wrapped in with the preceeding text. Blah blah blah.\n" "> Blab blab blab. Blab blab blab. Blab blab blab. Blab blab.\n" " \n \n \n \n " "And now we'll try multiple paragraphs. This text should not " "be wrapped in with the preceeding text. Blah blah blah." "Blab blab blab. Blab blab blab. Blab blab blab. Blab blab.\n" "\n" "> And here's a line that begins with > in column 0 which is longer" " than the fill column. This line should be left alone.\n" "> This is the second such line: it's short.\n" ">> This is the third such line, but much like the first line, it" " is long. Run and hide. Run and hide. Run and hide.\n" "> You can hide. All your life. Run and hide. Run and hide. " "but I'll have. Your hide. Run and hide. Run and hide.\n" "\n" "This line doesn't begin with > and is long and thus should be" " wrapped (like a good line should.) But this next line:\n" ">does begin with that hateful > character, and should be left" " alone no matter how it rambles on. No escape. run and hide.\n" "And now we're back in wrappable line land. On. And on and on. " "And on and on. And on.\n" "\n" "And now a test of a word that is longer than the line: " "12345689-12345689-12345689-12345689-12345689-12345689-12345689-" "12345689-12345689-12345689-12345689-12345689-12345689-12345689. " "12345689-12345689-12345689-12345689-12345689-12345689-12345689-" "12345689-12345689-12345689-12345689-12345689-12345689-12345689." ); # undef TEST } #endif /* 0 */